From 91ac1fe6bb9897fa393da55d9470b0f00d4758e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 3 May 2026 14:23:33 +0200 Subject: [PATCH] 7.0-stable patches added patches: arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch crypto-talitos-rename-first-last-to-first_desc-last_desc.patch firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch firmware-google-framebuffer-do-not-unregister-platform-device.patch hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch mm-damon-core-disallow-time-quota-setting-zero-esz.patch mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch mmc-block-use-single-block-write-in-retry.patch mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch randomize_kstack-maintain-kstack_offset-per-task.patch rtc-ntxec-fix-of-node-reference-imbalance.patch tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch tpm-tpm_tis-add-error-logging-for-data-transfer.patch tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch xfs-start-gc-on-zonegc_low_space-attribute-updates.patch --- ...din-enable-pullup-for-emmc-data-pins.patch | 57 +++ ...ix-irq-cleanup-on-6xxx-probe-failure.patch | 55 +++ ...ix-sec1-32k-ahash-request-limitation.patch | 357 ++++++++++++++++++ ...e-first-last-to-first_desc-last_desc.patch | 199 ++++++++++ ...pm-drop-fake-const-on-handle-pointer.patch | 345 +++++++++++++++++ ...er-do-not-unregister-platform-device.patch | 69 ++++ ...-fix-bugs-in-pt5161l_read_block_data.patch | 58 +++ ...count-for-resx-bits-in-__compute_fgt.patch | 39 ++ ...stency-check-for-efer-cr0-cr4-and-cs.patch | 69 ++++ ...-consistency-check-for-ncr3-validity.patch | 49 +++ ...mapping-vmcb12-fails-on-nested-vmrun.patch | 41 ++ ...-intercept-vmmcall-when-l2-is-active.patch | 65 ++++ ...mcb02-s-nextrip-after-first-l2-vmrun.patch | 76 ++++ ...vm-avoid-clearing-vmcb_lbr-in-vmcb12.patch | 65 ++++ ...nj-fields-in-vmcb12-on-nested-vmexit.patch | 69 ++++ ...m-clear-gif-on-nested-vmexit-invalid.patch | 33 ++ ...l2-nmi-and-soft-irq-on-nested-vmexit.patch | 64 ++++ ...q-rip-tracking-fields-until-vcpu-run.patch | 139 +++++++ ...rent-rip-into-nextrip-until-vcpu-run.patch | 117 ++++++ ...ural-consistency-check-for-np_enable.patch | 67 ++++ ...-when-restoring-a-vcpu-to-guest-mode.patch | 42 +++ ...02-dirty-when-restoring-nested-state.patch | 42 +++ ...dled-vmmcall-isn-t-intercepted-by-l1.patch | 141 +++++++ ...v-enablement-in-vmcb12-into-a-helper.patch | 58 +++ ...-vmcb12-on-nested-vmexit-as-a-helper.patch | 142 +++++++ ...w-to-cached-vmcb12-after-vmrun-of-l2.patch | 52 +++ ...p-to-cached-vmcb12-after-vmrun-of-l2.patch | 55 +++ ...apping-vmcb12-fails-on-nested-vmexit.patch | 55 +++ ...tore-host-cr3-fails-on-nested-vmexit.patch | 137 +++++++ ...hen-updating-vmcb12-on-nested-vmexit.patch | 73 ++++ ...ng-save-restore-handling-of-lbr-msrs.patch | 133 +++++++ ...irty-after-modifying-vmcb-intercepts.patch | 42 +++ ...inject-ud-for-invlpga-if-efer.svme-0.patch | 36 ++ ...-svm-switch-svm_copy_lbrs-to-a-macro.patch | 94 +++++ ...-exception-payload-to-userspace-read.patch | 175 +++++++++ ..._crmd_plv-in-kvm_arch_vcpu_in_kernel.patch | 33 ++ ...-of-two-min_region_sz-on-damon_start.patch | 43 +++ ...disallow-time-quota-setting-zero-esz.patch | 88 +++++ ...ge_open-for-damos-quota-window-start.patch | 56 +++ ...goal-nid-for-node_mem_-used-free-_bp.patch | 79 ++++ ...al-nid-for-node_memcg_-used-free-_bp.patch | 52 +++ ...on_start-failure-in-damon_stat_start.patch | 41 ++ ...ess-conversion-in-put_folios-cleanup.patch | 57 +++ ...ks-in-weighted_interleave_auto_store.patch | 76 ++++ ...loc-take-vmap_purge_lock-in-shrinker.patch | 42 +++ ...lock-use-single-block-write-in-retry.patch | 92 +++++ ...sable-clock-before-dll-configuration.patch | 81 ++++ ...-number-of-enabled-channels-in-probe.patch | 59 +++ ...tack-maintain-kstack_offset-per-task.patch | 155 ++++++++ ...txec-fix-of-node-reference-imbalance.patch | 41 ++ queue-7.0/series | 58 +++ ...n-leak-in-tpm2_get_random-error-path.patch | 47 +++ ...-add-error-logging-for-data-transfer.patch | 42 +++ ...op-transmit-if-retries-are-exhausted.patch | 48 +++ ...free-auth-session-in-tpm_dev_release.patch | 44 +++ ...-tpm_buf_destroy-in-tpm2_read_public.patch | 57 +++ ...ration-of-ranges-below-mmap_min_addr.patch | 60 +++ ...a-resource-leak-in-xfs_alloc_buftarg.patch | 32 ++ ...n-zonegc_low_space-attribute-updates.patch | 104 +++++ 59 files changed, 4797 insertions(+) create mode 100644 queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch create mode 100644 queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch create mode 100644 queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch create mode 100644 queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch create mode 100644 queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch create mode 100644 queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch create mode 100644 queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch create mode 100644 queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch create mode 100644 queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch create mode 100644 queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch create mode 100644 queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch create mode 100644 queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch create mode 100644 queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch create mode 100644 queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch create mode 100644 queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch create mode 100644 queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch create mode 100644 queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch create mode 100644 queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch create mode 100644 queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch create mode 100644 queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch create mode 100644 queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch create mode 100644 queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch create mode 100644 queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch create mode 100644 queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch create mode 100644 queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch create mode 100644 queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch create mode 100644 queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch create mode 100644 queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch create mode 100644 queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch create mode 100644 queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch create mode 100644 queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch create mode 100644 queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch create mode 100644 queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch create mode 100644 queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch create mode 100644 queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch create mode 100644 queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch create mode 100644 queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch create mode 100644 queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch create mode 100644 queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch create mode 100644 queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch create mode 100644 queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch create mode 100644 queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch create mode 100644 queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch create mode 100644 queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch create mode 100644 queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch create mode 100644 queue-7.0/mmc-block-use-single-block-write-in-retry.patch create mode 100644 queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch create mode 100644 queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch create mode 100644 queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch create mode 100644 queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch create mode 100644 queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch create mode 100644 queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch create mode 100644 queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch create mode 100644 queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch create mode 100644 queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch create mode 100644 queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch create mode 100644 queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch create mode 100644 queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch diff --git a/queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch b/queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch new file mode 100644 index 0000000000..409e5e1fdf --- /dev/null +++ b/queue-7.0/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch @@ -0,0 +1,57 @@ +From d5325810814ee995debfa0b6c4a22e0391598bef Mon Sep 17 00:00:00 2001 +From: Francesco Dolcini +Date: Fri, 20 Mar 2026 08:30:30 +0100 +Subject: arm64: dts: ti: am62-verdin: Enable pullup for eMMC data pins + +From: Francesco Dolcini + +commit d5325810814ee995debfa0b6c4a22e0391598bef upstream. + +Verdin AM62 board does not have external pullups on eMMC DAT1-DAT7 pins. +Enable internal pullups on DAT1-DAT7 considering: + + - without a host-side pullup, these lines rely solely on the eMMC + device's internal pullup (R_int, 10kohm-150kohm per JEDEC), which may + exceed the recommended 50kohm max for 1.8V VCCQ + - JEDEC JESD84-B51 Table 200 requires host-side pullups (R_DAT, + 10kohm-100kohm) on all data lines to prevent bus floating + +Fixes: 316b80246b16 ("arm64: dts: ti: add verdin am62") +Cc: stable@vger.kernel.org +Signed-off-by: Francesco Dolcini +Link: https://patch.msgid.link/20260320073032.10427-1-francesco@dolcini.it +Signed-off-by: Vignesh Raghavendra +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi ++++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +@@ -572,16 +572,16 @@ + /* On-module eMMC */ + pinctrl_sdhci0: main-mmc0-default-pins { + pinctrl-single,pins = < +- AM62X_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */ +- AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK */ +- AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */ +- AM62X_IOPAD(0x210, PIN_INPUT, 0) /* (AA1) MMC0_DAT1 */ +- AM62X_IOPAD(0x20c, PIN_INPUT, 0) /* (AA3) MMC0_DAT2 */ +- AM62X_IOPAD(0x208, PIN_INPUT, 0) /* (Y4) MMC0_DAT3 */ +- AM62X_IOPAD(0x204, PIN_INPUT, 0) /* (AB2) MMC0_DAT4 */ +- AM62X_IOPAD(0x200, PIN_INPUT, 0) /* (AC1) MMC0_DAT5 */ +- AM62X_IOPAD(0x1fc, PIN_INPUT, 0) /* (AD2) MMC0_DAT6 */ +- AM62X_IOPAD(0x1f8, PIN_INPUT, 0) /* (AC2) MMC0_DAT7 */ ++ AM62X_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */ ++ AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK */ ++ AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */ ++ AM62X_IOPAD(0x210, PIN_INPUT_PULLUP, 0) /* (AA1) MMC0_DAT1 */ ++ AM62X_IOPAD(0x20c, PIN_INPUT_PULLUP, 0) /* (AA3) MMC0_DAT2 */ ++ AM62X_IOPAD(0x208, PIN_INPUT_PULLUP, 0) /* (Y4) MMC0_DAT3 */ ++ AM62X_IOPAD(0x204, PIN_INPUT_PULLUP, 0) /* (AB2) MMC0_DAT4 */ ++ AM62X_IOPAD(0x200, PIN_INPUT_PULLUP, 0) /* (AC1) MMC0_DAT5 */ ++ AM62X_IOPAD(0x1fc, PIN_INPUT_PULLUP, 0) /* (AD2) MMC0_DAT6 */ ++ AM62X_IOPAD(0x1f8, PIN_INPUT_PULLUP, 0) /* (AC2) MMC0_DAT7 */ + >; + }; + diff --git a/queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch b/queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch new file mode 100644 index 0000000000..bc3570e18c --- /dev/null +++ b/queue-7.0/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch @@ -0,0 +1,55 @@ +From 95aed2af87ec43fa7624cc81dd13d37824ad4972 Mon Sep 17 00:00:00 2001 +From: Giovanni Cabiddu +Date: Wed, 1 Apr 2026 10:31:11 +0100 +Subject: crypto: qat - fix IRQ cleanup on 6xxx probe failure + +From: Giovanni Cabiddu + +commit 95aed2af87ec43fa7624cc81dd13d37824ad4972 upstream. + +When adf_dev_up() partially completes and then fails, the IRQ +handlers registered during adf_isr_resource_alloc() are not detached +before the MSI-X vectors are released. + +Since the device is enabled with pcim_enable_device(), calling +pci_alloc_irq_vectors() internally registers pcim_msi_release() as a +devres action. On probe failure, devres runs pcim_msi_release() which +calls pci_free_irq_vectors(), tearing down the MSI-X vectors while IRQ +handlers (for example 'qat0-bundle0') are still attached. This causes +remove_proc_entry() warnings: + + [ 22.163964] remove_proc_entry: removing non-empty directory 'irq/143', leaking at least 'qat0-bundle0' + +Moving the devm_add_action_or_reset() before adf_dev_up() does not solve +the problem since devres runs in LIFO order and pcim_msi_release(), +registered later inside adf_dev_up(), would still fire before +adf_device_down(). + +Fix by calling adf_dev_down() explicitly when adf_dev_up() fails, to +properly free IRQ handlers before devres releases the MSI-X vectors. + +Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver") +Cc: stable@vger.kernel.org +Signed-off-by: Giovanni Cabiddu +Reviewed-by: Ahsan Atta +Reviewed-by: Laurent M Coquerel +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/intel/qat/qat_6xxx/adf_drv.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c ++++ b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c +@@ -182,8 +182,10 @@ static int adf_probe(struct pci_dev *pde + return ret; + + ret = adf_dev_up(accel_dev, true); +- if (ret) ++ if (ret) { ++ adf_dev_down(accel_dev); + return ret; ++ } + + ret = devm_add_action_or_reset(dev, adf_device_down, accel_dev); + if (ret) diff --git a/queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch b/queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch new file mode 100644 index 0000000000..56a906b5eb --- /dev/null +++ b/queue-7.0/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch @@ -0,0 +1,357 @@ +From 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 Mon Sep 17 00:00:00 2001 +From: Paul Louvel +Date: Mon, 30 Mar 2026 12:28:18 +0200 +Subject: crypto: talitos - fix SEC1 32k ahash request limitation + +From: Paul Louvel + +commit 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 upstream. + +Since commit c662b043cdca ("crypto: af_alg/hash: Support +MSG_SPLICE_PAGES"), the crypto core may pass large scatterlists spanning +multiple pages to drivers supporting ahash operations. As a result, a +driver can now receive large ahash requests. + +The SEC1 engine has a limitation where a single descriptor cannot +process more than 32k of data. The current implementation attempts to +handle the entire request within a single descriptor, which leads to +failures raised by the driver: + + "length exceeds h/w max limit" + +Address this limitation by splitting large ahash requests into multiple +descriptors, each respecting the 32k hardware limit. This allows +processing arbitrarily large requests. + +Cc: stable@vger.kernel.org +Fixes: c662b043cdca ("crypto: af_alg/hash: Support MSG_SPLICE_PAGES") +Signed-off-by: Paul Louvel +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/talitos.c | 216 +++++++++++++++++++++++++++++++---------------- + 1 file changed, 147 insertions(+), 69 deletions(-) + +--- a/drivers/crypto/talitos.c ++++ b/drivers/crypto/talitos.c +@@ -12,6 +12,7 @@ + * All rights reserved. + */ + ++#include + #include + #include + #include +@@ -870,10 +871,18 @@ struct talitos_ahash_req_ctx { + unsigned int swinit; + unsigned int first; + unsigned int last; ++ unsigned int last_request; + unsigned int to_hash_later; + unsigned int nbuf; + struct scatterlist bufsl[2]; + struct scatterlist *psrc; ++ ++ struct scatterlist request_bufsl[2]; ++ struct ahash_request *areq; ++ struct scatterlist *request_sl; ++ unsigned int remaining_ahash_request_bytes; ++ unsigned int current_ahash_request_bytes; ++ struct work_struct sec1_ahash_process_remaining; + }; + + struct talitos_export_state { +@@ -1759,7 +1768,20 @@ static void ahash_done(struct device *de + + kfree(edesc); + +- ahash_request_complete(areq, err); ++ if (err) { ++ ahash_request_complete(areq, err); ++ return; ++ } ++ ++ req_ctx->remaining_ahash_request_bytes -= ++ req_ctx->current_ahash_request_bytes; ++ ++ if (!req_ctx->remaining_ahash_request_bytes) { ++ ahash_request_complete(areq, 0); ++ return; ++ } ++ ++ schedule_work(&req_ctx->sec1_ahash_process_remaining); + } + + /* +@@ -1925,60 +1947,7 @@ static struct talitos_edesc *ahash_edesc + nbytes, 0, 0, 0, areq->base.flags, false); + } + +-static int ahash_init(struct ahash_request *areq) +-{ +- struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); +- struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); +- struct device *dev = ctx->dev; +- struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); +- unsigned int size; +- dma_addr_t dma; +- +- /* Initialize the context */ +- req_ctx->buf_idx = 0; +- req_ctx->nbuf = 0; +- req_ctx->first = 1; /* first indicates h/w must init its context */ +- req_ctx->swinit = 0; /* assume h/w init of context */ +- size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) +- ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 +- : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; +- req_ctx->hw_context_size = size; +- +- dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size, +- DMA_TO_DEVICE); +- dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE); +- +- return 0; +-} +- +-/* +- * on h/w without explicit sha224 support, we initialize h/w context +- * manually with sha224 constants, and tell it to run sha256. +- */ +-static int ahash_init_sha224_swinit(struct ahash_request *areq) +-{ +- struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); +- +- req_ctx->hw_context[0] = SHA224_H0; +- req_ctx->hw_context[1] = SHA224_H1; +- req_ctx->hw_context[2] = SHA224_H2; +- req_ctx->hw_context[3] = SHA224_H3; +- req_ctx->hw_context[4] = SHA224_H4; +- req_ctx->hw_context[5] = SHA224_H5; +- req_ctx->hw_context[6] = SHA224_H6; +- req_ctx->hw_context[7] = SHA224_H7; +- +- /* init 64-bit count */ +- req_ctx->hw_context[8] = 0; +- req_ctx->hw_context[9] = 0; +- +- ahash_init(areq); +- req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/ +- +- return 0; +-} +- +-static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) ++static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes) + { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); +@@ -1997,12 +1966,12 @@ static int ahash_process_req(struct ahas + + if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) { + /* Buffer up to one whole block */ +- nents = sg_nents_for_len(areq->src, nbytes); ++ nents = sg_nents_for_len(req_ctx->request_sl, nbytes); + if (nents < 0) { + dev_err(dev, "Invalid number of src SG.\n"); + return nents; + } +- sg_copy_to_buffer(areq->src, nents, ++ sg_copy_to_buffer(req_ctx->request_sl, nents, + ctx_buf + req_ctx->nbuf, nbytes); + req_ctx->nbuf += nbytes; + return 0; +@@ -2029,7 +1998,7 @@ static int ahash_process_req(struct ahas + sg_init_table(req_ctx->bufsl, nsg); + sg_set_buf(req_ctx->bufsl, ctx_buf, req_ctx->nbuf); + if (nsg > 1) +- sg_chain(req_ctx->bufsl, 2, areq->src); ++ sg_chain(req_ctx->bufsl, 2, req_ctx->request_sl); + req_ctx->psrc = req_ctx->bufsl; + } else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) { + int offset; +@@ -2038,26 +2007,26 @@ static int ahash_process_req(struct ahas + offset = blocksize - req_ctx->nbuf; + else + offset = nbytes_to_hash - req_ctx->nbuf; +- nents = sg_nents_for_len(areq->src, offset); ++ nents = sg_nents_for_len(req_ctx->request_sl, offset); + if (nents < 0) { + dev_err(dev, "Invalid number of src SG.\n"); + return nents; + } +- sg_copy_to_buffer(areq->src, nents, ++ sg_copy_to_buffer(req_ctx->request_sl, nents, + ctx_buf + req_ctx->nbuf, offset); + req_ctx->nbuf += offset; +- req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, areq->src, ++ req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, req_ctx->request_sl, + offset); + } else +- req_ctx->psrc = areq->src; ++ req_ctx->psrc = req_ctx->request_sl; + + if (to_hash_later) { +- nents = sg_nents_for_len(areq->src, nbytes); ++ nents = sg_nents_for_len(req_ctx->request_sl, nbytes); + if (nents < 0) { + dev_err(dev, "Invalid number of src SG.\n"); + return nents; + } +- sg_pcopy_to_buffer(areq->src, nents, ++ sg_pcopy_to_buffer(req_ctx->request_sl, nents, + req_ctx->buf[(req_ctx->buf_idx + 1) & 1], + to_hash_later, + nbytes - to_hash_later); +@@ -2065,7 +2034,7 @@ static int ahash_process_req(struct ahas + req_ctx->to_hash_later = to_hash_later; + + /* Allocate extended descriptor */ +- edesc = ahash_edesc_alloc(areq, nbytes_to_hash); ++ edesc = ahash_edesc_alloc(req_ctx->areq, nbytes_to_hash); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + +@@ -2087,14 +2056,123 @@ static int ahash_process_req(struct ahas + if (ctx->keylen && (req_ctx->first || req_ctx->last)) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC; + +- return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done); ++ return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done); + } + +-static int ahash_update(struct ahash_request *areq) ++static void sec1_ahash_process_remaining(struct work_struct *work) + { ++ struct talitos_ahash_req_ctx *req_ctx = ++ container_of(work, struct talitos_ahash_req_ctx, ++ sec1_ahash_process_remaining); ++ int err = 0; ++ ++ req_ctx->request_sl = scatterwalk_ffwd(req_ctx->request_bufsl, ++ req_ctx->request_sl, TALITOS1_MAX_DATA_LEN); ++ ++ if (req_ctx->remaining_ahash_request_bytes > TALITOS1_MAX_DATA_LEN) ++ req_ctx->current_ahash_request_bytes = TALITOS1_MAX_DATA_LEN; ++ else { ++ req_ctx->current_ahash_request_bytes = ++ req_ctx->remaining_ahash_request_bytes; ++ ++ if (req_ctx->last_request) ++ req_ctx->last = 1; ++ } ++ ++ err = ahash_process_req_one(req_ctx->areq, ++ req_ctx->current_ahash_request_bytes); ++ ++ if (err != -EINPROGRESS) ++ ahash_request_complete(req_ctx->areq, err); ++} ++ ++static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) ++{ ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); ++ struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct device *dev = ctx->dev; ++ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); ++ struct talitos_private *priv = dev_get_drvdata(dev); ++ bool is_sec1 = has_ftr_sec1(priv); ++ ++ req_ctx->areq = areq; ++ req_ctx->request_sl = areq->src; ++ req_ctx->remaining_ahash_request_bytes = nbytes; ++ ++ if (is_sec1) { ++ if (nbytes > TALITOS1_MAX_DATA_LEN) ++ nbytes = TALITOS1_MAX_DATA_LEN; ++ else if (req_ctx->last_request) ++ req_ctx->last = 1; ++ } ++ ++ req_ctx->current_ahash_request_bytes = nbytes; ++ ++ return ahash_process_req_one(req_ctx->areq, ++ req_ctx->current_ahash_request_bytes); ++} ++ ++static int ahash_init(struct ahash_request *areq) ++{ ++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); ++ struct talitos_ctx *ctx = crypto_ahash_ctx(tfm); ++ struct device *dev = ctx->dev; + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); ++ unsigned int size; ++ dma_addr_t dma; + ++ /* Initialize the context */ ++ req_ctx->buf_idx = 0; ++ req_ctx->nbuf = 0; ++ req_ctx->first = 1; /* first indicates h/w must init its context */ ++ req_ctx->swinit = 0; /* assume h/w init of context */ ++ size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) ++ ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 ++ : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; ++ req_ctx->hw_context_size = size; ++ req_ctx->last_request = 0; + req_ctx->last = 0; ++ INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining); ++ ++ dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size, ++ DMA_TO_DEVICE); ++ dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE); ++ ++ return 0; ++} ++ ++/* ++ * on h/w without explicit sha224 support, we initialize h/w context ++ * manually with sha224 constants, and tell it to run sha256. ++ */ ++static int ahash_init_sha224_swinit(struct ahash_request *areq) ++{ ++ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); ++ ++ req_ctx->hw_context[0] = SHA224_H0; ++ req_ctx->hw_context[1] = SHA224_H1; ++ req_ctx->hw_context[2] = SHA224_H2; ++ req_ctx->hw_context[3] = SHA224_H3; ++ req_ctx->hw_context[4] = SHA224_H4; ++ req_ctx->hw_context[5] = SHA224_H5; ++ req_ctx->hw_context[6] = SHA224_H6; ++ req_ctx->hw_context[7] = SHA224_H7; ++ ++ /* init 64-bit count */ ++ req_ctx->hw_context[8] = 0; ++ req_ctx->hw_context[9] = 0; ++ ++ ahash_init(areq); ++ req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/ ++ ++ return 0; ++} ++ ++static int ahash_update(struct ahash_request *areq) ++{ ++ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); ++ ++ req_ctx->last_request = 0; + + return ahash_process_req(areq, areq->nbytes); + } +@@ -2103,7 +2181,7 @@ static int ahash_final(struct ahash_requ + { + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + +- req_ctx->last = 1; ++ req_ctx->last_request = 1; + + return ahash_process_req(areq, 0); + } +@@ -2112,7 +2190,7 @@ static int ahash_finup(struct ahash_requ + { + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + +- req_ctx->last = 1; ++ req_ctx->last_request = 1; + + return ahash_process_req(areq, areq->nbytes); + } diff --git a/queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch b/queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch new file mode 100644 index 0000000000..7d26de3a7f --- /dev/null +++ b/queue-7.0/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch @@ -0,0 +1,199 @@ +From a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae Mon Sep 17 00:00:00 2001 +From: Paul Louvel +Date: Mon, 30 Mar 2026 12:28:19 +0200 +Subject: crypto: talitos - rename first/last to first_desc/last_desc + +From: Paul Louvel + +commit a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae upstream. + +Previous commit introduces a new last_request variable in the context +structure. + +Renaming the first/last existing member variable in the context +structure to improve readability. + +Cc: stable@vger.kernel.org +Signed-off-by: Paul Louvel +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/talitos.c | 46 +++++++++++++++++++++++----------------------- + 1 file changed, 23 insertions(+), 23 deletions(-) + +--- a/drivers/crypto/talitos.c ++++ b/drivers/crypto/talitos.c +@@ -869,8 +869,8 @@ struct talitos_ahash_req_ctx { + u8 buf[2][HASH_MAX_BLOCK_SIZE]; + int buf_idx; + unsigned int swinit; +- unsigned int first; +- unsigned int last; ++ unsigned int first_desc; ++ unsigned int last_desc; + unsigned int last_request; + unsigned int to_hash_later; + unsigned int nbuf; +@@ -889,8 +889,8 @@ struct talitos_export_state { + u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)]; + u8 buf[HASH_MAX_BLOCK_SIZE]; + unsigned int swinit; +- unsigned int first; +- unsigned int last; ++ unsigned int first_desc; ++ unsigned int last_desc; + unsigned int to_hash_later; + unsigned int nbuf; + }; +@@ -1722,7 +1722,7 @@ static void common_nonsnoop_hash_unmap(s + if (desc->next_desc && + desc->ptr[5].ptr != desc2->ptr[5].ptr) + unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE); +- if (req_ctx->last) ++ if (req_ctx->last_desc) + memcpy(areq->result, req_ctx->hw_context, + crypto_ahash_digestsize(tfm)); + +@@ -1759,7 +1759,7 @@ static void ahash_done(struct device *de + container_of(desc, struct talitos_edesc, desc); + struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + +- if (!req_ctx->last && req_ctx->to_hash_later) { ++ if (!req_ctx->last_desc && req_ctx->to_hash_later) { + /* Position any partial block for next update/final/finup */ + req_ctx->buf_idx = (req_ctx->buf_idx + 1) & 1; + req_ctx->nbuf = req_ctx->to_hash_later; +@@ -1825,7 +1825,7 @@ static int common_nonsnoop_hash(struct t + /* first DWORD empty */ + + /* hash context in */ +- if (!req_ctx->first || req_ctx->swinit) { ++ if (!req_ctx->first_desc || req_ctx->swinit) { + map_single_talitos_ptr_nosync(dev, &desc->ptr[1], + req_ctx->hw_context_size, + req_ctx->hw_context, +@@ -1833,7 +1833,7 @@ static int common_nonsnoop_hash(struct t + req_ctx->swinit = 0; + } + /* Indicate next op is not the first. */ +- req_ctx->first = 0; ++ req_ctx->first_desc = 0; + + /* HMAC key */ + if (ctx->keylen) +@@ -1866,7 +1866,7 @@ static int common_nonsnoop_hash(struct t + /* fifth DWORD empty */ + + /* hash/HMAC out -or- hash context out */ +- if (req_ctx->last) ++ if (req_ctx->last_desc) + map_single_talitos_ptr(dev, &desc->ptr[5], + crypto_ahash_digestsize(tfm), + req_ctx->hw_context, DMA_FROM_DEVICE); +@@ -1908,7 +1908,7 @@ static int common_nonsnoop_hash(struct t + if (sg_count > 1) + sync_needed = true; + copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1); +- if (req_ctx->last) ++ if (req_ctx->last_desc) + map_single_talitos_ptr_nosync(dev, &desc->ptr[5], + req_ctx->hw_context_size, + req_ctx->hw_context, +@@ -1964,7 +1964,7 @@ static int ahash_process_req_one(struct + bool is_sec1 = has_ftr_sec1(priv); + u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx]; + +- if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) { ++ if (!req_ctx->last_desc && (nbytes + req_ctx->nbuf <= blocksize)) { + /* Buffer up to one whole block */ + nents = sg_nents_for_len(req_ctx->request_sl, nbytes); + if (nents < 0) { +@@ -1981,7 +1981,7 @@ static int ahash_process_req_one(struct + nbytes_to_hash = nbytes + req_ctx->nbuf; + to_hash_later = nbytes_to_hash & (blocksize - 1); + +- if (req_ctx->last) ++ if (req_ctx->last_desc) + to_hash_later = 0; + else if (to_hash_later) + /* There is a partial block. Hash the full block(s) now */ +@@ -2041,19 +2041,19 @@ static int ahash_process_req_one(struct + edesc->desc.hdr = ctx->desc_hdr_template; + + /* On last one, request SEC to pad; otherwise continue */ +- if (req_ctx->last) ++ if (req_ctx->last_desc) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD; + else + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT; + + /* request SEC to INIT hash. */ +- if (req_ctx->first && !req_ctx->swinit) ++ if (req_ctx->first_desc && !req_ctx->swinit) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT; + + /* When the tfm context has a keylen, it's an HMAC. + * A first or last (ie. not middle) descriptor must request HMAC. + */ +- if (ctx->keylen && (req_ctx->first || req_ctx->last)) ++ if (ctx->keylen && (req_ctx->first_desc || req_ctx->last_desc)) + edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC; + + return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done); +@@ -2076,7 +2076,7 @@ static void sec1_ahash_process_remaining + req_ctx->remaining_ahash_request_bytes; + + if (req_ctx->last_request) +- req_ctx->last = 1; ++ req_ctx->last_desc = 1; + } + + err = ahash_process_req_one(req_ctx->areq, +@@ -2103,7 +2103,7 @@ static int ahash_process_req(struct ahas + if (nbytes > TALITOS1_MAX_DATA_LEN) + nbytes = TALITOS1_MAX_DATA_LEN; + else if (req_ctx->last_request) +- req_ctx->last = 1; ++ req_ctx->last_desc = 1; + } + + req_ctx->current_ahash_request_bytes = nbytes; +@@ -2124,14 +2124,14 @@ static int ahash_init(struct ahash_reque + /* Initialize the context */ + req_ctx->buf_idx = 0; + req_ctx->nbuf = 0; +- req_ctx->first = 1; /* first indicates h/w must init its context */ ++ req_ctx->first_desc = 1; /* first_desc indicates h/w must init its context */ + req_ctx->swinit = 0; /* assume h/w init of context */ + size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE) + ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 + : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512; + req_ctx->hw_context_size = size; + req_ctx->last_request = 0; +- req_ctx->last = 0; ++ req_ctx->last_desc = 0; + INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining); + + dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size, +@@ -2224,8 +2224,8 @@ static int ahash_export(struct ahash_req + req_ctx->hw_context_size); + memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf); + export->swinit = req_ctx->swinit; +- export->first = req_ctx->first; +- export->last = req_ctx->last; ++ export->first_desc = req_ctx->first_desc; ++ export->last_desc = req_ctx->last_desc; + export->to_hash_later = req_ctx->to_hash_later; + export->nbuf = req_ctx->nbuf; + +@@ -2250,8 +2250,8 @@ static int ahash_import(struct ahash_req + memcpy(req_ctx->hw_context, export->hw_context, size); + memcpy(req_ctx->buf[0], export->buf, export->nbuf); + req_ctx->swinit = export->swinit; +- req_ctx->first = export->first; +- req_ctx->last = export->last; ++ req_ctx->first_desc = export->first_desc; ++ req_ctx->last_desc = export->last_desc; + req_ctx->to_hash_later = export->to_hash_later; + req_ctx->nbuf = export->nbuf; + diff --git a/queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch b/queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch new file mode 100644 index 0000000000..81e9718210 --- /dev/null +++ b/queue-7.0/firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch @@ -0,0 +1,345 @@ +From a2be37eedb52ea26938fa4cc9de1ff84963c57ad Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Tue, 24 Feb 2026 11:42:04 +0100 +Subject: firmware: exynos-acpm: Drop fake 'const' on handle pointer + +From: Krzysztof Kozlowski + +commit a2be37eedb52ea26938fa4cc9de1ff84963c57ad upstream. + +All the functions operating on the 'handle' pointer are claiming it is a +pointer to const thus they should not modify the handle. In fact that's +a false statement, because first thing these functions do is drop the +cast to const with container_of: + + struct acpm_info *acpm = handle_to_acpm_info(handle); + +And with such cast the handle is easily writable with simple: + + acpm->handle.ops.pmic_ops.read_reg = NULL; + +The code is not correct logically, either, because functions like +acpm_get_by_node() and acpm_handle_put() are meant to modify the handle +reference counting, thus they must modify the handle. Modification here +happens anyway, even if the reference counting is stored in the +container which the handle is part of. + +The code does not have actual visible bug, but incorrect 'const' +annotations could lead to incorrect compiler decisions. + +Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver") +Cc: stable@vger.kernel.org +Signed-off-by: Krzysztof Kozlowski +Link: https://patch.msgid.link/20260224104203.42950-2-krzysztof.kozlowski@oss.qualcomm.com +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clk/samsung/clk-acpm.c | 4 - + drivers/firmware/samsung/exynos-acpm-dvfs.c | 4 - + drivers/firmware/samsung/exynos-acpm-dvfs.h | 4 - + drivers/firmware/samsung/exynos-acpm-pmic.c | 10 ++-- + drivers/firmware/samsung/exynos-acpm-pmic.h | 10 ++-- + drivers/firmware/samsung/exynos-acpm.c | 16 ++++--- + drivers/firmware/samsung/exynos-acpm.h | 2 + drivers/mfd/sec-acpm.c | 10 ++-- + include/linux/firmware/samsung/exynos-acpm-protocol.h | 40 +++++++----------- + 9 files changed, 48 insertions(+), 52 deletions(-) + +--- a/drivers/clk/samsung/clk-acpm.c ++++ b/drivers/clk/samsung/clk-acpm.c +@@ -20,7 +20,7 @@ struct acpm_clk { + u32 id; + struct clk_hw hw; + unsigned int mbox_chan_id; +- const struct acpm_handle *handle; ++ struct acpm_handle *handle; + }; + + struct acpm_clk_variant { +@@ -113,7 +113,7 @@ static int acpm_clk_register(struct devi + + static int acpm_clk_probe(struct platform_device *pdev) + { +- const struct acpm_handle *acpm_handle; ++ struct acpm_handle *acpm_handle; + struct clk_hw_onecell_data *clk_data; + struct clk_hw **hws; + struct device *dev = &pdev->dev; +--- a/drivers/firmware/samsung/exynos-acpm-dvfs.c ++++ b/drivers/firmware/samsung/exynos-acpm-dvfs.c +@@ -42,7 +42,7 @@ static void acpm_dvfs_init_set_rate_cmd( + cmd[3] = ktime_to_ms(ktime_get()); + } + +-int acpm_dvfs_set_rate(const struct acpm_handle *handle, ++int acpm_dvfs_set_rate(struct acpm_handle *handle, + unsigned int acpm_chan_id, unsigned int clk_id, + unsigned long rate) + { +@@ -62,7 +62,7 @@ static void acpm_dvfs_init_get_rate_cmd( + cmd[3] = ktime_to_ms(ktime_get()); + } + +-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle, ++unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle, + unsigned int acpm_chan_id, unsigned int clk_id) + { + struct acpm_xfer xfer; +--- a/drivers/firmware/samsung/exynos-acpm-dvfs.h ++++ b/drivers/firmware/samsung/exynos-acpm-dvfs.h +@@ -11,10 +11,10 @@ + + struct acpm_handle; + +-int acpm_dvfs_set_rate(const struct acpm_handle *handle, ++int acpm_dvfs_set_rate(struct acpm_handle *handle, + unsigned int acpm_chan_id, unsigned int id, + unsigned long rate); +-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle, ++unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle, + unsigned int acpm_chan_id, + unsigned int clk_id); + +--- a/drivers/firmware/samsung/exynos-acpm-pmic.c ++++ b/drivers/firmware/samsung/exynos-acpm-pmic.c +@@ -77,7 +77,7 @@ static void acpm_pmic_init_read_cmd(u32 + cmd[3] = ktime_to_ms(ktime_get()); + } + +-int acpm_pmic_read_reg(const struct acpm_handle *handle, ++int acpm_pmic_read_reg(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 *buf) + { +@@ -107,7 +107,7 @@ static void acpm_pmic_init_bulk_read_cmd + FIELD_PREP(ACPM_PMIC_VALUE, count); + } + +-int acpm_pmic_bulk_read(const struct acpm_handle *handle, ++int acpm_pmic_bulk_read(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, u8 *buf) + { +@@ -150,7 +150,7 @@ static void acpm_pmic_init_write_cmd(u32 + cmd[3] = ktime_to_ms(ktime_get()); + } + +-int acpm_pmic_write_reg(const struct acpm_handle *handle, ++int acpm_pmic_write_reg(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value) + { +@@ -187,7 +187,7 @@ static void acpm_pmic_init_bulk_write_cm + } + } + +-int acpm_pmic_bulk_write(const struct acpm_handle *handle, ++int acpm_pmic_bulk_write(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, const u8 *buf) + { +@@ -220,7 +220,7 @@ static void acpm_pmic_init_update_cmd(u3 + cmd[3] = ktime_to_ms(ktime_get()); + } + +-int acpm_pmic_update_reg(const struct acpm_handle *handle, ++int acpm_pmic_update_reg(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value, u8 mask) + { +--- a/drivers/firmware/samsung/exynos-acpm-pmic.h ++++ b/drivers/firmware/samsung/exynos-acpm-pmic.h +@@ -11,19 +11,19 @@ + + struct acpm_handle; + +-int acpm_pmic_read_reg(const struct acpm_handle *handle, ++int acpm_pmic_read_reg(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 *buf); +-int acpm_pmic_bulk_read(const struct acpm_handle *handle, ++int acpm_pmic_bulk_read(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, u8 *buf); +-int acpm_pmic_write_reg(const struct acpm_handle *handle, ++int acpm_pmic_write_reg(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value); +-int acpm_pmic_bulk_write(const struct acpm_handle *handle, ++int acpm_pmic_bulk_write(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, const u8 *buf); +-int acpm_pmic_update_reg(const struct acpm_handle *handle, ++int acpm_pmic_update_reg(struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value, u8 mask); + #endif /* __EXYNOS_ACPM_PMIC_H__ */ +--- a/drivers/firmware/samsung/exynos-acpm.c ++++ b/drivers/firmware/samsung/exynos-acpm.c +@@ -412,7 +412,7 @@ static int acpm_wait_for_message_respons + * + * Return: 0 on success, -errno otherwise. + */ +-int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer) ++int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer) + { + struct acpm_info *acpm = handle_to_acpm_info(handle); + struct exynos_mbox_msg msg; +@@ -674,7 +674,7 @@ static int acpm_probe(struct platform_de + * acpm_handle_put() - release the handle acquired by acpm_get_by_phandle. + * @handle: Handle acquired by acpm_get_by_phandle. + */ +-static void acpm_handle_put(const struct acpm_handle *handle) ++static void acpm_handle_put(struct acpm_handle *handle) + { + struct acpm_info *acpm = handle_to_acpm_info(handle); + struct device *dev = acpm->dev; +@@ -700,9 +700,11 @@ static void devm_acpm_release(struct dev + * @np: ACPM device tree node. + * + * Return: pointer to handle on success, ERR_PTR(-errno) otherwise. ++ * ++ * Note: handle CANNOT be pointer to const + */ +-static const struct acpm_handle *acpm_get_by_node(struct device *dev, +- struct device_node *np) ++static struct acpm_handle *acpm_get_by_node(struct device *dev, ++ struct device_node *np) + { + struct platform_device *pdev; + struct device_link *link; +@@ -743,10 +745,10 @@ static const struct acpm_handle *acpm_ge + * + * Return: pointer to handle on success, ERR_PTR(-errno) otherwise. + */ +-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, +- struct device_node *np) ++struct acpm_handle *devm_acpm_get_by_node(struct device *dev, ++ struct device_node *np) + { +- const struct acpm_handle **ptr, *handle; ++ struct acpm_handle **ptr, *handle; + + ptr = devres_alloc(devm_acpm_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) +--- a/drivers/firmware/samsung/exynos-acpm.h ++++ b/drivers/firmware/samsung/exynos-acpm.h +@@ -17,7 +17,7 @@ struct acpm_xfer { + + struct acpm_handle; + +-int acpm_do_xfer(const struct acpm_handle *handle, ++int acpm_do_xfer(struct acpm_handle *handle, + const struct acpm_xfer *xfer); + + #endif /* __EXYNOS_ACPM_H__ */ +--- a/drivers/mfd/sec-acpm.c ++++ b/drivers/mfd/sec-acpm.c +@@ -367,7 +367,7 @@ static const struct regmap_config s2mpg1 + }; + + struct sec_pmic_acpm_shared_bus_context { +- const struct acpm_handle *acpm; ++ struct acpm_handle *acpm; + unsigned int acpm_chan_id; + u8 speedy_channel; + }; +@@ -390,7 +390,7 @@ static int sec_pmic_acpm_bus_write(void + size_t count) + { + struct sec_pmic_acpm_bus_context *ctx = context; +- const struct acpm_handle *acpm = ctx->shared->acpm; ++ struct acpm_handle *acpm = ctx->shared->acpm; + const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops; + size_t val_count = count - BITS_TO_BYTES(ACPM_ADDR_BITS); + const u8 *d = data; +@@ -410,7 +410,7 @@ static int sec_pmic_acpm_bus_read(void * + void *val_buf, size_t val_size) + { + struct sec_pmic_acpm_bus_context *ctx = context; +- const struct acpm_handle *acpm = ctx->shared->acpm; ++ struct acpm_handle *acpm = ctx->shared->acpm; + const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops; + const u8 *r = reg_buf; + u8 reg; +@@ -429,7 +429,7 @@ static int sec_pmic_acpm_bus_reg_update_ + unsigned int val) + { + struct sec_pmic_acpm_bus_context *ctx = context; +- const struct acpm_handle *acpm = ctx->shared->acpm; ++ struct acpm_handle *acpm = ctx->shared->acpm; + const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops; + + return pmic_ops->update_reg(acpm, ctx->shared->acpm_chan_id, ctx->type, reg & 0xff, +@@ -480,7 +480,7 @@ static int sec_pmic_acpm_probe(struct pl + struct regmap *regmap_common, *regmap_pmic, *regmap; + const struct sec_pmic_acpm_platform_data *pdata; + struct sec_pmic_acpm_shared_bus_context *shared_ctx; +- const struct acpm_handle *acpm; ++ struct acpm_handle *acpm; + struct device *dev = &pdev->dev; + int ret, irq; + +--- a/include/linux/firmware/samsung/exynos-acpm-protocol.h ++++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h +@@ -14,30 +14,24 @@ struct acpm_handle; + struct device_node; + + struct acpm_dvfs_ops { +- int (*set_rate)(const struct acpm_handle *handle, +- unsigned int acpm_chan_id, unsigned int clk_id, +- unsigned long rate); +- unsigned long (*get_rate)(const struct acpm_handle *handle, ++ int (*set_rate)(struct acpm_handle *handle, unsigned int acpm_chan_id, ++ unsigned int clk_id, unsigned long rate); ++ unsigned long (*get_rate)(struct acpm_handle *handle, + unsigned int acpm_chan_id, + unsigned int clk_id); + }; + + struct acpm_pmic_ops { +- int (*read_reg)(const struct acpm_handle *handle, +- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, +- u8 *buf); +- int (*bulk_read)(const struct acpm_handle *handle, +- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, +- u8 count, u8 *buf); +- int (*write_reg)(const struct acpm_handle *handle, +- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, +- u8 value); +- int (*bulk_write)(const struct acpm_handle *handle, +- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, +- u8 count, const u8 *buf); +- int (*update_reg)(const struct acpm_handle *handle, +- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, +- u8 value, u8 mask); ++ int (*read_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, ++ u8 type, u8 reg, u8 chan, u8 *buf); ++ int (*bulk_read)(struct acpm_handle *handle, unsigned int acpm_chan_id, ++ u8 type, u8 reg, u8 chan, u8 count, u8 *buf); ++ int (*write_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, ++ u8 type, u8 reg, u8 chan, u8 value); ++ int (*bulk_write)(struct acpm_handle *handle, unsigned int acpm_chan_id, ++ u8 type, u8 reg, u8 chan, u8 count, const u8 *buf); ++ int (*update_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id, ++ u8 type, u8 reg, u8 chan, u8 value, u8 mask); + }; + + struct acpm_ops { +@@ -56,12 +50,12 @@ struct acpm_handle { + struct device; + + #if IS_ENABLED(CONFIG_EXYNOS_ACPM_PROTOCOL) +-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, +- struct device_node *np); ++struct acpm_handle *devm_acpm_get_by_node(struct device *dev, ++ struct device_node *np); + #else + +-static inline const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, +- struct device_node *np) ++static inline struct acpm_handle *devm_acpm_get_by_node(struct device *dev, ++ struct device_node *np) + { + return NULL; + } diff --git a/queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch b/queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch new file mode 100644 index 0000000000..bb501154f0 --- /dev/null +++ b/queue-7.0/firmware-google-framebuffer-do-not-unregister-platform-device.patch @@ -0,0 +1,69 @@ +From 5cd28bd28c8ce426b56ce4230dbd17537181d5ad Mon Sep 17 00:00:00 2001 +From: Thomas Zimmermann +Date: Tue, 17 Feb 2026 16:56:11 +0100 +Subject: firmware: google: framebuffer: Do not unregister platform device + +From: Thomas Zimmermann + +commit 5cd28bd28c8ce426b56ce4230dbd17537181d5ad upstream. + +The native driver takes over the framebuffer aperture by removing the +system- framebuffer platform device. Afterwards the pointer in drvdata +is dangling. Remove the entire logic around drvdata and let the kernel's +aperture helpers handle this. The platform device depends on the native +hardware device instead of the coreboot device anyway. + +When commit 851b4c14532d ("firmware: coreboot: Add coreboot framebuffer +driver") added the coreboot framebuffer code, the kernel did not support +device-based aperture management. Instead native driviers only removed +the conflicting fbdev device. At that point, unregistering the framebuffer +device most likely worked correctly. It was definitely broken after +commit d9702b2a2171 ("fbdev/simplefb: Do not use struct +fb_info.apertures"). So take this commit for the Fixes tag. Earlier +releases might work depending on the native hardware driver. + +Signed-off-by: Thomas Zimmermann +Fixes: d9702b2a2171 ("fbdev/simplefb: Do not use struct fb_info.apertures") +Acked-by: Tzung-Bi Shih +Acked-by: Julius Werner +Cc: Thomas Zimmermann +Cc: Javier Martinez Canillas +Cc: Hans de Goede +Cc: linux-fbdev@vger.kernel.org +Cc: # v6.3+ +Link: https://patch.msgid.link/20260217155836.96267-2-tzimmermann@suse.de +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/google/framebuffer-coreboot.c | 10 ---------- + 1 file changed, 10 deletions(-) + +--- a/drivers/firmware/google/framebuffer-coreboot.c ++++ b/drivers/firmware/google/framebuffer-coreboot.c +@@ -81,19 +81,10 @@ static int framebuffer_probe(struct core + sizeof(pdata)); + if (IS_ERR(pdev)) + pr_warn("coreboot: could not register framebuffer\n"); +- else +- dev_set_drvdata(&dev->dev, pdev); + + return PTR_ERR_OR_ZERO(pdev); + } + +-static void framebuffer_remove(struct coreboot_device *dev) +-{ +- struct platform_device *pdev = dev_get_drvdata(&dev->dev); +- +- platform_device_unregister(pdev); +-} +- + static const struct coreboot_device_id framebuffer_ids[] = { + { .tag = CB_TAG_FRAMEBUFFER }, + { /* sentinel */ } +@@ -102,7 +93,6 @@ MODULE_DEVICE_TABLE(coreboot, framebuffe + + static struct coreboot_driver framebuffer_driver = { + .probe = framebuffer_probe, +- .remove = framebuffer_remove, + .drv = { + .name = "framebuffer", + }, diff --git a/queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch b/queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch new file mode 100644 index 0000000000..bf439e96bb --- /dev/null +++ b/queue-7.0/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch @@ -0,0 +1,58 @@ +From 24c73e93d6a756e1b8626bb259d2e07c5b89b370 Mon Sep 17 00:00:00 2001 +From: Sanman Pradhan +Date: Fri, 10 Apr 2026 00:25:55 +0000 +Subject: hwmon: (pt5161l) Fix bugs in pt5161l_read_block_data() + +From: Sanman Pradhan + +commit 24c73e93d6a756e1b8626bb259d2e07c5b89b370 upstream. + +Fix two bugs in pt5161l_read_block_data(): + +1. Buffer overrun: The local buffer rbuf is declared as u8 rbuf[24], + but i2c_smbus_read_block_data() can return up to + I2C_SMBUS_BLOCK_MAX (32) bytes. The i2c-core copies the data into + the caller's buffer before the return value can be checked, so + the post-read length validation does not prevent a stack overrun + if a device returns more than 24 bytes. Resize the buffer to + I2C_SMBUS_BLOCK_MAX. + +2. Unexpected positive return on length mismatch: When all three + retries are exhausted because the device returns data with an + unexpected length, i2c_smbus_read_block_data() returns a positive + byte count. The function returns this directly, and callers treat + any non-negative return as success, processing stale or incomplete + buffer contents. Return -EIO when retries are exhausted with a + positive return value, preserving the negative error code on I2C + failure. + +Fixes: 1b2ca93cd0592 ("hwmon: Add driver for Astera Labs PT5161L retimer") +Cc: stable@vger.kernel.org +Signed-off-by: Sanman Pradhan +Link: https://lore.kernel.org/r/20260410002549.424162-1-sanman.pradhan@hpe.com +Signed-off-by: Guenter Roeck +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hwmon/pt5161l.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/hwmon/pt5161l.c ++++ b/drivers/hwmon/pt5161l.c +@@ -121,7 +121,7 @@ static int pt5161l_read_block_data(struc + int ret, tries; + u8 remain_len = len; + u8 curr_len; +- u8 wbuf[16], rbuf[24]; ++ u8 wbuf[16], rbuf[I2C_SMBUS_BLOCK_MAX]; + u8 cmd = 0x08; /* [7]:pec_en, [4:2]:func, [1]:start, [0]:end */ + u8 config = 0x00; /* [6]:cfg_type, [4:1]:burst_len, [0]:address bit16 */ + +@@ -151,7 +151,7 @@ static int pt5161l_read_block_data(struc + break; + } + if (tries >= 3) +- return ret; ++ return ret < 0 ? ret : -EIO; + + memcpy(val, rbuf, curr_len); + val += curr_len; diff --git a/queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch b/queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch new file mode 100644 index 0000000000..16e62d99c1 --- /dev/null +++ b/queue-7.0/kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch @@ -0,0 +1,39 @@ +From d70d4323dd9636e35696639f6b4c2b2735291516 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Wed, 1 Apr 2026 11:36:00 +0100 +Subject: KVM: arm64: Account for RESx bits in __compute_fgt() + +From: Marc Zyngier + +commit d70d4323dd9636e35696639f6b4c2b2735291516 upstream. + +When computing Fine Grained Traps, it is preferable to account for +the reserved bits. The HW will most probably ignore them, unless the +bits have been repurposed to do something else. + +Use caution, and fold our view of the reserved bits in, + +Reviewed-by: Sascha Bischoff +Fixes: c259d763e6b09 ("KVM: arm64: Account for RES1 bits in DECLARE_FEAT_MAP() and co") +Link: https://sashiko.dev/#/patchset/20260319154937.3619520-1-sascha.bischoff%40arm.com +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20260401103611.357092-6-maz@kernel.org +Signed-off-by: Marc Zyngier +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/config.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm64/kvm/config.c ++++ b/arch/arm64/kvm/config.c +@@ -1585,8 +1585,8 @@ static __always_inline void __compute_fg + clear |= ~nested & m->nmask; + } + +- val |= set; +- val &= ~clear; ++ val |= set | m->res1; ++ val &= ~(clear | m->res0); + *vcpu_fgt(vcpu, reg) = val; + } + diff --git a/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch b/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch new file mode 100644 index 0000000000..96d5a291c5 --- /dev/null +++ b/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch @@ -0,0 +1,69 @@ +From 96bd3e76a171a8e21a6387e54e4c420a81968492 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:10 +0000 +Subject: KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Yosry Ahmed + +commit 96bd3e76a171a8e21a6387e54e4c420a81968492 upstream. + +According to the APM Volume #2, 15.5, Canonicalization and Consistency +Checks (24593—Rev. 3.42—March 2024), the following condition (among +others) results in a #VMEXIT with VMEXIT_INVALID (aka SVM_EXIT_ERR): + + EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero. + +In the list of consistency checks done when EFER.LME and CR0.PG are set, +add a check that CS.L and CS.D are not both set, after the existing +check that CR4.PAE is set. + +This is functionally a nop because the nested VMRUN results in +SVM_EXIT_ERR in HW, which is forwarded to L1, but KVM makes all +consistency checks before a VMRUN is actually attempted. + +Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-17-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 6 ++++++ + arch/x86/kvm/svm/svm.h | 1 + + 2 files changed, 7 insertions(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -392,6 +392,10 @@ static bool __nested_vmcb_check_save(str + CC(!(save->cr0 & X86_CR0_PE)) || + CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3))) + return false; ++ ++ if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) && ++ (save->cs.attrib & SVM_SELECTOR_DB_MASK))) ++ return false; + } + + /* Note, SVM doesn't have any additional restrictions on CR4. */ +@@ -508,6 +512,8 @@ static void __nested_copy_vmcb_save_to_c + * Copy only fields that are validated, as we need them + * to avoid TOC/TOU races. + */ ++ to->cs = from->cs; ++ + to->efer = from->efer; + to->cr0 = from->cr0; + to->cr3 = from->cr3; +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -140,6 +140,7 @@ struct kvm_vmcb_info { + }; + + struct vmcb_save_area_cached { ++ struct vmcb_seg cs; + u64 efer; + u64 cr4; + u64 cr3; diff --git a/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch b/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch new file mode 100644 index 0000000000..47c0008cf0 --- /dev/null +++ b/queue-7.0/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch @@ -0,0 +1,49 @@ +From b71138fcc362c67ebe66747bb22cb4e6b4d6a651 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:09 +0000 +Subject: KVM: nSVM: Add missing consistency check for nCR3 validity +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Yosry Ahmed + +commit b71138fcc362c67ebe66747bb22cb4e6b4d6a651 upstream. + +From the APM Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024): + + When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the + following conditions are considered illegal state combinations, in + addition to those mentioned in “Canonicalization and Consistency Checks”: + • Any MBZ bit of nCR3 is set. + • Any G_PAT.PA field has an unsupported type encoding or any + reserved field in G_PAT has a nonzero value. + +Add the consistency check for nCR3 being a legal GPA with no MBZ bits +set. Note, the G_PAT.PA check is being handled separately[*]. + +Link: https://lore.kernel.org/kvm/20260205214326.1029278-3-jmattson@google.com [*] +Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-16-yosry@kernel.org +[sean: capture everything in CC(), massage changelog formatting] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -350,6 +350,10 @@ static bool __nested_vmcb_check_controls + if (CC(control->asid == 0)) + return false; + ++ if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && ++ !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3))) ++ return false; ++ + if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa, + MSRPM_SIZE))) + return false; diff --git a/queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch b/queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch new file mode 100644 index 0000000000..45f59642fb --- /dev/null +++ b/queue-7.0/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch @@ -0,0 +1,41 @@ +From 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:33:59 +0000 +Subject: KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN + +From: Yosry Ahmed + +commit 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 upstream. + +nested_svm_vmrun() currently only injects a #GP if kvm_vcpu_map() fails +with -EINVAL. But it could also fail with -EFAULT if creating a host +mapping failed. Inject a #GP in all cases, no reason to treat failure +modes differently. + +Fixes: 8c5fbf1a7231 ("KVM/nSVM: Use the new mapping API for mapping guest memory") +CC: stable@vger.kernel.org +Co-developed-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-6-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1021,12 +1021,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vc + } + + vmcb12_gpa = svm->vmcb->save.rax; +- ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map); +- if (ret == -EINVAL) { ++ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) { + kvm_inject_gp(vcpu, 0); + return 1; +- } else if (ret) { +- return kvm_skip_emulated_instruction(vcpu); + } + + ret = kvm_skip_emulated_instruction(vcpu); diff --git a/queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch b/queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch new file mode 100644 index 0000000000..9ef1a4f390 --- /dev/null +++ b/queue-7.0/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch @@ -0,0 +1,65 @@ +From 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 3 Mar 2026 16:22:23 -0800 +Subject: KVM: nSVM: Always intercept VMMCALL when L2 is active + +From: Sean Christopherson + +commit 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 upstream. + +Always intercept VMMCALL now that KVM properly synthesizes a #UD as +appropriate, i.e. when L1 doesn't want to intercept VMMCALL, to avoid +putting L2 into an infinite #UD loop if KVM_X86_QUIRK_FIX_HYPERCALL_INSN +is enabled. + +By letting L2 execute VMMCALL natively and thus #UD, for all intents and +purposes KVM morphs the VMMCALL intercept into a #UD intercept (KVM always +intercepts #UD). When the hypercall quirk is enabled, KVM "emulates" +VMMCALL in response to the #UD by trying to fixup the opcode to the "right" +vendor, then restarts the guest, without skipping the VMMCALL. As a +result, the guest sees an endless stream of #UDs since it's already +executing the correct vendor hypercall instruction, i.e. the emulator +doesn't anticipate that the #UD could be due to lack of interception, as +opposed to a truly undefined opcode. + +Fixes: 0d945bd93511 ("KVM: SVM: Don't allow nested guest to VMMCALL into host") +Cc: stable@vger.kernel.org +Reviewed-by: Yosry Ahmed +Reviewed-by: Vitaly Kuznetsov +Link: https://patch.msgid.link/20260304002223.1105129-3-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/hyperv.h | 4 ---- + arch/x86/kvm/svm/nested.c | 7 ------- + 2 files changed, 11 deletions(-) + +--- a/arch/x86/kvm/svm/hyperv.h ++++ b/arch/x86/kvm/svm/hyperv.h +@@ -51,10 +51,6 @@ static inline bool nested_svm_is_l2_tlb_ + void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); + #else /* CONFIG_KVM_HYPERV */ + static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {} +-static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) +-{ +- return false; +-} + static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) + { + return false; +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -158,13 +158,6 @@ void recalc_intercepts(struct vcpu_svm * + vmcb_clr_intercept(c, INTERCEPT_VINTR); + } + +- /* +- * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB +- * flush feature is enabled. +- */ +- if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu)) +- vmcb_clr_intercept(c, INTERCEPT_VMMCALL); +- + for (i = 0; i < MAX_INTERCEPT; i++) + c->intercepts[i] |= g->intercepts[i]; + diff --git a/queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch b/queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch new file mode 100644 index 0000000000..f7d04a3447 --- /dev/null +++ b/queue-7.0/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch @@ -0,0 +1,76 @@ +From 8d397582f6b5e9fbcf09781c7c934b4910e94a50 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Wed, 25 Feb 2026 00:59:47 +0000 +Subject: KVM: nSVM: Always use NextRIP as vmcb02's NextRIP after first L2 VMRUN + +From: Yosry Ahmed + +commit 8d397582f6b5e9fbcf09781c7c934b4910e94a50 upstream. + +For guests with NRIPS disabled, L1 does not provide NextRIP when running +an L2 with an injected soft interrupt, instead it advances the current RIP +before running it. KVM uses the current RIP as the NextRIP in vmcb02 to +emulate a CPU without NRIPS. + +However, after L2 runs the first time, NextRIP will be updated by the CPU +and/or KVM, and the current RIP is no longer the correct value to use in +vmcb02. Hence, after save/restore, use the current RIP if and only if a +nested run is pending, otherwise use NextRIP. Give soft_int_next_rip the +same treatment, as it's the same logic, just for a narrower use case. + +Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") +CC: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260225005950.3739782-6-yosry@kernel.org +[sean: give soft_int_next_rip the same treatment] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 28 ++++++++++++++++++---------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -856,24 +856,32 @@ static void nested_vmcb02_prepare_contro + vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err; + + /* +- * next_rip is consumed on VMRUN as the return address pushed on the ++ * NextRIP is consumed on VMRUN as the return address pushed on the + * stack for injected soft exceptions/interrupts. If nrips is exposed +- * to L1, take it verbatim from vmcb12. If nrips is supported in +- * hardware but not exposed to L1, stuff the actual L2 RIP to emulate +- * what a nrips=0 CPU would do (L1 is responsible for advancing RIP +- * prior to injecting the event). ++ * to L1, take it verbatim from vmcb12. ++ * ++ * If nrips is supported in hardware but not exposed to L1, stuff the ++ * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is ++ * responsible for advancing RIP prior to injecting the event). This is ++ * only the case for the first L2 run after VMRUN. After that (e.g. ++ * during save/restore), NextRIP is updated by the CPU and/or KVM, and ++ * the value of the L2 RIP from vmcb12 should not be used. + */ +- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) +- vmcb02->control.next_rip = svm->nested.ctl.next_rip; +- else if (boot_cpu_has(X86_FEATURE_NRIPS)) +- vmcb02->control.next_rip = vmcb12_rip; ++ if (boot_cpu_has(X86_FEATURE_NRIPS)) { ++ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || ++ !svm->nested.nested_run_pending) ++ vmcb02->control.next_rip = svm->nested.ctl.next_rip; ++ else ++ vmcb02->control.next_rip = vmcb12_rip; ++ } + + svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); + if (is_evtinj_soft(vmcb02->control.event_inj)) { + svm->soft_int_injected = true; + svm->soft_int_csbase = vmcb12_csbase; + svm->soft_int_old_rip = vmcb12_rip; +- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) ++ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || ++ !svm->nested.nested_run_pending) + svm->soft_int_next_rip = svm->nested.ctl.next_rip; + else + svm->soft_int_next_rip = vmcb12_rip; diff --git a/queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch b/queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch new file mode 100644 index 0000000000..4281118acd --- /dev/null +++ b/queue-7.0/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch @@ -0,0 +1,65 @@ +From b53ab5167a81537777ac780bbd93d32613aa3bda Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:33:55 +0000 +Subject: KVM: nSVM: Avoid clearing VMCB_LBR in vmcb12 + +From: Yosry Ahmed + +commit b53ab5167a81537777ac780bbd93d32613aa3bda upstream. + +svm_copy_lbrs() always marks VMCB_LBR dirty in the destination VMCB. +However, nested_svm_vmexit() uses it to copy LBRs to vmcb12, and +clearing clean bits in vmcb12 is not architecturally defined. + +Move vmcb_mark_dirty() to callers and drop it for vmcb12. + +This also facilitates incoming refactoring that does not pass the entire +VMCB to svm_copy_lbrs(). + +Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-2-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 7 +++++-- + arch/x86/kvm/svm/svm.c | 2 -- + 2 files changed, 5 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -726,6 +726,7 @@ static void nested_vmcb02_prepare_save(s + } else { + svm_copy_lbrs(vmcb02, vmcb01); + } ++ vmcb_mark_dirty(vmcb02, VMCB_LBR); + svm_update_lbrv(&svm->vcpu); + } + +@@ -1242,10 +1243,12 @@ int nested_svm_vmexit(struct vcpu_svm *s + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); + + if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && +- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) ++ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + svm_copy_lbrs(vmcb12, vmcb02); +- else ++ } else { + svm_copy_lbrs(vmcb01, vmcb02); ++ vmcb_mark_dirty(vmcb01, VMCB_LBR); ++ } + + svm_update_lbrv(vcpu); + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -848,8 +848,6 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, + to_vmcb->save.br_to = from_vmcb->save.br_to; + to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from; + to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to; +- +- vmcb_mark_dirty(to_vmcb, VMCB_LBR); + } + + static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) diff --git a/queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch new file mode 100644 index 0000000000..59a5f6955c --- /dev/null +++ b/queue-7.0/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch @@ -0,0 +1,69 @@ +From 69b721a86d0dcb026f6db7d111dcde7550442d2e Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:05 +0000 +Subject: KVM: nSVM: Clear EVENTINJ fields in vmcb12 on nested #VMEXIT + +From: Yosry Ahmed + +commit 69b721a86d0dcb026f6db7d111dcde7550442d2e upstream. + +According to the APM, from the reference of the VMRUN instruction: + + Upon #VMEXIT, the processor performs the following actions in order to + return to the host execution context: + + ... + + clear EVENTINJ field in VMCB + +KVM already syncs EVENTINJ fields from vmcb02 to cached vmcb12 on every +L2->L0 #VMEXIT. Since these fields are zeroed by the CPU on #VMEXIT, they +will mostly be zeroed in vmcb12 on nested #VMEXIT by nested_svm_vmexit(). + +However, this is not the case when: + + 1. Consistency checks fail, as nested_svm_vmexit() is not called. + 2. Entering guest mode fails before L2 runs (e.g. due to failed load of + CR3). + +(2) was broken by commit 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB +controls updated by the processor on every vmexit"), as prior to that +nested_svm_vmexit() always zeroed EVENTINJ fields. + +Explicitly clear the fields in all nested #VMEXIT code paths. + +Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") +Fixes: 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-12-yosry@kernel.org +[sean: massage changelog formatting] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1045,6 +1045,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vc + vmcb12->control.exit_code = SVM_EXIT_ERR; + vmcb12->control.exit_info_1 = 0; + vmcb12->control.exit_info_2 = 0; ++ vmcb12->control.event_inj = 0; ++ vmcb12->control.event_inj_err = 0; + svm_set_gif(svm, false); + goto out; + } +@@ -1188,9 +1190,9 @@ static int nested_svm_vmexit_update_vmcb + if (nested_vmcb12_has_lbrv(vcpu)) + svm_copy_lbrs(&vmcb12->save, &vmcb02->save); + ++ vmcb12->control.event_inj = 0; ++ vmcb12->control.event_inj_err = 0; + vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; +- vmcb12->control.event_inj = svm->nested.ctl.event_inj; +- vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; + + trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, + vmcb12->control.exit_info_1, diff --git a/queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch b/queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch new file mode 100644 index 0000000000..462c166619 --- /dev/null +++ b/queue-7.0/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch @@ -0,0 +1,33 @@ +From f85a6ce06e4a0d49652f57967a649ab09e06287c Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:04 +0000 +Subject: KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID) + +From: Yosry Ahmed + +commit f85a6ce06e4a0d49652f57967a649ab09e06287c upstream. + +According to the APM, GIF is set to 0 on any #VMEXIT, including +an #VMEXIT(INVALID) due to failed consistency checks. Clear GIF on +consistency check failures. + +Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-11-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1045,6 +1045,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vc + vmcb12->control.exit_code = SVM_EXIT_ERR; + vmcb12->control.exit_info_1 = 0; + vmcb12->control.exit_info_2 = 0; ++ svm_set_gif(svm, false); + goto out; + } + diff --git a/queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch new file mode 100644 index 0000000000..3e71af08bd --- /dev/null +++ b/queue-7.0/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch @@ -0,0 +1,64 @@ +From 8998e1d012f3f45d0456f16706682cef04c3c436 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:06 +0000 +Subject: KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ on nested #VMEXIT + +From: Yosry Ahmed + +commit 8998e1d012f3f45d0456f16706682cef04c3c436 upstream. + +KVM clears tracking of L1->L2 injected NMIs (i.e. nmi_l1_to_l2) and soft +IRQs (i.e. soft_int_injected) on a synthesized #VMEXIT(INVALID) due to +failed VMRUN. However, they are not explicitly cleared in other +synthesized #VMEXITs. + +soft_int_injected is always cleared after the first VMRUN of L2 when +completing interrupts, as any re-injection is then tracked by KVM +(instead of purely in vmcb02). + +nmi_l1_to_l2 is not cleared after the first VMRUN if NMI injection +failed, as KVM still needs to keep track that the NMI originated from L1 +to avoid blocking NMIs for L1. It is only cleared when the NMI injection +succeeds. + +KVM could synthesize a #VMEXIT to L1 before successfully injecting the +NMI into L2 (e.g. due to a #NPF on L2's NMI handler in L1's NPTs). In +this case, nmi_l1_to_l2 will remain true, and KVM may not correctly mask +NMIs and intercept IRET when injecting an NMI into L1. + +Clear both nmi_l1_to_l2 and soft_int_injected in nested_svm_vmexit(), i.e. +for all #VMEXITs except those that occur due to failed consistency checks, +as those happen before nmi_l1_to_l2 or soft_int_injected are set. + +Fixes: 159fc6fa3b7d ("KVM: nSVM: Transparently handle L1 -> L2 NMI re-injection") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-13-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1074,8 +1074,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vc + + out_exit_err: + svm->nested.nested_run_pending = 0; +- svm->nmi_l1_to_l2 = false; +- svm->soft_int_injected = false; + + svm->vmcb->control.exit_code = SVM_EXIT_ERR; + svm->vmcb->control.exit_info_1 = 0; +@@ -1331,6 +1329,10 @@ void nested_svm_vmexit(struct vcpu_svm * + if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true)) + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + ++ /* Drop tracking for L1->L2 injected NMIs and soft IRQs */ ++ svm->nmi_l1_to_l2 = false; ++ svm->soft_int_injected = false; ++ + /* + * Drop what we picked up for L2 via svm_complete_interrupts() so it + * doesn't end up in L1. diff --git a/queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch b/queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch new file mode 100644 index 0000000000..1bef050f2b --- /dev/null +++ b/queue-7.0/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch @@ -0,0 +1,139 @@ +From c64bc6ed1764c1b7e3c0017019f743196074092f Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 4 Mar 2026 16:06:56 -0800 +Subject: KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run + +From: Sean Christopherson + +commit c64bc6ed1764c1b7e3c0017019f743196074092f upstream. + +In the save+restore path, when restoring nested state, the values of RIP +and CS base passed into nested_vmcb02_prepare_control() are mostly +incorrect. They are both pulled from the vmcb02. For CS base, the value +is only correct if system regs are restored before nested state. The +value of RIP is whatever the vCPU had in vmcb02 before restoring nested +state (zero on a freshly created vCPU). + +Instead, take a similar approach to NextRIP, and delay initializing the +RIP tracking fields until shortly before the vCPU is run, to make sure +the most up-to-date values of RIP and CS base are used regardless of +KVM_SET_SREGS, KVM_SET_REGS, and KVM_SET_NESTED_STATE's relative +ordering. + +Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") +CC: stable@vger.kernel.org +Suggested-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260225005950.3739782-8-yosry@kernel.org +[sean: deal with the svm_cancel_injection() madness] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 17 ++++++++--------- + arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++++++++++++ + 2 files changed, 37 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -754,9 +754,7 @@ static bool is_evtinj_nmi(u32 evtinj) + return type == SVM_EVTINJ_TYPE_NMI; + } + +-static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, +- unsigned long vmcb12_rip, +- unsigned long vmcb12_csbase) ++static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) + { + u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK; + u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; +@@ -868,15 +866,16 @@ static void nested_vmcb02_prepare_contro + vmcb02->control.next_rip = svm->nested.ctl.next_rip; + + svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); ++ ++ /* ++ * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1 ++ * doesn't have NRIPS) are initialized later, before the vCPU is run. ++ */ + if (is_evtinj_soft(vmcb02->control.event_inj)) { + svm->soft_int_injected = true; +- svm->soft_int_csbase = vmcb12_csbase; +- svm->soft_int_old_rip = vmcb12_rip; + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || + !svm->nested.nested_run_pending) + svm->soft_int_next_rip = svm->nested.ctl.next_rip; +- else +- svm->soft_int_next_rip = vmcb12_rip; + } + + /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ +@@ -974,7 +973,7 @@ int enter_svm_guest_mode(struct kvm_vcpu + nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); + + svm_switch_vmcb(svm, &svm->nested.vmcb02); +- nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base); ++ nested_vmcb02_prepare_control(svm); + nested_vmcb02_prepare_save(svm, vmcb12); + + ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, +@@ -1920,7 +1919,7 @@ static int svm_set_nested_state(struct k + nested_copy_vmcb_control_to_cache(svm, ctl); + + svm_switch_vmcb(svm, &svm->nested.vmcb02); +- nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); ++ nested_vmcb02_prepare_control(svm); + + /* + * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -3639,6 +3639,16 @@ static int svm_handle_exit(struct kvm_vc + return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code); + } + ++static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ svm->soft_int_csbase = svm->vmcb->save.cs.base; ++ svm->soft_int_old_rip = kvm_rip_read(vcpu); ++ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) ++ svm->soft_int_next_rip = kvm_rip_read(vcpu); ++} ++ + static int pre_svm_run(struct kvm_vcpu *vcpu) + { + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); +@@ -3761,6 +3771,13 @@ static void svm_fixup_nested_rips(struct + if (boot_cpu_has(X86_FEATURE_NRIPS) && + !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + svm->vmcb->control.next_rip = kvm_rip_read(vcpu); ++ ++ /* ++ * Simiarly, initialize the soft int metadata here to use the most ++ * up-to-date values of RIP and CS base, regardless of restore order. ++ */ ++ if (svm->soft_int_injected) ++ svm_set_nested_run_soft_int_state(vcpu); + } + + void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, +@@ -4131,6 +4148,18 @@ static void svm_complete_soft_interrupt( + struct vcpu_svm *svm = to_svm(vcpu); + + /* ++ * Initialize the soft int fields *before* reading them below if KVM ++ * aborted entry to the guest with a nested VMRUN pending. To ensure ++ * KVM uses up-to-date values for RIP and CS base across save/restore, ++ * regardless of restore order, KVM waits to set the soft int fields ++ * until VMRUN is imminent. But when canceling injection, KVM requeues ++ * the soft int and will reinject it via the standard injection flow, ++ * and so KVM needs to grab the state from the pending nested VMRUN. ++ */ ++ if (is_guest_mode(vcpu) && svm->nested.nested_run_pending) ++ svm_set_nested_run_soft_int_state(vcpu); ++ ++ /* + * If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's + * associated with the original soft exception/interrupt. next_rip is + * cleared on all exits that can occur while vectoring an event, so KVM diff --git a/queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch b/queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch new file mode 100644 index 0000000000..d053d109df --- /dev/null +++ b/queue-7.0/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch @@ -0,0 +1,117 @@ +From a0592461f39c00b28f552fe842a063a00043eaa8 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Wed, 25 Feb 2026 00:59:48 +0000 +Subject: KVM: nSVM: Delay stuffing L2's current RIP into NextRIP until vCPU run + +From: Yosry Ahmed + +commit a0592461f39c00b28f552fe842a063a00043eaa8 upstream. + +For guests with NRIPS disabled, L1 does not provide NextRIP when running +an L2 with an injected soft interrupt, instead it advances L2's RIP +before running it. KVM uses L2's current RIP as the NextRIP in vmcb02 to +emulate a CPU without NRIPS. + +However, in svm_set_nested_state(), the value used for L2's current RIP +comes from vmcb02, which is just whatever the vCPU had in vmcb02 before +restoring nested state (zero on a freshly created vCPU). Passing the +cached RIP value instead (i.e. kvm_rip_read()) would only fix the issue +if registers are restored before nested state. + +Instead, split the logic of setting NextRIP in vmcb02. Handle the +'normal' case of initializing vmcb02's NextRIP using NextRIP from vmcb12 +(or KVM_GET_NESTED_STATE's payload) in nested_vmcb02_prepare_control(). +Delay the special case of stuffing L2's current RIP into vmcb02's +NextRIP until shortly before the vCPU is run, to make sure the most +up-to-date value of RIP is used regardless of KVM_SET_REGS and +KVM_SET_NESTED_STATE's relative ordering. + +Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") +CC: stable@vger.kernel.org +Suggested-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260225005950.3739782-7-yosry@kernel.org +[sean: use new helper, svm_fixup_nested_rips()] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 25 ++++++++----------------- + arch/x86/kvm/svm/svm.c | 25 +++++++++++++++++++++++++ + 2 files changed, 33 insertions(+), 17 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -856,24 +856,15 @@ static void nested_vmcb02_prepare_contro + vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err; + + /* +- * NextRIP is consumed on VMRUN as the return address pushed on the +- * stack for injected soft exceptions/interrupts. If nrips is exposed +- * to L1, take it verbatim from vmcb12. +- * +- * If nrips is supported in hardware but not exposed to L1, stuff the +- * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is +- * responsible for advancing RIP prior to injecting the event). This is +- * only the case for the first L2 run after VMRUN. After that (e.g. +- * during save/restore), NextRIP is updated by the CPU and/or KVM, and +- * the value of the L2 RIP from vmcb12 should not be used. ++ * If nrips is exposed to L1, take NextRIP as-is. Otherwise, L1 ++ * advances L2's RIP before VMRUN instead of using NextRIP. KVM will ++ * stuff the current RIP as vmcb02's NextRIP before L2 is run. After ++ * the first run of L2 (e.g. after save+restore), NextRIP is updated by ++ * the CPU and/or KVM and should be used regardless of L1's support. + */ +- if (boot_cpu_has(X86_FEATURE_NRIPS)) { +- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || +- !svm->nested.nested_run_pending) +- vmcb02->control.next_rip = svm->nested.ctl.next_rip; +- else +- vmcb02->control.next_rip = vmcb12_rip; +- } ++ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) || ++ !svm->nested.nested_run_pending) ++ vmcb02->control.next_rip = svm->nested.ctl.next_rip; + + svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); + if (is_evtinj_soft(vmcb02->control.event_inj)) { +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -3742,6 +3742,29 @@ static void svm_inject_irq(struct kvm_vc + svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type; + } + ++static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu) ++{ ++ struct vcpu_svm *svm = to_svm(vcpu); ++ ++ if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending) ++ return; ++ ++ /* ++ * If nrips is supported in hardware but not exposed to L1, stuff the ++ * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is ++ * responsible for advancing RIP prior to injecting the event). Once L2 ++ * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or ++ * KVM, and this is no longer needed. ++ * ++ * This is done here (as opposed to when preparing vmcb02) to use the ++ * most up-to-date value of RIP regardless of the order of restoring ++ * registers and nested state in the vCPU save+restore path. ++ */ ++ if (boot_cpu_has(X86_FEATURE_NRIPS) && ++ !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) ++ svm->vmcb->control.next_rip = kvm_rip_read(vcpu); ++} ++ + void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode, + int trig_mode, int vector) + { +@@ -4338,6 +4361,8 @@ static __no_kcsan fastpath_t svm_vcpu_ru + kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS)) + svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP; + ++ svm_fixup_nested_rips(vcpu); ++ + svm_hv_update_vp_id(svm->vmcb, vcpu); + + /* diff --git a/queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch b/queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch new file mode 100644 index 0000000000..3d93321952 --- /dev/null +++ b/queue-7.0/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch @@ -0,0 +1,67 @@ +From e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:08 +0000 +Subject: KVM: nSVM: Drop the non-architectural consistency check for NP_ENABLE + +From: Yosry Ahmed + +commit e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 upstream. + +KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka +SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs. +On first glance, it seems like the check should actually be for +guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the +host to support NPTs but the guest CPUID to not advertise it. + +However, the consistency check is not architectural to begin with. The +APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor +that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored +if X86_FEATURE_NPT is not available for L1, so sanitize it when copying +from the VMCB12 to KVM's cache. + +Apart from the consistency check, NP_ENABLE in VMCB12 is currently +ignored because the bit is actually copied from VMCB01 to VMCB02, not +from VMCB12. + +Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-15-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -350,9 +350,6 @@ static bool __nested_vmcb_check_controls + if (CC(control->asid == 0)) + return false; + +- if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled)) +- return false; +- + if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa, + MSRPM_SIZE))) + return false; +@@ -462,6 +459,11 @@ void __nested_copy_vmcb_control_to_cache + nested_svm_sanitize_intercept(vcpu, to, SKINIT); + nested_svm_sanitize_intercept(vcpu, to, RDPRU); + ++ /* Always clear SVM_NESTED_CTL_NP_ENABLE if the guest cannot use NPTs */ ++ to->nested_ctl = from->nested_ctl; ++ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT)) ++ to->nested_ctl &= ~SVM_NESTED_CTL_NP_ENABLE; ++ + to->iopm_base_pa = from->iopm_base_pa; + to->msrpm_base_pa = from->msrpm_base_pa; + to->tsc_offset = from->tsc_offset; +@@ -475,7 +477,6 @@ void __nested_copy_vmcb_control_to_cache + to->exit_info_2 = from->exit_info_2; + to->exit_int_info = from->exit_int_info; + to->exit_int_info_err = from->exit_int_info_err; +- to->nested_ctl = from->nested_ctl; + to->event_inj = from->event_inj; + to->event_inj_err = from->event_inj_err; + to->next_rip = from->next_rip; diff --git a/queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch b/queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch new file mode 100644 index 0000000000..896ae7d81a --- /dev/null +++ b/queue-7.0/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch @@ -0,0 +1,42 @@ +From 24f7d36b824b65cf1a2db3db478059187b2a37b0 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 24 Feb 2026 22:50:17 +0000 +Subject: KVM: nSVM: Ensure AVIC is inhibited when restoring a vCPU to guest mode + +From: Yosry Ahmed + +commit 24f7d36b824b65cf1a2db3db478059187b2a37b0 upstream. + +On nested VMRUN, KVM ensures AVIC is inhibited by requesting +KVM_REQ_APICV_UPDATE, triggering a check of inhibit reasons, finding +APICV_INHIBIT_REASON_NESTED, and disabling AVIC. + +However, when KVM_SET_NESTED_STATE is performed on a vCPU not in guest +mode with AVIC enabled, KVM_REQ_APICV_UPDATE is not requested, and AVIC +is not inhibited. + +Request KVM_REQ_APICV_UPDATE in the KVM_SET_NESTED_STATE path if AVIC is +active, similar to the nested VMRUN path. + +Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running") +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260224225017.3303870-1-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1939,6 +1939,9 @@ static int svm_set_nested_state(struct k + + svm->nested.force_msr_bitmap_recalc = true; + ++ if (kvm_vcpu_apicv_active(vcpu)) ++ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); ++ + kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + ret = 0; + out_free: diff --git a/queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch b/queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch new file mode 100644 index 0000000000..7bbc6409c9 --- /dev/null +++ b/queue-7.0/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch @@ -0,0 +1,42 @@ +From e63fb1379f4b9300a44739964e69549bebbcdca4 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 10 Feb 2026 01:08:06 +0000 +Subject: KVM: nSVM: Mark all of vmcb02 dirty when restoring nested state + +From: Yosry Ahmed + +commit e63fb1379f4b9300a44739964e69549bebbcdca4 upstream. + +When restoring a vCPU in guest mode, any state restored before +KVM_SET_NESTED_STATE (e.g. KVM_SET_SREGS) will mark the corresponding +dirty bits in vmcb01, as it is the active VMCB before switching to +vmcb02 in svm_set_nested_state(). + +Hence, mark all fields in vmcb02 dirty in svm_set_nested_state() to +capture any previously restored fields. + +Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") +CC: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260210010806.3204289-1-yosry.ahmed@linux.dev +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1918,6 +1918,12 @@ static int svm_set_nested_state(struct k + nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); + + /* ++ * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields ++ * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here. ++ */ ++ vmcb_mark_all_dirty(svm->vmcb); ++ ++ /* + * While the nested guest CR3 is already checked and set by + * KVM_SET_SREGS, it was set when nested state was yet loaded, + * thus MMU might not be initialized correctly. diff --git a/queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch b/queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch new file mode 100644 index 0000000000..f3b6d400b3 --- /dev/null +++ b/queue-7.0/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch @@ -0,0 +1,141 @@ +From c36991c6f8d2ab56ee67aff04e3c357f45cfc76c Mon Sep 17 00:00:00 2001 +From: Kevin Cheng +Date: Tue, 3 Mar 2026 16:22:22 -0800 +Subject: KVM: nSVM: Raise #UD if unhandled VMMCALL isn't intercepted by L1 + +From: Kevin Cheng + +commit c36991c6f8d2ab56ee67aff04e3c357f45cfc76c upstream. + +Explicitly synthesize a #UD for VMMCALL if L2 is active, L1 does NOT want +to intercept VMMCALL, nested_svm_l2_tlb_flush_enabled() is true, and the +hypercall is something other than one of the supported Hyper-V hypercalls. +When all of the above conditions are met, KVM will intercept VMMCALL but +never forward it to L1, i.e. will let L2 make hypercalls as if it were L1. + +The TLFS says a whole lot of nothing about this scenario, so go with the +architectural behavior, which says that VMMCALL #UDs if it's not +intercepted. + +Opportunistically do a 2-for-1 stub trade by stub-ifying the new API +instead of the helpers it uses. The last remaining "single" stub will +soon be dropped as well. + +Suggested-by: Sean Christopherson +Fixes: 3f4a812edf5c ("KVM: nSVM: hyper-v: Enable L2 TLB flush") +Cc: Vitaly Kuznetsov +Cc: stable@vger.kernel.org +Signed-off-by: Kevin Cheng +Link: https://patch.msgid.link/20260228033328.2285047-5-chengkev@google.com +[sean: rewrite changelog and comment, tag for stable, remove defunct stubs] +Reviewed-by: Yosry Ahmed +Reviewed-by: Vitaly Kuznetsov +Link: https://patch.msgid.link/20260304002223.1105129-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/hyperv.h | 8 -------- + arch/x86/kvm/svm/hyperv.h | 11 +++++++++++ + arch/x86/kvm/svm/nested.c | 4 +--- + arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++- + 4 files changed, 30 insertions(+), 12 deletions(-) + +--- a/arch/x86/kvm/hyperv.h ++++ b/arch/x86/kvm/hyperv.h +@@ -305,14 +305,6 @@ static inline bool kvm_hv_has_stimer_pen + { + return false; + } +-static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu) +-{ +- return false; +-} +-static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu) +-{ +- return false; +-} + static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu) + { + return 0; +--- a/arch/x86/kvm/svm/hyperv.h ++++ b/arch/x86/kvm/svm/hyperv.h +@@ -41,6 +41,13 @@ static inline bool nested_svm_l2_tlb_flu + return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; + } + ++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) ++{ ++ return guest_hv_cpuid_has_l2_tlb_flush(vcpu) && ++ nested_svm_l2_tlb_flush_enabled(vcpu) && ++ kvm_hv_is_tlb_flush_hcall(vcpu); ++} ++ + void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); + #else /* CONFIG_KVM_HYPERV */ + static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {} +@@ -48,6 +55,10 @@ static inline bool nested_svm_l2_tlb_flu + { + return false; + } ++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu) ++{ ++ return false; ++} + static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {} + #endif /* CONFIG_KVM_HYPERV */ + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1711,9 +1711,7 @@ int nested_svm_exit_special(struct vcpu_ + } + case SVM_EXIT_VMMCALL: + /* Hyper-V L2 TLB flush hypercall is handled by L0 */ +- if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) && +- nested_svm_l2_tlb_flush_enabled(vcpu) && +- kvm_hv_is_tlb_flush_hcall(vcpu)) ++ if (nested_svm_is_l2_tlb_flush_hcall(vcpu)) + return NESTED_EXIT_HOST; + break; + default: +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -52,6 +52,7 @@ + #include "svm.h" + #include "svm_ops.h" + ++#include "hyperv.h" + #include "kvm_onhyperv.h" + #include "svm_onhyperv.h" + +@@ -3249,6 +3250,22 @@ static int bus_lock_exit(struct kvm_vcpu + return 0; + } + ++static int vmmcall_interception(struct kvm_vcpu *vcpu) ++{ ++ /* ++ * Inject a #UD if L2 is active and the VMMCALL isn't a Hyper-V TLB ++ * hypercall, as VMMCALL #UDs if it's not intercepted, and this path is ++ * reachable if and only if L1 doesn't want to intercept VMMCALL or has ++ * enabled L0 (KVM) handling of Hyper-V L2 TLB flush hypercalls. ++ */ ++ if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) { ++ kvm_queue_exception(vcpu, UD_VECTOR); ++ return 1; ++ } ++ ++ return kvm_emulate_hypercall(vcpu); ++} ++ + static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { + [SVM_EXIT_READ_CR0] = cr_interception, + [SVM_EXIT_READ_CR3] = cr_interception, +@@ -3299,7 +3316,7 @@ static int (*const svm_exit_handlers[])( + [SVM_EXIT_TASK_SWITCH] = task_switch_interception, + [SVM_EXIT_SHUTDOWN] = shutdown_interception, + [SVM_EXIT_VMRUN] = vmrun_interception, +- [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall, ++ [SVM_EXIT_VMMCALL] = vmmcall_interception, + [SVM_EXIT_VMLOAD] = vmload_interception, + [SVM_EXIT_VMSAVE] = vmsave_interception, + [SVM_EXIT_STGI] = stgi_interception, diff --git a/queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch b/queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch new file mode 100644 index 0000000000..c81f4d777e --- /dev/null +++ b/queue-7.0/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch @@ -0,0 +1,58 @@ +From 290c8d82023ab0e1d2782d37136541e017174d7c Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:00 +0000 +Subject: KVM: nSVM: Refactor checking LBRV enablement in vmcb12 into a helper + +From: Yosry Ahmed + +commit 290c8d82023ab0e1d2782d37136541e017174d7c upstream. + +Refactor the vCPU cap and vmcb12 flag checks into a helper. The +unlikely() annotation is dropped, it's unlikely (huh) to make a +difference and the CPU will probably predict it better on its own. + +CC: stable@vger.kernel.org +Co-developed-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-7-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -651,6 +651,12 @@ void nested_vmcb02_compute_g_pat(struct + svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat; + } + ++static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu) ++{ ++ return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && ++ (to_svm(vcpu)->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); ++} ++ + static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12) + { + bool new_vmcb12 = false; +@@ -715,8 +721,7 @@ static void nested_vmcb02_prepare_save(s + vmcb_mark_dirty(vmcb02, VMCB_DR); + } + +- if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && +- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { ++ if (nested_vmcb12_has_lbrv(vcpu)) { + /* + * Reserved bits of DEBUGCTL are ignored. Be consistent with + * svm_set_msr's definition of reserved bits. +@@ -1243,8 +1248,7 @@ int nested_svm_vmexit(struct vcpu_svm *s + if (!nested_exit_on_intr(svm)) + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); + +- if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && +- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { ++ if (nested_vmcb12_has_lbrv(vcpu)) { + svm_copy_lbrs(&vmcb12->save, &vmcb02->save); + } else { + svm_copy_lbrs(&vmcb01->save, &vmcb02->save); diff --git a/queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch b/queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch new file mode 100644 index 0000000000..a2aa0e8d78 --- /dev/null +++ b/queue-7.0/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch @@ -0,0 +1,142 @@ +From dcf3648ab71437b504abbfdc4e74622a0f1a56e3 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:01 +0000 +Subject: KVM: nSVM: Refactor writing vmcb12 on nested #VMEXIT as a helper + +From: Yosry Ahmed + +commit dcf3648ab71437b504abbfdc4e74622a0f1a56e3 upstream. + +Move mapping vmcb12 and updating it out of nested_svm_vmexit() into a +helper, no functional change intended. + +CC: stable@vger.kernel.org +Co-developed-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-8-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 77 ++++++++++++++++++++++++++-------------------- + 1 file changed, 44 insertions(+), 33 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1134,36 +1134,20 @@ void svm_copy_vmloadsave_state(struct vm + to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; + } + +-int nested_svm_vmexit(struct vcpu_svm *svm) ++static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu) + { +- struct kvm_vcpu *vcpu = &svm->vcpu; +- struct vmcb *vmcb01 = svm->vmcb01.ptr; ++ struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; +- struct vmcb *vmcb12; + struct kvm_host_map map; ++ struct vmcb *vmcb12; + int rc; + + rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); +- if (rc) { +- if (rc == -EINVAL) +- kvm_inject_gp(vcpu, 0); +- return 1; +- } ++ if (rc) ++ return rc; + + vmcb12 = map.hva; + +- /* Exit Guest-Mode */ +- leave_guest_mode(vcpu); +- svm->nested.vmcb12_gpa = 0; +- WARN_ON_ONCE(svm->nested.nested_run_pending); +- +- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); +- +- /* in case we halted in L2 */ +- kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); +- +- /* Give the current vmcb to the guest */ +- + vmcb12->save.es = vmcb02->save.es; + vmcb12->save.cs = vmcb02->save.cs; + vmcb12->save.ss = vmcb02->save.ss; +@@ -1200,10 +1184,48 @@ int nested_svm_vmexit(struct vcpu_svm *s + if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS)) + vmcb12->control.next_rip = vmcb02->control.next_rip; + ++ if (nested_vmcb12_has_lbrv(vcpu)) ++ svm_copy_lbrs(&vmcb12->save, &vmcb02->save); ++ + vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; + vmcb12->control.event_inj = svm->nested.ctl.event_inj; + vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; + ++ trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, ++ vmcb12->control.exit_info_1, ++ vmcb12->control.exit_info_2, ++ vmcb12->control.exit_int_info, ++ vmcb12->control.exit_int_info_err, ++ KVM_ISA_SVM); ++ ++ kvm_vcpu_unmap(vcpu, &map); ++ return 0; ++} ++ ++int nested_svm_vmexit(struct vcpu_svm *svm) ++{ ++ struct kvm_vcpu *vcpu = &svm->vcpu; ++ struct vmcb *vmcb01 = svm->vmcb01.ptr; ++ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; ++ int rc; ++ ++ rc = nested_svm_vmexit_update_vmcb12(vcpu); ++ if (rc) { ++ if (rc == -EINVAL) ++ kvm_inject_gp(vcpu, 0); ++ return 1; ++ } ++ ++ /* Exit Guest-Mode */ ++ leave_guest_mode(vcpu); ++ svm->nested.vmcb12_gpa = 0; ++ WARN_ON_ONCE(svm->nested.nested_run_pending); ++ ++ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ++ ++ /* in case we halted in L2 */ ++ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); ++ + if (!kvm_pause_in_guest(vcpu->kvm)) { + vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); +@@ -1248,9 +1270,7 @@ int nested_svm_vmexit(struct vcpu_svm *s + if (!nested_exit_on_intr(svm)) + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); + +- if (nested_vmcb12_has_lbrv(vcpu)) { +- svm_copy_lbrs(&vmcb12->save, &vmcb02->save); +- } else { ++ if (!nested_vmcb12_has_lbrv(vcpu)) { + svm_copy_lbrs(&vmcb01->save, &vmcb02->save); + vmcb_mark_dirty(vmcb01, VMCB_LBR); + } +@@ -1306,15 +1326,6 @@ int nested_svm_vmexit(struct vcpu_svm *s + svm->vcpu.arch.dr7 = DR7_FIXED_1; + kvm_update_dr7(&svm->vcpu); + +- trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, +- vmcb12->control.exit_info_1, +- vmcb12->control.exit_info_2, +- vmcb12->control.exit_int_info, +- vmcb12->control.exit_int_info_err, +- KVM_ISA_SVM); +- +- kvm_vcpu_unmap(vcpu, &map); +- + nested_svm_transition_tlb_flush(vcpu); + + nested_svm_uninit_mmu_context(vcpu); diff --git a/queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch new file mode 100644 index 0000000000..e436ccd67e --- /dev/null +++ b/queue-7.0/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch @@ -0,0 +1,52 @@ +From 03bee264f8ebfd39e0254c98e112d033a7aa9055 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Wed, 25 Feb 2026 00:59:44 +0000 +Subject: KVM: nSVM: Sync interrupt shadow to cached vmcb12 after VMRUN of L2 + +From: Yosry Ahmed + +commit 03bee264f8ebfd39e0254c98e112d033a7aa9055 upstream. + +After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs +fields written by the CPU from vmcb02 to the cached vmcb12. This is +because the cached vmcb12 is used as the authoritative copy of some of +the controls, and is the payload when saving/restoring nested state. + +int_state is also written by the CPU, specifically bit 0 (i.e. +SVM_INTERRUPT_SHADOW_MASK) for nested VMs, but it is not sync'd to +cached vmcb12. This does not cause a problem if KVM_SET_NESTED_STATE +preceeds KVM_SET_VCPU_EVENTS in the restore path, as an interrupt shadow +would be correctly restored to vmcb02 (KVM_SET_VCPU_EVENTS overwrites +what KVM_SET_NESTED_STATE restored in int_state). + +However, if KVM_SET_VCPU_EVENTS preceeds KVM_SET_NESTED_STATE, an +interrupt shadow would be restored into vmcb01 instead of vmcb02. This +would mostly be benign for L1 (delays an interrupt), but not for L2. For +L2, the vCPU could hang (e.g. if a wakeup interrupt is delivered before +a HLT that should have been in an interrupt shadow). + +Sync int_state to the cached vmcb12 in nested_sync_control_from_vmcb02() +to avoid this problem. With that, KVM_SET_NESTED_STATE restores the +correct interrupt shadow state, and if KVM_SET_VCPU_EVENTS follows it +would overwrite it with the same value. + +Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") +CC: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260225005950.3739782-3-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -530,6 +530,7 @@ void nested_sync_control_from_vmcb02(str + u32 mask; + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; ++ svm->nested.ctl.int_state = svm->vmcb->control.int_state; + + /* Only a few fields of int_ctl are written by the processor. */ + mask = V_IRQ_MASK | V_TPR_MASK; diff --git a/queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch new file mode 100644 index 0000000000..f8edd13210 --- /dev/null +++ b/queue-7.0/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch @@ -0,0 +1,55 @@ +From 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Wed, 25 Feb 2026 00:59:43 +0000 +Subject: KVM: nSVM: Sync NextRIP to cached vmcb12 after VMRUN of L2 + +From: Yosry Ahmed + +commit 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 upstream. + +After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs +fields written by the CPU from vmcb02 to the cached vmcb12. This is +because the cached vmcb12 is used as the authoritative copy of some of +the controls, and is the payload when saving/restoring nested state. + +NextRIP is also written by the CPU (in some cases) after VMRUN, but is +not sync'd to the cached vmcb12. As a result, it is corrupted after +save/restore (replaced by the original value written by L1 on nested +VMRUN). This could cause problems for both KVM (e.g. when injecting a +soft IRQ) or L1 (e.g. when using NextRIP to advance RIP after emulating +an instruction). + +Fix this by sync'ing NextRIP to the cache after VMRUN of L2, but only +after completing interrupts (not in nested_sync_control_from_vmcb02()), +as KVM may update NextRIP (e.g. when re-injecting a soft IRQ). + +Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE") +CC: stable@vger.kernel.org +Co-developed-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260225005950.3739782-2-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -4436,6 +4436,16 @@ static __no_kcsan fastpath_t svm_vcpu_ru + + svm_complete_interrupts(vcpu); + ++ /* ++ * Update the cache after completing interrupts to get an accurate ++ * NextRIP, e.g. when re-injecting a soft interrupt. ++ * ++ * FIXME: Rework svm_get_nested_state() to not pull data from the ++ * cache (except for maybe int_ctl). ++ */ ++ if (is_guest_mode(vcpu)) ++ svm->nested.ctl.next_rip = svm->vmcb->control.next_rip; ++ + return svm_exit_handlers_fastpath(vcpu); + } + diff --git a/queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch new file mode 100644 index 0000000000..7b399b6238 --- /dev/null +++ b/queue-7.0/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch @@ -0,0 +1,55 @@ +From 1b30e7551767cb95b3e49bb169c72bbd76b56e05 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:02 +0000 +Subject: KVM: nSVM: Triple fault if mapping VMCB12 fails on nested #VMEXIT + +From: Yosry Ahmed + +commit 1b30e7551767cb95b3e49bb169c72bbd76b56e05 upstream. + +KVM currently injects a #GP and hopes for the best if mapping VMCB12 +fails on nested #VMEXIT, and only if the failure mode is -EINVAL. +Mapping the VMCB12 could also fail if creating host mappings fails. + +After the #GP is injected, nested_svm_vmexit() bails early, without +cleaning up (e.g. KVM_REQ_GET_NESTED_STATE_PAGES is set, is_guest_mode() +is true, etc). + +Instead of optionally injecting a #GP, triple fault the guest if mapping +VMCB12 fails since KVM cannot make a sane recovery. The APM states that +a #VMEXIT will triple fault if host state is illegal or an exception +occurs while loading host state, so the behavior is not entirely made +up. + +Do not return early from nested_svm_vmexit(), continue cleaning up the +vCPU state (e.g. switch back to vmcb01), to handle the failure as +gracefully as possible. + +Fixes: cf74a78b229d ("KVM: SVM: Add VMEXIT handler and intercepts") +CC: stable@vger.kernel.org +Co-developed-by: Sean Christopherson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-9-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1208,12 +1208,8 @@ void nested_svm_vmexit(struct vcpu_svm * + struct vmcb *vmcb01 = svm->vmcb01.ptr; + struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + +- rc = nested_svm_vmexit_update_vmcb12(vcpu); +- if (rc) { +- if (rc == -EINVAL) +- kvm_inject_gp(vcpu, 0); +- return 1; +- } ++ if (nested_svm_vmexit_update_vmcb12(vcpu)) ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + + /* Exit Guest-Mode */ + leave_guest_mode(vcpu); diff --git a/queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch new file mode 100644 index 0000000000..49beb533a6 --- /dev/null +++ b/queue-7.0/kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch @@ -0,0 +1,137 @@ +From 5d291ef0585ed880ed4dd71ea1a5965e0a65fb53 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:34:03 +0000 +Subject: KVM: nSVM: Triple fault if restore host CR3 fails on nested #VMEXIT + +From: Yosry Ahmed + +commit 5d291ef0585ed880ed4dd71ea1a5965e0a65fb53 upstream. + +If loading L1's CR3 fails on a nested #VMEXIT, nested_svm_vmexit() +returns an error code that is ignored by most callers, and continues to +run L1 with corrupted state. A sane recovery is not possible in this +case, and HW behavior is to cause a shutdown. Inject a triple fault +instead, and do not return early from nested_svm_vmexit(). Continue +cleaning up the vCPU state (e.g. clear pending exceptions), to handle +the failure as gracefully as possible. + +From the APM: + + Upon #VMEXIT, the processor performs the following actions in order to + return to the host execution context: + + ... + + if (illegal host state loaded, or exception while loading host state) + shutdown + else + execute first host instruction following the VMRUN + +Remove the return value of nested_svm_vmexit(), which is mostly +unchecked anyway. + +Fixes: d82aaef9c88a ("KVM: nSVM: use nested_svm_load_cr3() on guest->host switch") +CC: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-10-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 10 +++------- + arch/x86/kvm/svm/svm.c | 11 ++--------- + arch/x86/kvm/svm/svm.h | 6 +++--- + 3 files changed, 8 insertions(+), 19 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1202,12 +1202,11 @@ static int nested_svm_vmexit_update_vmcb + return 0; + } + +-int nested_svm_vmexit(struct vcpu_svm *svm) ++void nested_svm_vmexit(struct vcpu_svm *svm) + { + struct kvm_vcpu *vcpu = &svm->vcpu; + struct vmcb *vmcb01 = svm->vmcb01.ptr; + struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; +- int rc; + + rc = nested_svm_vmexit_update_vmcb12(vcpu); + if (rc) { +@@ -1330,9 +1329,8 @@ int nested_svm_vmexit(struct vcpu_svm *s + + nested_svm_uninit_mmu_context(vcpu); + +- rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true); +- if (rc) +- return 1; ++ if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true)) ++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + + /* + * Drop what we picked up for L2 via svm_complete_interrupts() so it +@@ -1357,8 +1355,6 @@ int nested_svm_vmexit(struct vcpu_svm *s + */ + if (kvm_apicv_activated(vcpu->kvm)) + __kvm_vcpu_update_apicv(vcpu); +- +- return 0; + } + + static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -2233,13 +2233,9 @@ static int emulate_svm_instr(struct kvm_ + [SVM_INSTR_VMSAVE] = vmsave_interception, + }; + struct vcpu_svm *svm = to_svm(vcpu); +- int ret; + + if (is_guest_mode(vcpu)) { +- /* Returns '1' or -errno on failure, '0' on success. */ +- ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]); +- if (ret) +- return ret; ++ nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]); + return 1; + } + return svm_instr_handlers[opcode](vcpu); +@@ -4872,7 +4868,6 @@ static int svm_enter_smm(struct kvm_vcpu + { + struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map_save; +- int ret; + + if (!is_guest_mode(vcpu)) + return 0; +@@ -4892,9 +4887,7 @@ static int svm_enter_smm(struct kvm_vcpu + svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; + svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; + +- ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW); +- if (ret) +- return ret; ++ nested_svm_simple_vmexit(svm, SVM_EXIT_SW); + + /* + * KVM uses VMCB01 to store L1 host state while L2 runs but +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -793,14 +793,14 @@ int nested_svm_vmrun(struct kvm_vcpu *vc + void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save); + void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb); +-int nested_svm_vmexit(struct vcpu_svm *svm); ++void nested_svm_vmexit(struct vcpu_svm *svm); + +-static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) ++static inline void nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) + { + svm->vmcb->control.exit_code = exit_code; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; +- return nested_svm_vmexit(svm); ++ nested_svm_vmexit(svm); + } + + int nested_svm_exit_handled(struct vcpu_svm *svm); diff --git a/queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch b/queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch new file mode 100644 index 0000000000..389ee3f091 --- /dev/null +++ b/queue-7.0/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch @@ -0,0 +1,73 @@ +From 5c247d08bc81bbad4c662dcf5654137a2f8483ec Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Feb 2026 20:10:10 +0000 +Subject: KVM: nSVM: Use vcpu->arch.cr2 when updating vmcb12 on nested #VMEXIT + +From: Yosry Ahmed + +commit 5c247d08bc81bbad4c662dcf5654137a2f8483ec upstream. + +KVM currently uses the value of CR2 from vmcb02 to update vmcb12 on +nested #VMEXIT. This value is incorrect in some cases, causing L1 to run +L2 with a corrupted CR2. This could lead to segfaults or data corruption +if L2 is in the middle of handling a #PF and reads a corrupted CR2. Use +the correct value in vcpu->arch.cr2 instead. + +The value in vcpu->arch.cr2 is sync'd to vmcb02 shortly before a VMRUN +of L2, and sync'd back to vcpu->arch.cr2 shortly after. The value are +only out-of-sync in two cases: after save+restore, and after a #PF is +injected into L2. In either case, if a #VMEXIT to L1 is synthesized +before L2 runs, using the value in vmcb02 would be incorrect. + +After save+restore, the value of CR2 is restored by KVM_SET_SREGS into +vcpu->arch.cr2. It is not reflect in vmcb02 until a VMRUN of L2. Before +that, it holds whatever was in vmcb02 before restore, which would be +zero on a new vCPU that never ran nested. If a #VMEXIT to L1 is +synthesized before L2 ever runs, using vcpu->arch.cr2 to update vmcb12 +is the right thing to do. + +The #PF injection case is more nuanced. Although the APM is a bit +unclear about when CR2 is written during a #PF, the SDM is more clear: + + Processors update CR2 whenever a page fault is detected. If a + second page fault occurs while an earlier page fault is being + delivered, the faulting linear address of the second fault will + overwrite the contents of CR2 (replacing the previous address). + These updates to CR2 occur even if the page fault results in a + double fault or occurs during the delivery of a double fault. + +KVM injecting the exception surely counts as the #PF being "detected". +More importantly, when an exception is injected into L2 at the time of a +synthesized #VMEXIT, KVM updates exit_int_info in vmcb12 accordingly, +such that an L1 hypervisor can re-inject the exception. If CR2 is not +written at that point, the L1 hypervisor have no way of correctly +re-injecting the #PF. Hence, if a #VMEXIT to L1 is synthesized after +the #PF is injected into L2 but before it actually runs, using +vcpu->arch.cr2 to update vmcb12 is also the right thing to do. + +Note that KVM does _not_ update vcpu->arch.cr2 when a #PF is pending for +L2, only when it is injected. The distinction is important, because only +injected (but not intercepted) exceptions are propagated to L1 through +exit_int_info. It would be incorrect to update CR2 in vmcb12 for a +pending #PF, as L1 would perceive an updated CR2 value with no #PF. + +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260203201010.1871056-1-yosry.ahmed@linux.dev +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1166,7 +1166,7 @@ int nested_svm_vmexit(struct vcpu_svm *s + vmcb12->save.efer = svm->vcpu.arch.efer; + vmcb12->save.cr0 = kvm_read_cr0(vcpu); + vmcb12->save.cr3 = kvm_read_cr3(vcpu); +- vmcb12->save.cr2 = vmcb02->save.cr2; ++ vmcb12->save.cr2 = vcpu->arch.cr2; + vmcb12->save.cr4 = svm->vcpu.arch.cr4; + vmcb12->save.rflags = kvm_get_rflags(vcpu); + vmcb12->save.rip = kvm_rip_read(vcpu); diff --git a/queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch b/queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch new file mode 100644 index 0000000000..05666e664a --- /dev/null +++ b/queue-7.0/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch @@ -0,0 +1,133 @@ +From 3700f0788da6acf73b2df56690f4b201aa4aefd2 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:33:57 +0000 +Subject: KVM: SVM: Add missing save/restore handling of LBR MSRs + +From: Yosry Ahmed + +commit 3700f0788da6acf73b2df56690f4b201aa4aefd2 upstream. + +MSR_IA32_DEBUGCTLMSR and LBR MSRs are currently not enumerated by +KVM_GET_MSR_INDEX_LIST, and LBR MSRs cannot be set with KVM_SET_MSRS. So +save/restore is completely broken. + +Fix it by adding the MSRs to msrs_to_save_base, and allowing writes to +LBR MSRs from userspace only (as they are read-only MSRs) if LBR +virtualization is enabled. Additionally, to correctly restore L1's LBRs +while L2 is running, make sure the LBRs are copied from the captured +VMCB01 save area in svm_copy_vmrun_state(). + +Note, for VMX, this also fixes a flaw where MSR_IA32_DEBUGCTLMSR isn't +reported as an MSR to save/restore. + +Note #2, over-reporting MSR_IA32_LASTxxx on Intel is ok, as KVM already +handles unsupported reads and writes thanks to commit b5e2fec0ebc3 ("KVM: +Ignore DEBUGCTL MSRs with no effect") (kvm_do_msr_access() will morph the +unsupported userspace write into a nop). + +Fixes: 24e09cbf480a ("KVM: SVM: enable LBR virtualization") +Cc: stable@vger.kernel.org +Reported-by: Jim Mattson +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-4-yosry@kernel.org +[sean: guard with lbrv checks, massage changelog] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 5 +++++ + arch/x86/kvm/svm/svm.c | 42 +++++++++++++++++++++++++++++++++++++----- + arch/x86/kvm/x86.c | 3 +++ + 3 files changed, 45 insertions(+), 5 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -1109,6 +1109,11 @@ void svm_copy_vmrun_state(struct vmcb_sa + to_save->isst_addr = from_save->isst_addr; + to_save->ssp = from_save->ssp; + } ++ ++ if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) { ++ svm_copy_lbrs(to_save, from_save); ++ to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS; ++ } + } + + void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -2788,19 +2788,19 @@ static int svm_get_msr(struct kvm_vcpu * + msr_info->data = svm->tsc_aux; + break; + case MSR_IA32_DEBUGCTLMSR: +- msr_info->data = svm->vmcb->save.dbgctl; ++ msr_info->data = lbrv ? svm->vmcb->save.dbgctl : 0; + break; + case MSR_IA32_LASTBRANCHFROMIP: +- msr_info->data = svm->vmcb->save.br_from; ++ msr_info->data = lbrv ? svm->vmcb->save.br_from : 0; + break; + case MSR_IA32_LASTBRANCHTOIP: +- msr_info->data = svm->vmcb->save.br_to; ++ msr_info->data = lbrv ? svm->vmcb->save.br_to : 0; + break; + case MSR_IA32_LASTINTFROMIP: +- msr_info->data = svm->vmcb->save.last_excp_from; ++ msr_info->data = lbrv ? svm->vmcb->save.last_excp_from : 0; + break; + case MSR_IA32_LASTINTTOIP: +- msr_info->data = svm->vmcb->save.last_excp_to; ++ msr_info->data = lbrv ? svm->vmcb->save.last_excp_to : 0; + break; + case MSR_VM_HSAVE_PA: + msr_info->data = svm->nested.hsave_msr; +@@ -3075,6 +3075,38 @@ static int svm_set_msr(struct kvm_vcpu * + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); + svm_update_lbrv(vcpu); + break; ++ case MSR_IA32_LASTBRANCHFROMIP: ++ if (!lbrv) ++ return KVM_MSR_RET_UNSUPPORTED; ++ if (!msr->host_initiated) ++ return 1; ++ svm->vmcb->save.br_from = data; ++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR); ++ break; ++ case MSR_IA32_LASTBRANCHTOIP: ++ if (!lbrv) ++ return KVM_MSR_RET_UNSUPPORTED; ++ if (!msr->host_initiated) ++ return 1; ++ svm->vmcb->save.br_to = data; ++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR); ++ break; ++ case MSR_IA32_LASTINTFROMIP: ++ if (!lbrv) ++ return KVM_MSR_RET_UNSUPPORTED; ++ if (!msr->host_initiated) ++ return 1; ++ svm->vmcb->save.last_excp_from = data; ++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR); ++ break; ++ case MSR_IA32_LASTINTTOIP: ++ if (!lbrv) ++ return KVM_MSR_RET_UNSUPPORTED; ++ if (!msr->host_initiated) ++ return 1; ++ svm->vmcb->save.last_excp_to = data; ++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR); ++ break; + case MSR_VM_HSAVE_PA: + /* + * Old kernels did not validate the value written to +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -351,6 +351,9 @@ static const u32 msrs_to_save_base[] = { + MSR_IA32_U_CET, MSR_IA32_S_CET, + MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP, + MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB, ++ MSR_IA32_DEBUGCTLMSR, ++ MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP, ++ MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP, + }; + + static const u32 msrs_to_save_pmu[] = { diff --git a/queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch b/queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch new file mode 100644 index 0000000000..984bf5c409 --- /dev/null +++ b/queue-7.0/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch @@ -0,0 +1,42 @@ +From d5bde6113aed8315a2bfe708730b721be9c2f48b Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 18 Feb 2026 15:09:51 -0800 +Subject: KVM: SVM: Explicitly mark vmcb01 dirty after modifying VMCB intercepts + +From: Sean Christopherson + +commit d5bde6113aed8315a2bfe708730b721be9c2f48b upstream. + +When reacting to an intercept update, explicitly mark vmcb01's intercepts +dirty, as KVM always initially operates on vmcb01, and nested_svm_vmexit() +isn't guaranteed to mark VMCB_INTERCEPTS as dirty. I.e. if L2 is active, +KVM will modify the intercepts for L1, but might not mark them as dirty +before the next VMRUN of L1. + +Fixes: 116a0a23676e ("KVM: SVM: Add clean-bit for intercetps, tsc-offset and pause filter count") +Cc: stable@vger.kernel.org +Reviewed-by: Yosry Ahmed +Link: https://patch.msgid.link/20260218230958.2877682-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -128,11 +128,13 @@ void recalc_intercepts(struct vcpu_svm * + struct vmcb_ctrl_area_cached *g; + unsigned int i; + +- vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS); + + if (!is_guest_mode(&svm->vcpu)) + return; + ++ vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); ++ + c = &svm->vmcb->control; + h = &svm->vmcb01.ptr->control; + g = &svm->nested.ctl; diff --git a/queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch b/queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch new file mode 100644 index 0000000000..0a8c0d7a82 --- /dev/null +++ b/queue-7.0/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch @@ -0,0 +1,36 @@ +From d99df02ff427f461102230f9c5b90a6c64ee8e23 Mon Sep 17 00:00:00 2001 +From: Kevin Cheng +Date: Sat, 28 Feb 2026 03:33:26 +0000 +Subject: KVM: SVM: Inject #UD for INVLPGA if EFER.SVME=0 + +From: Kevin Cheng + +commit d99df02ff427f461102230f9c5b90a6c64ee8e23 upstream. + +INVLPGA should cause a #UD when EFER.SVME is not set. Add a check to +properly inject #UD when EFER.SVME=0. + +Fixes: ff092385e828 ("KVM: SVM: Implement INVLPGA") +Cc: stable@vger.kernel.org +Signed-off-by: Kevin Cheng +Reviewed-by: Yosry Ahmed +Link: https://patch.msgid.link/20260228033328.2285047-3-chengkev@google.com +[sean: tag for stable@] +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -2366,6 +2366,9 @@ static int invlpga_interception(struct k + gva_t gva = kvm_rax_read(vcpu); + u32 asid = kvm_rcx_read(vcpu); + ++ if (nested_svm_check_permissions(vcpu)) ++ return 1; ++ + /* FIXME: Handle an address size prefix. */ + if (!is_long_mode(vcpu)) + gva = (u32)gva; diff --git a/queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch b/queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch new file mode 100644 index 0000000000..723873f591 --- /dev/null +++ b/queue-7.0/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch @@ -0,0 +1,94 @@ +From 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Tue, 3 Mar 2026 00:33:56 +0000 +Subject: KVM: SVM: Switch svm_copy_lbrs() to a macro + +From: Yosry Ahmed + +commit 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 upstream. + +In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area' +without a containing 'struct vmcb', and later even 'struct +vmcb_save_area_cached', make it a macro. + +Macros are generally not preferred compared to functions, mainly due to +type-safety. However, in this case it seems like having a simple macro +copying a few fields is better than copy-pasting the same 5 lines of +code in different places. + +Cc: stable@vger.kernel.org +Signed-off-by: Yosry Ahmed +Link: https://patch.msgid.link/20260303003421.2185681-3-yosry@kernel.org +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 8 ++++---- + arch/x86/kvm/svm/svm.c | 9 --------- + arch/x86/kvm/svm/svm.h | 10 +++++++++- + 3 files changed, 13 insertions(+), 14 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -721,10 +721,10 @@ static void nested_vmcb02_prepare_save(s + * Reserved bits of DEBUGCTL are ignored. Be consistent with + * svm_set_msr's definition of reserved bits. + */ +- svm_copy_lbrs(vmcb02, vmcb12); ++ svm_copy_lbrs(&vmcb02->save, &vmcb12->save); + vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; + } else { +- svm_copy_lbrs(vmcb02, vmcb01); ++ svm_copy_lbrs(&vmcb02->save, &vmcb01->save); + } + vmcb_mark_dirty(vmcb02, VMCB_LBR); + svm_update_lbrv(&svm->vcpu); +@@ -1243,9 +1243,9 @@ int nested_svm_vmexit(struct vcpu_svm *s + + if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { +- svm_copy_lbrs(vmcb12, vmcb02); ++ svm_copy_lbrs(&vmcb12->save, &vmcb02->save); + } else { +- svm_copy_lbrs(vmcb01, vmcb02); ++ svm_copy_lbrs(&vmcb01->save, &vmcb02->save); + vmcb_mark_dirty(vmcb01, VMCB_LBR); + } + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -841,15 +841,6 @@ static void svm_recalc_msr_intercepts(st + */ + } + +-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) +-{ +- to_vmcb->save.dbgctl = from_vmcb->save.dbgctl; +- to_vmcb->save.br_from = from_vmcb->save.br_from; +- to_vmcb->save.br_to = from_vmcb->save.br_to; +- to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from; +- to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to; +-} +- + static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) + { + to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; +--- a/arch/x86/kvm/svm/svm.h ++++ b/arch/x86/kvm/svm/svm.h +@@ -713,8 +713,16 @@ static inline void *svm_vcpu_alloc_msrpm + return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT); + } + ++#define svm_copy_lbrs(to, from) \ ++do { \ ++ (to)->dbgctl = (from)->dbgctl; \ ++ (to)->br_from = (from)->br_from; \ ++ (to)->br_to = (from)->br_to; \ ++ (to)->last_excp_from = (from)->last_excp_from; \ ++ (to)->last_excp_to = (from)->last_excp_to; \ ++} while (0) ++ + void svm_vcpu_free_msrpm(void *msrpm); +-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb); + void svm_enable_lbrv(struct kvm_vcpu *vcpu); + void svm_update_lbrv(struct kvm_vcpu *vcpu); + diff --git a/queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch b/queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch new file mode 100644 index 0000000000..8a4c784c79 --- /dev/null +++ b/queue-7.0/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch @@ -0,0 +1,175 @@ +From d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 17 Feb 2026 16:54:38 -0800 +Subject: KVM: x86: Defer non-architectural deliver of exception payload to userspace read + +From: Sean Christopherson + +commit d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 upstream. + +When attempting to play nice with userspace that hasn't enabled +KVM_CAP_EXCEPTION_PAYLOAD, defer KVM's non-architectural delivery of the +payload until userspace actually reads relevant vCPU state, and more +importantly, force delivery of the payload in *all* paths where userspace +saves relevant vCPU state, not just KVM_GET_VCPU_EVENTS. + +Ignoring userspace save/restore for the moment, delivering the payload +before the exception is injected is wrong regardless of whether L1 or L2 +is running. To make matters even more confusing, the flaw *currently* +being papered over by the !is_guest_mode() check isn't even the same bug +that commit da998b46d244 ("kvm: x86: Defer setting of CR2 until #PF +delivery") was trying to avoid. + +At the time of commit da998b46d244, KVM didn't correctly handle exception +intercepts, as KVM would wait until VM-Entry into L2 was imminent to check +if the queued exception should morph to a nested VM-Exit. I.e. KVM would +deliver the payload to L2 and then synthesize a VM-Exit into L1. But the +payload was only the most blatant issue, e.g. waiting to check exception +intercepts would also lead to KVM incorrectly escalating a +should-be-intercepted #PF into a #DF. + +That underlying bug was eventually fixed by commit 7709aba8f716 ("KVM: x86: +Morph pending exceptions to pending VM-Exits at queue time"), but in the +interim, commit a06230b62b89 ("KVM: x86: Deliver exception payload on +KVM_GET_VCPU_EVENTS") came along and subtly added another dependency on +the !is_guest_mode() check. + +While not recorded in the changelog, the motivation for deferring the +!exception_payload_enabled delivery was to fix a flaw where a synthesized +MTF (Monitor Trap Flag) VM-Exit would drop a pending #DB and clobber DR6. +On a VM-Exit, VMX CPUs save pending #DB information into the VMCS, which +is emulated by KVM in nested_vmx_update_pending_dbg() by grabbing the +payload from the queue/pending exception. I.e. prematurely delivering the +payload would cause the pending #DB to not be recorded in the VMCS, and of +course, clobber L2's DR6 as seen by L1. + +Jumping back to save+restore, the quirked behavior of forcing delivery of +the payload only works if userspace does KVM_GET_VCPU_EVENTS *before* +CR2 or DR6 is saved, i.e. before KVM_GET_SREGS{,2} and KVM_GET_DEBUGREGS. +E.g. if userspace does KVM_GET_SREGS before KVM_GET_VCPU_EVENTS, then the +CR2 saved by userspace won't contain the payload for the exception save by +KVM_GET_VCPU_EVENTS. + +Deliberately deliver the payload in the store_regs() path, as it's the +least awful option even though userspace may not be doing save+restore. +Because if userspace _is_ doing save restore, it could elide KVM_GET_SREGS +knowing that SREGS were already saved when the vCPU exited. + +Link: https://lore.kernel.org/all/20200207103608.110305-1-oupton@google.com +Cc: Yosry Ahmed +Cc: stable@vger.kernel.org +Reviewed-by: Yosry Ahmed +Tested-by: Yosry Ahmed +Link: https://patch.msgid.link/20260218005438.2619063-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 62 +++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 39 insertions(+), 23 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -864,9 +864,6 @@ static void kvm_multiple_exception(struc + vcpu->arch.exception.error_code = error_code; + vcpu->arch.exception.has_payload = has_payload; + vcpu->arch.exception.payload = payload; +- if (!is_guest_mode(vcpu)) +- kvm_deliver_exception_payload(vcpu, +- &vcpu->arch.exception); + return; + } + +@@ -5531,18 +5528,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(st + return 0; + } + +-static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, +- struct kvm_vcpu_events *events) ++static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu) + { +- struct kvm_queued_exception *ex; +- +- process_nmi(vcpu); +- +-#ifdef CONFIG_KVM_SMM +- if (kvm_check_request(KVM_REQ_SMI, vcpu)) +- process_smi(vcpu); +-#endif +- + /* + * KVM's ABI only allows for one exception to be migrated. Luckily, + * the only time there can be two queued exceptions is if there's a +@@ -5553,21 +5540,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_ + if (vcpu->arch.exception_vmexit.pending && + !vcpu->arch.exception.pending && + !vcpu->arch.exception.injected) +- ex = &vcpu->arch.exception_vmexit; +- else +- ex = &vcpu->arch.exception; ++ return &vcpu->arch.exception_vmexit; ++ ++ return &vcpu->arch.exception; ++} ++ ++static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu); + + /* +- * In guest mode, payload delivery should be deferred if the exception +- * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1 +- * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability, +- * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not +- * propagate the payload and so it cannot be safely deferred. Deliver +- * the payload if the capability hasn't been requested. ++ * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver ++ * the pending exception payload when userspace saves *any* vCPU state ++ * that interacts with exception payloads to avoid breaking userspace. ++ * ++ * Architecturally, KVM must not deliver an exception payload until the ++ * exception is actually injected, e.g. to avoid losing pending #DB ++ * information (which VMX tracks in the VMCS), and to avoid clobbering ++ * state if the exception is never injected for whatever reason. But ++ * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or ++ * may not propagate the payload across save+restore, and so KVM can't ++ * safely defer delivery of the payload. + */ + if (!vcpu->kvm->arch.exception_payload_enabled && + ex->pending && ex->has_payload) + kvm_deliver_exception_payload(vcpu, ex); ++} ++ ++static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, ++ struct kvm_vcpu_events *events) ++{ ++ struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu); ++ ++ process_nmi(vcpu); ++ ++#ifdef CONFIG_KVM_SMM ++ if (kvm_check_request(KVM_REQ_SMI, vcpu)) ++ process_smi(vcpu); ++#endif ++ ++ kvm_handle_exception_payload_quirk(vcpu); + + memset(events, 0, sizeof(*events)); + +@@ -5746,6 +5758,8 @@ static int kvm_vcpu_ioctl_x86_get_debugr + vcpu->arch.guest_state_protected) + return -EINVAL; + ++ kvm_handle_exception_payload_quirk(vcpu); ++ + memset(dbgregs, 0, sizeof(*dbgregs)); + + BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db)); +@@ -12148,6 +12162,8 @@ static void __get_sregs_common(struct kv + if (vcpu->arch.guest_state_protected) + goto skip_protected_regs; + ++ kvm_handle_exception_payload_quirk(vcpu); ++ + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); diff --git a/queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch b/queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch new file mode 100644 index 0000000000..870ca11c43 --- /dev/null +++ b/queue-7.0/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch @@ -0,0 +1,33 @@ +From da773ea3f59032f659bfc4c450ca86e384786168 Mon Sep 17 00:00:00 2001 +From: Tao Cui +Date: Thu, 9 Apr 2026 18:56:36 +0800 +Subject: LoongArch: KVM: Use CSR_CRMD_PLV in kvm_arch_vcpu_in_kernel() + +From: Tao Cui + +commit da773ea3f59032f659bfc4c450ca86e384786168 upstream. + +The function reads LOONGARCH_CSR_CRMD but uses CSR_PRMD_PPLV to +extract the privilege level. While both masks have the same value +(0x3), CSR_CRMD_PLV is the semantically correct constant for CRMD. + +Cc: stable@vger.kernel.org +Reviewed-by: Bibo Mao +Signed-off-by: Tao Cui +Signed-off-by: Huacai Chen +Signed-off-by: Greg Kroah-Hartman +--- + arch/loongarch/kvm/vcpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/loongarch/kvm/vcpu.c ++++ b/arch/loongarch/kvm/vcpu.c +@@ -402,7 +402,7 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_ + val = gcsr_read(LOONGARCH_CSR_CRMD); + preempt_enable(); + +- return (val & CSR_PRMD_PPLV) == PLV_KERN; ++ return (val & CSR_CRMD_PLV) == PLV_KERN; + } + + #ifdef CONFIG_GUEST_PERF_EVENTS diff --git a/queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch b/queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch new file mode 100644 index 0000000000..b35b6a43b3 --- /dev/null +++ b/queue-7.0/mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch @@ -0,0 +1,43 @@ +From 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Sat, 11 Apr 2026 14:36:36 -0700 +Subject: mm/damon/core: disallow non-power of two min_region_sz on damon_start() + +From: SeongJae Park + +commit 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a upstream. + +Commit d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") introduced +a bug that allows unaligned DAMON region address ranges. Commit +c80f46ac228b ("mm/damon/core: disallow non-power of two min_region_sz") +fixed it, but only for damon_commit_ctx() use case. Still, DAMON sysfs +interface can emit non-power of two min_region_sz via damon_start(). Fix +the path by adding the is_power_of_2() check on damon_start(). + +The issue was discovered by sashiko [1]. + +Link: https://lore.kernel.org/20260411213638.77768-1-sj@kernel.org +Link: https://lore.kernel.org/20260403155530.64647-1-sj@kernel.org [1] +Fixes: d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") +Signed-off-by: SeongJae Park +Cc: # 6.18.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -1368,6 +1368,11 @@ int damon_start(struct damon_ctx **ctxs, + int i; + int err = 0; + ++ for (i = 0; i < nr_ctxs; i++) { ++ if (!is_power_of_2(ctxs[i]->min_region_sz)) ++ return -EINVAL; ++ } ++ + mutex_lock(&damon_lock); + if ((exclusive && nr_running_ctxs) || + (!exclusive && running_exclusive_ctxs)) { diff --git a/queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch b/queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch new file mode 100644 index 0000000000..6c9a7cb1e7 --- /dev/null +++ b/queue-7.0/mm-damon-core-disallow-time-quota-setting-zero-esz.patch @@ -0,0 +1,88 @@ +From 8bbde987c2b84f80da0853f739f0a920386f8b99 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Mon, 6 Apr 2026 17:31:52 -0700 +Subject: mm/damon/core: disallow time-quota setting zero esz + +From: SeongJae Park + +commit 8bbde987c2b84f80da0853f739f0a920386f8b99 upstream. + +When the throughput of a DAMOS scheme is very slow, DAMOS time quota can +make the effective size quota smaller than damon_ctx->min_region_sz. In +the case, damos_apply_scheme() will skip applying the action, because the +action is tried at region level, which requires >=min_region_sz size. +That is, the quota is effectively exceeded for the quota charge window. + +Because no action will be applied, the total_charged_sz and +total_charged_ns are also not updated. damos_set_effective_quota() will +try to update the effective size quota before starting the next charge +window. However, because the total_charged_sz and total_charged_ns have +not updated, the throughput and effective size quota are also not changed. +Since effective size quota can only be decreased, other effective size +quota update factors including DAMOS quota goals and size quota cannot +make any change, either. + +As a result, the scheme is unexpectedly deactivated until the user notices +and mitigates the situation. The users can mitigate this situation by +changing the time quota online or re-install the scheme. While the +mitigation is somewhat straightforward, finding the situation would be +challenging, because DAMON is not providing good observabilities for that. +Even if such observability is provided, doing the additional monitoring +and the mitigation is somewhat cumbersome and not aligned to the intention +of the time quota. The time quota was intended to help reduce the user's +administration overhead. + +Fix the problem by setting time quota-modified effective size quota be at +least min_region_sz always. + +The issue was discovered [1] by sashiko. + +Link: https://lore.kernel.org/20260407003153.79589-1-sj@kernel.org +Link: https://lore.kernel.org/20260405192504.110014-1-sj@kernel.org [1] +Fixes: 1cd243030059 ("mm/damon/schemes: implement time quota") +Signed-off-by: SeongJae Park +Cc: # 5.16.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -2225,7 +2225,8 @@ static unsigned long damos_quota_score(s + /* + * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty + */ +-static void damos_set_effective_quota(struct damos_quota *quota) ++static void damos_set_effective_quota(struct damos_quota *quota, ++ struct damon_ctx *ctx) + { + unsigned long throughput; + unsigned long esz = ULONG_MAX; +@@ -2251,6 +2252,7 @@ static void damos_set_effective_quota(st + else + throughput = PAGE_SIZE * 1024; + esz = min(throughput * quota->ms, esz); ++ esz = max(ctx->min_region_sz, esz); + } + + if (quota->sz && quota->sz < esz) +@@ -2287,7 +2289,7 @@ static void damos_adjust_quota(struct da + /* First charge window */ + if (!quota->total_charged_sz && !quota->charged_from) { + quota->charged_from = jiffies; +- damos_set_effective_quota(quota); ++ damos_set_effective_quota(quota, c); + } + + /* New charge window starts */ +@@ -2301,7 +2303,7 @@ static void damos_adjust_quota(struct da + quota->charged_sz = 0; + if (trace_damos_esz_enabled()) + cached_esz = quota->esz; +- damos_set_effective_quota(quota); ++ damos_set_effective_quota(quota, c); + if (trace_damos_esz_enabled() && quota->esz != cached_esz) + damos_trace_esz(c, s, quota); + } diff --git a/queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch b/queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch new file mode 100644 index 0000000000..ea14965d91 --- /dev/null +++ b/queue-7.0/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch @@ -0,0 +1,56 @@ +From 049a57421dd67a28c45ae7e92c36df758033e5fa Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Sun, 29 Mar 2026 08:23:05 -0700 +Subject: mm/damon/core: use time_in_range_open() for damos quota window start + +From: SeongJae Park + +commit 049a57421dd67a28c45ae7e92c36df758033e5fa upstream. + +damos_adjust_quota() uses time_after_eq() to show if it is time to start a +new quota charge window, comparing the current jiffies and the scheduled +next charge window start time. If it is, the next charge window start +time is updated and the new charge window starts. + +The time check and next window start time update is skipped while the +scheme is deactivated by the watermarks. Let's suppose the deactivation +is kept more than LONG_MAX jiffies (assuming CONFIG_HZ of 250, more than +99 days in 32 bit systems and more than one billion years in 64 bit +systems), resulting in having the jiffies larger than the next charge +window start time + LONG_MAX. Then, the time_after_eq() call can return +false until another LONG_MAX jiffies are passed. + +This means the scheme can continue working after being reactivated by the +watermarks. But, soon, the quota will be exceeded and the scheme will +again effectively stop working until the next charge window starts. +Because the current charge window is extended to up to LONG_MAX jiffies, +however, it will look like it stopped unexpectedly and indefinitely, from +the user's perspective. + +Fix this by using !time_in_range_open() instead. + +The issue was discovered [1] by sashiko. + +Link: https://lore.kernel.org/20260329152306.45796-1-sj@kernel.org +Link: https://lore.kernel.org/20260324040722.57944-1-sj@kernel.org [1] +Fixes: ee801b7dd782 ("mm/damon/schemes: activate schemes based on a watermarks mechanism") +Signed-off-by: SeongJae Park +Cc: # 5.16.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -2291,7 +2291,8 @@ static void damos_adjust_quota(struct da + } + + /* New charge window starts */ +- if (time_after_eq(jiffies, quota->charged_from + ++ if (!time_in_range_open(jiffies, quota->charged_from, ++ quota->charged_from + + msecs_to_jiffies(quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; diff --git a/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch b/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch new file mode 100644 index 0000000000..0a61afe359 --- /dev/null +++ b/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch @@ -0,0 +1,79 @@ +From 40250b2dded0604a112be605f3828700d80ad7c2 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Sat, 28 Mar 2026 21:38:59 -0700 +Subject: mm/damon/core: validate damos_quota_goal->nid for node_mem_{used,free}_bp + +From: SeongJae Park + +commit 40250b2dded0604a112be605f3828700d80ad7c2 upstream. + +Patch series "mm/damon/core: validate damos_quota_goal->nid". + +node_mem[cg]_{used,free}_bp DAMOS quota goals receive the node id. The +node id is used for si_meminfo_node() and NODE_DATA() without proper +validation. As a result, privileged users can trigger an out of bounds +memory access using DAMON_SYSFS. Fix the issues. + +The issue was originally reported [1] with a fix by another author. The +original author announced [2] that they will stop working including the +fix that was still in the review stage. Hence I'm restarting this. + + +This patch (of 2): + +Users can set damos_quota_goal->nid with arbitrary value for +node_mem_{used,free}_bp. But DAMON core is using those for +si_meminfo_node() without the validation of the value. This can result in +out of bounds memory access. The issue can actually triggered using DAMON +user-space tool (damo), like below. + + $ sudo ./damo start --damos_action stat \ + --damos_quota_goal node_mem_used_bp 50% -1 \ + --damos_quota_interval 1s + $ sudo dmesg + [...] + [ 65.565986] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 + +Fix this issue by adding the validation of the given node. If an invalid +node id is given, it returns 0% for used memory ratio, and 100% for free +memory ratio. + +Link: https://lore.kernel.org/20260329043902.46163-2-sj@kernel.org +Link: https://lore.kernel.org/20260325073034.140353-1-objecting@objecting.org [1] +Link: https://lore.kernel.org/20260327040924.68553-1-sj@kernel.org [2] +Fixes: 0e1c773b501f ("mm/damon/core: introduce damos quota goal metrics for memory node utilization") +Signed-off-by: SeongJae Park +Cc: # 6.16.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -2078,12 +2078,24 @@ static inline u64 damos_get_some_mem_psi + #endif /* CONFIG_PSI */ + + #ifdef CONFIG_NUMA ++static bool invalid_mem_node(int nid) ++{ ++ return nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY); ++} ++ + static __kernel_ulong_t damos_get_node_mem_bp( + struct damos_quota_goal *goal) + { + struct sysinfo i; + __kernel_ulong_t numerator; + ++ if (invalid_mem_node(goal->nid)) { ++ if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP) ++ return 0; ++ else /* DAMOS_QUOTA_NODE_MEM_FREE_BP */ ++ return 10000; ++ } ++ + si_meminfo_node(&i, goal->nid); + if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP) + numerator = i.totalram - i.freeram; diff --git a/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch b/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch new file mode 100644 index 0000000000..e20954571b --- /dev/null +++ b/queue-7.0/mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch @@ -0,0 +1,52 @@ +From a34dac6482e53e2c76944f25b1489b9b7da3a6e6 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Sat, 28 Mar 2026 21:39:00 -0700 +Subject: mm/damon/core: validate damos_quota_goal->nid for node_memcg_{used,free}_bp + +From: SeongJae Park + +commit a34dac6482e53e2c76944f25b1489b9b7da3a6e6 upstream. + +Users can set damos_quota_goal->nid with arbitrary value for +node_memcg_{used,free}_bp. But DAMON core is using those for NODE-DATA() +without a validation of the value. This can result in out of bounds +memory access. The issue can actually triggered using DAMON user-space +tool (damo), like below. + + $ sudo mkdir /sys/fs/cgroup/foo + $ sudo ./damo start --damos_action stat --damos_quota_interval 1s \ + --damos_quota_goal node_memcg_used_bp 50% -1 /foo + $ sudo dmseg + [...] + [ 524.181426] Unable to handle kernel paging request at virtual address 0000000000002c00 + +Fix this issue by adding the validation of the given node id. If an +invalid node id is given, it returns 0% for used memory ratio, and 100% +for free memory ratio. + +Link: https://lore.kernel.org/20260329043902.46163-3-sj@kernel.org +Fixes: b74a120bcf50 ("mm/damon/core: implement DAMOS_QUOTA_NODE_MEMCG_USED_BP") +Signed-off-by: SeongJae Park +Cc: # 6.19.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/core.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/damon/core.c ++++ b/mm/damon/core.c +@@ -2112,6 +2112,13 @@ static unsigned long damos_get_node_memc + unsigned long used_pages, numerator; + struct sysinfo i; + ++ if (invalid_mem_node(goal->nid)) { ++ if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) ++ return 0; ++ else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ ++ return 10000; ++ } ++ + memcg = mem_cgroup_get_from_id(goal->memcg_id); + if (!memcg) { + if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) diff --git a/queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch b/queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch new file mode 100644 index 0000000000..d1b2e211a3 --- /dev/null +++ b/queue-7.0/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch @@ -0,0 +1,41 @@ +From e04ed278d25bf15769800bf6e35c6737f137186f Mon Sep 17 00:00:00 2001 +From: Jackie Liu +Date: Tue, 31 Mar 2026 18:15:53 +0800 +Subject: mm/damon/stat: fix memory leak on damon_start() failure in damon_stat_start() + +From: Jackie Liu + +commit e04ed278d25bf15769800bf6e35c6737f137186f upstream. + +Destroy the DAMON context and reset the global pointer when damon_start() +fails. Otherwise, the context allocated by damon_stat_build_ctx() is +leaked, and the stale damon_stat_context pointer will be overwritten on +the next enable attempt, making the old allocation permanently +unreachable. + +Link: https://lore.kernel.org/20260331101553.88422-1-liu.yun@linux.dev +Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module") +Signed-off-by: Jackie Liu +Reviewed-by: SeongJae Park +Cc: # 6.17.x +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/stat.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/damon/stat.c ++++ b/mm/damon/stat.c +@@ -255,8 +255,11 @@ static int damon_stat_start(void) + if (!damon_stat_context) + return -ENOMEM; + err = damon_start(&damon_stat_context, 1, true); +- if (err) ++ if (err) { ++ damon_destroy_ctx(damon_stat_context); ++ damon_stat_context = NULL; + return err; ++ } + + damon_stat_last_refresh_jiffies = jiffies; + call_control.data = damon_stat_context; diff --git a/queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch b/queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch new file mode 100644 index 0000000000..87443a5737 --- /dev/null +++ b/queue-7.0/mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch @@ -0,0 +1,57 @@ +From 3538f90ab89aaf302782b4b073a0aae66904cd67 Mon Sep 17 00:00:00 2001 +From: Chenghao Duan +Date: Thu, 26 Mar 2026 16:47:25 +0800 +Subject: mm/memfd_luo: fix physical address conversion in put_folios cleanup + +From: Chenghao Duan + +commit 3538f90ab89aaf302782b4b073a0aae66904cd67 upstream. + +In memfd_luo_retrieve_folios()'s put_folios cleanup path: + +1. kho_restore_folio() expects a phys_addr_t (physical address) but + receives a raw PFN (pfolio->pfn). This causes kho_restore_page() to + check the wrong physical address (pfn << PAGE_SHIFT instead of the + actual physical address). + +2. This loop lacks the !pfolio->pfn check that exists in the main + retrieval loop and memfd_luo_discard_folios(), which could + incorrectly process sparse file holes where pfn=0. + +Fix by converting PFN to physical address with PFN_PHYS() and adding +the !pfolio->pfn check, matching the pattern used elsewhere in this file. + +This issue was identified by the AI review. +https://sashiko.dev/#/patchset/20260323110747.193569-1-duanchenghao@kylinos.cn + +Link: https://lore.kernel.org/20260326084727.118437-6-duanchenghao@kylinos.cn +Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd") +Signed-off-by: Chenghao Duan +Reviewed-by: Pasha Tatashin +Reviewed-by: Pratyush Yadav +Cc: Haoran Jiang +Cc: Mike Rapoport (Microsoft) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memfd_luo.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/mm/memfd_luo.c ++++ b/mm/memfd_luo.c +@@ -466,8 +466,13 @@ put_folios: + */ + for (long j = i + 1; j < nr_folios; j++) { + const struct memfd_luo_folio_ser *pfolio = &folios_ser[j]; ++ phys_addr_t phys; + +- folio = kho_restore_folio(pfolio->pfn); ++ if (!pfolio->pfn) ++ continue; ++ ++ phys = PFN_PHYS(pfolio->pfn); ++ folio = kho_restore_folio(phys); + if (folio) + folio_put(folio); + } diff --git a/queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch b/queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch new file mode 100644 index 0000000000..20922f1936 --- /dev/null +++ b/queue-7.0/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch @@ -0,0 +1,76 @@ +From 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 Mon Sep 17 00:00:00 2001 +From: Jackie Liu +Date: Wed, 1 Apr 2026 08:57:02 +0800 +Subject: mm/mempolicy: fix memory leaks in weighted_interleave_auto_store() + +From: Jackie Liu + +commit 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 upstream. + +weighted_interleave_auto_store() fetches old_wi_state inside the if +(!input) block only. This causes two memory leaks: + +1. When a user writes "false" and the current mode is already manual, + the function returns early without freeing the freshly allocated + new_wi_state. + +2. When a user writes "true", old_wi_state stays NULL because the + fetch is skipped entirely. The old state is then overwritten by + rcu_assign_pointer() but never freed, since the cleanup path is + gated on old_wi_state being non-NULL. A user can trigger this + repeatedly by writing "1" in a loop. + +Fix both leaks by moving the old_wi_state fetch before the input check, +making it unconditional. This also allows a unified early return for both +"true" and "false" when the requested mode matches the current mode. + +Link: https://lore.kernel.org/20260401005702.7096-1-liu.yun@linux.dev +Link: https://sashiko.dev/#/patchset/20260331100740.84906-1-liu.yun@linux.dev +Fixes: e341f9c3c841 ("mm/mempolicy: Weighted Interleave Auto-tuning") +Signed-off-by: Jackie Liu +Reviewed-by: Joshua Hahn +Reviewed by: Donet Tom +Cc: Gregory Price +Cc: Alistair Popple +Cc: Byungchul Park +Cc: David Hildenbrand +Cc: # v6.16+ +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mempolicy.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -3706,18 +3706,19 @@ static ssize_t weighted_interleave_auto_ + new_wi_state->iw_table[i] = 1; + + mutex_lock(&wi_state_lock); +- if (!input) { +- old_wi_state = rcu_dereference_protected(wi_state, +- lockdep_is_held(&wi_state_lock)); +- if (!old_wi_state) +- goto update_wi_state; +- if (input == old_wi_state->mode_auto) { +- mutex_unlock(&wi_state_lock); +- return count; +- } ++ old_wi_state = rcu_dereference_protected(wi_state, ++ lockdep_is_held(&wi_state_lock)); ++ ++ if (old_wi_state && input == old_wi_state->mode_auto) { ++ mutex_unlock(&wi_state_lock); ++ kfree(new_wi_state); ++ return count; ++ } + +- memcpy(new_wi_state->iw_table, old_wi_state->iw_table, +- nr_node_ids * sizeof(u8)); ++ if (!input) { ++ if (old_wi_state) ++ memcpy(new_wi_state->iw_table, old_wi_state->iw_table, ++ nr_node_ids * sizeof(u8)); + goto update_wi_state; + } + diff --git a/queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch b/queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch new file mode 100644 index 0000000000..ba17894401 --- /dev/null +++ b/queue-7.0/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch @@ -0,0 +1,42 @@ +From ec05f51f1e65bce95528543eb73fda56fd201d94 Mon Sep 17 00:00:00 2001 +From: "Uladzislau Rezki (Sony)" +Date: Mon, 13 Apr 2026 21:26:46 +0200 +Subject: mm/vmalloc: take vmap_purge_lock in shrinker + +From: Uladzislau Rezki (Sony) + +commit ec05f51f1e65bce95528543eb73fda56fd201d94 upstream. + +decay_va_pool_node() can be invoked concurrently from two paths: +__purge_vmap_area_lazy() when pools are being purged, and the shrinker via +vmap_node_shrink_scan(). + +However, decay_va_pool_node() is not safe to run concurrently, and the +shrinker path currently lacks serialization, leading to races and possible +leaks. + +Protect decay_va_pool_node() by taking vmap_purge_lock in the shrinker +path to ensure serialization with purge users. + +Link: https://lore.kernel.org/20260413192646.14683-1-urezki@gmail.com +Fixes: 7679ba6b36db ("mm: vmalloc: add a shrinker to drain vmap pools") +Signed-off-by: Uladzislau Rezki (Sony) +Reviewed-by: Baoquan He +Cc: chenyichong +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -5416,6 +5416,7 @@ vmap_node_shrink_scan(struct shrinker *s + { + struct vmap_node *vn; + ++ guard(mutex)(&vmap_purge_lock); + for_each_vmap_node(vn) + decay_va_pool_node(vn, true); + diff --git a/queue-7.0/mmc-block-use-single-block-write-in-retry.patch b/queue-7.0/mmc-block-use-single-block-write-in-retry.patch new file mode 100644 index 0000000000..8fb107a31d --- /dev/null +++ b/queue-7.0/mmc-block-use-single-block-write-in-retry.patch @@ -0,0 +1,92 @@ +From c7c6d4f5103864f73ee3a78bfd6da241f84197dd Mon Sep 17 00:00:00 2001 +From: Bin Liu +Date: Wed, 25 Mar 2026 08:49:47 -0500 +Subject: mmc: block: use single block write in retry + +From: Bin Liu + +commit c7c6d4f5103864f73ee3a78bfd6da241f84197dd upstream. + +Due to errata i2493[0], multi-block write would still fail in retries. + +With i2493, the MMC interface has the potential of write failures when +issuing multi-block writes operating in HS200 mode with excessive IO +supply noise. + +While the errata provides guidance in hardware design and layout to +minimize the IO supply noise, in theory the write failure cannot be +resolved in hardware. The software solution to ensure the data integrity +is to add minimum 5us delay between block writes. Single-block write is +the practical way to introduce the delay. + +This patch reuses recovery_mode flag, and switches to single-block +write in retry when multi-block write fails. It covers both CQE and +non-CQE cases. + +[0] https://www.ti.com/lit/pdf/sprz582 +Cc: stable@vger.kernel.org +Suggested-by: Jens Axboe +Signed-off-by: Bin Liu +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/core/block.c | 12 ++++++++++-- + drivers/mmc/core/queue.h | 3 +++ + 2 files changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/mmc/core/block.c ++++ b/drivers/mmc/core/block.c +@@ -1401,6 +1401,9 @@ static void mmc_blk_data_prep(struct mmc + rq_data_dir(req) == WRITE && + (md->flags & MMC_BLK_REL_WR); + ++ if (mqrq->flags & MQRQ_XFER_SINGLE_BLOCK) ++ recovery_mode = 1; ++ + memset(brq, 0, sizeof(struct mmc_blk_request)); + + mmc_crypto_prepare_req(mqrq); +@@ -1540,10 +1543,13 @@ static void mmc_blk_cqe_complete_rq(stru + err = 0; + + if (err) { +- if (mqrq->retries++ < MMC_CQE_RETRIES) ++ if (mqrq->retries++ < MMC_CQE_RETRIES) { ++ if (rq_data_dir(req) == WRITE) ++ mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK; + blk_mq_requeue_request(req, true); +- else ++ } else { + blk_mq_end_request(req, BLK_STS_IOERR); ++ } + } else if (mrq->data) { + if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered)) + blk_mq_requeue_request(req, true); +@@ -2085,6 +2091,8 @@ static void mmc_blk_mq_complete_rq(struc + } else if (!blk_rq_bytes(req)) { + __blk_mq_end_request(req, BLK_STS_IOERR); + } else if (mqrq->retries++ < MMC_MAX_RETRIES) { ++ if (rq_data_dir(req) == WRITE) ++ mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK; + blk_mq_requeue_request(req, true); + } else { + if (mmc_card_removed(mq->card)) +--- a/drivers/mmc/core/queue.h ++++ b/drivers/mmc/core/queue.h +@@ -61,6 +61,8 @@ enum mmc_drv_op { + MMC_DRV_OP_GET_EXT_CSD, + }; + ++#define MQRQ_XFER_SINGLE_BLOCK BIT(0) ++ + struct mmc_queue_req { + struct mmc_blk_request brq; + struct scatterlist *sg; +@@ -69,6 +71,7 @@ struct mmc_queue_req { + void *drv_op_data; + unsigned int ioc_count; + int retries; ++ u32 flags; + }; + + struct mmc_queue { diff --git a/queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch b/queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch new file mode 100644 index 0000000000..853763f3cd --- /dev/null +++ b/queue-7.0/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch @@ -0,0 +1,81 @@ +From 6546a49bbe656981d99a389195560999058c89c4 Mon Sep 17 00:00:00 2001 +From: Shawn Lin +Date: Wed, 8 Apr 2026 15:18:49 +0800 +Subject: mmc: sdhci-of-dwcmshc: Disable clock before DLL configuration + +From: Shawn Lin + +commit 6546a49bbe656981d99a389195560999058c89c4 upstream. + +According to the ASIC design recommendations, the clock must be +disabled before operating the DLL to prevent glitches that could +affect the internal digital logic. In extreme cases, failing to +do so may cause the controller to malfunction completely. + +Adds a step to disable the clock before DLL configuration and +re-enables it at the end. + +Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support") +Cc: stable@vger.kernel.org +Signed-off-by: Shawn Lin +Acked-by: Adrian Hunter +Signed-off-by: Ulf Hansson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/host/sdhci-of-dwcmshc.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +--- a/drivers/mmc/host/sdhci-of-dwcmshc.c ++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c +@@ -738,12 +738,15 @@ static void dwcmshc_rk3568_set_clock(str + extra |= BIT(4); + sdhci_writel(host, extra, reg); + ++ /* Disable clock while config DLL */ ++ sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL); ++ + if (clock <= 52000000) { + if (host->mmc->ios.timing == MMC_TIMING_MMC_HS200 || + host->mmc->ios.timing == MMC_TIMING_MMC_HS400) { + dev_err(mmc_dev(host->mmc), + "Can't reduce the clock below 52MHz in HS200/HS400 mode"); +- return; ++ goto enable_clk; + } + + /* +@@ -763,7 +766,7 @@ static void dwcmshc_rk3568_set_clock(str + DLL_STRBIN_DELAY_NUM_SEL | + DLL_STRBIN_DELAY_NUM_DEFAULT << DLL_STRBIN_DELAY_NUM_OFFSET; + sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN); +- return; ++ goto enable_clk; + } + + /* Reset DLL */ +@@ -790,7 +793,7 @@ static void dwcmshc_rk3568_set_clock(str + 500 * USEC_PER_MSEC); + if (err) { + dev_err(mmc_dev(host->mmc), "DLL lock timeout!\n"); +- return; ++ goto enable_clk; + } + + extra = 0x1 << 16 | /* tune clock stop en */ +@@ -823,6 +826,16 @@ static void dwcmshc_rk3568_set_clock(str + DLL_STRBIN_TAPNUM_DEFAULT | + DLL_STRBIN_TAPNUM_FROM_SW; + sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN); ++ ++enable_clk: ++ /* ++ * The sdclk frequency select bits in SDHCI_CLOCK_CONTROL are not functional ++ * on Rockchip's SDHCI implementation. Instead, the clock frequency is fully ++ * controlled via external clk provider by calling clk_set_rate(). Consequently, ++ * passing 0 to sdhci_enable_clk() only re-enables the already-configured clock, ++ * which matches the hardware's actual behavior. ++ */ ++ sdhci_enable_clk(host, 0); + } + + static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask) diff --git a/queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch b/queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch new file mode 100644 index 0000000000..8c5c478126 --- /dev/null +++ b/queue-7.0/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch @@ -0,0 +1,59 @@ +From 3962c24f2d14e8a7f8a23f56b7ce320523947342 Mon Sep 17 00:00:00 2001 +From: "Viorel Suman (OSS)" +Date: Wed, 11 Mar 2026 14:33:09 +0200 +Subject: pwm: imx-tpm: Count the number of enabled channels in probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Viorel Suman (OSS) + +commit 3962c24f2d14e8a7f8a23f56b7ce320523947342 upstream. + +On a soft reset TPM PWM IP may preserve its internal state from previous +runtime, therefore on a subsequent OS boot and driver probe +"enable_count" value and TPM PWM IP internal channels "enabled" states +may get unaligned. In consequence on a suspend/resume cycle the call "if +(--tpm->enable_count == 0)" may lead to "enable_count" overflow the +system being blocked from entering suspend due to: + + if (tpm->enable_count > 0) + return -EBUSY; + +Fix the problem by counting the enabled channels in probe function. + +Signed-off-by: Viorel Suman (OSS) +Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support") +Link: https://patch.msgid.link/20260311123309.348904-1-viorel.suman@oss.nxp.com +Cc: stable@vger.kernel.org +Signed-off-by: Uwe Kleine-König +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pwm/pwm-imx-tpm.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/pwm/pwm-imx-tpm.c ++++ b/drivers/pwm/pwm-imx-tpm.c +@@ -352,7 +352,7 @@ static int pwm_imx_tpm_probe(struct plat + struct clk *clk; + void __iomem *base; + int ret; +- unsigned int npwm; ++ unsigned int i, npwm; + u32 val; + + base = devm_platform_ioremap_resource(pdev, 0); +@@ -382,6 +382,13 @@ static int pwm_imx_tpm_probe(struct plat + + mutex_init(&tpm->lock); + ++ /* count the enabled channels */ ++ for (i = 0; i < npwm; ++i) { ++ val = readl(base + PWM_IMX_TPM_CnSC(i)); ++ if (FIELD_GET(PWM_IMX_TPM_CnSC_ELS, val)) ++ ++tpm->enable_count; ++ } ++ + ret = devm_pwmchip_add(&pdev->dev, chip); + if (ret) + return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n"); diff --git a/queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch b/queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch new file mode 100644 index 0000000000..92783ea208 --- /dev/null +++ b/queue-7.0/randomize_kstack-maintain-kstack_offset-per-task.patch @@ -0,0 +1,155 @@ +From 37beb42560165869838e7d91724f3e629db64129 Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Tue, 3 Mar 2026 15:08:38 +0000 +Subject: randomize_kstack: Maintain kstack_offset per task + +From: Ryan Roberts + +commit 37beb42560165869838e7d91724f3e629db64129 upstream. + +kstack_offset was previously maintained per-cpu, but this caused a +couple of issues. So let's instead make it per-task. + +Issue 1: add_random_kstack_offset() and choose_random_kstack_offset() +expected and required to be called with interrupts and preemption +disabled so that it could manipulate per-cpu state. But arm64, loongarch +and risc-v are calling them with interrupts and preemption enabled. I +don't _think_ this causes any functional issues, but it's certainly +unexpected and could lead to manipulating the wrong cpu's state, which +could cause a minor performance degradation due to bouncing the cache +lines. By maintaining the state per-task those functions can safely be +called in preemptible context. + +Issue 2: add_random_kstack_offset() is called before executing the +syscall and expands the stack using a previously chosen random offset. +choose_random_kstack_offset() is called after executing the syscall and +chooses and stores a new random offset for the next syscall. With +per-cpu storage for this offset, an attacker could force cpu migration +during the execution of the syscall and prevent the offset from being +updated for the original cpu such that it is predictable for the next +syscall on that cpu. By maintaining the state per-task, this problem +goes away because the per-task random offset is updated after the +syscall regardless of which cpu it is executing on. + +Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") +Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/ +Cc: stable@vger.kernel.org +Acked-by: Mark Rutland +Signed-off-by: Ryan Roberts +Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com +Signed-off-by: Kees Cook +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/randomize_kstack.h | 26 +++++++++++++++----------- + include/linux/sched.h | 4 ++++ + init/main.c | 1 - + kernel/fork.c | 2 ++ + 4 files changed, 21 insertions(+), 12 deletions(-) + +--- a/include/linux/randomize_kstack.h ++++ b/include/linux/randomize_kstack.h +@@ -9,7 +9,6 @@ + + DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, + randomize_kstack_offset); +-DECLARE_PER_CPU(u32, kstack_offset); + + /* + * Do not use this anywhere else in the kernel. This is used here because +@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset); + * add_random_kstack_offset - Increase stack utilization by previously + * chosen random offset + * +- * This should be used in the syscall entry path when interrupts and +- * preempt are disabled, and after user registers have been stored to +- * the stack. For testing the resulting entropy, please see: +- * tools/testing/selftests/lkdtm/stack-entropy.sh ++ * This should be used in the syscall entry path after user registers have been ++ * stored to the stack. Preemption may be enabled. For testing the resulting ++ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh + */ + #define add_random_kstack_offset() do { \ + if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ + &randomize_kstack_offset)) { \ +- u32 offset = raw_cpu_read(kstack_offset); \ ++ u32 offset = current->kstack_offset; \ + u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \ + /* Keep allocation even after "ptr" loses scope. */ \ + asm volatile("" :: "r"(ptr) : "memory"); \ +@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset); + * choose_random_kstack_offset - Choose the random offset for the next + * add_random_kstack_offset() + * +- * This should only be used during syscall exit when interrupts and +- * preempt are disabled. This position in the syscall flow is done to +- * frustrate attacks from userspace attempting to learn the next offset: ++ * This should only be used during syscall exit. Preemption may be enabled. This ++ * position in the syscall flow is done to frustrate attacks from userspace ++ * attempting to learn the next offset: + * - Maximize the timing uncertainty visible from userspace: if the + * offset is chosen at syscall entry, userspace has much more control + * over the timing between choosing offsets. "How long will we be in +@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset); + #define choose_random_kstack_offset(rand) do { \ + if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ + &randomize_kstack_offset)) { \ +- u32 offset = raw_cpu_read(kstack_offset); \ ++ u32 offset = current->kstack_offset; \ + offset = ror32(offset, 5) ^ (rand); \ +- raw_cpu_write(kstack_offset, offset); \ ++ current->kstack_offset = offset; \ + } \ + } while (0) ++ ++static inline void random_kstack_task_init(struct task_struct *tsk) ++{ ++ tsk->kstack_offset = 0; ++} + #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ + #define add_random_kstack_offset() do { } while (0) + #define choose_random_kstack_offset(rand) do { } while (0) ++#define random_kstack_task_init(tsk) do { } while (0) + #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */ + + #endif +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1592,6 +1592,10 @@ struct task_struct { + unsigned long prev_lowest_stack; + #endif + ++#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET ++ u32 kstack_offset; ++#endif ++ + #ifdef CONFIG_X86_MCE + void __user *mce_vaddr; + __u64 mce_kflags; +--- a/init/main.c ++++ b/init/main.c +@@ -833,7 +833,6 @@ static inline void initcall_debug_enable + #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET + DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, + randomize_kstack_offset); +-DEFINE_PER_CPU(u32, kstack_offset); + + static int __init early_randomize_kstack_offset(char *buf) + { +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -95,6 +95,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2233,6 +2234,7 @@ __latent_entropy struct task_struct *cop + if (retval) + goto bad_fork_cleanup_io; + ++ random_kstack_task_init(p); + stackleak_task_init(p); + + if (pid != &init_struct_pid) { diff --git a/queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch b/queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch new file mode 100644 index 0000000000..06f8ab75a2 --- /dev/null +++ b/queue-7.0/rtc-ntxec-fix-of-node-reference-imbalance.patch @@ -0,0 +1,41 @@ +From 30c4d2f26bb3538c328035cea2e6265c8320539e Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Tue, 7 Apr 2026 14:27:17 +0200 +Subject: rtc: ntxec: fix OF node reference imbalance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Johan Hovold + +commit 30c4d2f26bb3538c328035cea2e6265c8320539e upstream. + +The driver reuses the OF node of the parent multi-function device but +fails to take another reference to balance the one dropped by the +platform bus code when unbinding the MFD and deregistering the child +devices. + +Fix this by using the intended helper for reusing OF nodes. + +Fixes: 435af89786c6 ("rtc: New driver for RTC in Netronix embedded controller") +Cc: stable@vger.kernel.org # 5.13 +Cc: Jonathan Neuschäfer +Signed-off-by: Johan Hovold +Link: https://patch.msgid.link/20260407122717.2676774-1-johan@kernel.org +Signed-off-by: Alexandre Belloni +Signed-off-by: Greg Kroah-Hartman +--- + drivers/rtc/rtc-ntxec.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/rtc/rtc-ntxec.c ++++ b/drivers/rtc/rtc-ntxec.c +@@ -110,7 +110,7 @@ static int ntxec_rtc_probe(struct platfo + struct rtc_device *dev; + struct ntxec_rtc *rtc; + +- pdev->dev.of_node = pdev->dev.parent->of_node; ++ device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent); + + rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL); + if (!rtc) diff --git a/queue-7.0/series b/queue-7.0/series index ad00972e9e..5258dabea1 100644 --- a/queue-7.0/series +++ b/queue-7.0/series @@ -168,3 +168,61 @@ media-rzv2h-ivc-fix-axirx_vblank-register-write.patch fs-prepare-for-adding-lsm-blob-to-backing_file.patch lsm-add-backing_file-lsm-hooks.patch selinux-fix-overlayfs-mmap-and-mprotect-access-checks.patch +hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch +randomize_kstack-maintain-kstack_offset-per-task.patch +mmc-block-use-single-block-write-in-retry.patch +mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch +arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch +crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch +xfs-start-gc-on-zonegc_low_space-attribute-updates.patch +xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch +firmware-google-framebuffer-do-not-unregister-platform-device.patch +firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch +crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch +crypto-talitos-rename-first-last-to-first_desc-last_desc.patch +pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch +tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch +tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch +tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch +tpm-tpm_tis-add-error-logging-for-data-transfer.patch +tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch +rtc-ntxec-fix-of-node-reference-imbalance.patch +mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch +mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch +mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch +mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch +mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch +mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch +mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch +mm-damon-core-disallow-time-quota-setting-zero-esz.patch +mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch +userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch +loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch +kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch +kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch +kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch +kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch +kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch +kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch +kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch +kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch +kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch +kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch +kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch +kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch +kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch +kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch +kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch +kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch +kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch +kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch +kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch +kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch +kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch +kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch +kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch +kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch +kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch +kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch +kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch +kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch diff --git a/queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch b/queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch new file mode 100644 index 0000000000..5a3fc6e6f2 --- /dev/null +++ b/queue-7.0/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch @@ -0,0 +1,47 @@ +From 666c1a2ca603d8314231200bf8bbb3a81bd64c6b Mon Sep 17 00:00:00 2001 +From: Gunnar Kudrjavets +Date: Wed, 8 Apr 2026 12:00:27 +0300 +Subject: tpm: Fix auth session leak in tpm2_get_random() error path + +From: Gunnar Kudrjavets + +commit 666c1a2ca603d8314231200bf8bbb3a81bd64c6b upstream. + +When tpm_buf_fill_hmac_session() fails inside the do-while loop in +tpm2_get_random(), the function returns directly after destroying the +buffer, without ending the auth session via tpm2_end_auth_session(). + +This leaks the TPM auth session resource. All other error paths within +the loop correctly reach the 'out' label which calls both +tpm_buf_destroy() and tpm2_end_auth_session(). + +Fix this by replacing the early return with a goto to the existing 'out' +label, which already handles both cleanup operations. The redundant +tpm_buf_destroy() call is removed since 'out' takes care of it. + +Cc: stable@vger.kernel.org # v6.19+ +Fixes: 6e9722e9a7bf ("tpm2-sessions: Fix out of range indexing in name_size") +Signed-off-by: Gunnar Kudrjavets +Reviewed-by: Justinien Bouron +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm2-cmd.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/char/tpm/tpm2-cmd.c ++++ b/drivers/char/tpm/tpm2-cmd.c +@@ -295,10 +295,8 @@ int tpm2_get_random(struct tpm_chip *chi + } + tpm_buf_append_u16(&buf, num_bytes); + err = tpm_buf_fill_hmac_session(chip, &buf); +- if (err) { +- tpm_buf_destroy(&buf); +- return err; +- } ++ if (err) ++ goto out; + + err = tpm_transmit_cmd(chip, &buf, + offsetof(struct tpm2_get_random_out, diff --git a/queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch b/queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch new file mode 100644 index 0000000000..e7fad8ba74 --- /dev/null +++ b/queue-7.0/tpm-tpm_tis-add-error-logging-for-data-transfer.patch @@ -0,0 +1,42 @@ +From 0471921e2d1043dcc6de5cffb49dd37709521abe Mon Sep 17 00:00:00 2001 +From: Jacqueline Wong +Date: Wed, 15 Apr 2026 16:00:05 +0000 +Subject: tpm: tpm_tis: add error logging for data transfer + +From: Jacqueline Wong + +commit 0471921e2d1043dcc6de5cffb49dd37709521abe upstream. + +Add logging to more easily determine reason for transmit failure + +Cc: stable@vger.kernel.org # v6.6+ +Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors") +Signed-off-by: Jacqueline Wong +Signed-off-by: Jordan Hand +Link: https://lore.kernel.org/r/20260415160006.2275325-2-jacqwong@google.com +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm_tis_core.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/char/tpm/tpm_tis_core.c ++++ b/drivers/char/tpm/tpm_tis_core.c +@@ -471,6 +471,8 @@ static int tpm_tis_send_data(struct tpm_ + status = tpm_tis_status(chip); + if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) { + rc = -EIO; ++ dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be set. sts = 0x%08x\n", ++ status); + goto out_err; + } + } +@@ -491,6 +493,8 @@ static int tpm_tis_send_data(struct tpm_ + status = tpm_tis_status(chip); + if (!itpm && (status & TPM_STS_DATA_EXPECT) != 0) { + rc = -EIO; ++ dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be unset. sts = 0x%08x\n", ++ status); + goto out_err; + } + diff --git a/queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch b/queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch new file mode 100644 index 0000000000..a1104746c3 --- /dev/null +++ b/queue-7.0/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch @@ -0,0 +1,48 @@ +From 949692da7211572fac419b2986b6abc0cd1aeb76 Mon Sep 17 00:00:00 2001 +From: Jacqueline Wong +Date: Wed, 15 Apr 2026 16:00:06 +0000 +Subject: tpm: tpm_tis: stop transmit if retries are exhausted + +From: Jacqueline Wong + +commit 949692da7211572fac419b2986b6abc0cd1aeb76 upstream. + +tpm_tis_send_main() will attempt to retry sending data TPM_RETRY times. +Currently, if those retries are exhausted, the driver will attempt to +call execute. The TPM will be in the wrong state, leading to the +operation simply timing out. + +Instead, if there is still an error after retries are exhausted, return +that error immediately. + +Cc: stable@vger.kernel.org # v6.6+ +Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors") +Signed-off-by: Jacqueline Wong +Signed-off-by: Jordan Hand +Link: https://lore.kernel.org/r/20260415160006.2275325-3-jacqwong@google.com +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm_tis_core.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/char/tpm/tpm_tis_core.c ++++ b/drivers/char/tpm/tpm_tis_core.c +@@ -556,11 +556,16 @@ static int tpm_tis_send_main(struct tpm_ + break; + else if (rc != -EAGAIN && rc != -EIO) + /* Data transfer failed, not recoverable */ +- return rc; ++ goto out_err; + + usleep_range(priv->timeout_min, priv->timeout_max); + } + ++ if (rc == -EAGAIN || rc == -EIO) { ++ dev_err(&chip->dev, "Exhausted %d tpm_tis_send_data retries\n", TPM_RETRY); ++ goto out_err; ++ } ++ + /* go and do it */ + rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO); + if (rc < 0) diff --git a/queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch b/queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch new file mode 100644 index 0000000000..ecac4efe4f --- /dev/null +++ b/queue-7.0/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch @@ -0,0 +1,44 @@ +From c424d2664f08c77f08b4580b5f0cbaabf7c229b2 Mon Sep 17 00:00:00 2001 +From: Gunnar Kudrjavets +Date: Thu, 9 Apr 2026 17:20:54 +0000 +Subject: tpm: Use kfree_sensitive() to free auth session in tpm_dev_release() + +From: Gunnar Kudrjavets + +commit c424d2664f08c77f08b4580b5f0cbaabf7c229b2 upstream. + +tpm_dev_release() uses plain kfree() to free chip->auth, which contains +sensitive cryptographic material including HMAC session keys, nonces, +and passphrase data (struct tpm2_auth). + +Every other code path that frees this structure uses kfree_sensitive() +to zero the memory before releasing it: both tpm2_end_auth_session() +and tpm_buf_check_hmac_response() do so. The tpm_dev_release() path +is the only one that does not, leaving key material in freed slab +memory until it is eventually overwritten. + +Use kfree_sensitive() for consistency with the rest of the driver and +to ensure session keys are scrubbed during device teardown. + +Cc: stable@vger.kernel.org # v6.10+ +Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions") +Signed-off-by: Gunnar Kudrjavets +Reviewed-by: Justinien Bouron +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm-chip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/char/tpm/tpm-chip.c ++++ b/drivers/char/tpm/tpm-chip.c +@@ -247,7 +247,7 @@ static void tpm_dev_release(struct devic + kfree(chip->work_space.context_buf); + kfree(chip->work_space.session_buf); + #ifdef CONFIG_TCG_TPM2_HMAC +- kfree(chip->auth); ++ kfree_sensitive(chip->auth); + #endif + kfree(chip); + } diff --git a/queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch b/queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch new file mode 100644 index 0000000000..4b6f2204fe --- /dev/null +++ b/queue-7.0/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch @@ -0,0 +1,57 @@ +From f0f75a3d98b7959a8677b6363e23190f3018636b Mon Sep 17 00:00:00 2001 +From: Gunnar Kudrjavets +Date: Wed, 15 Apr 2026 03:00:03 +0300 +Subject: tpm2-sessions: Fix missing tpm_buf_destroy() in tpm2_read_public() + +From: Gunnar Kudrjavets + +commit f0f75a3d98b7959a8677b6363e23190f3018636b upstream. + +tpm2_read_public() calls tpm_buf_init() but fails to call +tpm_buf_destroy() on two exit paths, leaking a page allocation: + +1. When name_size() returns an error (unrecognized hash algorithm), + the function returns directly without destroying the buffer. + +2. On the success path, the buffer is never destroyed before + returning. + +All other error paths in the function correctly call +tpm_buf_destroy() before returning. + +Fix both by adding the missing tpm_buf_destroy() calls. + +Cc: stable@vger.kernel.org # v6.19+ +Fixes: bda1cbf73c6e ("tpm2-sessions: Fix tpm2_read_public range checks") +Signed-off-by: Gunnar Kudrjavets +Reviewed-by: Justinien Bouron +Reviewed-by: Paul Menzel +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/tpm/tpm2-sessions.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/char/tpm/tpm2-sessions.c ++++ b/drivers/char/tpm/tpm2-sessions.c +@@ -203,8 +203,10 @@ static int tpm2_read_public(struct tpm_c + rc = tpm_buf_read_u16(&buf, &offset); + name_size_alg = name_size(&buf.data[offset]); + +- if (name_size_alg < 0) ++ if (name_size_alg < 0) { ++ tpm_buf_destroy(&buf); + return name_size_alg; ++ } + + if (rc != name_size_alg) { + tpm_buf_destroy(&buf); +@@ -217,6 +219,7 @@ static int tpm2_read_public(struct tpm_c + } + + memcpy(name, &buf.data[offset], rc); ++ tpm_buf_destroy(&buf); + return name_size_alg; + } + #endif /* CONFIG_TCG_TPM2_HMAC */ diff --git a/queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch b/queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch new file mode 100644 index 0000000000..0995109bc8 --- /dev/null +++ b/queue-7.0/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch @@ -0,0 +1,60 @@ +From 161ce69c2c89781784b945d8e281ff2da9dede9c Mon Sep 17 00:00:00 2001 +From: "Denis M. Karpov" +Date: Thu, 9 Apr 2026 13:33:45 +0300 +Subject: userfaultfd: allow registration of ranges below mmap_min_addr + +From: Denis M. Karpov + +commit 161ce69c2c89781784b945d8e281ff2da9dede9c upstream. + +The current implementation of validate_range() in fs/userfaultfd.c +performs a hard check against mmap_min_addr. This is redundant because +UFFDIO_REGISTER operates on memory ranges that must already be backed by a +VMA. + +Enforcing mmap_min_addr or capability checks again in userfaultfd is +unnecessary and prevents applications like binary compilers from using +UFFD for valid memory regions mapped by application. + +Remove the redundant check for mmap_min_addr. + +We started using UFFD instead of the classic mprotect approach in the +binary translator to track application writes. During development, we +encountered this bug. The translator cannot control where the translated +application chooses to map its memory and if the app requires a +low-address area, UFFD fails, whereas mprotect would work just fine. I +believe this is a genuine logic bug rather than an improvement, and I +would appreciate including the fix in stable. + +Link: https://lore.kernel.org/20260409103345.15044-1-komlomal@gmail.com +Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization") +Signed-off-by: Denis M. Karpov +Reviewed-by: Lorenzo Stoakes +Acked-by: Harry Yoo (Oracle) +Reviewed-by: Pedro Falcato +Reviewed-by: Liam R. Howlett +Reviewed-by: Mike Rapoport (Microsoft) +Cc: Alexander Viro +Cc: Al Viro +Cc: Christian Brauner +Cc: Jan Kara +Cc: Jann Horn +Cc: Peter Xu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/userfaultfd.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -1238,8 +1238,6 @@ static __always_inline int validate_unal + return -EINVAL; + if (!len) + return -EINVAL; +- if (start < mmap_min_addr) +- return -EINVAL; + if (start >= task_size) + return -EINVAL; + if (len > task_size - start) diff --git a/queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch b/queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch new file mode 100644 index 0000000000..03396935a5 --- /dev/null +++ b/queue-7.0/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch @@ -0,0 +1,32 @@ +From 29a7b2614357393b176ef06ba5bc3ff5afc8df69 Mon Sep 17 00:00:00 2001 +From: Haoxiang Li +Date: Wed, 1 Apr 2026 12:02:41 +0800 +Subject: xfs: fix a resource leak in xfs_alloc_buftarg() + +From: Haoxiang Li + +commit 29a7b2614357393b176ef06ba5bc3ff5afc8df69 upstream. + +In the error path, call fs_put_dax() to drop the DAX +device reference. + +Fixes: 6f643c57d57c ("xfs: implement ->notify_failure() for XFS") +Cc: stable@vger.kernel.org +Signed-off-by: Haoxiang Li +Reviewed-by: Darrick J. Wong +Signed-off-by: Carlos Maiolino +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_buf.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -1831,6 +1831,7 @@ xfs_alloc_buftarg( + return btp; + + error_free: ++ fs_put_dax(btp->bt_daxdev, mp); + kfree(btp); + return ERR_PTR(error); + } diff --git a/queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch b/queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch new file mode 100644 index 0000000000..4d01219070 --- /dev/null +++ b/queue-7.0/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch @@ -0,0 +1,104 @@ +From 181ea4e2de422aa0a66f355bd59bccccdd169826 Mon Sep 17 00:00:00 2001 +From: Hans Holmberg +Date: Wed, 25 Mar 2026 13:43:12 +0100 +Subject: xfs: start gc on zonegc_low_space attribute updates + +From: Hans Holmberg + +commit 181ea4e2de422aa0a66f355bd59bccccdd169826 upstream. + +Start gc if the agressiveness of zone garbage collection is changed +by the user (if the file system is not read only). + +Without this change, the new setting will not be taken into account +until the gc thread is woken up by e.g. a write. + +Cc: stable@vger.kernel.org # v6.15 +Fixes: 845abeb1f06a8a ("xfs: add tunable threshold parameter for triggering zone GC") +Signed-off-by: Hans Holmberg +Reviewed-by: Christoph Hellwig +Reviewed-by: Damien Le Moal +Signed-off-by: Carlos Maiolino +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_sysfs.c | 7 ++++++- + fs/xfs/xfs_zone_alloc.h | 4 ++++ + fs/xfs/xfs_zone_gc.c | 17 +++++++++++++++++ + 3 files changed, 27 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_sysfs.c ++++ b/fs/xfs/xfs_sysfs.c +@@ -14,6 +14,7 @@ + #include "xfs_log_priv.h" + #include "xfs_mount.h" + #include "xfs_zones.h" ++#include "xfs_zone_alloc.h" + + struct xfs_sysfs_attr { + struct attribute attr; +@@ -724,6 +725,7 @@ zonegc_low_space_store( + const char *buf, + size_t count) + { ++ struct xfs_mount *mp = zoned_to_mp(kobj); + int ret; + unsigned int val; + +@@ -734,7 +736,10 @@ zonegc_low_space_store( + if (val > 100) + return -EINVAL; + +- zoned_to_mp(kobj)->m_zonegc_low_space = val; ++ if (mp->m_zonegc_low_space != val) { ++ mp->m_zonegc_low_space = val; ++ xfs_zone_gc_wakeup(mp); ++ } + + return count; + } +--- a/fs/xfs/xfs_zone_alloc.h ++++ b/fs/xfs/xfs_zone_alloc.h +@@ -51,6 +51,7 @@ int xfs_mount_zones(struct xfs_mount *mp + void xfs_unmount_zones(struct xfs_mount *mp); + void xfs_zone_gc_start(struct xfs_mount *mp); + void xfs_zone_gc_stop(struct xfs_mount *mp); ++void xfs_zone_gc_wakeup(struct xfs_mount *mp); + #else + static inline int xfs_mount_zones(struct xfs_mount *mp) + { +@@ -65,6 +66,9 @@ static inline void xfs_zone_gc_start(str + static inline void xfs_zone_gc_stop(struct xfs_mount *mp) + { + } ++static inline void xfs_zone_gc_wakeup(struct xfs_mount *mp) ++{ ++} + #endif /* CONFIG_XFS_RT */ + + #endif /* _XFS_ZONE_ALLOC_H */ +--- a/fs/xfs/xfs_zone_gc.c ++++ b/fs/xfs/xfs_zone_gc.c +@@ -1159,6 +1159,23 @@ xfs_zone_gc_stop( + kthread_park(mp->m_zone_info->zi_gc_thread); + } + ++void ++xfs_zone_gc_wakeup( ++ struct xfs_mount *mp) ++{ ++ struct super_block *sb = mp->m_super; ++ ++ /* ++ * If we are unmounting the file system we must not try to ++ * wake gc as m_zone_info might have been freed already. ++ */ ++ if (down_read_trylock(&sb->s_umount)) { ++ if (!xfs_is_readonly(mp)) ++ wake_up_process(mp->m_zone_info->zi_gc_thread); ++ up_read(&sb->s_umount); ++ } ++} ++ + int + xfs_zone_gc_mount( + struct xfs_mount *mp) -- 2.47.3