--- /dev/null
+From stable+bounces-124192-greg=kroah.com@vger.kernel.org Thu Mar 13 00:49:47 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:09 +0000
+Subject: KVM: arm64: Calculate cptr_el2 traps on activating traps
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Fuad Tabba <tabba@google.com>, James Clark <james.clark@linaro.org>
+Message-ID: <20250312-stable-sve-6-13-v1-1-c7ba07a6f4f7@kernel.org>
+
+From: Fuad Tabba <tabba@google.com>
+
+[ Upstream commit 2fd5b4b0e7b440602455b79977bfa64dea101e6c ]
+
+Similar to VHE, calculate the value of cptr_el2 from scratch on
+activate traps. This removes the need to store cptr_el2 in every
+vcpu structure. Moreover, some traps, such as whether the guest
+owns the fp registers, need to be set on every vcpu run.
+
+Reported-by: James Clark <james.clark@linaro.org>
+Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch")
+Signed-off-by: Fuad Tabba <tabba@google.com>
+Link: https://lore.kernel.org/r/20241216105057.579031-13-tabba@google.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_host.h | 1
+ arch/arm64/kvm/arm.c | 1
+ arch/arm64/kvm/hyp/nvhe/pkvm.c | 30 ----------------------
+ arch/arm64/kvm/hyp/nvhe/switch.c | 51 +++++++++++++++++++++++---------------
+ 4 files changed, 32 insertions(+), 51 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -708,7 +708,6 @@ struct kvm_vcpu_arch {
+ u64 hcr_el2;
+ u64 hcrx_el2;
+ u64 mdcr_el2;
+- u64 cptr_el2;
+
+ /* Exception Information */
+ struct kvm_vcpu_fault_info fault;
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -1569,7 +1569,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init
+ }
+
+ vcpu_reset_hcr(vcpu);
+- vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
+
+ /*
+ * Handle the "start in power-off" case.
+--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
++++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
+@@ -31,8 +31,6 @@ static void pvm_init_traps_aa64pfr0(stru
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
+ u64 hcr_set = HCR_RW;
+ u64 hcr_clear = 0;
+- u64 cptr_set = 0;
+- u64 cptr_clear = 0;
+
+ /* Protected KVM does not support AArch32 guests. */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
+@@ -62,21 +60,10 @@ static void pvm_init_traps_aa64pfr0(stru
+ /* Trap AMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
+ hcr_clear |= HCR_AMVOFFEN;
+- cptr_set |= CPTR_EL2_TAM;
+- }
+-
+- /* Trap SVE */
+- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
+- if (has_hvhe())
+- cptr_clear |= CPACR_ELx_ZEN;
+- else
+- cptr_set |= CPTR_EL2_TZ;
+ }
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+- vcpu->arch.cptr_el2 |= cptr_set;
+- vcpu->arch.cptr_el2 &= ~cptr_clear;
+ }
+
+ /*
+@@ -106,7 +93,6 @@ static void pvm_init_traps_aa64dfr0(stru
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
+ u64 mdcr_set = 0;
+ u64 mdcr_clear = 0;
+- u64 cptr_set = 0;
+
+ /* Trap/constrain PMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
+@@ -133,21 +119,12 @@ static void pvm_init_traps_aa64dfr0(stru
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
+ mdcr_set |= MDCR_EL2_TTRF;
+
+- /* Trap Trace */
+- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids)) {
+- if (has_hvhe())
+- cptr_set |= CPACR_EL1_TTA;
+- else
+- cptr_set |= CPTR_EL2_TTA;
+- }
+-
+ /* Trap External Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_ExtTrcBuff), feature_ids))
+ mdcr_clear |= MDCR_EL2_E2TB_MASK;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 &= ~mdcr_clear;
+- vcpu->arch.cptr_el2 |= cptr_set;
+ }
+
+ /*
+@@ -198,10 +175,6 @@ static void pvm_init_trap_regs(struct kv
+ /* Clear res0 and set res1 bits to trap potential new features. */
+ vcpu->arch.hcr_el2 &= ~(HCR_RES0);
+ vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
+- if (!has_hvhe()) {
+- vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
+- vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
+- }
+ }
+
+ static void pkvm_vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+@@ -236,7 +209,6 @@ static void pkvm_vcpu_reset_hcr(struct k
+ */
+ static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+ {
+- vcpu->arch.cptr_el2 = kvm_get_reset_cptr_el2(vcpu);
+ vcpu->arch.mdcr_el2 = 0;
+
+ pkvm_vcpu_reset_hcr(vcpu);
+@@ -693,8 +665,6 @@ unlock:
+ return ret;
+ }
+
+- hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+-
+ return 0;
+ }
+
+--- a/arch/arm64/kvm/hyp/nvhe/switch.c
++++ b/arch/arm64/kvm/hyp/nvhe/switch.c
+@@ -36,33 +36,46 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_ve
+
+ extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+
+-static void __activate_traps(struct kvm_vcpu *vcpu)
++static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
+ {
+- u64 val;
++ u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
+
+- ___activate_traps(vcpu, vcpu->arch.hcr_el2);
+- __activate_traps_common(vcpu);
++ if (has_hvhe()) {
++ val |= CPACR_ELx_TTA;
+
+- val = vcpu->arch.cptr_el2;
+- val |= CPTR_EL2_TAM; /* Same bit irrespective of E2H */
+- val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
+- if (cpus_have_final_cap(ARM64_SME)) {
+- if (has_hvhe())
+- val &= ~CPACR_ELx_SMEN;
+- else
+- val |= CPTR_EL2_TSM;
+- }
++ if (guest_owns_fp_regs()) {
++ val |= CPACR_ELx_FPEN;
++ if (vcpu_has_sve(vcpu))
++ val |= CPACR_ELx_ZEN;
++ }
++ } else {
++ val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
++
++ /*
++ * Always trap SME since it's not supported in KVM.
++ * TSM is RES1 if SME isn't implemented.
++ */
++ val |= CPTR_EL2_TSM;
+
+- if (!guest_owns_fp_regs()) {
+- if (has_hvhe())
+- val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+- else
+- val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
++ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
++ val |= CPTR_EL2_TZ;
+
+- __activate_traps_fpsimd32(vcpu);
++ if (!guest_owns_fp_regs())
++ val |= CPTR_EL2_TFP;
+ }
+
++ if (!guest_owns_fp_regs())
++ __activate_traps_fpsimd32(vcpu);
++
+ kvm_write_cptr_el2(val);
++}
++
++static void __activate_traps(struct kvm_vcpu *vcpu)
++{
++ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
++ __activate_traps_common(vcpu);
++ __activate_cptr_traps(vcpu);
++
+ write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
+
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
--- /dev/null
+From broonie@kernel.org Thu Mar 13 00:49:53 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:16 +0000
+Subject: KVM: arm64: Eagerly switch ZCR_EL{1,2}
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Fuad Tabba <tabba@google.com>
+Message-ID: <20250312-stable-sve-6-13-v1-8-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+In non-protected KVM modes, while the guest FPSIMD/SVE/SME state is live on the
+CPU, the host's active SVE VL may differ from the guest's maximum SVE VL:
+
+* For VHE hosts, when a VM uses NV, ZCR_EL2 contains a value constrained
+ by the guest hypervisor, which may be less than or equal to that
+ guest's maximum VL.
+
+ Note: in this case the value of ZCR_EL1 is immaterial due to E2H.
+
+* For nVHE/hVHE hosts, ZCR_EL1 contains a value written by the guest,
+ which may be less than or greater than the guest's maximum VL.
+
+ Note: in this case hyp code traps host SVE usage and lazily restores
+ ZCR_EL2 to the host's maximum VL, which may be greater than the
+ guest's maximum VL.
+
+This can be the case between exiting a guest and kvm_arch_vcpu_put_fp().
+If a softirq is taken during this period and the softirq handler tries
+to use kernel-mode NEON, then the kernel will fail to save the guest's
+FPSIMD/SVE state, and will pend a SIGKILL for the current thread.
+
+This happens because kvm_arch_vcpu_ctxsync_fp() binds the guest's live
+FPSIMD/SVE state with the guest's maximum SVE VL, and
+fpsimd_save_user_state() verifies that the live SVE VL is as expected
+before attempting to save the register state:
+
+| if (WARN_ON(sve_get_vl() != vl)) {
+| force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+| return;
+| }
+
+Fix this and make this a bit easier to reason about by always eagerly
+switching ZCR_EL{1,2} at hyp during guest<->host transitions. With this
+happening, there's no need to trap host SVE usage, and the nVHE/nVHE
+__deactivate_cptr_traps() logic can be simplified to enable host access
+to all present FPSIMD/SVE/SME features.
+
+In protected nVHE/hVHE modes, the host's state is always saved/restored
+by hyp, and the guest's state is saved prior to exit to the host, so
+from the host's PoV the guest never has live FPSIMD/SVE/SME state, and
+the host's ZCR_EL1 is never clobbered by hyp.
+
+Fixes: 8c8010d69c132273 ("KVM: arm64: Save/restore SVE state for nVHE")
+Fixes: 2e3cf82063a00ea0 ("KVM: arm64: nv: Ensure correct VL is loaded before saving SVE state")
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Cc: Will Deacon <will@kernel.org>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-9-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+(cherry picked from commit 59419f10045bc955d2229819c7cf7a8b0b9c5b59)
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/fpsimd.c | 30 ----------------
+ arch/arm64/kvm/hyp/entry.S | 5 ++
+ arch/arm64/kvm/hyp/include/hyp/switch.h | 59 ++++++++++++++++++++++++++++++++
+ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 13 +++----
+ arch/arm64/kvm/hyp/nvhe/switch.c | 33 +++++++++++++++--
+ arch/arm64/kvm/hyp/vhe/switch.c | 4 ++
+ 6 files changed, 103 insertions(+), 41 deletions(-)
+
+--- a/arch/arm64/kvm/fpsimd.c
++++ b/arch/arm64/kvm/fpsimd.c
+@@ -136,36 +136,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcp
+ local_irq_save(flags);
+
+ if (guest_owns_fp_regs()) {
+- if (vcpu_has_sve(vcpu)) {
+- u64 zcr = read_sysreg_el1(SYS_ZCR);
+-
+- /*
+- * If the vCPU is in the hyp context then ZCR_EL1 is
+- * loaded with its vEL2 counterpart.
+- */
+- __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr;
+-
+- /*
+- * Restore the VL that was saved when bound to the CPU,
+- * which is the maximum VL for the guest. Because the
+- * layout of the data when saving the sve state depends
+- * on the VL, we need to use a consistent (i.e., the
+- * maximum) VL.
+- * Note that this means that at guest exit ZCR_EL1 is
+- * not necessarily the same as on guest entry.
+- *
+- * ZCR_EL2 holds the guest hypervisor's VL when running
+- * a nested guest, which could be smaller than the
+- * max for the vCPU. Similar to above, we first need to
+- * switch to a VL consistent with the layout of the
+- * vCPU's SVE state. KVM support for NV implies VHE, so
+- * using the ZCR_EL1 alias is safe.
+- */
+- if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)))
+- sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
+- SYS_ZCR_EL1);
+- }
+-
+ /*
+ * Flush (save and invalidate) the fpsimd/sve state so that if
+ * the host tries to use fpsimd/sve, it's not using stale data
+--- a/arch/arm64/kvm/hyp/entry.S
++++ b/arch/arm64/kvm/hyp/entry.S
+@@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
+ alternative_else_nop_endif
+ mrs x1, isr_el1
+ cbz x1, 1f
++
++ // Ensure that __guest_enter() always provides a context
++ // synchronization event so that callers don't need ISBs for anything
++ // that would usually be synchonized by the ERET.
++ isb
+ mov x0, #ARM_EXCEPTION_IRQ
+ ret
+
+--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
+@@ -375,6 +375,65 @@ static inline void __hyp_sve_save_host(v
+ true);
+ }
+
++static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
++{
++ u64 zcr_el1, zcr_el2;
++
++ if (!guest_owns_fp_regs())
++ return;
++
++ if (vcpu_has_sve(vcpu)) {
++ /* A guest hypervisor may restrict the effective max VL. */
++ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))
++ zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2);
++ else
++ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
++
++ write_sysreg_el2(zcr_el2, SYS_ZCR);
++
++ zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu));
++ write_sysreg_el1(zcr_el1, SYS_ZCR);
++ }
++}
++
++static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
++{
++ u64 zcr_el1, zcr_el2;
++
++ if (!guest_owns_fp_regs())
++ return;
++
++ /*
++ * When the guest owns the FP regs, we know that guest+hyp traps for
++ * any FPSIMD/SVE/SME features exposed to the guest have been disabled
++ * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
++ * prior to __guest_entry(). As __guest_entry() guarantees a context
++ * synchronization event, we don't need an ISB here to avoid taking
++ * traps for anything that was exposed to the guest.
++ */
++ if (vcpu_has_sve(vcpu)) {
++ zcr_el1 = read_sysreg_el1(SYS_ZCR);
++ __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1;
++
++ /*
++ * The guest's state is always saved using the guest's max VL.
++ * Ensure that the host has the guest's max VL active such that
++ * the host can save the guest's state lazily, but don't
++ * artificially restrict the host to the guest's max VL.
++ */
++ if (has_vhe()) {
++ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
++ write_sysreg_el2(zcr_el2, SYS_ZCR);
++ } else {
++ zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
++ write_sysreg_el2(zcr_el2, SYS_ZCR);
++
++ zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
++ write_sysreg_el1(zcr_el1, SYS_ZCR);
++ }
++ }
++}
++
+ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+ {
+ /*
+--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
++++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+@@ -5,6 +5,7 @@
+ */
+
+ #include <hyp/adjust_pc.h>
++#include <hyp/switch.h>
+
+ #include <asm/pgtable-types.h>
+ #include <asm/kvm_asm.h>
+@@ -178,8 +179,12 @@ static void handle___kvm_vcpu_run(struct
+ sync_hyp_vcpu(hyp_vcpu);
+ pkvm_put_hyp_vcpu(hyp_vcpu);
+ } else {
++ struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu);
++
+ /* The host is fully trusted, run its vCPU directly. */
+- ret = __kvm_vcpu_run(host_vcpu);
++ fpsimd_lazy_switch_to_guest(vcpu);
++ ret = __kvm_vcpu_run(vcpu);
++ fpsimd_lazy_switch_to_host(vcpu);
+ }
+
+ out:
+@@ -480,12 +485,6 @@ void handle_trap(struct kvm_cpu_context
+ case ESR_ELx_EC_SMC64:
+ handle_host_smc(host_ctxt);
+ break;
+- case ESR_ELx_EC_SVE:
+- cpacr_clear_set(0, CPACR_ELx_ZEN);
+- isb();
+- sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1,
+- SYS_ZCR_EL2);
+- break;
+ case ESR_ELx_EC_IABT_LOW:
+ case ESR_ELx_EC_DABT_LOW:
+ handle_host_mem_abort(host_ctxt);
+--- a/arch/arm64/kvm/hyp/nvhe/switch.c
++++ b/arch/arm64/kvm/hyp/nvhe/switch.c
+@@ -40,6 +40,9 @@ static void __activate_cptr_traps(struct
+ {
+ u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
+
++ if (!guest_owns_fp_regs())
++ __activate_traps_fpsimd32(vcpu);
++
+ if (has_hvhe()) {
+ val |= CPACR_ELx_TTA;
+
+@@ -48,6 +51,8 @@ static void __activate_cptr_traps(struct
+ if (vcpu_has_sve(vcpu))
+ val |= CPACR_ELx_ZEN;
+ }
++
++ write_sysreg(val, cpacr_el1);
+ } else {
+ val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
+
+@@ -62,12 +67,32 @@ static void __activate_cptr_traps(struct
+
+ if (!guest_owns_fp_regs())
+ val |= CPTR_EL2_TFP;
++
++ write_sysreg(val, cptr_el2);
+ }
++}
+
+- if (!guest_owns_fp_regs())
+- __activate_traps_fpsimd32(vcpu);
++static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
++{
++ if (has_hvhe()) {
++ u64 val = CPACR_ELx_FPEN;
++
++ if (cpus_have_final_cap(ARM64_SVE))
++ val |= CPACR_ELx_ZEN;
++ if (cpus_have_final_cap(ARM64_SME))
++ val |= CPACR_ELx_SMEN;
++
++ write_sysreg(val, cpacr_el1);
++ } else {
++ u64 val = CPTR_NVHE_EL2_RES1;
++
++ if (!cpus_have_final_cap(ARM64_SVE))
++ val |= CPTR_EL2_TZ;
++ if (!cpus_have_final_cap(ARM64_SME))
++ val |= CPTR_EL2_TSM;
+
+- kvm_write_cptr_el2(val);
++ write_sysreg(val, cptr_el2);
++ }
+ }
+
+ static void __activate_traps(struct kvm_vcpu *vcpu)
+@@ -120,7 +145,7 @@ static void __deactivate_traps(struct kv
+
+ write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
+
+- kvm_reset_cptr_el2(vcpu);
++ __deactivate_cptr_traps(vcpu);
+ write_sysreg(__kvm_hyp_host_vector, vbar_el2);
+ }
+
+--- a/arch/arm64/kvm/hyp/vhe/switch.c
++++ b/arch/arm64/kvm/hyp/vhe/switch.c
+@@ -462,6 +462,8 @@ static int __kvm_vcpu_run_vhe(struct kvm
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
++ fpsimd_lazy_switch_to_guest(vcpu);
++
+ /*
+ * Note that ARM erratum 1165522 requires us to configure both stage 1
+ * and stage 2 translation for the guest context before we clear
+@@ -486,6 +488,8 @@ static int __kvm_vcpu_run_vhe(struct kvm
+
+ __deactivate_traps(vcpu);
+
++ fpsimd_lazy_switch_to_host(vcpu);
++
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (guest_owns_fp_regs())
--- /dev/null
+From broonie@kernel.org Thu Mar 13 00:49:50 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:15 +0000
+Subject: KVM: arm64: Mark some header functions as inline
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Fuad Tabba <tabba@google.com>
+Message-ID: <20250312-stable-sve-6-13-v1-7-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit f9dd00de1e53a47763dfad601635d18542c3836d ]
+
+The shared hyp switch header has a number of static functions which
+might not be used by all files that include the header, and when unused
+they will provoke compiler warnings, e.g.
+
+| In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8:
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:703:13: warning: 'kvm_hyp_handle_dabt_low' defined but not used [-Wunused-function]
+| 703 | static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+| | ^~~~~~~~~~~~~~~~~~~~~~~
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:682:13: warning: 'kvm_hyp_handle_cp15_32' defined but not used [-Wunused-function]
+| 682 | static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+| | ^~~~~~~~~~~~~~~~~~~~~~
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:662:13: warning: 'kvm_hyp_handle_sysreg' defined but not used [-Wunused-function]
+| 662 | static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+| | ^~~~~~~~~~~~~~~~~~~~~
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:458:13: warning: 'kvm_hyp_handle_fpsimd' defined but not used [-Wunused-function]
+| 458 | static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+| | ^~~~~~~~~~~~~~~~~~~~~
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:329:13: warning: 'kvm_hyp_handle_mops' defined but not used [-Wunused-function]
+| 329 | static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
+| | ^~~~~~~~~~~~~~~~~~~
+
+Mark these functions as 'inline' to suppress this warning. This
+shouldn't result in any functional change.
+
+At the same time, avoid the use of __alias() in the header and alias
+kvm_hyp_handle_iabt_low() and kvm_hyp_handle_watchpt_low() to
+kvm_hyp_handle_memory_fault() using CPP, matching the style in the rest
+of the kernel. For consistency, kvm_hyp_handle_memory_fault() is also
+marked as 'inline'.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-8-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/hyp/include/hyp/switch.h | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
+@@ -326,7 +326,7 @@ static inline bool __populate_fault_info
+ return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
+ }
+
+-static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2);
+@@ -404,7 +404,7 @@ static void kvm_hyp_save_fpsimd_host(str
+ * If FP/SIMD is not implemented, handle the trap and inject an undefined
+ * instruction exception to the guest. Similarly for trapped SVE accesses.
+ */
+-static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+ bool sve_guest;
+ u8 esr_ec;
+@@ -595,7 +595,7 @@ static bool handle_ampere1_tcr(struct kv
+ return true;
+ }
+
+-static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ handle_tx2_tvm(vcpu))
+@@ -615,7 +615,7 @@ static bool kvm_hyp_handle_sysreg(struct
+ return false;
+ }
+
+-static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+@@ -624,19 +624,18 @@ static bool kvm_hyp_handle_cp15_32(struc
+ return false;
+ }
+
+-static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
++ u64 *exit_code)
+ {
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ return false;
+ }
+-static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+- __alias(kvm_hyp_handle_memory_fault);
+-static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+- __alias(kvm_hyp_handle_memory_fault);
++#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault
++#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault
+
+-static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+ if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
+ return true;
--- /dev/null
+From broonie@kernel.org Thu Mar 13 00:49:47 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:14 +0000
+Subject: KVM: arm64: Refactor exit handlers
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Fuad Tabba <tabba@google.com>
+Message-ID: <20250312-stable-sve-6-13-v1-6-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 9b66195063c5a145843547b1d692bd189be85287 ]
+
+The hyp exit handling logic is largely shared between VHE and nVHE/hVHE,
+with common logic in arch/arm64/kvm/hyp/include/hyp/switch.h. The code
+in the header depends on function definitions provided by
+arch/arm64/kvm/hyp/vhe/switch.c and arch/arm64/kvm/hyp/nvhe/switch.c
+when they include the header.
+
+This is an unusual header dependency, and prevents the use of
+arch/arm64/kvm/hyp/include/hyp/switch.h in other files as this would
+result in compiler warnings regarding missing definitions, e.g.
+
+| In file included from arch/arm64/kvm/hyp/nvhe/hyp-main.c:8:
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:733:31: warning: 'kvm_get_exit_handler_array' used but never defined
+| 733 | static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+| | ^~~~~~~~~~~~~~~~~~~~~~~~~~
+| ./arch/arm64/kvm/hyp/include/hyp/switch.h:735:13: warning: 'early_exit_filter' used but never defined
+| 735 | static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+| | ^~~~~~~~~~~~~~~~~
+
+Refactor the logic such that the header doesn't depend on anything from
+the C files. There should be no functional change as a result of this
+patch.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-7-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/hyp/include/hyp/switch.h | 30 ++++++------------------------
+ arch/arm64/kvm/hyp/nvhe/switch.c | 28 ++++++++++++++++------------
+ arch/arm64/kvm/hyp/vhe/switch.c | 9 ++++-----
+ 3 files changed, 26 insertions(+), 41 deletions(-)
+
+--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
+@@ -666,23 +666,16 @@ static bool kvm_hyp_handle_dabt_low(stru
+
+ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
+
+-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+-
+-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+-
+ /*
+ * Allow the hypervisor to handle the exit with an exit handler if it has one.
+ *
+ * Returns true if the hypervisor handled the exit, and control should go back
+ * to the guest, or false if it hasn't.
+ */
+-static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
++ const exit_handler_fn *handlers)
+ {
+- const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
+- exit_handler_fn fn;
+-
+- fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
+-
++ exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
+ if (fn)
+ return fn(vcpu, exit_code);
+
+@@ -712,20 +705,9 @@ static inline void synchronize_vcpu_psta
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+-static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
++ const exit_handler_fn *handlers)
+ {
+- /*
+- * Save PSTATE early so that we can evaluate the vcpu mode
+- * early on.
+- */
+- synchronize_vcpu_pstate(vcpu, exit_code);
+-
+- /*
+- * Check whether we want to repaint the state one way or
+- * another.
+- */
+- early_exit_filter(vcpu, exit_code);
+-
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+
+@@ -755,7 +737,7 @@ static inline bool fixup_guest_exit(stru
+ goto exit;
+
+ /* Check if there's an exit handler and allow it to handle the exit. */
+- if (kvm_hyp_handle_exit(vcpu, exit_code))
++ if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
+ goto guest;
+ exit:
+ /* Return to the host kernel and handle the exit */
+--- a/arch/arm64/kvm/hyp/nvhe/switch.c
++++ b/arch/arm64/kvm/hyp/nvhe/switch.c
+@@ -224,19 +224,21 @@ static const exit_handler_fn *kvm_get_ex
+ return hyp_exit_handlers;
+ }
+
+-/*
+- * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
+- * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
+- * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
+- * hypervisor spots a guest in such a state ensure it is handled, and don't
+- * trust the host to spot or fix it. The check below is based on the one in
+- * kvm_arch_vcpu_ioctl_run().
+- *
+- * Returns false if the guest ran in AArch32 when it shouldn't have, and
+- * thus should exit to the host, or true if a the guest run loop can continue.
+- */
+-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
++static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
++ const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
++
++ synchronize_vcpu_pstate(vcpu, exit_code);
++
++ /*
++ * Some guests (e.g., protected VMs) are not be allowed to run in
++ * AArch32. The ARMv8 architecture does not give the hypervisor a
++ * mechanism to prevent a guest from dropping to AArch32 EL0 if
++ * implemented by the CPU. If the hypervisor spots a guest in such a
++ * state ensure it is handled, and don't trust the host to spot or fix
++ * it. The check below is based on the one in
++ * kvm_arch_vcpu_ioctl_run().
++ */
+ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
+ /*
+ * As we have caught the guest red-handed, decide that it isn't
+@@ -249,6 +251,8 @@ static void early_exit_filter(struct kvm
+ *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
+ *exit_code |= ARM_EXCEPTION_IL;
+ }
++
++ return __fixup_guest_exit(vcpu, exit_code, handlers);
+ }
+
+ /* Switch to the guest for legacy non-VHE systems */
+--- a/arch/arm64/kvm/hyp/vhe/switch.c
++++ b/arch/arm64/kvm/hyp/vhe/switch.c
+@@ -423,13 +423,10 @@ static const exit_handler_fn hyp_exit_ha
+ [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
+ };
+
+-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
++static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+- return hyp_exit_handlers;
+-}
++ synchronize_vcpu_pstate(vcpu, exit_code);
+
+-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+-{
+ /*
+ * If we were in HYP context on entry, adjust the PSTATE view
+ * so that the usual helpers work correctly.
+@@ -449,6 +446,8 @@ static void early_exit_filter(struct kvm
+ *vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT);
+ *vcpu_cpsr(vcpu) |= mode;
+ }
++
++ return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
+ }
+
+ /* Switch to the guest for VHE systems running in EL2 */
--- /dev/null
+From stable+bounces-124194-greg=kroah.com@vger.kernel.org Thu Mar 13 00:50:09 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:11 +0000
+Subject: KVM: arm64: Remove host FPSIMD saving for non-protected KVM
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Fuad Tabba <tabba@google.com>
+Message-ID: <20250312-stable-sve-6-13-v1-3-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 8eca7f6d5100b6997df4f532090bc3f7e0203bef ]
+
+Now that the host eagerly saves its own FPSIMD/SVE/SME state,
+non-protected KVM never needs to save the host FPSIMD/SVE/SME state,
+and the code to do this is never used. Protected KVM still needs to
+save/restore the host FPSIMD/SVE state to avoid leaking guest state to
+the host (and to avoid revealing to the host whether the guest used
+FPSIMD/SVE/SME), and that code needs to be retained.
+
+Remove the unused code and data structures.
+
+To avoid the need for a stub copy of kvm_hyp_save_fpsimd_host() in the
+VHE hyp code, the nVHE/hVHE version is moved into the shared switch
+header, where it is only invoked when KVM is in protected mode.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-3-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+[CPACR_EL1_ZEN -> CPACR_ELx_ZEN -- broonie]
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_host.h | 18 ++++--------------
+ arch/arm64/kvm/arm.c | 8 --------
+ arch/arm64/kvm/fpsimd.c | 2 --
+ arch/arm64/kvm/hyp/include/hyp/switch.h | 25 +++++++++++++++++++++++--
+ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 +-
+ arch/arm64/kvm/hyp/nvhe/switch.c | 28 ----------------------------
+ arch/arm64/kvm/hyp/vhe/switch.c | 8 --------
+ 7 files changed, 28 insertions(+), 63 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -613,23 +613,13 @@ struct kvm_host_data {
+ struct kvm_cpu_context host_ctxt;
+
+ /*
+- * All pointers in this union are hyp VA.
++ * Hyp VA.
+ * sve_state is only used in pKVM and if system_supports_sve().
+ */
+- union {
+- struct user_fpsimd_state *fpsimd_state;
+- struct cpu_sve_state *sve_state;
+- };
++ struct cpu_sve_state *sve_state;
+
+- union {
+- /* HYP VA pointer to the host storage for FPMR */
+- u64 *fpmr_ptr;
+- /*
+- * Used by pKVM only, as it needs to provide storage
+- * for the host
+- */
+- u64 fpmr;
+- };
++ /* Used by pKVM only. */
++ u64 fpmr;
+
+ /* Ownership of the FP regs */
+ enum {
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -2468,14 +2468,6 @@ static void finalize_init_hyp_mode(void)
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
+ kern_hyp_va(sve_state);
+ }
+- } else {
+- for_each_possible_cpu(cpu) {
+- struct user_fpsimd_state *fpsimd_state;
+-
+- fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
+- per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
+- kern_hyp_va(fpsimd_state);
+- }
+ }
+ }
+
+--- a/arch/arm64/kvm/fpsimd.c
++++ b/arch/arm64/kvm/fpsimd.c
+@@ -64,8 +64,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vc
+ */
+ fpsimd_save_and_flush_cpu_state();
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
+- *host_data_ptr(fpsimd_state) = NULL;
+- *host_data_ptr(fpmr_ptr) = NULL;
+
+ vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
+@@ -375,7 +375,28 @@ static inline void __hyp_sve_save_host(v
+ true);
+ }
+
+-static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
++static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
++{
++ /*
++ * Non-protected kvm relies on the host restoring its sve state.
++ * Protected kvm restores the host's sve state as not to reveal that
++ * fpsimd was used by a guest nor leak upper sve bits.
++ */
++ if (system_supports_sve()) {
++ __hyp_sve_save_host();
++
++ /* Re-enable SVE traps if not supported for the guest vcpu. */
++ if (!vcpu_has_sve(vcpu))
++ cpacr_clear_set(CPACR_ELx_ZEN, 0);
++
++ } else {
++ __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs));
++ }
++
++ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
++ *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR);
++}
++
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context and
+@@ -425,7 +446,7 @@ static bool kvm_hyp_handle_fpsimd(struct
+ isb();
+
+ /* Write out the host state if it's in the registers */
+- if (host_owns_fp_regs())
++ if (is_protected_kvm_enabled() && host_owns_fp_regs())
+ kvm_hyp_save_fpsimd_host(vcpu);
+
+ /* Restore the guest state */
+--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
++++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+@@ -83,7 +83,7 @@ static void fpsimd_sve_sync(struct kvm_v
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+- __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
++ __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs));
+
+ if (has_fpmr)
+ write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
+--- a/arch/arm64/kvm/hyp/nvhe/switch.c
++++ b/arch/arm64/kvm/hyp/nvhe/switch.c
+@@ -193,34 +193,6 @@ static bool kvm_handle_pvm_sys64(struct
+ kvm_handle_pvm_sysreg(vcpu, exit_code));
+ }
+
+-static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+-{
+- /*
+- * Non-protected kvm relies on the host restoring its sve state.
+- * Protected kvm restores the host's sve state as not to reveal that
+- * fpsimd was used by a guest nor leak upper sve bits.
+- */
+- if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+- __hyp_sve_save_host();
+-
+- /* Re-enable SVE traps if not supported for the guest vcpu. */
+- if (!vcpu_has_sve(vcpu))
+- cpacr_clear_set(CPACR_ELx_ZEN, 0);
+-
+- } else {
+- __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+- }
+-
+- if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) {
+- u64 val = read_sysreg_s(SYS_FPMR);
+-
+- if (unlikely(is_protected_kvm_enabled()))
+- *host_data_ptr(fpmr) = val;
+- else
+- **host_data_ptr(fpmr_ptr) = val;
+- }
+-}
+-
+ static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+--- a/arch/arm64/kvm/hyp/vhe/switch.c
++++ b/arch/arm64/kvm/hyp/vhe/switch.c
+@@ -309,14 +309,6 @@ static bool kvm_hyp_handle_eret(struct k
+ return true;
+ }
+
+-static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+-{
+- __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+-
+- if (kvm_has_fpmr(vcpu->kvm))
+- **host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR);
+-}
+-
+ static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
+ {
+ int ret = -EINVAL;
--- /dev/null
+From broonie@kernel.org Thu Mar 13 00:49:44 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:13 +0000
+Subject: KVM: arm64: Remove VHE host restore of CPACR_EL1.SMEN
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Fuad Tabba <tabba@google.com>
+Message-ID: <20250312-stable-sve-6-13-v1-5-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 407a99c4654e8ea65393f412c421a55cac539f5b ]
+
+When KVM is in VHE mode, the host kernel tries to save and restore the
+configuration of CPACR_EL1.SMEN (i.e. CPTR_EL2.SMEN when HCR_EL2.E2H=1)
+across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the
+configuration may be clobbered by hyp when running a vCPU. This logic
+has historically been broken, and is currently redundant.
+
+This logic was originally introduced in commit:
+
+ 861262ab86270206 ("KVM: arm64: Handle SME host state when running guests")
+
+At the time, the VHE hyp code would reset CPTR_EL2.SMEN to 0b00 when
+returning to the host, trapping host access to SME state. Unfortunately,
+this was unsafe as the host could take a softirq before calling
+kvm_arch_vcpu_put_fp(), and if a softirq handler were to use kernel mode
+NEON the resulting attempt to save the live FPSIMD/SVE/SME state would
+result in a fatal trap.
+
+That issue was limited to VHE mode. For nVHE/hVHE modes, KVM always
+saved/restored the host kernel's CPACR_EL1 value, and configured
+CPTR_EL2.TSM to 0b0, ensuring that host usage of SME would not be
+trapped.
+
+The issue above was incidentally fixed by commit:
+
+ 375110ab51dec5dc ("KVM: arm64: Fix resetting SME trap values on reset for (h)VHE")
+
+That commit changed the VHE hyp code to configure CPTR_EL2.SMEN to 0b01
+when returning to the host, permitting host kernel usage of SME,
+avoiding the issue described above. At the time, this was not identified
+as a fix for commit 861262ab86270206.
+
+Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME
+state, there's no need to save/restore the state of the EL0 SME trap.
+The kernel can safely save/restore state without trapping, as described
+above, and will restore userspace state (including trap controls) before
+returning to userspace.
+
+Remove the redundant logic.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-5-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+[Update for rework of flags storage -- broonie]
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_host.h | 2 --
+ arch/arm64/kvm/fpsimd.c | 21 ---------------------
+ 2 files changed, 23 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -902,8 +902,6 @@ struct kvm_vcpu_arch {
+ /* Save TRBE context if active */
+ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
+
+-/* SME enabled for EL0 */
+-#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+ /* Physical CPU not in supported_cpus */
+ #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
+ /* WFIT instruction trapped */
+--- a/arch/arm64/kvm/fpsimd.c
++++ b/arch/arm64/kvm/fpsimd.c
+@@ -65,12 +65,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vc
+ fpsimd_save_and_flush_cpu_state();
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
+
+- if (system_supports_sme()) {
+- vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
+- if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
+- vcpu_set_flag(vcpu, HOST_SME_ENABLED);
+- }
+-
+ /*
+ * If normal guests gain SME support, maintain this behavior for pKVM
+ * guests, which don't support SME.
+@@ -141,21 +135,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcp
+
+ local_irq_save(flags);
+
+- /*
+- * If we have VHE then the Hyp code will reset CPACR_EL1 to
+- * the default value and we need to reenable SME.
+- */
+- if (has_vhe() && system_supports_sme()) {
+- /* Also restore EL0 state seen on entry */
+- if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
+- sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
+- else
+- sysreg_clear_set(CPACR_EL1,
+- CPACR_EL1_SMEN_EL0EN,
+- CPACR_EL1_SMEN_EL1EN);
+- isb();
+- }
+-
+ if (guest_owns_fp_regs()) {
+ if (vcpu_has_sve(vcpu)) {
+ u64 zcr = read_sysreg_el1(SYS_ZCR);
--- /dev/null
+From stable+bounces-124195-greg=kroah.com@vger.kernel.org Thu Mar 13 00:50:19 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:12 +0000
+Subject: KVM: arm64: Remove VHE host restore of CPACR_EL1.ZEN
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Fuad Tabba <tabba@google.com>
+Message-ID: <20250312-stable-sve-6-13-v1-4-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 459f059be702056d91537b99a129994aa6ccdd35 ]
+
+When KVM is in VHE mode, the host kernel tries to save and restore the
+configuration of CPACR_EL1.ZEN (i.e. CPTR_EL2.ZEN when HCR_EL2.E2H=1)
+across kvm_arch_vcpu_load_fp() and kvm_arch_vcpu_put_fp(), since the
+configuration may be clobbered by hyp when running a vCPU. This logic is
+currently redundant.
+
+The VHE hyp code unconditionally configures CPTR_EL2.ZEN to 0b01 when
+returning to the host, permitting host kernel usage of SVE.
+
+Now that the host eagerly saves and unbinds its own FPSIMD/SVE/SME
+state, there's no need to save/restore the state of the EL0 SVE trap.
+The kernel can safely save/restore state without trapping, as described
+above, and will restore userspace state (including trap controls) before
+returning to userspace.
+
+Remove the redundant logic.
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Acked-by: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-4-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+[Rework for refactoring of where the flags are stored -- broonie]
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_host.h | 2 --
+ arch/arm64/kvm/fpsimd.c | 16 ----------------
+ 2 files changed, 18 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -902,8 +902,6 @@ struct kvm_vcpu_arch {
+ /* Save TRBE context if active */
+ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
+
+-/* SVE enabled for host EL0 */
+-#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
+ /* SME enabled for EL0 */
+ #define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+ /* Physical CPU not in supported_cpus */
+--- a/arch/arm64/kvm/fpsimd.c
++++ b/arch/arm64/kvm/fpsimd.c
+@@ -65,10 +65,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vc
+ fpsimd_save_and_flush_cpu_state();
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
+
+- vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
+- if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+- vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
+-
+ if (system_supports_sme()) {
+ vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
+@@ -202,18 +198,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcp
+ * when needed.
+ */
+ fpsimd_save_and_flush_cpu_state();
+- } else if (has_vhe() && system_supports_sve()) {
+- /*
+- * The FPSIMD/SVE state in the CPU has not been touched, and we
+- * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
+- * reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE
+- * for EL0. To avoid spurious traps, restore the trap state
+- * seen by kvm_arch_vcpu_load_fp():
+- */
+- if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
+- sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
+- else
+- sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
+ }
+
+ local_irq_restore(flags);
--- /dev/null
+From stable+bounces-124193-greg=kroah.com@vger.kernel.org Thu Mar 13 00:49:59 2025
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 12 Mar 2025 23:49:10 +0000
+Subject: KVM: arm64: Unconditionally save+flush host FPSIMD/SVE/SME state
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Marc Zyngier <maz@kernel.org>, Oliver Upton <oliver.upton@linux.dev>, Joey Gouly <joey.gouly@arm.com>, Suzuki K Poulose <suzuki.poulose@arm.com>, Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev, linux-kernel@vger.kernel.org, stable@vger.kernel.org, Mark Brown <broonie@kernel.org>, Mark Rutland <mark.rutland@arm.com>, Eric Auger <eauger@redhat.com>, Wilco Dijkstra <wilco.dijkstra@arm.com>, Eric Auger <eric.auger@redhat.com>, Florian Weimer <fweimer@redhat.com>, Fuad Tabba <tabba@google.com>, Jeremy Linton <jeremy.linton@arm.com>, Paolo Bonzini <pbonzini@redhat.com>
+Message-ID: <20250312-stable-sve-6-13-v1-2-c7ba07a6f4f7@kernel.org>
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit fbc7e61195e23f744814e78524b73b59faa54ab4 ]
+
+There are several problems with the way hyp code lazily saves the host's
+FPSIMD/SVE state, including:
+
+* Host SVE being discarded unexpectedly due to inconsistent
+ configuration of TIF_SVE and CPACR_ELx.ZEN. This has been seen to
+ result in QEMU crashes where SVE is used by memmove(), as reported by
+ Eric Auger:
+
+ https://issues.redhat.com/browse/RHEL-68997
+
+* Host SVE state is discarded *after* modification by ptrace, which was an
+ unintentional ptrace ABI change introduced with lazy discarding of SVE state.
+
+* The host FPMR value can be discarded when running a non-protected VM,
+ where FPMR support is not exposed to a VM, and that VM uses
+ FPSIMD/SVE. In these cases the hyp code does not save the host's FPMR
+ before unbinding the host's FPSIMD/SVE/SME state, leaving a stale
+ value in memory.
+
+Avoid these by eagerly saving and "flushing" the host's FPSIMD/SVE/SME
+state when loading a vCPU such that KVM does not need to save any of the
+host's FPSIMD/SVE/SME state. For clarity, fpsimd_kvm_prepare() is
+removed and the necessary call to fpsimd_save_and_flush_cpu_state() is
+placed in kvm_arch_vcpu_load_fp(). As 'fpsimd_state' and 'fpmr_ptr'
+should not be used, they are set to NULL; all uses of these will be
+removed in subsequent patches.
+
+Historical problems go back at least as far as v5.17, e.g. erroneous
+assumptions about TIF_SVE being clear in commit:
+
+ 8383741ab2e773a9 ("KVM: arm64: Get rid of host SVE tracking/saving")
+
+... and so this eager save+flush probably needs to be backported to ALL
+stable trees.
+
+Fixes: 93ae6b01bafee8fa ("KVM: arm64: Discard any SVE state when entering KVM guests")
+Fixes: 8c845e2731041f0f ("arm64/sve: Leave SVE enabled on syscall if we don't context switch")
+Fixes: ef3be86021c3bdf3 ("KVM: arm64: Add save/restore support for FPMR")
+Reported-by: Eric Auger <eauger@redhat.com>
+Reported-by: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Tested-by: Mark Brown <broonie@kernel.org>
+Tested-by: Eric Auger <eric.auger@redhat.com>
+Acked-by: Will Deacon <will@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Florian Weimer <fweimer@redhat.com>
+Cc: Fuad Tabba <tabba@google.com>
+Cc: Jeremy Linton <jeremy.linton@arm.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
+Link: https://lore.kernel.org/r/20250210195226.1215254-2-mark.rutland@arm.com
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+[ Mark: Handle vcpu/host flag conflict ]
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/fpsimd.c | 25 -------------------------
+ arch/arm64/kvm/fpsimd.c | 35 ++++++++++-------------------------
+ 2 files changed, 10 insertions(+), 50 deletions(-)
+
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -1695,31 +1695,6 @@ void fpsimd_signal_preserve_current_stat
+ }
+
+ /*
+- * Called by KVM when entering the guest.
+- */
+-void fpsimd_kvm_prepare(void)
+-{
+- if (!system_supports_sve())
+- return;
+-
+- /*
+- * KVM does not save host SVE state since we can only enter
+- * the guest from a syscall so the ABI means that only the
+- * non-saved SVE state needs to be saved. If we have left
+- * SVE enabled for performance reasons then update the task
+- * state to be FPSIMD only.
+- */
+- get_cpu_fpsimd_context();
+-
+- if (test_and_clear_thread_flag(TIF_SVE)) {
+- sve_to_fpsimd(current);
+- current->thread.fp_type = FP_STATE_FPSIMD;
+- }
+-
+- put_cpu_fpsimd_context();
+-}
+-
+-/*
+ * Associate current's FPSIMD context with this cpu
+ * The caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
+--- a/arch/arm64/kvm/fpsimd.c
++++ b/arch/arm64/kvm/fpsimd.c
+@@ -54,16 +54,18 @@ void kvm_arch_vcpu_load_fp(struct kvm_vc
+ if (!system_supports_fpsimd())
+ return;
+
+- fpsimd_kvm_prepare();
+-
+ /*
+- * We will check TIF_FOREIGN_FPSTATE just before entering the
+- * guest in kvm_arch_vcpu_ctxflush_fp() and override this to
+- * FP_STATE_FREE if the flag set.
++ * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
++ * that the host kernel is responsible for restoring this state upon
++ * return to userspace, and the hyp code doesn't need to save anything.
++ *
++ * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures
++ * that PSTATE.{SM,ZA} == {0,0}.
+ */
+- *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+- *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state);
+- *host_data_ptr(fpmr_ptr) = kern_hyp_va(¤t->thread.uw.fpmr);
++ fpsimd_save_and_flush_cpu_state();
++ *host_data_ptr(fp_owner) = FP_STATE_FREE;
++ *host_data_ptr(fpsimd_state) = NULL;
++ *host_data_ptr(fpmr_ptr) = NULL;
+
+ vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+@@ -73,23 +75,6 @@ void kvm_arch_vcpu_load_fp(struct kvm_vc
+ vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
+ vcpu_set_flag(vcpu, HOST_SME_ENABLED);
+-
+- /*
+- * If PSTATE.SM is enabled then save any pending FP
+- * state and disable PSTATE.SM. If we leave PSTATE.SM
+- * enabled and the guest does not enable SME via
+- * CPACR_EL1.SMEN then operations that should be valid
+- * may generate SME traps from EL1 to EL1 which we
+- * can't intercept and which would confuse the guest.
+- *
+- * Do the same for PSTATE.ZA in the case where there
+- * is state in the registers which has not already
+- * been saved, this is very unlikely to happen.
+- */
+- if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
+- *host_data_ptr(fp_owner) = FP_STATE_FREE;
+- fpsimd_save_and_flush_cpu_state();
+- }
+ }
+
+ /*
--- /dev/null
+From c50f8e6053b0503375c2975bf47f182445aebb4c Mon Sep 17 00:00:00 2001
+From: Barry Song <v-songbaohua@oppo.com>
+Date: Wed, 26 Feb 2025 13:14:00 +1300
+Subject: mm: fix kernel BUG when userfaultfd_move encounters swapcache
+
+From: Barry Song <v-songbaohua@oppo.com>
+
+commit c50f8e6053b0503375c2975bf47f182445aebb4c upstream.
+
+userfaultfd_move() checks whether the PTE entry is present or a
+swap entry.
+
+- If the PTE entry is present, move_present_pte() handles folio
+ migration by setting:
+
+ src_folio->index = linear_page_index(dst_vma, dst_addr);
+
+- If the PTE entry is a swap entry, move_swap_pte() simply copies
+ the PTE to the new dst_addr.
+
+This approach is incorrect because, even if the PTE is a swap entry,
+it can still reference a folio that remains in the swap cache.
+
+This creates a race window between steps 2 and 4.
+ 1. add_to_swap: The folio is added to the swapcache.
+ 2. try_to_unmap: PTEs are converted to swap entries.
+ 3. pageout: The folio is written back.
+ 4. Swapcache is cleared.
+If userfaultfd_move() occurs in the window between steps 2 and 4,
+after the swap PTE has been moved to the destination, accessing the
+destination triggers do_swap_page(), which may locate the folio in
+the swapcache. However, since the folio's index has not been updated
+to match the destination VMA, do_swap_page() will detect a mismatch.
+
+This can result in two critical issues depending on the system
+configuration.
+
+If KSM is disabled, both small and large folios can trigger a BUG
+during the add_rmap operation due to:
+
+ page_pgoff(folio, page) != linear_page_index(vma, address)
+
+[ 13.336953] page: refcount:6 mapcount:1 mapping:00000000f43db19c index:0xffffaf150 pfn:0x4667c
+[ 13.337520] head: order:2 mapcount:1 entire_mapcount:0 nr_pages_mapped:1 pincount:0
+[ 13.337716] memcg:ffff00000405f000
+[ 13.337849] anon flags: 0x3fffc0000020459(locked|uptodate|dirty|owner_priv_1|head|swapbacked|node=0|zone=0|lastcpupid=0xffff)
+[ 13.338630] raw: 03fffc0000020459 ffff80008507b538 ffff80008507b538 ffff000006260361
+[ 13.338831] raw: 0000000ffffaf150 0000000000004000 0000000600000000 ffff00000405f000
+[ 13.339031] head: 03fffc0000020459 ffff80008507b538 ffff80008507b538 ffff000006260361
+[ 13.339204] head: 0000000ffffaf150 0000000000004000 0000000600000000 ffff00000405f000
+[ 13.339375] head: 03fffc0000000202 fffffdffc0199f01 ffffffff00000000 0000000000000001
+[ 13.339546] head: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000
+[ 13.339736] page dumped because: VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address))
+[ 13.340190] ------------[ cut here ]------------
+[ 13.340316] kernel BUG at mm/rmap.c:1380!
+[ 13.340683] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
+[ 13.340969] Modules linked in:
+[ 13.341257] CPU: 1 UID: 0 PID: 107 Comm: a.out Not tainted 6.14.0-rc3-gcf42737e247a-dirty #299
+[ 13.341470] Hardware name: linux,dummy-virt (DT)
+[ 13.341671] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[ 13.341815] pc : __page_check_anon_rmap+0xa0/0xb0
+[ 13.341920] lr : __page_check_anon_rmap+0xa0/0xb0
+[ 13.342018] sp : ffff80008752bb20
+[ 13.342093] x29: ffff80008752bb20 x28: fffffdffc0199f00 x27: 0000000000000001
+[ 13.342404] x26: 0000000000000000 x25: 0000000000000001 x24: 0000000000000001
+[ 13.342575] x23: 0000ffffaf0d0000 x22: 0000ffffaf0d0000 x21: fffffdffc0199f00
+[ 13.342731] x20: fffffdffc0199f00 x19: ffff000006210700 x18: 00000000ffffffff
+[ 13.342881] x17: 6c203d2120296567 x16: 6170202c6f696c6f x15: 662866666f67705f
+[ 13.343033] x14: 6567617028454741 x13: 2929737365726464 x12: ffff800083728ab0
+[ 13.343183] x11: ffff800082996bf8 x10: 0000000000000fd7 x9 : ffff80008011bc40
+[ 13.343351] x8 : 0000000000017fe8 x7 : 00000000fffff000 x6 : ffff8000829eebf8
+[ 13.343498] x5 : c0000000fffff000 x4 : 0000000000000000 x3 : 0000000000000000
+[ 13.343645] x2 : 0000000000000000 x1 : ffff0000062db980 x0 : 000000000000005f
+[ 13.343876] Call trace:
+[ 13.344045] __page_check_anon_rmap+0xa0/0xb0 (P)
+[ 13.344234] folio_add_anon_rmap_ptes+0x22c/0x320
+[ 13.344333] do_swap_page+0x1060/0x1400
+[ 13.344417] __handle_mm_fault+0x61c/0xbc8
+[ 13.344504] handle_mm_fault+0xd8/0x2e8
+[ 13.344586] do_page_fault+0x20c/0x770
+[ 13.344673] do_translation_fault+0xb4/0xf0
+[ 13.344759] do_mem_abort+0x48/0xa0
+[ 13.344842] el0_da+0x58/0x130
+[ 13.344914] el0t_64_sync_handler+0xc4/0x138
+[ 13.345002] el0t_64_sync+0x1ac/0x1b0
+[ 13.345208] Code: aa1503e0 f000f801 910f6021 97ff5779 (d4210000)
+[ 13.345504] ---[ end trace 0000000000000000 ]---
+[ 13.345715] note: a.out[107] exited with irqs disabled
+[ 13.345954] note: a.out[107] exited with preempt_count 2
+
+If KSM is enabled, Peter Xu also discovered that do_swap_page() may
+trigger an unexpected CoW operation for small folios because
+ksm_might_need_to_copy() allocates a new folio when the folio index
+does not match linear_page_index(vma, addr).
+
+This patch also checks the swapcache when handling swap entries. If a
+match is found in the swapcache, it processes it similarly to a present
+PTE.
+However, there are some differences. For example, the folio is no longer
+exclusive because folio_try_share_anon_rmap_pte() is performed during
+unmapping.
+Furthermore, in the case of swapcache, the folio has already been
+unmapped, eliminating the risk of concurrent rmap walks and removing the
+need to acquire src_folio's anon_vma or lock.
+
+Note that for large folios, in the swapcache handling path, we directly
+return -EBUSY since split_folio() will return -EBUSY regardless if
+the folio is under writeback or unmapped. This is not an urgent issue,
+so a follow-up patch may address it separately.
+
+[v-songbaohua@oppo.com: minor cleanup according to Peter Xu]
+ Link: https://lkml.kernel.org/r/20250226024411.47092-1-21cnbao@gmail.com
+Link: https://lkml.kernel.org/r/20250226001400.9129-1-21cnbao@gmail.com
+Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
+Signed-off-by: Barry Song <v-songbaohua@oppo.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Suren Baghdasaryan <surenb@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Brian Geffon <bgeffon@google.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Kalesh Singh <kaleshsingh@google.com>
+Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: Lokesh Gidra <lokeshgidra@google.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Nicolas Geoffray <ngeoffray@google.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: ZhangPeng <zhangpeng362@huawei.com>
+Cc: Tangquan Zheng <zhengtangquan@oppo.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ surenb: resolved merged conflict caused by the difference in
+ move_swap_pte() arguments]
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/userfaultfd.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 66 insertions(+), 9 deletions(-)
+
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -18,6 +18,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/tlb.h>
+ #include "internal.h"
++#include "swap.h"
+
+ static __always_inline
+ bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
+@@ -1067,15 +1068,13 @@ out:
+ return err;
+ }
+
+-static int move_swap_pte(struct mm_struct *mm,
++static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
+ unsigned long dst_addr, unsigned long src_addr,
+ pte_t *dst_pte, pte_t *src_pte,
+ pte_t orig_dst_pte, pte_t orig_src_pte,
+- spinlock_t *dst_ptl, spinlock_t *src_ptl)
++ spinlock_t *dst_ptl, spinlock_t *src_ptl,
++ struct folio *src_folio)
+ {
+- if (!pte_swp_exclusive(orig_src_pte))
+- return -EBUSY;
+-
+ double_pt_lock(dst_ptl, src_ptl);
+
+ if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
+@@ -1084,6 +1083,16 @@ static int move_swap_pte(struct mm_struc
+ return -EAGAIN;
+ }
+
++ /*
++ * The src_folio resides in the swapcache, requiring an update to its
++ * index and mapping to align with the dst_vma, where a swap-in may
++ * occur and hit the swapcache after moving the PTE.
++ */
++ if (src_folio) {
++ folio_move_anon_rmap(src_folio, dst_vma);
++ src_folio->index = linear_page_index(dst_vma, dst_addr);
++ }
++
+ orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
+ set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
+ double_pt_unlock(dst_ptl, src_ptl);
+@@ -1130,6 +1139,7 @@ static int move_pages_pte(struct mm_stru
+ __u64 mode)
+ {
+ swp_entry_t entry;
++ struct swap_info_struct *si = NULL;
+ pte_t orig_src_pte, orig_dst_pte;
+ pte_t src_folio_pte;
+ spinlock_t *src_ptl, *dst_ptl;
+@@ -1321,6 +1331,8 @@ retry:
+ orig_dst_pte, orig_src_pte,
+ dst_ptl, src_ptl, src_folio);
+ } else {
++ struct folio *folio = NULL;
++
+ entry = pte_to_swp_entry(orig_src_pte);
+ if (non_swap_entry(entry)) {
+ if (is_migration_entry(entry)) {
+@@ -1334,10 +1346,53 @@ retry:
+ goto out;
+ }
+
+- err = move_swap_pte(mm, dst_addr, src_addr,
+- dst_pte, src_pte,
+- orig_dst_pte, orig_src_pte,
+- dst_ptl, src_ptl);
++ if (!pte_swp_exclusive(orig_src_pte)) {
++ err = -EBUSY;
++ goto out;
++ }
++
++ si = get_swap_device(entry);
++ if (unlikely(!si)) {
++ err = -EAGAIN;
++ goto out;
++ }
++ /*
++ * Verify the existence of the swapcache. If present, the folio's
++ * index and mapping must be updated even when the PTE is a swap
++ * entry. The anon_vma lock is not taken during this process since
++ * the folio has already been unmapped, and the swap entry is
++ * exclusive, preventing rmap walks.
++ *
++ * For large folios, return -EBUSY immediately, as split_folio()
++ * also returns -EBUSY when attempting to split unmapped large
++ * folios in the swapcache. This issue needs to be resolved
++ * separately to allow proper handling.
++ */
++ if (!src_folio)
++ folio = filemap_get_folio(swap_address_space(entry),
++ swap_cache_index(entry));
++ if (!IS_ERR_OR_NULL(folio)) {
++ if (folio_test_large(folio)) {
++ err = -EBUSY;
++ folio_put(folio);
++ goto out;
++ }
++ src_folio = folio;
++ src_folio_pte = orig_src_pte;
++ if (!folio_trylock(src_folio)) {
++ pte_unmap(&orig_src_pte);
++ pte_unmap(&orig_dst_pte);
++ src_pte = dst_pte = NULL;
++ put_swap_device(si);
++ si = NULL;
++ /* now we can block and wait */
++ folio_lock(src_folio);
++ goto retry;
++ }
++ }
++ err = move_swap_pte(mm, dst_vma, dst_addr, src_addr, dst_pte, src_pte,
++ orig_dst_pte, orig_src_pte,
++ dst_ptl, src_ptl, src_folio);
+ }
+
+ out:
+@@ -1354,6 +1409,8 @@ out:
+ if (src_pte)
+ pte_unmap(src_pte);
+ mmu_notifier_invalidate_range_end(&range);
++ if (si)
++ put_swap_device(si);
+
+ return err;
+ }
--- /dev/null
+From dfd3df31c9db752234d7d2e09bef2aeabb643ce4 Mon Sep 17 00:00:00 2001
+From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Date: Fri, 28 Feb 2025 13:13:56 +0100
+Subject: mm/slab/kvfree_rcu: Switch to WQ_MEM_RECLAIM wq
+
+From: Uladzislau Rezki (Sony) <urezki@gmail.com>
+
+commit dfd3df31c9db752234d7d2e09bef2aeabb643ce4 upstream.
+
+Currently kvfree_rcu() APIs use a system workqueue which is
+"system_unbound_wq" to driver RCU machinery to reclaim a memory.
+
+Recently, it has been noted that the following kernel warning can
+be observed:
+
+<snip>
+workqueue: WQ_MEM_RECLAIM nvme-wq:nvme_scan_work is flushing !WQ_MEM_RECLAIM events_unbound:kfree_rcu_work
+ WARNING: CPU: 21 PID: 330 at kernel/workqueue.c:3719 check_flush_dependency+0x112/0x120
+ Modules linked in: intel_uncore_frequency(E) intel_uncore_frequency_common(E) skx_edac(E) ...
+ CPU: 21 UID: 0 PID: 330 Comm: kworker/u144:6 Tainted: G E 6.13.2-0_g925d379822da #1
+ Hardware name: Wiwynn Twin Lakes MP/Twin Lakes Passive MP, BIOS YMM20 02/01/2023
+ Workqueue: nvme-wq nvme_scan_work
+ RIP: 0010:check_flush_dependency+0x112/0x120
+ Code: 05 9a 40 14 02 01 48 81 c6 c0 00 00 00 48 8b 50 18 48 81 c7 c0 00 00 00 48 89 f9 48 ...
+ RSP: 0018:ffffc90000df7bd8 EFLAGS: 00010082
+ RAX: 000000000000006a RBX: ffffffff81622390 RCX: 0000000000000027
+ RDX: 00000000fffeffff RSI: 000000000057ffa8 RDI: ffff88907f960c88
+ RBP: 0000000000000000 R08: ffffffff83068e50 R09: 000000000002fffd
+ R10: 0000000000000004 R11: 0000000000000000 R12: ffff8881001a4400
+ R13: 0000000000000000 R14: ffff88907f420fb8 R15: 0000000000000000
+ FS: 0000000000000000(0000) GS:ffff88907f940000(0000) knlGS:0000000000000000
+ CR2: 00007f60c3001000 CR3: 000000107d010005 CR4: 00000000007726f0
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ ? __warn+0xa4/0x140
+ ? check_flush_dependency+0x112/0x120
+ ? report_bug+0xe1/0x140
+ ? check_flush_dependency+0x112/0x120
+ ? handle_bug+0x5e/0x90
+ ? exc_invalid_op+0x16/0x40
+ ? asm_exc_invalid_op+0x16/0x20
+ ? timer_recalc_next_expiry+0x190/0x190
+ ? check_flush_dependency+0x112/0x120
+ ? check_flush_dependency+0x112/0x120
+ __flush_work.llvm.1643880146586177030+0x174/0x2c0
+ flush_rcu_work+0x28/0x30
+ kvfree_rcu_barrier+0x12f/0x160
+ kmem_cache_destroy+0x18/0x120
+ bioset_exit+0x10c/0x150
+ disk_release.llvm.6740012984264378178+0x61/0xd0
+ device_release+0x4f/0x90
+ kobject_put+0x95/0x180
+ nvme_put_ns+0x23/0xc0
+ nvme_remove_invalid_namespaces+0xb3/0xd0
+ nvme_scan_work+0x342/0x490
+ process_scheduled_works+0x1a2/0x370
+ worker_thread+0x2ff/0x390
+ ? pwq_release_workfn+0x1e0/0x1e0
+ kthread+0xb1/0xe0
+ ? __kthread_parkme+0x70/0x70
+ ret_from_fork+0x30/0x40
+ ? __kthread_parkme+0x70/0x70
+ ret_from_fork_asm+0x11/0x20
+ </TASK>
+ ---[ end trace 0000000000000000 ]---
+<snip>
+
+To address this switch to use of independent WQ_MEM_RECLAIM
+workqueue, so the rules are not violated from workqueue framework
+point of view.
+
+Apart of that, since kvfree_rcu() does reclaim memory it is worth
+to go with WQ_MEM_RECLAIM type of wq because it is designed for
+this purpose.
+
+Fixes: 6c6c47b063b5 ("mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()"),
+Reported-by: Keith Busch <kbusch@kernel.org>
+Closes: https://lore.kernel.org/all/Z7iqJtCjHKfo8Kho@kbusch-mbp/
+Cc: stable@vger.kernel.org
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tree.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -3191,6 +3191,8 @@ void call_rcu(struct rcu_head *head, rcu
+ }
+ EXPORT_SYMBOL_GPL(call_rcu);
+
++static struct workqueue_struct *rcu_reclaim_wq;
++
+ /* Maximum number of jiffies to wait before draining a batch. */
+ #define KFREE_DRAIN_JIFFIES (5 * HZ)
+ #define KFREE_N_BATCHES 2
+@@ -3519,10 +3521,10 @@ __schedule_delayed_monitor_work(struct k
+ if (delayed_work_pending(&krcp->monitor_work)) {
+ delay_left = krcp->monitor_work.timer.expires - jiffies;
+ if (delay < delay_left)
+- mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
++ mod_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
+ return;
+ }
+- queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay);
++ queue_delayed_work(rcu_reclaim_wq, &krcp->monitor_work, delay);
+ }
+
+ static void
+@@ -3620,7 +3622,7 @@ kvfree_rcu_queue_batch(struct kfree_rcu_
+ // "free channels", the batch can handle. Break
+ // the loop since it is done with this CPU thus
+ // queuing an RCU work is _always_ success here.
+- queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
++ queued = queue_rcu_work(rcu_reclaim_wq, &krwp->rcu_work);
+ WARN_ON_ONCE(!queued);
+ break;
+ }
+@@ -3708,7 +3710,7 @@ run_page_cache_worker(struct kfree_rcu_c
+ if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
+ !atomic_xchg(&krcp->work_in_progress, 1)) {
+ if (atomic_read(&krcp->backoff_page_cache_fill)) {
+- queue_delayed_work(system_unbound_wq,
++ queue_delayed_work(rcu_reclaim_wq,
+ &krcp->page_cache_work,
+ msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
+ } else {
+@@ -5654,6 +5656,10 @@ static void __init kfree_rcu_batch_init(
+ int i, j;
+ struct shrinker *kfree_rcu_shrinker;
+
++ rcu_reclaim_wq = alloc_workqueue("kvfree_rcu_reclaim",
++ WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
++ WARN_ON(!rcu_reclaim_wq);
++
+ /* Clamp it to [0:100] seconds interval. */
+ if (rcu_delay_page_cache_fill_msec < 0 ||
+ rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
--- /dev/null
+kvm-arm64-calculate-cptr_el2-traps-on-activating-traps.patch
+kvm-arm64-unconditionally-save-flush-host-fpsimd-sve-sme-state.patch
+kvm-arm64-remove-host-fpsimd-saving-for-non-protected-kvm.patch
+kvm-arm64-remove-vhe-host-restore-of-cpacr_el1.zen.patch
+kvm-arm64-remove-vhe-host-restore-of-cpacr_el1.smen.patch
+kvm-arm64-refactor-exit-handlers.patch
+kvm-arm64-mark-some-header-functions-as-inline.patch
+kvm-arm64-eagerly-switch-zcr_el-1-2.patch
+mm-fix-kernel-bug-when-userfaultfd_move-encounters-swapcache.patch
+userfaultfd-fix-pte-unmapping-stack-allocated-pte-copies.patch
+mm-slab-kvfree_rcu-switch-to-wq_mem_reclaim-wq.patch
+virt-sev-guest-move-snp-guest-request-data-pages-handling-under-snp_cmd_mutex.patch
--- /dev/null
+From 927e926d72d9155fde3264459fe9bfd7b5e40d28 Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Wed, 26 Feb 2025 10:55:09 -0800
+Subject: userfaultfd: fix PTE unmapping stack-allocated PTE copies
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit 927e926d72d9155fde3264459fe9bfd7b5e40d28 upstream.
+
+Current implementation of move_pages_pte() copies source and destination
+PTEs in order to detect concurrent changes to PTEs involved in the move.
+However these copies are also used to unmap the PTEs, which will fail if
+CONFIG_HIGHPTE is enabled because the copies are allocated on the stack.
+Fix this by using the actual PTEs which were kmap()ed.
+
+Link: https://lkml.kernel.org/r/20250226185510.2732648-3-surenb@google.com
+Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Reported-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Barry Song <v-songbaohua@oppo.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Kalesh Singh <kaleshsingh@google.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Lokesh Gidra <lokeshgidra@google.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/userfaultfd.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -1274,8 +1274,8 @@ retry:
+ spin_unlock(src_ptl);
+
+ if (!locked) {
+- pte_unmap(&orig_src_pte);
+- pte_unmap(&orig_dst_pte);
++ pte_unmap(src_pte);
++ pte_unmap(dst_pte);
+ src_pte = dst_pte = NULL;
+ /* now we can block and wait */
+ folio_lock(src_folio);
+@@ -1291,8 +1291,8 @@ retry:
+ /* at this point we have src_folio locked */
+ if (folio_test_large(src_folio)) {
+ /* split_folio() can block */
+- pte_unmap(&orig_src_pte);
+- pte_unmap(&orig_dst_pte);
++ pte_unmap(src_pte);
++ pte_unmap(dst_pte);
+ src_pte = dst_pte = NULL;
+ err = split_folio(src_folio);
+ if (err)
+@@ -1317,8 +1317,8 @@ retry:
+ goto out;
+ }
+ if (!anon_vma_trylock_write(src_anon_vma)) {
+- pte_unmap(&orig_src_pte);
+- pte_unmap(&orig_dst_pte);
++ pte_unmap(src_pte);
++ pte_unmap(dst_pte);
+ src_pte = dst_pte = NULL;
+ /* now we can block and wait */
+ anon_vma_lock_write(src_anon_vma);
+@@ -1336,8 +1336,8 @@ retry:
+ entry = pte_to_swp_entry(orig_src_pte);
+ if (non_swap_entry(entry)) {
+ if (is_migration_entry(entry)) {
+- pte_unmap(&orig_src_pte);
+- pte_unmap(&orig_dst_pte);
++ pte_unmap(src_pte);
++ pte_unmap(dst_pte);
+ src_pte = dst_pte = NULL;
+ migration_entry_wait(mm, src_pmd, src_addr);
+ err = -EAGAIN;
+@@ -1380,8 +1380,8 @@ retry:
+ src_folio = folio;
+ src_folio_pte = orig_src_pte;
+ if (!folio_trylock(src_folio)) {
+- pte_unmap(&orig_src_pte);
+- pte_unmap(&orig_dst_pte);
++ pte_unmap(src_pte);
++ pte_unmap(dst_pte);
+ src_pte = dst_pte = NULL;
+ put_swap_device(si);
+ si = NULL;
--- /dev/null
+From 3e385c0d6ce88ac9916dcf84267bd5855d830748 Mon Sep 17 00:00:00 2001
+From: Alexey Kardashevskiy <aik@amd.com>
+Date: Fri, 7 Mar 2025 12:37:00 +1100
+Subject: virt: sev-guest: Move SNP Guest Request data pages handling under snp_cmd_mutex
+
+From: Alexey Kardashevskiy <aik@amd.com>
+
+commit 3e385c0d6ce88ac9916dcf84267bd5855d830748 upstream.
+
+Compared to the SNP Guest Request, the "Extended" version adds data pages for
+receiving certificates. If not enough pages provided, the HV can report to the
+VM how much is needed so the VM can reallocate and repeat.
+
+Commit
+
+ ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")
+
+moved handling of the allocated/desired pages number out of scope of said
+mutex and create a possibility for a race (multiple instances trying to
+trigger Extended request in a VM) as there is just one instance of
+snp_msg_desc per /dev/sev-guest and no locking other than snp_cmd_mutex.
+
+Fix the issue by moving the data blob/size and the GHCB input struct
+(snp_req_data) into snp_guest_req which is allocated on stack now and accessed
+by the GHCB caller under that mutex.
+
+Stop allocating SEV_FW_BLOB_MAX_SIZE in snp_msg_alloc() as only one of four
+callers needs it. Free the received blob in get_ext_report() right after it is
+copied to the userspace. Possible future users of snp_send_guest_request() are
+likely to have different ideas about the buffer size anyways.
+
+Fixes: ae596615d93d ("virt: sev-guest: Reduce the scope of SNP command mutex")
+Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Nikunj A Dadhania <nikunj@amd.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20250307013700.437505-3-aik@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+
+---
+ arch/x86/include/asm/sev.h | 6 +--
+ drivers/virt/coco/sev-guest/sev-guest.c | 63 +++++++++++++++++++-------------
+ 2 files changed, 42 insertions(+), 27 deletions(-)
+
+--- a/arch/x86/include/asm/sev.h
++++ b/arch/x86/include/asm/sev.h
+@@ -185,6 +185,9 @@ struct snp_guest_req {
+ unsigned int vmpck_id;
+ u8 msg_version;
+ u8 msg_type;
++
++ struct snp_req_data input;
++ void *certs_data;
+ };
+
+ /*
+@@ -245,9 +248,6 @@ struct snp_msg_desc {
+ struct snp_guest_msg secret_request, secret_response;
+
+ struct snp_secrets_page *secrets;
+- struct snp_req_data input;
+-
+- void *certs_data;
+
+ struct aesgcm_ctx *ctx;
+
+--- a/drivers/virt/coco/sev-guest/sev-guest.c
++++ b/drivers/virt/coco/sev-guest/sev-guest.c
+@@ -249,7 +249,7 @@ retry_request:
+ * sequence number must be incremented or the VMPCK must be deleted to
+ * prevent reuse of the IV.
+ */
+- rc = snp_issue_guest_request(req, &mdesc->input, rio);
++ rc = snp_issue_guest_request(req, &req->input, rio);
+ switch (rc) {
+ case -ENOSPC:
+ /*
+@@ -259,7 +259,7 @@ retry_request:
+ * order to increment the sequence number and thus avoid
+ * IV reuse.
+ */
+- override_npages = mdesc->input.data_npages;
++ override_npages = req->input.data_npages;
+ req->exit_code = SVM_VMGEXIT_GUEST_REQUEST;
+
+ /*
+@@ -315,7 +315,7 @@ retry_request:
+ }
+
+ if (override_npages)
+- mdesc->input.data_npages = override_npages;
++ req->input.data_npages = override_npages;
+
+ return rc;
+ }
+@@ -354,6 +354,11 @@ static int snp_send_guest_request(struct
+ memcpy(mdesc->request, &mdesc->secret_request,
+ sizeof(mdesc->secret_request));
+
++ /* initial the input address for guest request */
++ req->input.req_gpa = __pa(mdesc->request);
++ req->input.resp_gpa = __pa(mdesc->response);
++ req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0;
++
+ rc = __handle_guest_request(mdesc, req, rio);
+ if (rc) {
+ if (rc == -EIO &&
+@@ -495,6 +500,7 @@ static int get_ext_report(struct snp_gue
+ struct snp_guest_req req = {};
+ int ret, npages = 0, resp_len;
+ sockptr_t certs_address;
++ struct page *page;
+
+ if (sockptr_is_null(io->req_data) || sockptr_is_null(io->resp_data))
+ return -EINVAL;
+@@ -528,8 +534,20 @@ static int get_ext_report(struct snp_gue
+ * the host. If host does not supply any certs in it, then copy
+ * zeros to indicate that certificate data was not provided.
+ */
+- memset(mdesc->certs_data, 0, report_req->certs_len);
+ npages = report_req->certs_len >> PAGE_SHIFT;
++ page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
++ get_order(report_req->certs_len));
++ if (!page)
++ return -ENOMEM;
++
++ req.certs_data = page_address(page);
++ ret = set_memory_decrypted((unsigned long)req.certs_data, npages);
++ if (ret) {
++ pr_err("failed to mark page shared, ret=%d\n", ret);
++ __free_pages(page, get_order(report_req->certs_len));
++ return -EFAULT;
++ }
++
+ cmd:
+ /*
+ * The intermediate response buffer is used while decrypting the
+@@ -538,10 +556,12 @@ cmd:
+ */
+ resp_len = sizeof(report_resp->data) + mdesc->ctx->authsize;
+ report_resp = kzalloc(resp_len, GFP_KERNEL_ACCOUNT);
+- if (!report_resp)
+- return -ENOMEM;
++ if (!report_resp) {
++ ret = -ENOMEM;
++ goto e_free_data;
++ }
+
+- mdesc->input.data_npages = npages;
++ req.input.data_npages = npages;
+
+ req.msg_version = arg->msg_version;
+ req.msg_type = SNP_MSG_REPORT_REQ;
+@@ -556,7 +576,7 @@ cmd:
+
+ /* If certs length is invalid then copy the returned length */
+ if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) {
+- report_req->certs_len = mdesc->input.data_npages << PAGE_SHIFT;
++ report_req->certs_len = req.input.data_npages << PAGE_SHIFT;
+
+ if (copy_to_sockptr(io->req_data, report_req, sizeof(*report_req)))
+ ret = -EFAULT;
+@@ -565,7 +585,7 @@ cmd:
+ if (ret)
+ goto e_free;
+
+- if (npages && copy_to_sockptr(certs_address, mdesc->certs_data, report_req->certs_len)) {
++ if (npages && copy_to_sockptr(certs_address, req.certs_data, report_req->certs_len)) {
+ ret = -EFAULT;
+ goto e_free;
+ }
+@@ -575,6 +595,13 @@ cmd:
+
+ e_free:
+ kfree(report_resp);
++e_free_data:
++ if (npages) {
++ if (set_memory_encrypted((unsigned long)req.certs_data, npages))
++ WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n");
++ else
++ __free_pages(page, get_order(report_req->certs_len));
++ }
+ return ret;
+ }
+
+@@ -1048,35 +1075,26 @@ static int __init sev_guest_probe(struct
+ if (!mdesc->response)
+ goto e_free_request;
+
+- mdesc->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE);
+- if (!mdesc->certs_data)
+- goto e_free_response;
+-
+ ret = -EIO;
+ mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN);
+ if (!mdesc->ctx)
+- goto e_free_cert_data;
++ goto e_free_response;
+
+ misc = &snp_dev->misc;
+ misc->minor = MISC_DYNAMIC_MINOR;
+ misc->name = DEVICE_NAME;
+ misc->fops = &snp_guest_fops;
+
+- /* Initialize the input addresses for guest request */
+- mdesc->input.req_gpa = __pa(mdesc->request);
+- mdesc->input.resp_gpa = __pa(mdesc->response);
+- mdesc->input.data_gpa = __pa(mdesc->certs_data);
+-
+ /* Set the privlevel_floor attribute based on the vmpck_id */
+ sev_tsm_ops.privlevel_floor = vmpck_id;
+
+ ret = tsm_register(&sev_tsm_ops, snp_dev);
+ if (ret)
+- goto e_free_cert_data;
++ goto e_free_response;
+
+ ret = devm_add_action_or_reset(&pdev->dev, unregister_sev_tsm, NULL);
+ if (ret)
+- goto e_free_cert_data;
++ goto e_free_response;
+
+ ret = misc_register(misc);
+ if (ret)
+@@ -1088,8 +1106,6 @@ static int __init sev_guest_probe(struct
+
+ e_free_ctx:
+ kfree(mdesc->ctx);
+-e_free_cert_data:
+- free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE);
+ e_free_response:
+ free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
+ e_free_request:
+@@ -1104,7 +1120,6 @@ static void __exit sev_guest_remove(stru
+ struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev);
+ struct snp_msg_desc *mdesc = snp_dev->msg_desc;
+
+- free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE);
+ free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg));
+ free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg));
+ kfree(mdesc->ctx);