From b57fa742b63993364d9dfbe624930377c2b5ab5d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 12 Aug 2022 16:39:44 +0200 Subject: [PATCH] 5.10-stable patches added patches: kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch --- ...mx-msr-to-any-_host_-supported-value.patch | 175 ++++++++++++++++++ ...-t-present-the-ecall-interrupt-twice.patch | 100 ++++++++++ ...r-emulation-_after_-all-fault-checks.patch | 66 +++++++ ...elector-on-lldt-ltr-non-canonical-gp.patch | 41 ++++ queue-5.10/series | 4 + 5 files changed, 386 insertions(+) create mode 100644 queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch create mode 100644 queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch create mode 100644 queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch create mode 100644 queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch diff --git a/queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch b/queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch new file mode 100644 index 00000000000..7175a82114e --- /dev/null +++ b/queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch @@ -0,0 +1,175 @@ +From f8ae08f9789ad59d318ea75b570caa454aceda81 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 7 Jun 2022 21:35:54 +0000 +Subject: KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value + +From: Sean Christopherson + +commit f8ae08f9789ad59d318ea75b570caa454aceda81 upstream. + +Restrict the nVMX MSRs based on KVM's config, not based on the guest's +current config. Using the guest's config to audit the new config +prevents userspace from restoring the original config (KVM's config) if +at any point in the past the guest's config was restricted in any way. + +Fixes: 62cc6b9dc61e ("KVM: nVMX: support restore of VMX capability MSRs") +Cc: stable@vger.kernel.org +Cc: David Matlack +Signed-off-by: Sean Christopherson +Message-Id: <20220607213604.3346000-6-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 70 ++++++++++++++++++++++++---------------------- + 1 file changed, 37 insertions(+), 33 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -1245,7 +1245,7 @@ static int vmx_restore_vmx_basic(struct + BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | + /* reserved */ + BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); +- u64 vmx_basic = vmx->nested.msrs.basic; ++ u64 vmx_basic = vmcs_config.nested.basic; + + if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) + return -EINVAL; +@@ -1268,36 +1268,42 @@ static int vmx_restore_vmx_basic(struct + return 0; + } + +-static int +-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index, ++ u32 **low, u32 **high) + { +- u64 supported; +- u32 *lowp, *highp; +- + switch (msr_index) { + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: +- lowp = &vmx->nested.msrs.pinbased_ctls_low; +- highp = &vmx->nested.msrs.pinbased_ctls_high; ++ *low = &msrs->pinbased_ctls_low; ++ *high = &msrs->pinbased_ctls_high; + break; + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: +- lowp = &vmx->nested.msrs.procbased_ctls_low; +- highp = &vmx->nested.msrs.procbased_ctls_high; ++ *low = &msrs->procbased_ctls_low; ++ *high = &msrs->procbased_ctls_high; + break; + case MSR_IA32_VMX_TRUE_EXIT_CTLS: +- lowp = &vmx->nested.msrs.exit_ctls_low; +- highp = &vmx->nested.msrs.exit_ctls_high; ++ *low = &msrs->exit_ctls_low; ++ *high = &msrs->exit_ctls_high; + break; + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: +- lowp = &vmx->nested.msrs.entry_ctls_low; +- highp = &vmx->nested.msrs.entry_ctls_high; ++ *low = &msrs->entry_ctls_low; ++ *high = &msrs->entry_ctls_high; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: +- lowp = &vmx->nested.msrs.secondary_ctls_low; +- highp = &vmx->nested.msrs.secondary_ctls_high; ++ *low = &msrs->secondary_ctls_low; ++ *high = &msrs->secondary_ctls_high; + break; + default: + BUG(); + } ++} ++ ++static int ++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++{ ++ u32 *lowp, *highp; ++ u64 supported; ++ ++ vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp); + + supported = vmx_control_msr(*lowp, *highp); + +@@ -1309,6 +1315,7 @@ vmx_restore_control_msr(struct vcpu_vmx + if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32))) + return -EINVAL; + ++ vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp); + *lowp = data; + *highp = data >> 32; + return 0; +@@ -1322,10 +1329,8 @@ static int vmx_restore_vmx_misc(struct v + BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | + /* reserved */ + GENMASK_ULL(13, 9) | BIT_ULL(31); +- u64 vmx_misc; +- +- vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, +- vmx->nested.msrs.misc_high); ++ u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, ++ vmcs_config.nested.misc_high); + + if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) + return -EINVAL; +@@ -1353,10 +1358,8 @@ static int vmx_restore_vmx_misc(struct v + + static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) + { +- u64 vmx_ept_vpid_cap; +- +- vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, +- vmx->nested.msrs.vpid_caps); ++ u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps, ++ vmcs_config.nested.vpid_caps); + + /* Every bit is either reserved or a feature bit. */ + if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) +@@ -1367,20 +1370,21 @@ static int vmx_restore_vmx_ept_vpid_cap( + return 0; + } + +-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index) + { +- u64 *msr; +- + switch (msr_index) { + case MSR_IA32_VMX_CR0_FIXED0: +- msr = &vmx->nested.msrs.cr0_fixed0; +- break; ++ return &msrs->cr0_fixed0; + case MSR_IA32_VMX_CR4_FIXED0: +- msr = &vmx->nested.msrs.cr4_fixed0; +- break; ++ return &msrs->cr4_fixed0; + default: + BUG(); + } ++} ++ ++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++{ ++ const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index); + + /* + * 1 bits (which indicates bits which "must-be-1" during VMX operation) +@@ -1389,7 +1393,7 @@ static int vmx_restore_fixed0_msr(struct + if (!is_bitwise_subset(data, *msr, -1ULL)) + return -EINVAL; + +- *msr = data; ++ *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data; + return 0; + } + +@@ -1450,7 +1454,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcp + vmx->nested.msrs.vmcs_enum = data; + return 0; + case MSR_IA32_VMX_VMFUNC: +- if (data & ~vmx->nested.msrs.vmfunc_controls) ++ if (data & ~vmcs_config.nested.vmfunc_controls) + return -EINVAL; + vmx->nested.msrs.vmfunc_controls = data; + return 0; diff --git a/queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch b/queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch new file mode 100644 index 00000000000..a82da2232a9 --- /dev/null +++ b/queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch @@ -0,0 +1,100 @@ +From c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 Mon Sep 17 00:00:00 2001 +From: Nico Boehr +Date: Mon, 18 Jul 2022 15:04:34 +0200 +Subject: KVM: s390: pv: don't present the ecall interrupt twice + +From: Nico Boehr + +commit c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 upstream. + +When the SIGP interpretation facility is present and a VCPU sends an +ecall to another VCPU in enabled wait, the sending VCPU receives a 56 +intercept (partial execution), so KVM can wake up the receiving CPU. +Note that the SIGP interpretation facility will take care of the +interrupt delivery and KVM's only job is to wake the receiving VCPU. + +For PV, the sending VCPU will receive a 108 intercept (pv notify) and +should continue like in the non-PV case, i.e. wake the receiving VCPU. + +For PV and non-PV guests the interrupt delivery will occur through the +SIGP interpretation facility on SIE entry when SIE finds the X bit in +the status field set. + +However, in handle_pv_notification(), there was no special handling for +SIGP, which leads to interrupt injection being requested by KVM for the +next SIE entry. This results in the interrupt being delivered twice: +once by the SIGP interpretation facility and once by KVM through the +IICTL. + +Add the necessary special handling in handle_pv_notification(), similar +to handle_partial_execution(), which simply wakes the receiving VCPU and +leave interrupt delivery to the SIGP interpretation facility. + +In contrast to external calls, emergency calls are not interpreted but +also cause a 108 intercept, which is why we still need to call +handle_instruction() for SIGP orders other than ecall. + +Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which +cause a 108 intercept - even if they are actually handled by +handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei() +to avoid possibly confusing trace messages. + +Signed-off-by: Nico Boehr +Cc: # 5.7 +Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation") +Reviewed-by: Claudio Imbrenda +Reviewed-by: Janosch Frank +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com +Message-Id: <20220718130434.73302-1-nrb@linux.ibm.com> +Signed-off-by: Claudio Imbrenda +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kvm/intercept.c | 15 +++++++++++++++ + arch/s390/kvm/sigp.c | 4 ++-- + 2 files changed, 17 insertions(+), 2 deletions(-) + +--- a/arch/s390/kvm/intercept.c ++++ b/arch/s390/kvm/intercept.c +@@ -521,12 +521,27 @@ static int handle_pv_uvc(struct kvm_vcpu + + static int handle_pv_notification(struct kvm_vcpu *vcpu) + { ++ int ret; ++ + if (vcpu->arch.sie_block->ipa == 0xb210) + return handle_pv_spx(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb220) + return handle_pv_sclp(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb9a4) + return handle_pv_uvc(vcpu); ++ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) { ++ /* ++ * Besides external call, other SIGP orders also cause a ++ * 108 (pv notify) intercept. In contrast to external call, ++ * these orders need to be emulated and hence the appropriate ++ * place to handle them is in handle_instruction(). ++ * So first try kvm_s390_handle_sigp_pei() and if that isn't ++ * successful, go on with handle_instruction(). ++ */ ++ ret = kvm_s390_handle_sigp_pei(vcpu); ++ if (!ret) ++ return ret; ++ } + + return handle_instruction(vcpu); + } +--- a/arch/s390/kvm/sigp.c ++++ b/arch/s390/kvm/sigp.c +@@ -492,9 +492,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_ + struct kvm_vcpu *dest_vcpu; + u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); + +- trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); +- + if (order_code == SIGP_EXTERNAL_CALL) { ++ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); ++ + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); + BUG_ON(dest_vcpu == NULL); + diff --git a/queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch b/queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch new file mode 100644 index 00000000000..42dc4d4f13f --- /dev/null +++ b/queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch @@ -0,0 +1,66 @@ +From ec6e4d863258d4bfb36d48d5e3ef68140234d688 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 11 Jul 2022 23:27:48 +0000 +Subject: KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks + +From: Sean Christopherson + +commit ec6e4d863258d4bfb36d48d5e3ef68140234d688 upstream. + +Wait to mark the TSS as busy during LTR emulation until after all fault +checks for the LTR have passed. Specifically, don't mark the TSS busy if +the new TSS base is non-canonical. + +Opportunistically drop the one-off !seg_desc.PRESENT check for TR as the +only reason for the early check was to avoid marking a !PRESENT TSS as +busy, i.e. the common !PRESENT is now done before setting the busy bit. + +Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR") +Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Cc: Tetsuo Handa +Cc: Hou Wenlong +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Link: https://lore.kernel.org/r/20220711232750.1092012-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -1772,16 +1772,6 @@ static int __load_segment_descriptor(str + case VCPU_SREG_TR: + if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) + goto exception; +- if (!seg_desc.p) { +- err_vec = NP_VECTOR; +- goto exception; +- } +- old_desc = seg_desc; +- seg_desc.type |= 2; /* busy */ +- ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, +- sizeof(seg_desc), &ctxt->exception); +- if (ret != X86EMUL_CONTINUE) +- return ret; + break; + case VCPU_SREG_LDTR: + if (seg_desc.s || seg_desc.type != 2) +@@ -1822,6 +1812,15 @@ static int __load_segment_descriptor(str + ((u64)base3 << 32), ctxt)) + return emulate_gp(ctxt, 0); + } ++ ++ if (seg == VCPU_SREG_TR) { ++ old_desc = seg_desc; ++ seg_desc.type |= 2; /* busy */ ++ ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, ++ sizeof(seg_desc), &ctxt->exception); ++ if (ret != X86EMUL_CONTINUE) ++ return ret; ++ } + load: + ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); + if (desc) diff --git a/queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch b/queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch new file mode 100644 index 00000000000..1757d0e47d1 --- /dev/null +++ b/queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch @@ -0,0 +1,41 @@ +From 2626206963ace9e8bf92b6eea5ff78dd674c555c Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 11 Jul 2022 23:27:49 +0000 +Subject: KVM: x86: Set error code to segment selector on LLDT/LTR non-canonical #GP + +From: Sean Christopherson + +commit 2626206963ace9e8bf92b6eea5ff78dd674c555c upstream. + +When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set +the error code to the selector. Intel SDM's says nothing about the #GP, +but AMD's APM explicitly states that both LLDT and LTR set the error code +to the selector, not zero. + +Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0), +but the KVM code in question is specific to the base from the descriptor. + +Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -1809,8 +1809,8 @@ static int __load_segment_descriptor(str + if (ret != X86EMUL_CONTINUE) + return ret; + if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | +- ((u64)base3 << 32), ctxt)) +- return emulate_gp(ctxt, 0); ++ ((u64)base3 << 32), ctxt)) ++ return emulate_gp(ctxt, err_code); + } + + if (seg == VCPU_SREG_TR) { diff --git a/queue-5.10/series b/queue-5.10/series index f6487c6568e..87c41f5f1ac 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -14,3 +14,7 @@ hid-wacom-don-t-register-pad_input-for-touch-switch.patch kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_run_pending-case.patch kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_run_pending-case.patch kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-with-gif-0.patch +kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch +kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch +kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch +kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch -- 2.47.3