]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 12 Aug 2022 14:41:35 +0000 (16:41 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 12 Aug 2022 14:41:35 +0000 (16:41 +0200)
added patches:
kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch

queue-5.15/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch [new file with mode: 0644]
queue-5.15/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch [new file with mode: 0644]
queue-5.15/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch [new file with mode: 0644]
queue-5.15/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch [new file with mode: 0644]
queue-5.15/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch [new file with mode: 0644]
queue-5.15/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch [new file with mode: 0644]
queue-5.15/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch b/queue-5.15/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
new file mode 100644 (file)
index 0000000..2beb1c6
--- /dev/null
@@ -0,0 +1,45 @@
+From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:51 +0000
+Subject: KVM: nVMX: Account for KVM reserved CR4 bits in consistency checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ca58f3aa53d165afe4ab74c755bc2f6d168617ac upstream.
+
+Check that the guest (L2) and host (L1) CR4 values that would be loaded
+by nested VM-Enter and VM-Exit respectively are valid with respect to
+KVM's (L0 host) allowed CR4 bits.  Failure to check KVM reserved bits
+would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
+failed VM-Entry) by massaging guest CPUID to allow features that are not
+supported by KVM.  Amusingly, KVM itself is an accomplice in its doom, as
+KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
+L2 based on L1's CPUID model.
+
+Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
+and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
+be true in the future, e.g. emulating VMXON has a bug where it doesn't
+check the allowed/required CR0/CR4 bits.
+
+Cc: stable@vger.kernel.org
+Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/nested.h
++++ b/arch/x86/kvm/vmx/nested.h
+@@ -280,7 +280,8 @@ static inline bool nested_cr4_valid(stru
+       u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
+-      return fixed_bits_valid(val, fixed0, fixed1);
++      return fixed_bits_valid(val, fixed0, fixed1) &&
++             __kvm_is_valid_cr4(vcpu, val);
+ }
+ /* No difference in the restrictions on guest and host CR4 in VMX operation. */
diff --git a/queue-5.15/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch b/queue-5.15/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
new file mode 100644 (file)
index 0000000..cb94bd4
--- /dev/null
@@ -0,0 +1,75 @@
+From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:52 +0000
+Subject: KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c7d855c2aff2d511fd60ee2e356134c4fb394799 upstream.
+
+Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
+per the associated nested VMX MSRs' fixed0/1 settings.  KVM cannot rely
+on hardware to perform the checks, even for the few checks that have
+higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
+hardware while running the guest, (b) there may incompatible CR0/CR4 bits
+that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
+have further restricted the allowed CR0/CR4 values by manipulating the
+guest's nested VMX MSRs.
+
+Note, despite a very strong desire to throw shade at Jim, commit
+70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
+is not to blame for the buggy behavior (though the comment...).  That
+commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
+(though it did erroneously drop the CPL check, but that has already been
+remedied).  KVM may force CR0.PE=1, but will do so only when also
+forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
+Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
+Reported-by: Eric Li <ercli@ucdavis.edu>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4952,20 +4952,25 @@ static int handle_vmon(struct kvm_vcpu *
+               | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+       /*
+-       * The Intel VMX Instruction Reference lists a bunch of bits that are
+-       * prerequisite to running VMXON, most notably cr4.VMXE must be set to
+-       * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
+-       * Otherwise, we should fail with #UD.  But most faulting conditions
+-       * have already been checked by hardware, prior to the VM-exit for
+-       * VMXON.  We do test guest cr4.VMXE because processor CR4 always has
+-       * that bit set to 1 in non-root mode.
++       * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
++       * that have higher priority than VM-Exit (see Intel SDM's pseudocode
++       * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
++       * running the guest, i.e. KVM needs to check the _guest_ values.
++       *
++       * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
++       * !COMPATIBILITY modes.  KVM may run the guest in VM86 to emulate Real
++       * Mode, but KVM will never take the guest out of those modes.
+        */
+-      if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
++      if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
++          !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+-      /* CPL=0 must be checked manually. */
++      /*
++       * CPL=0 and all other checks that are lower priority than VM-Exit must
++       * be checked manually.
++       */
+       if (vmx_get_cpl(vcpu)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
diff --git a/queue-5.15/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch b/queue-5.15/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
new file mode 100644 (file)
index 0000000..46659e2
--- /dev/null
@@ -0,0 +1,175 @@
+From f8ae08f9789ad59d318ea75b570caa454aceda81 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:54 +0000
+Subject: KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f8ae08f9789ad59d318ea75b570caa454aceda81 upstream.
+
+Restrict the nVMX MSRs based on KVM's config, not based on the guest's
+current config.  Using the guest's config to audit the new config
+prevents userspace from restoring the original config (KVM's config) if
+at any point in the past the guest's config was restricted in any way.
+
+Fixes: 62cc6b9dc61e ("KVM: nVMX: support restore of VMX capability MSRs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   70 ++++++++++++++++++++++++----------------------
+ 1 file changed, 37 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -1217,7 +1217,7 @@ static int vmx_restore_vmx_basic(struct
+               BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+               /* reserved */
+               BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+-      u64 vmx_basic = vmx->nested.msrs.basic;
++      u64 vmx_basic = vmcs_config.nested.basic;
+       if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+               return -EINVAL;
+@@ -1240,36 +1240,42 @@ static int vmx_restore_vmx_basic(struct
+       return 0;
+ }
+-static int
+-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
++                              u32 **low, u32 **high)
+ {
+-      u64 supported;
+-      u32 *lowp, *highp;
+-
+       switch (msr_index) {
+       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+-              lowp = &vmx->nested.msrs.pinbased_ctls_low;
+-              highp = &vmx->nested.msrs.pinbased_ctls_high;
++              *low = &msrs->pinbased_ctls_low;
++              *high = &msrs->pinbased_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+-              lowp = &vmx->nested.msrs.procbased_ctls_low;
+-              highp = &vmx->nested.msrs.procbased_ctls_high;
++              *low = &msrs->procbased_ctls_low;
++              *high = &msrs->procbased_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+-              lowp = &vmx->nested.msrs.exit_ctls_low;
+-              highp = &vmx->nested.msrs.exit_ctls_high;
++              *low = &msrs->exit_ctls_low;
++              *high = &msrs->exit_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+-              lowp = &vmx->nested.msrs.entry_ctls_low;
+-              highp = &vmx->nested.msrs.entry_ctls_high;
++              *low = &msrs->entry_ctls_low;
++              *high = &msrs->entry_ctls_high;
+               break;
+       case MSR_IA32_VMX_PROCBASED_CTLS2:
+-              lowp = &vmx->nested.msrs.secondary_ctls_low;
+-              highp = &vmx->nested.msrs.secondary_ctls_high;
++              *low = &msrs->secondary_ctls_low;
++              *high = &msrs->secondary_ctls_high;
+               break;
+       default:
+               BUG();
+       }
++}
++
++static int
++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++      u32 *lowp, *highp;
++      u64 supported;
++
++      vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
+       supported = vmx_control_msr(*lowp, *highp);
+@@ -1281,6 +1287,7 @@ vmx_restore_control_msr(struct vcpu_vmx
+       if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+               return -EINVAL;
++      vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
+       *lowp = data;
+       *highp = data >> 32;
+       return 0;
+@@ -1294,10 +1301,8 @@ static int vmx_restore_vmx_misc(struct v
+               BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+               /* reserved */
+               GENMASK_ULL(13, 9) | BIT_ULL(31);
+-      u64 vmx_misc;
+-
+-      vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+-                                 vmx->nested.msrs.misc_high);
++      u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
++                                     vmcs_config.nested.misc_high);
+       if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+               return -EINVAL;
+@@ -1325,10 +1330,8 @@ static int vmx_restore_vmx_misc(struct v
+ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+ {
+-      u64 vmx_ept_vpid_cap;
+-
+-      vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+-                                         vmx->nested.msrs.vpid_caps);
++      u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
++                                             vmcs_config.nested.vpid_caps);
+       /* Every bit is either reserved or a feature bit. */
+       if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+@@ -1339,20 +1342,21 @@ static int vmx_restore_vmx_ept_vpid_cap(
+       return 0;
+ }
+-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
+ {
+-      u64 *msr;
+-
+       switch (msr_index) {
+       case MSR_IA32_VMX_CR0_FIXED0:
+-              msr = &vmx->nested.msrs.cr0_fixed0;
+-              break;
++              return &msrs->cr0_fixed0;
+       case MSR_IA32_VMX_CR4_FIXED0:
+-              msr = &vmx->nested.msrs.cr4_fixed0;
+-              break;
++              return &msrs->cr4_fixed0;
+       default:
+               BUG();
+       }
++}
++
++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++      const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
+       /*
+        * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+@@ -1361,7 +1365,7 @@ static int vmx_restore_fixed0_msr(struct
+       if (!is_bitwise_subset(data, *msr, -1ULL))
+               return -EINVAL;
+-      *msr = data;
++      *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
+       return 0;
+ }
+@@ -1422,7 +1426,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcp
+               vmx->nested.msrs.vmcs_enum = data;
+               return 0;
+       case MSR_IA32_VMX_VMFUNC:
+-              if (data & ~vmx->nested.msrs.vmfunc_controls)
++              if (data & ~vmcs_config.nested.vmfunc_controls)
+                       return -EINVAL;
+               vmx->nested.msrs.vmfunc_controls = data;
+               return 0;
diff --git a/queue-5.15/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch b/queue-5.15/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
new file mode 100644 (file)
index 0000000..fd1af89
--- /dev/null
@@ -0,0 +1,100 @@
+From c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 Mon Sep 17 00:00:00 2001
+From: Nico Boehr <nrb@linux.ibm.com>
+Date: Mon, 18 Jul 2022 15:04:34 +0200
+Subject: KVM: s390: pv: don't present the ecall interrupt twice
+
+From: Nico Boehr <nrb@linux.ibm.com>
+
+commit c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 upstream.
+
+When the SIGP interpretation facility is present and a VCPU sends an
+ecall to another VCPU in enabled wait, the sending VCPU receives a 56
+intercept (partial execution), so KVM can wake up the receiving CPU.
+Note that the SIGP interpretation facility will take care of the
+interrupt delivery and KVM's only job is to wake the receiving VCPU.
+
+For PV, the sending VCPU will receive a 108 intercept (pv notify) and
+should continue like in the non-PV case, i.e. wake the receiving VCPU.
+
+For PV and non-PV guests the interrupt delivery will occur through the
+SIGP interpretation facility on SIE entry when SIE finds the X bit in
+the status field set.
+
+However, in handle_pv_notification(), there was no special handling for
+SIGP, which leads to interrupt injection being requested by KVM for the
+next SIE entry. This results in the interrupt being delivered twice:
+once by the SIGP interpretation facility and once by KVM through the
+IICTL.
+
+Add the necessary special handling in handle_pv_notification(), similar
+to handle_partial_execution(), which simply wakes the receiving VCPU and
+leave interrupt delivery to the SIGP interpretation facility.
+
+In contrast to external calls, emergency calls are not interpreted but
+also cause a 108 intercept, which is why we still need to call
+handle_instruction() for SIGP orders other than ecall.
+
+Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which
+cause a 108 intercept - even if they are actually handled by
+handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei()
+to avoid possibly confusing trace messages.
+
+Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 5.7
+Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation")
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com
+Message-Id: <20220718130434.73302-1-nrb@linux.ibm.com>
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/intercept.c |   15 +++++++++++++++
+ arch/s390/kvm/sigp.c      |    4 ++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -523,12 +523,27 @@ static int handle_pv_uvc(struct kvm_vcpu
+ static int handle_pv_notification(struct kvm_vcpu *vcpu)
+ {
++      int ret;
++
+       if (vcpu->arch.sie_block->ipa == 0xb210)
+               return handle_pv_spx(vcpu);
+       if (vcpu->arch.sie_block->ipa == 0xb220)
+               return handle_pv_sclp(vcpu);
+       if (vcpu->arch.sie_block->ipa == 0xb9a4)
+               return handle_pv_uvc(vcpu);
++      if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
++              /*
++               * Besides external call, other SIGP orders also cause a
++               * 108 (pv notify) intercept. In contrast to external call,
++               * these orders need to be emulated and hence the appropriate
++               * place to handle them is in handle_instruction().
++               * So first try kvm_s390_handle_sigp_pei() and if that isn't
++               * successful, go on with handle_instruction().
++               */
++              ret = kvm_s390_handle_sigp_pei(vcpu);
++              if (!ret)
++                      return ret;
++      }
+       return handle_instruction(vcpu);
+ }
+--- a/arch/s390/kvm/sigp.c
++++ b/arch/s390/kvm/sigp.c
+@@ -492,9 +492,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_
+       struct kvm_vcpu *dest_vcpu;
+       u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+-      trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+-
+       if (order_code == SIGP_EXTERNAL_CALL) {
++              trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
++
+               dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+               BUG_ON(dest_vcpu == NULL);
diff --git a/queue-5.15/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch b/queue-5.15/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
new file mode 100644 (file)
index 0000000..63bd40c
--- /dev/null
@@ -0,0 +1,66 @@
+From ec6e4d863258d4bfb36d48d5e3ef68140234d688 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 11 Jul 2022 23:27:48 +0000
+Subject: KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ec6e4d863258d4bfb36d48d5e3ef68140234d688 upstream.
+
+Wait to mark the TSS as busy during LTR emulation until after all fault
+checks for the LTR have passed.  Specifically, don't mark the TSS busy if
+the new TSS base is non-canonical.
+
+Opportunistically drop the one-off !seg_desc.PRESENT check for TR as the
+only reason for the early check was to avoid marking a !PRESENT TSS as
+busy, i.e. the common !PRESENT is now done before setting the busy bit.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1669,16 +1669,6 @@ static int __load_segment_descriptor(str
+       case VCPU_SREG_TR:
+               if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
+                       goto exception;
+-              if (!seg_desc.p) {
+-                      err_vec = NP_VECTOR;
+-                      goto exception;
+-              }
+-              old_desc = seg_desc;
+-              seg_desc.type |= 2; /* busy */
+-              ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
+-                                                sizeof(seg_desc), &ctxt->exception);
+-              if (ret != X86EMUL_CONTINUE)
+-                      return ret;
+               break;
+       case VCPU_SREG_LDTR:
+               if (seg_desc.s || seg_desc.type != 2)
+@@ -1719,6 +1709,15 @@ static int __load_segment_descriptor(str
+                               ((u64)base3 << 32), ctxt))
+                       return emulate_gp(ctxt, 0);
+       }
++
++      if (seg == VCPU_SREG_TR) {
++              old_desc = seg_desc;
++              seg_desc.type |= 2; /* busy */
++              ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
++                                                sizeof(seg_desc), &ctxt->exception);
++              if (ret != X86EMUL_CONTINUE)
++                      return ret;
++      }
+ load:
+       ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
+       if (desc)
diff --git a/queue-5.15/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch b/queue-5.15/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
new file mode 100644 (file)
index 0000000..e5a6f82
--- /dev/null
@@ -0,0 +1,41 @@
+From 2626206963ace9e8bf92b6eea5ff78dd674c555c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 11 Jul 2022 23:27:49 +0000
+Subject: KVM: x86: Set error code to segment selector on LLDT/LTR non-canonical #GP
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 2626206963ace9e8bf92b6eea5ff78dd674c555c upstream.
+
+When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set
+the error code to the selector.  Intel SDM's says nothing about the #GP,
+but AMD's APM explicitly states that both LLDT and LTR set the error code
+to the selector, not zero.
+
+Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0),
+but the KVM code in question is specific to the base from the descriptor.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1706,8 +1706,8 @@ static int __load_segment_descriptor(str
+               if (ret != X86EMUL_CONTINUE)
+                       return ret;
+               if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+-                              ((u64)base3 << 32), ctxt))
+-                      return emulate_gp(ctxt, 0);
++                                               ((u64)base3 << 32), ctxt))
++                      return emulate_gp(ctxt, err_code);
+       }
+       if (seg == VCPU_SREG_TR) {
diff --git a/queue-5.15/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch b/queue-5.15/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch
new file mode 100644 (file)
index 0000000..2a2f128
--- /dev/null
@@ -0,0 +1,104 @@
+From c33f6f2228fe8517e38941a508e9f905f99ecba9 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:50 +0000
+Subject: KVM: x86: Split kvm_is_valid_cr4() and export only the non-vendor bits
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c33f6f2228fe8517e38941a508e9f905f99ecba9 upstream.
+
+Split the common x86 parts of kvm_is_valid_cr4(), i.e. the reserved bits
+checks, into a separate helper, __kvm_is_valid_cr4(), and export only the
+inner helper to vendor code in order to prevent nested VMX from calling
+back into vmx_is_valid_cr4() via kvm_is_valid_cr4().
+
+On SVM, this is a nop as SVM doesn't place any additional restrictions on
+CR4.
+
+On VMX, this is also currently a nop, but only because nested VMX is
+missing checks on reserved CR4 bits for nested VM-Enter.  That bug will
+be fixed in a future patch, and could simply use kvm_is_valid_cr4() as-is,
+but nVMX has _another_ bug where VMXON emulation doesn't enforce VMX's
+restrictions on CR0/CR4.  The cleanest and most intuitive way to fix the
+VMXON bug is to use nested_host_cr{0,4}_valid().  If the CR4 variant
+routes through kvm_is_valid_cr4(), using nested_host_cr4_valid() won't do
+the right thing for the VMXON case as vmx_is_valid_cr4() enforces VMX's
+restrictions if and only if the vCPU is post-VMXON.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 ++-
+ arch/x86/kvm/vmx/vmx.c    |    4 ++--
+ arch/x86/kvm/x86.c        |   12 +++++++++---
+ arch/x86/kvm/x86.h        |    2 +-
+ 4 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -275,7 +275,8 @@ static bool nested_vmcb_check_cr3_cr4(st
+                       return false;
+       }
+-      if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
++      /* Note, SVM doesn't have any additional restrictions on CR4. */
++      if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
+               return false;
+       return true;
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -3213,8 +3213,8 @@ static bool vmx_is_valid_cr4(struct kvm_
+ {
+       /*
+        * We operate under the default treatment of SMM, so VMX cannot be
+-       * enabled under SMM.  Note, whether or not VMXE is allowed at all is
+-       * handled by kvm_is_valid_cr4().
++       * enabled under SMM.  Note, whether or not VMXE is allowed at all,
++       * i.e. is a reserved bit, is handled by common x86 code.
+        */
+       if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
+               return false;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1031,7 +1031,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *
+ }
+ EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+       if (cr4 & cr4_reserved_bits)
+               return false;
+@@ -1039,9 +1039,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *v
+       if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
+               return false;
+-      return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
++      return true;
++}
++EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
++
++static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++      return __kvm_is_valid_cr4(vcpu, cr4) &&
++             static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
+ }
+-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
+ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
+ {
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -448,7 +448,7 @@ static inline void kvm_machine_check(voi
+ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
+ int kvm_spec_ctrl_test_value(u64 value);
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+                             struct x86_exception *e);
+ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
index 05b9864bf9bba67f0f1fc50ac32569194cf96de4..2564d7e9ee42d87efedecd368ac8820443880344 100644 (file)
@@ -20,3 +20,10 @@ hid-wacom-don-t-register-pad_input-for-touch-switch.patch
 kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_run_pending-case.patch
 kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_run_pending-case.patch
 kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-with-gif-0.patch
+kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
+kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch
+kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
+kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
+kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
+kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
+kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch