From b57fa742b63993364d9dfbe624930377c2b5ab5d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 12 Aug 2022 16:39:44 +0200
Subject: [PATCH] 5.10-stable patches

added patches:
	kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
	kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
	kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
	kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
---
 ...mx-msr-to-any-_host_-supported-value.patch | 175 ++++++++++++++++++
 ...-t-present-the-ecall-interrupt-twice.patch | 100 ++++++++++
 ...r-emulation-_after_-all-fault-checks.patch |  66 +++++++
 ...elector-on-lldt-ltr-non-canonical-gp.patch |  41 ++++
 queue-5.10/series                             |   4 +
 5 files changed, 386 insertions(+)
 create mode 100644 queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
 create mode 100644 queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
 create mode 100644 queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
 create mode 100644 queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch

diff --git a/queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch b/queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
new file mode 100644
index 00000000000..7175a82114e
--- /dev/null
+++ b/queue-5.10/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
@@ -0,0 +1,175 @@
+From f8ae08f9789ad59d318ea75b570caa454aceda81 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:54 +0000
+Subject: KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f8ae08f9789ad59d318ea75b570caa454aceda81 upstream.
+
+Restrict the nVMX MSRs based on KVM's config, not based on the guest's
+current config.  Using the guest's config to audit the new config
+prevents userspace from restoring the original config (KVM's config) if
+at any point in the past the guest's config was restricted in any way.
+
+Fixes: 62cc6b9dc61e ("KVM: nVMX: support restore of VMX capability MSRs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   70 ++++++++++++++++++++++++----------------------
+ 1 file changed, 37 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -1245,7 +1245,7 @@ static int vmx_restore_vmx_basic(struct
+ 		BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+ 		/* reserved */
+ 		BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+-	u64 vmx_basic = vmx->nested.msrs.basic;
++	u64 vmx_basic = vmcs_config.nested.basic;
+ 
+ 	if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+ 		return -EINVAL;
+@@ -1268,36 +1268,42 @@ static int vmx_restore_vmx_basic(struct
+ 	return 0;
+ }
+ 
+-static int
+-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
++				u32 **low, u32 **high)
+ {
+-	u64 supported;
+-	u32 *lowp, *highp;
+-
+ 	switch (msr_index) {
+ 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+-		lowp = &vmx->nested.msrs.pinbased_ctls_low;
+-		highp = &vmx->nested.msrs.pinbased_ctls_high;
++		*low = &msrs->pinbased_ctls_low;
++		*high = &msrs->pinbased_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+-		lowp = &vmx->nested.msrs.procbased_ctls_low;
+-		highp = &vmx->nested.msrs.procbased_ctls_high;
++		*low = &msrs->procbased_ctls_low;
++		*high = &msrs->procbased_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+-		lowp = &vmx->nested.msrs.exit_ctls_low;
+-		highp = &vmx->nested.msrs.exit_ctls_high;
++		*low = &msrs->exit_ctls_low;
++		*high = &msrs->exit_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+-		lowp = &vmx->nested.msrs.entry_ctls_low;
+-		highp = &vmx->nested.msrs.entry_ctls_high;
++		*low = &msrs->entry_ctls_low;
++		*high = &msrs->entry_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_PROCBASED_CTLS2:
+-		lowp = &vmx->nested.msrs.secondary_ctls_low;
+-		highp = &vmx->nested.msrs.secondary_ctls_high;
++		*low = &msrs->secondary_ctls_low;
++		*high = &msrs->secondary_ctls_high;
+ 		break;
+ 	default:
+ 		BUG();
+ 	}
++}
++
++static int
++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++	u32 *lowp, *highp;
++	u64 supported;
++
++	vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
+ 
+ 	supported = vmx_control_msr(*lowp, *highp);
+ 
+@@ -1309,6 +1315,7 @@ vmx_restore_control_msr(struct vcpu_vmx
+ 	if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+ 		return -EINVAL;
+ 
++	vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
+ 	*lowp = data;
+ 	*highp = data >> 32;
+ 	return 0;
+@@ -1322,10 +1329,8 @@ static int vmx_restore_vmx_misc(struct v
+ 		BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+ 		/* reserved */
+ 		GENMASK_ULL(13, 9) | BIT_ULL(31);
+-	u64 vmx_misc;
+-
+-	vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+-				   vmx->nested.msrs.misc_high);
++	u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
++				       vmcs_config.nested.misc_high);
+ 
+ 	if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+ 		return -EINVAL;
+@@ -1353,10 +1358,8 @@ static int vmx_restore_vmx_misc(struct v
+ 
+ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+ {
+-	u64 vmx_ept_vpid_cap;
+-
+-	vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+-					   vmx->nested.msrs.vpid_caps);
++	u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
++					       vmcs_config.nested.vpid_caps);
+ 
+ 	/* Every bit is either reserved or a feature bit. */
+ 	if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+@@ -1367,20 +1370,21 @@ static int vmx_restore_vmx_ept_vpid_cap(
+ 	return 0;
+ }
+ 
+-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
+ {
+-	u64 *msr;
+-
+ 	switch (msr_index) {
+ 	case MSR_IA32_VMX_CR0_FIXED0:
+-		msr = &vmx->nested.msrs.cr0_fixed0;
+-		break;
++		return &msrs->cr0_fixed0;
+ 	case MSR_IA32_VMX_CR4_FIXED0:
+-		msr = &vmx->nested.msrs.cr4_fixed0;
+-		break;
++		return &msrs->cr4_fixed0;
+ 	default:
+ 		BUG();
+ 	}
++}
++
++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++	const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
+ 
+ 	/*
+ 	 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+@@ -1389,7 +1393,7 @@ static int vmx_restore_fixed0_msr(struct
+ 	if (!is_bitwise_subset(data, *msr, -1ULL))
+ 		return -EINVAL;
+ 
+-	*msr = data;
++	*vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
+ 	return 0;
+ }
+ 
+@@ -1450,7 +1454,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcp
+ 		vmx->nested.msrs.vmcs_enum = data;
+ 		return 0;
+ 	case MSR_IA32_VMX_VMFUNC:
+-		if (data & ~vmx->nested.msrs.vmfunc_controls)
++		if (data & ~vmcs_config.nested.vmfunc_controls)
+ 			return -EINVAL;
+ 		vmx->nested.msrs.vmfunc_controls = data;
+ 		return 0;
diff --git a/queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch b/queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
new file mode 100644
index 00000000000..a82da2232a9
--- /dev/null
+++ b/queue-5.10/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
@@ -0,0 +1,100 @@
+From c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 Mon Sep 17 00:00:00 2001
+From: Nico Boehr <nrb@linux.ibm.com>
+Date: Mon, 18 Jul 2022 15:04:34 +0200
+Subject: KVM: s390: pv: don't present the ecall interrupt twice
+
+From: Nico Boehr <nrb@linux.ibm.com>
+
+commit c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 upstream.
+
+When the SIGP interpretation facility is present and a VCPU sends an
+ecall to another VCPU in enabled wait, the sending VCPU receives a 56
+intercept (partial execution), so KVM can wake up the receiving CPU.
+Note that the SIGP interpretation facility will take care of the
+interrupt delivery and KVM's only job is to wake the receiving VCPU.
+
+For PV, the sending VCPU will receive a 108 intercept (pv notify) and
+should continue like in the non-PV case, i.e. wake the receiving VCPU.
+
+For PV and non-PV guests the interrupt delivery will occur through the
+SIGP interpretation facility on SIE entry when SIE finds the X bit in
+the status field set.
+
+However, in handle_pv_notification(), there was no special handling for
+SIGP, which leads to interrupt injection being requested by KVM for the
+next SIE entry. This results in the interrupt being delivered twice:
+once by the SIGP interpretation facility and once by KVM through the
+IICTL.
+
+Add the necessary special handling in handle_pv_notification(), similar
+to handle_partial_execution(), which simply wakes the receiving VCPU and
+leave interrupt delivery to the SIGP interpretation facility.
+
+In contrast to external calls, emergency calls are not interpreted but
+also cause a 108 intercept, which is why we still need to call
+handle_instruction() for SIGP orders other than ecall.
+
+Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which
+cause a 108 intercept - even if they are actually handled by
+handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei()
+to avoid possibly confusing trace messages.
+
+Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 5.7
+Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation")
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com
+Message-Id: <20220718130434.73302-1-nrb@linux.ibm.com>
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/intercept.c |   15 +++++++++++++++
+ arch/s390/kvm/sigp.c      |    4 ++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -521,12 +521,27 @@ static int handle_pv_uvc(struct kvm_vcpu
+ 
+ static int handle_pv_notification(struct kvm_vcpu *vcpu)
+ {
++	int ret;
++
+ 	if (vcpu->arch.sie_block->ipa == 0xb210)
+ 		return handle_pv_spx(vcpu);
+ 	if (vcpu->arch.sie_block->ipa == 0xb220)
+ 		return handle_pv_sclp(vcpu);
+ 	if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ 		return handle_pv_uvc(vcpu);
++	if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
++		/*
++		 * Besides external call, other SIGP orders also cause a
++		 * 108 (pv notify) intercept. In contrast to external call,
++		 * these orders need to be emulated and hence the appropriate
++		 * place to handle them is in handle_instruction().
++		 * So first try kvm_s390_handle_sigp_pei() and if that isn't
++		 * successful, go on with handle_instruction().
++		 */
++		ret = kvm_s390_handle_sigp_pei(vcpu);
++		if (!ret)
++			return ret;
++	}
+ 
+ 	return handle_instruction(vcpu);
+ }
+--- a/arch/s390/kvm/sigp.c
++++ b/arch/s390/kvm/sigp.c
+@@ -492,9 +492,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_
+ 	struct kvm_vcpu *dest_vcpu;
+ 	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+ 
+-	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+-
+ 	if (order_code == SIGP_EXTERNAL_CALL) {
++		trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
++
+ 		dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+ 		BUG_ON(dest_vcpu == NULL);
+ 
diff --git a/queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch b/queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
new file mode 100644
index 00000000000..42dc4d4f13f
--- /dev/null
+++ b/queue-5.10/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
@@ -0,0 +1,66 @@
+From ec6e4d863258d4bfb36d48d5e3ef68140234d688 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 11 Jul 2022 23:27:48 +0000
+Subject: KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ec6e4d863258d4bfb36d48d5e3ef68140234d688 upstream.
+
+Wait to mark the TSS as busy during LTR emulation until after all fault
+checks for the LTR have passed.  Specifically, don't mark the TSS busy if
+the new TSS base is non-canonical.
+
+Opportunistically drop the one-off !seg_desc.PRESENT check for TR as the
+only reason for the early check was to avoid marking a !PRESENT TSS as
+busy, i.e. the common !PRESENT is now done before setting the busy bit.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1772,16 +1772,6 @@ static int __load_segment_descriptor(str
+ 	case VCPU_SREG_TR:
+ 		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
+ 			goto exception;
+-		if (!seg_desc.p) {
+-			err_vec = NP_VECTOR;
+-			goto exception;
+-		}
+-		old_desc = seg_desc;
+-		seg_desc.type |= 2; /* busy */
+-		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
+-						  sizeof(seg_desc), &ctxt->exception);
+-		if (ret != X86EMUL_CONTINUE)
+-			return ret;
+ 		break;
+ 	case VCPU_SREG_LDTR:
+ 		if (seg_desc.s || seg_desc.type != 2)
+@@ -1822,6 +1812,15 @@ static int __load_segment_descriptor(str
+ 				((u64)base3 << 32), ctxt))
+ 			return emulate_gp(ctxt, 0);
+ 	}
++
++	if (seg == VCPU_SREG_TR) {
++		old_desc = seg_desc;
++		seg_desc.type |= 2; /* busy */
++		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
++						  sizeof(seg_desc), &ctxt->exception);
++		if (ret != X86EMUL_CONTINUE)
++			return ret;
++	}
+ load:
+ 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
+ 	if (desc)
diff --git a/queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch b/queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
new file mode 100644
index 00000000000..1757d0e47d1
--- /dev/null
+++ b/queue-5.10/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
@@ -0,0 +1,41 @@
+From 2626206963ace9e8bf92b6eea5ff78dd674c555c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 11 Jul 2022 23:27:49 +0000
+Subject: KVM: x86: Set error code to segment selector on LLDT/LTR non-canonical #GP
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 2626206963ace9e8bf92b6eea5ff78dd674c555c upstream.
+
+When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set
+the error code to the selector.  Intel SDM's says nothing about the #GP,
+but AMD's APM explicitly states that both LLDT and LTR set the error code
+to the selector, not zero.
+
+Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0),
+but the KVM code in question is specific to the base from the descriptor.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1809,8 +1809,8 @@ static int __load_segment_descriptor(str
+ 		if (ret != X86EMUL_CONTINUE)
+ 			return ret;
+ 		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+-				((u64)base3 << 32), ctxt))
+-			return emulate_gp(ctxt, 0);
++						 ((u64)base3 << 32), ctxt))
++			return emulate_gp(ctxt, err_code);
+ 	}
+ 
+ 	if (seg == VCPU_SREG_TR) {
diff --git a/queue-5.10/series b/queue-5.10/series
index f6487c6568e..87c41f5f1ac 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -14,3 +14,7 @@ hid-wacom-don-t-register-pad_input-for-touch-switch.patch
 kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_run_pending-case.patch
 kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_run_pending-case.patch
 kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-with-gif-0.patch
+kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
+kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
+kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
+kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
-- 
2.47.3