From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 Feb 2018 20:40:33 +0000 (-0800)
Subject: 4.9-stable patches
X-Git-Tag: v3.18.94~5
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2d6d1a82eaf5c2981bf90128425c667a0ff280ad;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	kvm-nvmx-eliminate-vmcs02-pool.patch
	kvm-nvmx-mark-vmcs12-pages-dirty-on-l2-exit.patch
	kvm-nvmx-vmx_complete_nested_posted_interrupt-can-t-fail.patch
	kvm-svm-allow-direct-access-to-msr_ia32_spec_ctrl.patch
	kvm-vmx-allow-direct-access-to-msr_ia32_spec_ctrl.patch
	kvm-vmx-emulate-msr_ia32_arch_capabilities.patch
	kvm-vmx-introduce-alloc_loaded_vmcs.patch
	kvm-vmx-make-msr-bitmaps-per-vcpu.patch
	kvm-x86-add-ibpb-support.patch
---

diff --git a/queue-4.9/kvm-nvmx-eliminate-vmcs02-pool.patch b/queue-4.9/kvm-nvmx-eliminate-vmcs02-pool.patch
new file mode 100644
index 00000000000..8a99c2ae01c
--- /dev/null
+++ b/queue-4.9/kvm-nvmx-eliminate-vmcs02-pool.patch
@@ -0,0 +1,292 @@
+From de3a0021a60635de96aa92713c1a31a96747d72c Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Mon, 27 Nov 2017 17:22:25 -0600
+Subject: KVM: nVMX: Eliminate vmcs02 pool
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jim Mattson <jmattson@google.com>
+
+commit de3a0021a60635de96aa92713c1a31a96747d72c upstream.
+
+The potential performance advantages of a vmcs02 pool have never been
+realized. To simplify the code, eliminate the pool. Instead, a single
+vmcs02 is allocated per VCPU when the VCPU enters VMX operation.
+
+Cc: stable@vger.kernel.org       # prereq for Spectre mitigation
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
+Reviewed-by: Ameya More <ameya.more@oracle.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim KrÄmÃ¡Å <rkrcmar@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |  146 ++++++++---------------------------------------------
+ 1 file changed, 23 insertions(+), 123 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -173,7 +173,6 @@ module_param(ple_window_max, int, S_IRUG
+ extern const ulong vmx_return;
+ 
+ #define NR_AUTOLOAD_MSRS 8
+-#define VMCS02_POOL_SIZE 1
+ 
+ struct vmcs {
+ 	u32 revision_id;
+@@ -207,7 +206,7 @@ struct shared_msr_entry {
+  * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
+  * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
+  * More than one of these structures may exist, if L1 runs multiple L2 guests.
+- * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
++ * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
+  * underlying hardware which will be used to run L2.
+  * This structure is packed to ensure that its layout is identical across
+  * machines (necessary for live migration).
+@@ -386,13 +385,6 @@ struct __packed vmcs12 {
+  */
+ #define VMCS12_SIZE 0x1000
+ 
+-/* Used to remember the last vmcs02 used for some recently used vmcs12s */
+-struct vmcs02_list {
+-	struct list_head list;
+-	gpa_t vmptr;
+-	struct loaded_vmcs vmcs02;
+-};
+-
+ /*
+  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
+  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
+@@ -419,15 +411,15 @@ struct nested_vmx {
+ 	 */
+ 	bool sync_shadow_vmcs;
+ 
+-	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
+-	struct list_head vmcs02_pool;
+-	int vmcs02_num;
+ 	bool change_vmcs01_virtual_x2apic_mode;
+ 	/* L2 must run next, and mustn't decide to exit to L1. */
+ 	bool nested_run_pending;
++
++	struct loaded_vmcs vmcs02;
++
+ 	/*
+-	 * Guest pages referred to in vmcs02 with host-physical pointers, so
+-	 * we must keep them pinned while L2 runs.
++	 * Guest pages referred to in the vmcs02 with host-physical
++	 * pointers, so we must keep them pinned while L2 runs.
+ 	 */
+ 	struct page *apic_access_page;
+ 	struct page *virtual_apic_page;
+@@ -6684,94 +6676,6 @@ static int handle_monitor(struct kvm_vcp
+ }
+ 
+ /*
+- * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
+- * We could reuse a single VMCS for all the L2 guests, but we also want the
+- * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
+- * allows keeping them loaded on the processor, and in the future will allow
+- * optimizations where prepare_vmcs02 doesn't need to set all the fields on
+- * every entry if they never change.
+- * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
+- * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
+- *
+- * The following functions allocate and free a vmcs02 in this pool.
+- */
+-
+-/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
+-static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
+-{
+-	struct vmcs02_list *item;
+-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+-		if (item->vmptr == vmx->nested.current_vmptr) {
+-			list_move(&item->list, &vmx->nested.vmcs02_pool);
+-			return &item->vmcs02;
+-		}
+-
+-	if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
+-		/* Recycle the least recently used VMCS. */
+-		item = list_last_entry(&vmx->nested.vmcs02_pool,
+-				       struct vmcs02_list, list);
+-		item->vmptr = vmx->nested.current_vmptr;
+-		list_move(&item->list, &vmx->nested.vmcs02_pool);
+-		return &item->vmcs02;
+-	}
+-
+-	/* Create a new VMCS */
+-	item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+-	if (!item)
+-		return NULL;
+-	item->vmcs02.vmcs = alloc_vmcs();
+-	item->vmcs02.shadow_vmcs = NULL;
+-	if (!item->vmcs02.vmcs) {
+-		kfree(item);
+-		return NULL;
+-	}
+-	loaded_vmcs_init(&item->vmcs02);
+-	item->vmptr = vmx->nested.current_vmptr;
+-	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
+-	vmx->nested.vmcs02_num++;
+-	return &item->vmcs02;
+-}
+-
+-/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
+-static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
+-{
+-	struct vmcs02_list *item;
+-	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
+-		if (item->vmptr == vmptr) {
+-			free_loaded_vmcs(&item->vmcs02);
+-			list_del(&item->list);
+-			kfree(item);
+-			vmx->nested.vmcs02_num--;
+-			return;
+-		}
+-}
+-
+-/*
+- * Free all VMCSs saved for this vcpu, except the one pointed by
+- * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
+- * must be &vmx->vmcs01.
+- */
+-static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
+-{
+-	struct vmcs02_list *item, *n;
+-
+-	WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
+-	list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
+-		/*
+-		 * Something will leak if the above WARN triggers.  Better than
+-		 * a use-after-free.
+-		 */
+-		if (vmx->loaded_vmcs == &item->vmcs02)
+-			continue;
+-
+-		free_loaded_vmcs(&item->vmcs02);
+-		list_del(&item->list);
+-		kfree(item);
+-		vmx->nested.vmcs02_num--;
+-	}
+-}
+-
+-/*
+  * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
+  * set the success or error code of an emulated VMX instruction, as specified
+  * by Vol 2B, VMX Instruction Reference, "Conventions".
+@@ -7084,6 +6988,12 @@ static int handle_vmon(struct kvm_vcpu *
+ 		return 1;
+ 	}
+ 
++	vmx->nested.vmcs02.vmcs = alloc_vmcs();
++	vmx->nested.vmcs02.shadow_vmcs = NULL;
++	if (!vmx->nested.vmcs02.vmcs)
++		goto out_vmcs02;
++	loaded_vmcs_init(&vmx->nested.vmcs02);
++
+ 	if (cpu_has_vmx_msr_bitmap()) {
+ 		vmx->nested.msr_bitmap =
+ 				(unsigned long *)__get_free_page(GFP_KERNEL);
+@@ -7106,9 +7016,6 @@ static int handle_vmon(struct kvm_vcpu *
+ 		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
+ 	}
+ 
+-	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
+-	vmx->nested.vmcs02_num = 0;
+-
+ 	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ 		     HRTIMER_MODE_REL_PINNED);
+ 	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+@@ -7126,6 +7033,9 @@ out_cached_vmcs12:
+ 	free_page((unsigned long)vmx->nested.msr_bitmap);
+ 
+ out_msr_bitmap:
++	free_loaded_vmcs(&vmx->nested.vmcs02);
++
++out_vmcs02:
+ 	return -ENOMEM;
+ }
+ 
+@@ -7211,7 +7121,7 @@ static void free_nested(struct vcpu_vmx
+ 		vmx->vmcs01.shadow_vmcs = NULL;
+ 	}
+ 	kfree(vmx->nested.cached_vmcs12);
+-	/* Unpin physical memory we referred to in current vmcs02 */
++	/* Unpin physical memory we referred to in the vmcs02 */
+ 	if (vmx->nested.apic_access_page) {
+ 		nested_release_page(vmx->nested.apic_access_page);
+ 		vmx->nested.apic_access_page = NULL;
+@@ -7227,7 +7137,7 @@ static void free_nested(struct vcpu_vmx
+ 		vmx->nested.pi_desc = NULL;
+ 	}
+ 
+-	nested_free_all_saved_vmcss(vmx);
++	free_loaded_vmcs(&vmx->nested.vmcs02);
+ }
+ 
+ /* Emulate the VMXOFF instruction */
+@@ -7261,8 +7171,6 @@ static int handle_vmclear(struct kvm_vcp
+ 			vmptr + offsetof(struct vmcs12, launch_state),
+ 			&zero, sizeof(zero));
+ 
+-	nested_free_vmcs02(vmx, vmptr);
+-
+ 	skip_emulated_instruction(vcpu);
+ 	nested_vmx_succeed(vcpu);
+ 	return 1;
+@@ -8051,10 +7959,11 @@ static bool nested_vmx_exit_handled(stru
+ 
+ 	/*
+ 	 * The host physical addresses of some pages of guest memory
+-	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
+-	 * may write to these pages via their host physical address while
+-	 * L2 is running, bypassing any address-translation-based dirty
+-	 * tracking (e.g. EPT write protection).
++	 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
++	 * Page). The CPU may write to these pages via their host
++	 * physical address while L2 is running, bypassing any
++	 * address-translation-based dirty tracking (e.g. EPT write
++	 * protection).
+ 	 *
+ 	 * Mark them dirty on every exit from L2 to prevent them from
+ 	 * getting out of sync with dirty tracking.
+@@ -10223,7 +10132,6 @@ static int nested_vmx_run(struct kvm_vcp
+ 	struct vmcs12 *vmcs12;
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 	int cpu;
+-	struct loaded_vmcs *vmcs02;
+ 	bool ia32e;
+ 	u32 msr_entry_idx;
+ 
+@@ -10363,17 +10271,13 @@ static int nested_vmx_run(struct kvm_vcp
+ 	 * the nested entry.
+ 	 */
+ 
+-	vmcs02 = nested_get_current_vmcs02(vmx);
+-	if (!vmcs02)
+-		return -ENOMEM;
+-
+ 	enter_guest_mode(vcpu);
+ 
+ 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+ 		vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ 
+ 	cpu = get_cpu();
+-	vmx->loaded_vmcs = vmcs02;
++	vmx->loaded_vmcs = &vmx->nested.vmcs02;
+ 	vmx_vcpu_put(vcpu);
+ 	vmx_vcpu_load(vcpu, cpu);
+ 	vcpu->cpu = cpu;
+@@ -10888,10 +10792,6 @@ static void nested_vmx_vmexit(struct kvm
+ 	vm_exit_controls_reset_shadow(vmx);
+ 	vmx_segment_cache_clear(vmx);
+ 
+-	/* if no vmcs02 cache requested, remove the one we used */
+-	if (VMCS02_POOL_SIZE == 0)
+-		nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
+-
+ 	load_vmcs12_host_state(vcpu, vmcs12);
+ 
+ 	/* Update any VMCS fields that might have changed while L2 ran */
diff --git a/queue-4.9/kvm-nvmx-mark-vmcs12-pages-dirty-on-l2-exit.patch b/queue-4.9/kvm-nvmx-mark-vmcs12-pages-dirty-on-l2-exit.patch
new file mode 100644
index 00000000000..9361ea07fba
--- /dev/null
+++ b/queue-4.9/kvm-nvmx-mark-vmcs12-pages-dirty-on-l2-exit.patch
@@ -0,0 +1,116 @@
+From c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570 Mon Sep 17 00:00:00 2001
+From: David Matlack <dmatlack@google.com>
+Date: Tue, 1 Aug 2017 14:00:40 -0700
+Subject: KVM: nVMX: mark vmcs12 pages dirty on L2 exit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: David Matlack <dmatlack@google.com>
+
+commit c9f04407f2e0b3fc9ff7913c65fcfcb0a4b61570 upstream.
+
+The host physical addresses of L1's Virtual APIC Page and Posted
+Interrupt descriptor are loaded into the VMCS02. The CPU may write
+to these pages via their host physical address while L2 is running,
+bypassing address-translation-based dirty tracking (e.g. EPT write
+protection). Mark them dirty on every exit from L2 to prevent them
+from getting out of sync with dirty tracking.
+
+Also mark the virtual APIC page and the posted interrupt descriptor
+dirty when KVM is virtualizing posted interrupt processing.
+
+Signed-off-by: David Matlack <dmatlack@google.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Radim KrÄmÃ¡Å <rkrcmar@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |   53 +++++++++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 43 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4738,6 +4738,28 @@ static bool vmx_get_enable_apicv(void)
+ 	return enable_apicv;
+ }
+ 
++static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
++{
++	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
++	gfn_t gfn;
++
++	/*
++	 * Don't need to mark the APIC access page dirty; it is never
++	 * written to by the CPU during APIC virtualization.
++	 */
++
++	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
++		gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
++		kvm_vcpu_mark_page_dirty(vcpu, gfn);
++	}
++
++	if (nested_cpu_has_posted_intr(vmcs12)) {
++		gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
++		kvm_vcpu_mark_page_dirty(vcpu, gfn);
++	}
++}
++
++
+ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -4745,18 +4767,15 @@ static void vmx_complete_nested_posted_i
+ 	void *vapic_page;
+ 	u16 status;
+ 
+-	if (vmx->nested.pi_desc &&
+-	    vmx->nested.pi_pending) {
+-		vmx->nested.pi_pending = false;
+-		if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+-			return;
+-
+-		max_irr = find_last_bit(
+-			(unsigned long *)vmx->nested.pi_desc->pir, 256);
++	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
++		return;
+ 
+-		if (max_irr == 256)
+-			return;
++	vmx->nested.pi_pending = false;
++	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
++		return;
+ 
++	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
++	if (max_irr != 256) {
+ 		vapic_page = kmap(vmx->nested.virtual_apic_page);
+ 		if (!vapic_page) {
+ 			WARN_ON(1);
+@@ -4772,6 +4791,8 @@ static void vmx_complete_nested_posted_i
+ 			vmcs_write16(GUEST_INTR_STATUS, status);
+ 		}
+ 	}
++
++	nested_mark_vmcs12_pages_dirty(vcpu);
+ }
+ 
+ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
+@@ -8028,6 +8049,18 @@ static bool nested_vmx_exit_handled(stru
+ 				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+ 				KVM_ISA_VMX);
+ 
++	/*
++	 * The host physical addresses of some pages of guest memory
++	 * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU
++	 * may write to these pages via their host physical address while
++	 * L2 is running, bypassing any address-translation-based dirty
++	 * tracking (e.g. EPT write protection).
++	 *
++	 * Mark them dirty on every exit from L2 to prevent them from
++	 * getting out of sync with dirty tracking.
++	 */
++	nested_mark_vmcs12_pages_dirty(vcpu);
++
+ 	if (vmx->nested.nested_run_pending)
+ 		return false;
+ 
diff --git a/queue-4.9/kvm-nvmx-vmx_complete_nested_posted_interrupt-can-t-fail.patch b/queue-4.9/kvm-nvmx-vmx_complete_nested_posted_interrupt-can-t-fail.patch
new file mode 100644
index 00000000000..9c5498f1bc9
--- /dev/null
+++ b/queue-4.9/kvm-nvmx-vmx_complete_nested_posted_interrupt-can-t-fail.patch
@@ -0,0 +1,65 @@
+From 6342c50ad12e8ce0736e722184a7dbdea4a3477f Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Wed, 25 Jan 2017 11:58:58 +0100
+Subject: KVM: nVMX: vmx_complete_nested_posted_interrupt() can't fail
+
+From: David Hildenbrand <david@redhat.com>
+
+commit 6342c50ad12e8ce0736e722184a7dbdea4a3477f upstream.
+
+vmx_complete_nested_posted_interrupt() can't fail, let's turn it into
+a void function.
+
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -4738,7 +4738,7 @@ static bool vmx_get_enable_apicv(void)
+ 	return enable_apicv;
+ }
+ 
+-static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
++static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+ 	int max_irr;
+@@ -4749,13 +4749,13 @@ static int vmx_complete_nested_posted_in
+ 	    vmx->nested.pi_pending) {
+ 		vmx->nested.pi_pending = false;
+ 		if (!pi_test_and_clear_on(vmx->nested.pi_desc))
+-			return 0;
++			return;
+ 
+ 		max_irr = find_last_bit(
+ 			(unsigned long *)vmx->nested.pi_desc->pir, 256);
+ 
+ 		if (max_irr == 256)
+-			return 0;
++			return;
+ 
+ 		vapic_page = kmap(vmx->nested.virtual_apic_page);
+ 		if (!vapic_page) {
+@@ -4772,7 +4772,6 @@ static int vmx_complete_nested_posted_in
+ 			vmcs_write16(GUEST_INTR_STATUS, status);
+ 		}
+ 	}
+-	return 0;
+ }
+ 
+ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
+@@ -10493,7 +10492,8 @@ static int vmx_check_nested_events(struc
+ 		return 0;
+ 	}
+ 
+-	return vmx_complete_nested_posted_interrupt(vcpu);
++	vmx_complete_nested_posted_interrupt(vcpu);
++	return 0;
+ }
+ 
+ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
diff --git a/queue-4.9/kvm-svm-allow-direct-access-to-msr_ia32_spec_ctrl.patch b/queue-4.9/kvm-svm-allow-direct-access-to-msr_ia32_spec_ctrl.patch
new file mode 100644
index 00000000000..b81a525cb8b
--- /dev/null
+++ b/queue-4.9/kvm-svm-allow-direct-access-to-msr_ia32_spec_ctrl.patch
@@ -0,0 +1,189 @@
+From b2ac58f90540e39324e7a29a7ad471407ae0bf48 Mon Sep 17 00:00:00 2001
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Sat, 3 Feb 2018 15:56:23 +0100
+Subject: KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL
+
+From: KarimAllah Ahmed <karahmed@amazon.de>
+
+commit b2ac58f90540e39324e7a29a7ad471407ae0bf48 upstream.
+
+[ Based on a patch from Paolo Bonzini <pbonzini@redhat.com> ]
+
+... basically doing exactly what we do for VMX:
+
+- Passthrough SPEC_CTRL to guests (if enabled in guest CPUID)
+- Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest
+  actually used it.
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c |   88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 88 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -183,6 +183,8 @@ struct vcpu_svm {
+ 		u64 gs_base;
+ 	} host;
+ 
++	u64 spec_ctrl;
++
+ 	u32 *msrpm;
+ 
+ 	ulong nmi_iret_rip;
+@@ -248,6 +250,7 @@ static const struct svm_direct_access_ms
+ 	{ .index = MSR_CSTAR,				.always = true  },
+ 	{ .index = MSR_SYSCALL_MASK,			.always = true  },
+ #endif
++	{ .index = MSR_IA32_SPEC_CTRL,			.always = false },
+ 	{ .index = MSR_IA32_PRED_CMD,			.always = false },
+ 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
+ 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
+@@ -863,6 +866,25 @@ static bool valid_msr_intercept(u32 inde
+ 	return false;
+ }
+ 
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
++{
++	u8 bit_write;
++	unsigned long tmp;
++	u32 offset;
++	u32 *msrpm;
++
++	msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
++				      to_svm(vcpu)->msrpm;
++
++	offset    = svm_msrpm_offset(msr);
++	bit_write = 2 * (msr & 0x0f) + 1;
++	tmp       = msrpm[offset];
++
++	BUG_ON(offset == MSR_INVALID);
++
++	return !!test_bit(bit_write,  &tmp);
++}
++
+ static void set_msr_interception(u32 *msrpm, unsigned msr,
+ 				 int read, int write)
+ {
+@@ -1537,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vc
+ 	u32 dummy;
+ 	u32 eax = 1;
+ 
++	svm->spec_ctrl = 0;
++
+ 	if (!init_event) {
+ 		svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ 					   MSR_IA32_APICBASE_ENABLE;
+@@ -3520,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *
+ 	case MSR_VM_CR:
+ 		msr_info->data = svm->nested.vm_cr_msr;
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has_ibrs(vcpu))
++			return 1;
++
++		msr_info->data = svm->spec_ctrl;
++		break;
+ 	case MSR_IA32_UCODE_REV:
+ 		msr_info->data = 0x01000065;
+ 		break;
+@@ -3611,6 +3642,33 @@ static int svm_set_msr(struct kvm_vcpu *
+ 	case MSR_IA32_TSC:
+ 		kvm_write_tsc(vcpu, msr);
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has_ibrs(vcpu))
++			return 1;
++
++		/* The STIBP bit doesn't fault even if it's not advertised */
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++			return 1;
++
++		svm->spec_ctrl = data;
++
++		if (!data)
++			break;
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_svm_vmrun_msrpm.
++		 * We update the L1 MSR bit as well since it will end up
++		 * touching the MSR anyway now.
++		 */
++		set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
++		break;
+ 	case MSR_IA32_PRED_CMD:
+ 		if (!msr->host_initiated &&
+ 		    !guest_cpuid_has_ibpb(vcpu))
+@@ -4854,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu
+ 
+ 	local_irq_enable();
+ 
++	/*
++	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
++	 * is no need to worry about the conditional branch over the wrmsr
++	 * being speculatively taken.
++	 */
++	if (svm->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
+ 	asm volatile (
+ 		"push %%" _ASM_BP "; \n\t"
+ 		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+@@ -4946,6 +5013,27 @@ static void svm_vcpu_run(struct kvm_vcpu
+ #endif
+ 		);
+ 
++	/*
++	 * We do not use IBRS in the kernel. If this vCPU has used the
++	 * SPEC_CTRL MSR it may have left it on; save the value and
++	 * turn it off. This is much more efficient than blindly adding
++	 * it to the atomic save/restore list. Especially as the former
++	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++	 *
++	 * For non-nested case:
++	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 *
++	 * For nested case:
++	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 */
++	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++		rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
++
++	if (svm->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+ 	/* Eliminate branch target predictions from guest mode */
+ 	vmexit_fill_RSB();
+ 
diff --git a/queue-4.9/kvm-vmx-allow-direct-access-to-msr_ia32_spec_ctrl.patch b/queue-4.9/kvm-vmx-allow-direct-access-to-msr_ia32_spec_ctrl.patch
new file mode 100644
index 00000000000..b94e36d76a1
--- /dev/null
+++ b/queue-4.9/kvm-vmx-allow-direct-access-to-msr_ia32_spec_ctrl.patch
@@ -0,0 +1,296 @@
+From d28b387fb74da95d69d2615732f50cceb38e9a4d Mon Sep 17 00:00:00 2001
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Thu, 1 Feb 2018 22:59:45 +0100
+Subject: KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL
+
+From: KarimAllah Ahmed <karahmed@amazon.de>
+
+commit d28b387fb74da95d69d2615732f50cceb38e9a4d upstream.
+
+[ Based on a patch from Ashok Raj <ashok.raj@intel.com> ]
+
+Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
+guests that will only mitigate Spectre V2 through IBRS+IBPB and will not
+be using a retpoline+IBPB based approach.
+
+To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for
+guests that do not actually use the MSR, only start saving and restoring
+when a non-zero is written to it.
+
+No attempt is made to handle STIBP here, intentionally. Filtering STIBP
+may be added in a future patch, which may require trapping all writes
+if we don't want to pass it through directly to the guest.
+
+[dwmw2: Clean up CPUID bits, save/restore manually, handle reset]
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    8 ++-
+ arch/x86/kvm/cpuid.h |   11 +++++
+ arch/x86/kvm/vmx.c   |  103 ++++++++++++++++++++++++++++++++++++++++++++++++++-
+ arch/x86/kvm/x86.c   |    2 
+ 4 files changed, 118 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct
+ 
+ 	/* cpuid 0x80000008.ebx */
+ 	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
+-		F(IBPB);
++		F(IBPB) | F(IBRS);
+ 
+ 	/* cpuid 0xC0000001.edx */
+ 	const u32 kvm_cpuid_C000_0001_edx_x86_features =
+@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct
+ 
+ 	/* cpuid 7.0.edx*/
+ 	const u32 kvm_cpuid_7_0_edx_x86_features =
+-		F(ARCH_CAPABILITIES);
++		F(SPEC_CTRL) | F(ARCH_CAPABILITIES);
+ 
+ 	/* all calls to cpuid_count() should be made on the same cpu */
+ 	get_cpu();
+@@ -618,9 +618,11 @@ static inline int __do_cpuid_ent(struct
+ 			g_phys_as = phys_as;
+ 		entry->eax = g_phys_as | (virt_as << 8);
+ 		entry->edx = 0;
+-		/* IBPB isn't necessarily present in hardware cpuid */
++		/* IBRS and IBPB aren't necessarily present in hardware cpuid */
+ 		if (boot_cpu_has(X86_FEATURE_IBPB))
+ 			entry->ebx |= F(IBPB);
++		if (boot_cpu_has(X86_FEATURE_IBRS))
++			entry->ebx |= F(IBRS);
+ 		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ 		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ 		break;
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -171,6 +171,17 @@ static inline bool guest_cpuid_has_ibpb(
+ 	return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+ }
+ 
++static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu)
++{
++	struct kvm_cpuid_entry2 *best;
++
++	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
++	if (best && (best->ebx & bit(X86_FEATURE_IBRS)))
++		return true;
++	best = kvm_find_cpuid_entry(vcpu, 7, 0);
++	return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
++}
++
+ static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
+ {
+ 	struct kvm_cpuid_entry2 *best;
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -551,6 +551,7 @@ struct vcpu_vmx {
+ #endif
+ 
+ 	u64 		      arch_capabilities;
++	u64 		      spec_ctrl;
+ 
+ 	u32 vm_entry_controls_shadow;
+ 	u32 vm_exit_controls_shadow;
+@@ -1854,6 +1855,29 @@ static void update_exception_bitmap(stru
+ }
+ 
+ /*
++ * Check if MSR is intercepted for currently loaded MSR bitmap.
++ */
++static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
++{
++	unsigned long *msr_bitmap;
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return true;
++
++	msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
++
++	if (msr <= 0x1fff) {
++		return !!test_bit(msr, msr_bitmap + 0x800 / f);
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++	}
++
++	return true;
++}
++
++/*
+  * Check if MSR is intercepted for L01 MSR bitmap.
+  */
+ static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+@@ -2983,6 +3007,13 @@ static int vmx_get_msr(struct kvm_vcpu *
+ 	case MSR_IA32_TSC:
+ 		msr_info->data = guest_read_tsc(vcpu);
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has_ibrs(vcpu))
++			return 1;
++
++		msr_info->data = to_vmx(vcpu)->spec_ctrl;
++		break;
+ 	case MSR_IA32_ARCH_CAPABILITIES:
+ 		if (!msr_info->host_initiated &&
+ 		    !guest_cpuid_has_arch_capabilities(vcpu))
+@@ -3093,6 +3124,36 @@ static int vmx_set_msr(struct kvm_vcpu *
+ 	case MSR_IA32_TSC:
+ 		kvm_write_tsc(vcpu, msr_info);
+ 		break;
++	case MSR_IA32_SPEC_CTRL:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has_ibrs(vcpu))
++			return 1;
++
++		/* The STIBP bit doesn't fault even if it's not advertised */
++		if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
++			return 1;
++
++		vmx->spec_ctrl = data;
++
++		if (!data)
++			break;
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_vmx_merge_msr_bitmap. We should not touch the
++		 * vmcs02.msr_bitmap here since it gets completely overwritten
++		 * in the merging. We update the vmcs01 here for L1 as well
++		 * since it will end up touching the MSR anyway now.
++		 */
++		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
++					      MSR_IA32_SPEC_CTRL,
++					      MSR_TYPE_RW);
++		break;
+ 	case MSR_IA32_PRED_CMD:
+ 		if (!msr_info->host_initiated &&
+ 		    !guest_cpuid_has_ibpb(vcpu))
+@@ -5245,6 +5306,7 @@ static void vmx_vcpu_reset(struct kvm_vc
+ 	u64 cr0;
+ 
+ 	vmx->rmode.vm86_active = 0;
++	vmx->spec_ctrl = 0;
+ 
+ 	vmx->soft_vnmi_blocked = 0;
+ 
+@@ -8830,6 +8892,15 @@ static void __noclone vmx_vcpu_run(struc
+ 
+ 	vmx_arm_hv_timer(vcpu);
+ 
++	/*
++	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
++	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
++	 * is no need to worry about the conditional branch over the wrmsr
++	 * being speculatively taken.
++	 */
++	if (vmx->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++
+ 	vmx->__launched = vmx->loaded_vmcs->launched;
+ 	asm(
+ 		/* Store host registers */
+@@ -8948,6 +9019,27 @@ static void __noclone vmx_vcpu_run(struc
+ #endif
+ 	      );
+ 
++	/*
++	 * We do not use IBRS in the kernel. If this vCPU has used the
++	 * SPEC_CTRL MSR it may have left it on; save the value and
++	 * turn it off. This is much more efficient than blindly adding
++	 * it to the atomic save/restore list. Especially as the former
++	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
++	 *
++	 * For non-nested case:
++	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 *
++	 * For nested case:
++	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
++	 * save it.
++	 */
++	if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
++		rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
++
++	if (vmx->spec_ctrl)
++		wrmsrl(MSR_IA32_SPEC_CTRL, 0);
++
+ 	/* Eliminate branch target predictions from guest mode */
+ 	vmexit_fill_RSB();
+ 
+@@ -9507,7 +9599,7 @@ static inline bool nested_vmx_merge_msr_
+ 	unsigned long *msr_bitmap_l1;
+ 	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ 	/*
+-	 * pred_cmd is trying to verify two things:
++	 * pred_cmd & spec_ctrl are trying to verify two things:
+ 	 *
+ 	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
+ 	 *    ensures that we do not accidentally generate an L02 MSR bitmap
+@@ -9520,9 +9612,10 @@ static inline bool nested_vmx_merge_msr_
+ 	 *    the MSR.
+ 	 */
+ 	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
++	bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
+ 
+ 	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
+-	    !pred_cmd)
++	    !pred_cmd && !spec_ctrl)
+ 		return false;
+ 
+ 	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
+@@ -9561,6 +9654,12 @@ static inline bool nested_vmx_merge_msr_
+ 		}
+ 	}
+ 
++	if (spec_ctrl)
++		nested_vmx_disable_intercept_for_msr(
++					msr_bitmap_l1, msr_bitmap_l0,
++					MSR_IA32_SPEC_CTRL,
++					MSR_TYPE_R | MSR_TYPE_W);
++
+ 	if (pred_cmd)
+ 		nested_vmx_disable_intercept_for_msr(
+ 					msr_bitmap_l1, msr_bitmap_l0,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -975,7 +975,7 @@ static u32 msrs_to_save[] = {
+ #endif
+ 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ 	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+-	MSR_IA32_ARCH_CAPABILITIES
++	MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
+ };
+ 
+ static unsigned num_msrs_to_save;
diff --git a/queue-4.9/kvm-vmx-emulate-msr_ia32_arch_capabilities.patch b/queue-4.9/kvm-vmx-emulate-msr_ia32_arch_capabilities.patch
new file mode 100644
index 00000000000..4b8d14c941e
--- /dev/null
+++ b/queue-4.9/kvm-vmx-emulate-msr_ia32_arch_capabilities.patch
@@ -0,0 +1,147 @@
+From 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd Mon Sep 17 00:00:00 2001
+From: KarimAllah Ahmed <karahmed@amazon.de>
+Date: Thu, 1 Feb 2018 22:59:44 +0100
+Subject: KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES
+
+From: KarimAllah Ahmed <karahmed@amazon.de>
+
+commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd upstream.
+
+Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO
+(bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the
+contents will come directly from the hardware, but user-space can still
+override it.
+
+[dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional]
+
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Cc: Ashok Raj <ashok.raj@intel.com>
+Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    8 +++++++-
+ arch/x86/kvm/cpuid.h |    8 ++++++++
+ arch/x86/kvm/vmx.c   |   15 +++++++++++++++
+ arch/x86/kvm/x86.c   |    1 +
+ 4 files changed, 31 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct
+ 	/* cpuid 7.0.ecx*/
+ 	const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+ 
++	/* cpuid 7.0.edx*/
++	const u32 kvm_cpuid_7_0_edx_x86_features =
++		F(ARCH_CAPABILITIES);
++
+ 	/* all calls to cpuid_count() should be made on the same cpu */
+ 	get_cpu();
+ 
+@@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct
+ 			/* PKU is not yet implemented for shadow paging. */
+ 			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+ 				entry->ecx &= ~F(PKU);
++			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
++			cpuid_mask(&entry->edx, CPUID_7_EDX);
+ 		} else {
+ 			entry->ebx = 0;
+ 			entry->ecx = 0;
++			entry->edx = 0;
+ 		}
+ 		entry->eax = 0;
+-		entry->edx = 0;
+ 		break;
+ 	}
+ 	case 9:
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -171,6 +171,14 @@ static inline bool guest_cpuid_has_ibpb(
+ 	return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+ }
+ 
++static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
++{
++	struct kvm_cpuid_entry2 *best;
++
++	best = kvm_find_cpuid_entry(vcpu, 7, 0);
++	return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
++}
++
+ 
+ /*
+  * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -550,6 +550,8 @@ struct vcpu_vmx {
+ 	u64 		      msr_guest_kernel_gs_base;
+ #endif
+ 
++	u64 		      arch_capabilities;
++
+ 	u32 vm_entry_controls_shadow;
+ 	u32 vm_exit_controls_shadow;
+ 	/*
+@@ -2981,6 +2983,12 @@ static int vmx_get_msr(struct kvm_vcpu *
+ 	case MSR_IA32_TSC:
+ 		msr_info->data = guest_read_tsc(vcpu);
+ 		break;
++	case MSR_IA32_ARCH_CAPABILITIES:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has_arch_capabilities(vcpu))
++			return 1;
++		msr_info->data = to_vmx(vcpu)->arch_capabilities;
++		break;
+ 	case MSR_IA32_SYSENTER_CS:
+ 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
+ 		break;
+@@ -3112,6 +3120,11 @@ static int vmx_set_msr(struct kvm_vcpu *
+ 		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
+ 					      MSR_TYPE_W);
+ 		break;
++	case MSR_IA32_ARCH_CAPABILITIES:
++		if (!msr_info->host_initiated)
++			return 1;
++		vmx->arch_capabilities = data;
++		break;
+ 	case MSR_IA32_CR_PAT:
+ 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -5202,6 +5215,8 @@ static int vmx_vcpu_setup(struct vcpu_vm
+ 		++vmx->nmsrs;
+ 	}
+ 
++	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+ 
+ 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
+ 
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -975,6 +975,7 @@ static u32 msrs_to_save[] = {
+ #endif
+ 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+ 	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
++	MSR_IA32_ARCH_CAPABILITIES
+ };
+ 
+ static unsigned num_msrs_to_save;
diff --git a/queue-4.9/kvm-vmx-introduce-alloc_loaded_vmcs.patch b/queue-4.9/kvm-vmx-introduce-alloc_loaded_vmcs.patch
new file mode 100644
index 00000000000..32d35f88e11
--- /dev/null
+++ b/queue-4.9/kvm-vmx-introduce-alloc_loaded_vmcs.patch
@@ -0,0 +1,101 @@
+From f21f165ef922c2146cc5bdc620f542953c41714b Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 11 Jan 2018 12:16:15 +0100
+Subject: KVM: VMX: introduce alloc_loaded_vmcs
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit f21f165ef922c2146cc5bdc620f542953c41714b upstream.
+
+Group together the calls to alloc_vmcs and loaded_vmcs_init.  Soon we'll also
+allocate an MSR bitmap there.
+
+Cc: stable@vger.kernel.org       # prereq for Spectre mitigation
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |   38 +++++++++++++++++++++++---------------
+ 1 file changed, 23 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3524,11 +3524,6 @@ static struct vmcs *alloc_vmcs_cpu(int c
+ 	return vmcs;
+ }
+ 
+-static struct vmcs *alloc_vmcs(void)
+-{
+-	return alloc_vmcs_cpu(raw_smp_processor_id());
+-}
+-
+ static void free_vmcs(struct vmcs *vmcs)
+ {
+ 	free_pages((unsigned long)vmcs, vmcs_config.order);
+@@ -3547,6 +3542,22 @@ static void free_loaded_vmcs(struct load
+ 	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+ }
+ 
++static struct vmcs *alloc_vmcs(void)
++{
++	return alloc_vmcs_cpu(raw_smp_processor_id());
++}
++
++static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
++{
++	loaded_vmcs->vmcs = alloc_vmcs();
++	if (!loaded_vmcs->vmcs)
++		return -ENOMEM;
++
++	loaded_vmcs->shadow_vmcs = NULL;
++	loaded_vmcs_init(loaded_vmcs);
++	return 0;
++}
++
+ static void free_kvm_area(void)
+ {
+ 	int cpu;
+@@ -6949,6 +6960,7 @@ static int handle_vmon(struct kvm_vcpu *
+ 	struct vmcs *shadow_vmcs;
+ 	const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
+ 		| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
++	int r;
+ 
+ 	/* The Intel VMX Instruction Reference lists a bunch of bits that
+ 	 * are prerequisite to running VMXON, most notably cr4.VMXE must be
+@@ -6988,11 +7000,9 @@ static int handle_vmon(struct kvm_vcpu *
+ 		return 1;
+ 	}
+ 
+-	vmx->nested.vmcs02.vmcs = alloc_vmcs();
+-	vmx->nested.vmcs02.shadow_vmcs = NULL;
+-	if (!vmx->nested.vmcs02.vmcs)
++	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
++	if (r < 0)
+ 		goto out_vmcs02;
+-	loaded_vmcs_init(&vmx->nested.vmcs02);
+ 
+ 	if (cpu_has_vmx_msr_bitmap()) {
+ 		vmx->nested.msr_bitmap =
+@@ -9113,17 +9123,15 @@ static struct kvm_vcpu *vmx_create_vcpu(
+ 	if (!vmx->guest_msrs)
+ 		goto free_pml;
+ 
+-	vmx->loaded_vmcs = &vmx->vmcs01;
+-	vmx->loaded_vmcs->vmcs = alloc_vmcs();
+-	vmx->loaded_vmcs->shadow_vmcs = NULL;
+-	if (!vmx->loaded_vmcs->vmcs)
+-		goto free_msrs;
+ 	if (!vmm_exclusive)
+ 		kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
+-	loaded_vmcs_init(vmx->loaded_vmcs);
++	err = alloc_loaded_vmcs(&vmx->vmcs01);
+ 	if (!vmm_exclusive)
+ 		kvm_cpu_vmxoff();
++	if (err < 0)
++		goto free_msrs;
+ 
++	vmx->loaded_vmcs = &vmx->vmcs01;
+ 	cpu = get_cpu();
+ 	vmx_vcpu_load(&vmx->vcpu, cpu);
+ 	vmx->vcpu.cpu = cpu;
diff --git a/queue-4.9/kvm-vmx-make-msr-bitmaps-per-vcpu.patch b/queue-4.9/kvm-vmx-make-msr-bitmaps-per-vcpu.patch
new file mode 100644
index 00000000000..a831850727e
--- /dev/null
+++ b/queue-4.9/kvm-vmx-make-msr-bitmaps-per-vcpu.patch
@@ -0,0 +1,582 @@
+From 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 16 Jan 2018 16:51:18 +0100
+Subject: KVM: VMX: make MSR bitmaps per-VCPU
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 904e14fb7cb96401a7dc803ca2863fd5ba32ffe6 upstream.
+
+Place the MSR bitmap in struct loaded_vmcs, and update it in place
+every time the x2apic or APICv state can change.  This is rare and
+the loop can handle 64 MSRs per iteration, in a similar fashion as
+nested_vmx_prepare_msr_bitmap.
+
+This prepares for choosing, on a per-VM basis, whether to intercept
+the SPEC_CTRL and PRED_CMD MSRs.
+
+Cc: stable@vger.kernel.org       # prereq for Spectre mitigation
+Suggested-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx.c |  316 +++++++++++++++++++----------------------------------
+ 1 file changed, 115 insertions(+), 201 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -109,6 +109,14 @@ static u64 __read_mostly host_xss;
+ static bool __read_mostly enable_pml = 1;
+ module_param_named(pml, enable_pml, bool, S_IRUGO);
+ 
++#define MSR_TYPE_R	1
++#define MSR_TYPE_W	2
++#define MSR_TYPE_RW	3
++
++#define MSR_BITMAP_MODE_X2APIC		1
++#define MSR_BITMAP_MODE_X2APIC_APICV	2
++#define MSR_BITMAP_MODE_LM		4
++
+ #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
+ 
+ /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
+@@ -190,6 +198,7 @@ struct loaded_vmcs {
+ 	struct vmcs *shadow_vmcs;
+ 	int cpu;
+ 	int launched;
++	unsigned long *msr_bitmap;
+ 	struct list_head loaded_vmcss_on_cpu_link;
+ };
+ 
+@@ -428,8 +437,6 @@ struct nested_vmx {
+ 	bool pi_pending;
+ 	u16 posted_intr_nv;
+ 
+-	unsigned long *msr_bitmap;
+-
+ 	struct hrtimer preemption_timer;
+ 	bool preemption_timer_expired;
+ 
+@@ -530,6 +537,7 @@ struct vcpu_vmx {
+ 	unsigned long         host_rsp;
+ 	u8                    fail;
+ 	bool                  nmi_known_unmasked;
++	u8		      msr_bitmap_mode;
+ 	u32                   exit_intr_info;
+ 	u32                   idt_vectoring_info;
+ 	ulong                 rflags;
+@@ -904,6 +912,7 @@ static u32 vmx_segment_access_rights(str
+ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
+ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+ static int alloc_identity_pagetable(struct kvm *kvm);
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
+ 
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+@@ -923,12 +932,6 @@ static DEFINE_PER_CPU(spinlock_t, blocke
+ 
+ static unsigned long *vmx_io_bitmap_a;
+ static unsigned long *vmx_io_bitmap_b;
+-static unsigned long *vmx_msr_bitmap_legacy;
+-static unsigned long *vmx_msr_bitmap_longmode;
+-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
+-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+ static unsigned long *vmx_vmread_bitmap;
+ static unsigned long *vmx_vmwrite_bitmap;
+ 
+@@ -2522,36 +2525,6 @@ static void move_msr_up(struct vcpu_vmx
+ 	vmx->guest_msrs[from] = tmp;
+ }
+ 
+-static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
+-{
+-	unsigned long *msr_bitmap;
+-
+-	if (is_guest_mode(vcpu))
+-		msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
+-	else if (cpu_has_secondary_exec_ctrls() &&
+-		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+-		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
+-		if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
+-			if (is_long_mode(vcpu))
+-				msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+-			else
+-				msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+-		} else {
+-			if (is_long_mode(vcpu))
+-				msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+-			else
+-				msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+-		}
+-	} else {
+-		if (is_long_mode(vcpu))
+-			msr_bitmap = vmx_msr_bitmap_longmode;
+-		else
+-			msr_bitmap = vmx_msr_bitmap_legacy;
+-	}
+-
+-	vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
+-}
+-
+ /*
+  * Set up the vmcs to automatically save and restore system
+  * msrs.  Don't touch the 64-bit msrs if the guest is in legacy
+@@ -2592,7 +2565,7 @@ static void setup_msrs(struct vcpu_vmx *
+ 	vmx->save_nmsrs = save_nmsrs;
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmx_set_msr_bitmap(&vmx->vcpu);
++		vmx_update_msr_bitmap(&vmx->vcpu);
+ }
+ 
+ /*
+@@ -3539,6 +3512,8 @@ static void free_loaded_vmcs(struct load
+ 	loaded_vmcs_clear(loaded_vmcs);
+ 	free_vmcs(loaded_vmcs->vmcs);
+ 	loaded_vmcs->vmcs = NULL;
++	if (loaded_vmcs->msr_bitmap)
++		free_page((unsigned long)loaded_vmcs->msr_bitmap);
+ 	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+ }
+ 
+@@ -3555,7 +3530,18 @@ static int alloc_loaded_vmcs(struct load
+ 
+ 	loaded_vmcs->shadow_vmcs = NULL;
+ 	loaded_vmcs_init(loaded_vmcs);
++
++	if (cpu_has_vmx_msr_bitmap()) {
++		loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
++		if (!loaded_vmcs->msr_bitmap)
++			goto out_vmcs;
++		memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
++	}
+ 	return 0;
++
++out_vmcs:
++	free_loaded_vmcs(loaded_vmcs);
++	return -ENOMEM;
+ }
+ 
+ static void free_kvm_area(void)
+@@ -4564,10 +4550,8 @@ static void free_vpid(int vpid)
+ 	spin_unlock(&vmx_vpid_lock);
+ }
+ 
+-#define MSR_TYPE_R	1
+-#define MSR_TYPE_W	2
+-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+-						u32 msr, int type)
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++							  u32 msr, int type)
+ {
+ 	int f = sizeof(unsigned long);
+ 
+@@ -4601,8 +4585,8 @@ static void __vmx_disable_intercept_for_
+ 	}
+ }
+ 
+-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+-						u32 msr, int type)
++static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
++							 u32 msr, int type)
+ {
+ 	int f = sizeof(unsigned long);
+ 
+@@ -4636,6 +4620,15 @@ static void __vmx_enable_intercept_for_m
+ 	}
+ }
+ 
++static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
++			     			      u32 msr, int type, bool value)
++{
++	if (value)
++		vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
++	else
++		vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
++}
++
+ /*
+  * If a msr is allowed by L0, we should check whether it is allowed by L1.
+  * The corresponding bit will be cleared unless both of L0 and L1 allow it.
+@@ -4682,58 +4675,68 @@ static void nested_vmx_disable_intercept
+ 	}
+ }
+ 
+-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
++static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
+ {
+-	if (!longmode_only)
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+-						msr, MSR_TYPE_R | MSR_TYPE_W);
+-	__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+-						msr, MSR_TYPE_R | MSR_TYPE_W);
+-}
+-
+-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+-{
+-	if (apicv_active) {
+-		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+-				msr, MSR_TYPE_R);
+-		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+-				msr, MSR_TYPE_R);
+-	} else {
+-		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+-				msr, MSR_TYPE_R);
+-		__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+-				msr, MSR_TYPE_R);
++	u8 mode = 0;
++
++	if (cpu_has_secondary_exec_ctrls() &&
++	    (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
++	     SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
++		mode |= MSR_BITMAP_MODE_X2APIC;
++		if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
++			mode |= MSR_BITMAP_MODE_X2APIC_APICV;
+ 	}
++
++	if (is_long_mode(vcpu))
++		mode |= MSR_BITMAP_MODE_LM;
++
++	return mode;
+ }
+ 
+-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
++#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
++
++static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
++					 u8 mode)
+ {
+-	if (apicv_active) {
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+-				msr, MSR_TYPE_R);
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+-				msr, MSR_TYPE_R);
+-	} else {
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+-				msr, MSR_TYPE_R);
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+-				msr, MSR_TYPE_R);
++	int msr;
++
++	for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
++		unsigned word = msr / BITS_PER_LONG;
++		msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
++		msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
++	}
++
++	if (mode & MSR_BITMAP_MODE_X2APIC) {
++		/*
++		 * TPR reads and writes can be virtualized even if virtual interrupt
++		 * delivery is not in use.
++		 */
++		vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
++		if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
++			vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
++			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
++			vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
++		}
+ 	}
+ }
+ 
+-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
++static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
+ {
+-	if (apicv_active) {
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+-				msr, MSR_TYPE_W);
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+-				msr, MSR_TYPE_W);
+-	} else {
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+-				msr, MSR_TYPE_W);
+-		__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+-				msr, MSR_TYPE_W);
+-	}
++	struct vcpu_vmx *vmx = to_vmx(vcpu);
++	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
++	u8 mode = vmx_msr_bitmap_mode(vcpu);
++	u8 changed = mode ^ vmx->msr_bitmap_mode;
++
++	if (!changed)
++		return;
++
++	vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
++				  !(mode & MSR_BITMAP_MODE_LM));
++
++	if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
++		vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
++
++	vmx->msr_bitmap_mode = mode;
+ }
+ 
+ static bool vmx_get_enable_apicv(void)
+@@ -4982,7 +4985,7 @@ static void vmx_refresh_apicv_exec_ctrl(
+ 	}
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmx_set_msr_bitmap(vcpu);
++		vmx_update_msr_bitmap(vcpu);
+ }
+ 
+ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
+@@ -5071,7 +5074,7 @@ static int vmx_vcpu_setup(struct vcpu_vm
+ 		vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ 	}
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
++		vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
+ 
+ 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
+ 
+@@ -6402,7 +6405,7 @@ static void wakeup_handler(void)
+ 
+ static __init int hardware_setup(void)
+ {
+-	int r = -ENOMEM, i, msr;
++	int r = -ENOMEM, i;
+ 
+ 	rdmsrl_safe(MSR_EFER, &host_efer);
+ 
+@@ -6417,41 +6420,13 @@ static __init int hardware_setup(void)
+ 	if (!vmx_io_bitmap_b)
+ 		goto out;
+ 
+-	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+-	if (!vmx_msr_bitmap_legacy)
+-		goto out1;
+-
+-	vmx_msr_bitmap_legacy_x2apic =
+-				(unsigned long *)__get_free_page(GFP_KERNEL);
+-	if (!vmx_msr_bitmap_legacy_x2apic)
+-		goto out2;
+-
+-	vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
+-				(unsigned long *)__get_free_page(GFP_KERNEL);
+-	if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
+-		goto out3;
+-
+-	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+-	if (!vmx_msr_bitmap_longmode)
+-		goto out4;
+-
+-	vmx_msr_bitmap_longmode_x2apic =
+-				(unsigned long *)__get_free_page(GFP_KERNEL);
+-	if (!vmx_msr_bitmap_longmode_x2apic)
+-		goto out5;
+-
+-	vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
+-				(unsigned long *)__get_free_page(GFP_KERNEL);
+-	if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
+-		goto out6;
+-
+ 	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ 	if (!vmx_vmread_bitmap)
+-		goto out7;
++		goto out1;
+ 
+ 	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ 	if (!vmx_vmwrite_bitmap)
+-		goto out8;
++		goto out2;
+ 
+ 	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+ 	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+@@ -6460,12 +6435,9 @@ static __init int hardware_setup(void)
+ 
+ 	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+ 
+-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+-
+ 	if (setup_vmcs_config(&vmcs_config) < 0) {
+ 		r = -EIO;
+-		goto out9;
++		goto out3;
+ 	}
+ 
+ 	if (boot_cpu_has(X86_FEATURE_NX))
+@@ -6522,47 +6494,8 @@ static __init int hardware_setup(void)
+ 		kvm_tsc_scaling_ratio_frac_bits = 48;
+ 	}
+ 
+-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+-
+-	memcpy(vmx_msr_bitmap_legacy_x2apic,
+-			vmx_msr_bitmap_legacy, PAGE_SIZE);
+-	memcpy(vmx_msr_bitmap_longmode_x2apic,
+-			vmx_msr_bitmap_longmode, PAGE_SIZE);
+-	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+-			vmx_msr_bitmap_legacy, PAGE_SIZE);
+-	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+-			vmx_msr_bitmap_longmode, PAGE_SIZE);
+-
+ 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ 
+-	/*
+-	 * enable_apicv && kvm_vcpu_apicv_active()
+-	 */
+-	for (msr = 0x800; msr <= 0x8ff; msr++)
+-		vmx_disable_intercept_msr_read_x2apic(msr, true);
+-
+-	/* TMCCT */
+-	vmx_enable_intercept_msr_read_x2apic(0x839, true);
+-	/* TPR */
+-	vmx_disable_intercept_msr_write_x2apic(0x808, true);
+-	/* EOI */
+-	vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+-	/* SELF-IPI */
+-	vmx_disable_intercept_msr_write_x2apic(0x83f, true);
+-
+-	/*
+-	 * (enable_apicv && !kvm_vcpu_apicv_active()) ||
+-	 * 	!enable_apicv
+-	 */
+-	/* TPR */
+-	vmx_disable_intercept_msr_read_x2apic(0x808, false);
+-	vmx_disable_intercept_msr_write_x2apic(0x808, false);
+-
+ 	if (enable_ept) {
+ 		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
+ 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+@@ -6608,22 +6541,10 @@ static __init int hardware_setup(void)
+ 
+ 	return alloc_kvm_area();
+ 
+-out9:
+-	free_page((unsigned long)vmx_vmwrite_bitmap);
+-out8:
+-	free_page((unsigned long)vmx_vmread_bitmap);
+-out7:
+-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
+-out6:
+-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+-out5:
+-	free_page((unsigned long)vmx_msr_bitmap_longmode);
+-out4:
+-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
+ out3:
+-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
++	free_page((unsigned long)vmx_vmwrite_bitmap);
+ out2:
+-	free_page((unsigned long)vmx_msr_bitmap_legacy);
++	free_page((unsigned long)vmx_vmread_bitmap);
+ out1:
+ 	free_page((unsigned long)vmx_io_bitmap_b);
+ out:
+@@ -6634,12 +6555,6 @@ out:
+ 
+ static __exit void hardware_unsetup(void)
+ {
+-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
+-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
+-	free_page((unsigned long)vmx_msr_bitmap_legacy);
+-	free_page((unsigned long)vmx_msr_bitmap_longmode);
+ 	free_page((unsigned long)vmx_io_bitmap_b);
+ 	free_page((unsigned long)vmx_io_bitmap_a);
+ 	free_page((unsigned long)vmx_vmwrite_bitmap);
+@@ -7004,13 +6919,6 @@ static int handle_vmon(struct kvm_vcpu *
+ 	if (r < 0)
+ 		goto out_vmcs02;
+ 
+-	if (cpu_has_vmx_msr_bitmap()) {
+-		vmx->nested.msr_bitmap =
+-				(unsigned long *)__get_free_page(GFP_KERNEL);
+-		if (!vmx->nested.msr_bitmap)
+-			goto out_msr_bitmap;
+-	}
+-
+ 	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ 	if (!vmx->nested.cached_vmcs12)
+ 		goto out_cached_vmcs12;
+@@ -7040,9 +6948,6 @@ out_shadow_vmcs:
+ 	kfree(vmx->nested.cached_vmcs12);
+ 
+ out_cached_vmcs12:
+-	free_page((unsigned long)vmx->nested.msr_bitmap);
+-
+-out_msr_bitmap:
+ 	free_loaded_vmcs(&vmx->nested.vmcs02);
+ 
+ out_vmcs02:
+@@ -7121,10 +7026,6 @@ static void free_nested(struct vcpu_vmx
+ 	vmx->nested.vmxon = false;
+ 	free_vpid(vmx->nested.vpid02);
+ 	nested_release_vmcs12(vmx);
+-	if (vmx->nested.msr_bitmap) {
+-		free_page((unsigned long)vmx->nested.msr_bitmap);
+-		vmx->nested.msr_bitmap = NULL;
+-	}
+ 	if (enable_shadow_vmcs) {
+ 		vmcs_clear(vmx->vmcs01.shadow_vmcs);
+ 		free_vmcs(vmx->vmcs01.shadow_vmcs);
+@@ -8471,7 +8372,7 @@ static void vmx_set_virtual_x2apic_mode(
+ 	}
+ 	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
+ 
+-	vmx_set_msr_bitmap(vcpu);
++	vmx_update_msr_bitmap(vcpu);
+ }
+ 
+ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+@@ -9091,6 +8992,7 @@ static struct kvm_vcpu *vmx_create_vcpu(
+ {
+ 	int err;
+ 	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
++	unsigned long *msr_bitmap;
+ 	int cpu;
+ 
+ 	if (!vmx)
+@@ -9131,6 +9033,15 @@ static struct kvm_vcpu *vmx_create_vcpu(
+ 	if (err < 0)
+ 		goto free_msrs;
+ 
++	msr_bitmap = vmx->vmcs01.msr_bitmap;
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
++	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
++	vmx->msr_bitmap_mode = 0;
++
+ 	vmx->loaded_vmcs = &vmx->vmcs01;
+ 	cpu = get_cpu();
+ 	vmx_vcpu_load(&vmx->vcpu, cpu);
+@@ -9525,7 +9436,7 @@ static inline bool nested_vmx_merge_msr_
+ 	int msr;
+ 	struct page *page;
+ 	unsigned long *msr_bitmap_l1;
+-	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
++	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
+ 
+ 	/* This shortcut is ok because we support only x2APIC MSRs so far. */
+ 	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
+@@ -10045,6 +9956,9 @@ static void prepare_vmcs02(struct kvm_vc
+ 	if (kvm_has_tsc_control)
+ 		decache_tsc_multiplier(vmx);
+ 
++	if (cpu_has_vmx_msr_bitmap())
++		vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
++
+ 	if (enable_vpid) {
+ 		/*
+ 		 * There is no direct mapping between vpid02 and vpid12, the
+@@ -10749,7 +10663,7 @@ static void load_vmcs12_host_state(struc
+ 	vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ 
+ 	if (cpu_has_vmx_msr_bitmap())
+-		vmx_set_msr_bitmap(vcpu);
++		vmx_update_msr_bitmap(vcpu);
+ 
+ 	if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
+ 				vmcs12->vm_exit_msr_load_count))
diff --git a/queue-4.9/kvm-x86-add-ibpb-support.patch b/queue-4.9/kvm-x86-add-ibpb-support.patch
new file mode 100644
index 00000000000..c893472bd1a
--- /dev/null
+++ b/queue-4.9/kvm-x86-add-ibpb-support.patch
@@ -0,0 +1,343 @@
+From 15d45071523d89b3fb7372e2135fbd72f6af9506 Mon Sep 17 00:00:00 2001
+From: Ashok Raj <ashok.raj@intel.com>
+Date: Thu, 1 Feb 2018 22:59:43 +0100
+Subject: KVM/x86: Add IBPB support
+
+From: Ashok Raj <ashok.raj@intel.com>
+
+commit 15d45071523d89b3fb7372e2135fbd72f6af9506 upstream.
+
+The Indirect Branch Predictor Barrier (IBPB) is an indirect branch
+control mechanism. It keeps earlier branches from influencing
+later ones.
+
+Unlike IBRS and STIBP, IBPB does not define a new mode of operation.
+It's a command that ensures predicted branch targets aren't used after
+the barrier. Although IBRS and IBPB are enumerated by the same CPUID
+enumeration, IBPB is very different.
+
+IBPB helps mitigate against three potential attacks:
+
+* Mitigate guests from being attacked by other guests.
+  - This is addressed by issing IBPB when we do a guest switch.
+
+* Mitigate attacks from guest/ring3->host/ring3.
+  These would require a IBPB during context switch in host, or after
+  VMEXIT. The host process has two ways to mitigate
+  - Either it can be compiled with retpoline
+  - If its going through context switch, and has set !dumpable then
+    there is a IBPB in that path.
+    (Tim's patch: https://patchwork.kernel.org/patch/10192871)
+  - The case where after a VMEXIT you return back to Qemu might make
+    Qemu attackable from guest when Qemu isn't compiled with retpoline.
+  There are issues reported when doing IBPB on every VMEXIT that resulted
+  in some tsc calibration woes in guest.
+
+* Mitigate guest/ring0->host/ring0 attacks.
+  When host kernel is using retpoline it is safe against these attacks.
+  If host kernel isn't using retpoline we might need to do a IBPB flush on
+  every VMEXIT.
+
+Even when using retpoline for indirect calls, in certain conditions 'ret'
+can use the BTB on Skylake-era CPUs. There are other mitigations
+available like RSB stuffing/clearing.
+
+* IBPB is issued only for SVM during svm_free_vcpu().
+  VMX has a vmclear and SVM doesn't.  Follow discussion here:
+  https://lkml.org/lkml/2018/1/15/146
+
+Please refer to the following spec for more details on the enumeration
+and control.
+
+Refer here to get documentation about mitigations.
+
+https://software.intel.com/en-us/side-channel-security-support
+
+[peterz: rebase and changelog rewrite]
+[karahmed: - rebase
+           - vmx: expose PRED_CMD if guest has it in CPUID
+           - svm: only pass through IBPB if guest has it in CPUID
+           - vmx: support !cpu_has_vmx_msr_bitmap()]
+           - vmx: support nested]
+[dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
+        PRED_CMD is a write-only MSR]
+
+Signed-off-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: kvm@vger.kernel.org
+Cc: Asit Mallick <asit.k.mallick@intel.com>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
+Cc: Greg KH <gregkh@linuxfoundation.org>
+Cc: Jun Nakajima <jun.nakajima@intel.com>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Tim Chen <tim.c.chen@linux.intel.com>
+Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com
+Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |   11 ++++++-
+ arch/x86/kvm/cpuid.h |   12 +++++++
+ arch/x86/kvm/svm.c   |   28 ++++++++++++++++++
+ arch/x86/kvm/vmx.c   |   79 +++++++++++++++++++++++++++++++++++++++++++++++++--
+ 4 files changed, 127 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct
+ 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
+ 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
+ 
++	/* cpuid 0x80000008.ebx */
++	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
++		F(IBPB);
++
+ 	/* cpuid 0xC0000001.edx */
+ 	const u32 kvm_cpuid_C000_0001_edx_x86_features =
+ 		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+@@ -607,7 +611,12 @@ static inline int __do_cpuid_ent(struct
+ 		if (!g_phys_as)
+ 			g_phys_as = phys_as;
+ 		entry->eax = g_phys_as | (virt_as << 8);
+-		entry->ebx = entry->edx = 0;
++		entry->edx = 0;
++		/* IBPB isn't necessarily present in hardware cpuid */
++		if (boot_cpu_has(X86_FEATURE_IBPB))
++			entry->ebx |= F(IBPB);
++		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
++		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ 		break;
+ 	}
+ 	case 0x80000019:
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -160,6 +160,18 @@ static inline bool guest_cpuid_has_rdtsc
+ 	return best && (best->edx & bit(X86_FEATURE_RDTSCP));
+ }
+ 
++static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
++{
++	struct kvm_cpuid_entry2 *best;
++
++	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
++	if (best && (best->ebx & bit(X86_FEATURE_IBPB)))
++		return true;
++	best = kvm_find_cpuid_entry(vcpu, 7, 0);
++	return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
++}
++
++
+ /*
+  * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
+  */
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -248,6 +248,7 @@ static const struct svm_direct_access_ms
+ 	{ .index = MSR_CSTAR,				.always = true  },
+ 	{ .index = MSR_SYSCALL_MASK,			.always = true  },
+ #endif
++	{ .index = MSR_IA32_PRED_CMD,			.always = false },
+ 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
+ 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
+ 	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
+@@ -510,6 +511,7 @@ struct svm_cpu_data {
+ 	struct kvm_ldttss_desc *tss_desc;
+ 
+ 	struct page *save_area;
++	struct vmcb *current_vmcb;
+ };
+ 
+ static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
+@@ -1644,11 +1646,17 @@ static void svm_free_vcpu(struct kvm_vcp
+ 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
+ 	kvm_vcpu_uninit(vcpu);
+ 	kmem_cache_free(kvm_vcpu_cache, svm);
++	/*
++	 * The vmcb page can be recycled, causing a false negative in
++	 * svm_vcpu_load(). So do a full IBPB now.
++	 */
++	indirect_branch_prediction_barrier();
+ }
+ 
+ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+ 	struct vcpu_svm *svm = to_svm(vcpu);
++	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+ 	int i;
+ 
+ 	if (unlikely(cpu != vcpu->cpu)) {
+@@ -1677,6 +1685,10 @@ static void svm_vcpu_load(struct kvm_vcp
+ 	if (static_cpu_has(X86_FEATURE_RDTSCP))
+ 		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ 
++	if (sd->current_vmcb != svm->vmcb) {
++		sd->current_vmcb = svm->vmcb;
++		indirect_branch_prediction_barrier();
++	}
+ 	avic_vcpu_load(vcpu, cpu);
+ }
+ 
+@@ -3599,6 +3611,22 @@ static int svm_set_msr(struct kvm_vcpu *
+ 	case MSR_IA32_TSC:
+ 		kvm_write_tsc(vcpu, msr);
+ 		break;
++	case MSR_IA32_PRED_CMD:
++		if (!msr->host_initiated &&
++		    !guest_cpuid_has_ibpb(vcpu))
++			return 1;
++
++		if (data & ~PRED_CMD_IBPB)
++			return 1;
++
++		if (!data)
++			break;
++
++		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++		if (is_guest_mode(vcpu))
++			break;
++		set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
++		break;
+ 	case MSR_STAR:
+ 		svm->vmcb->save.star = data;
+ 		break;
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -549,6 +549,7 @@ struct vcpu_vmx {
+ 	u64 		      msr_host_kernel_gs_base;
+ 	u64 		      msr_guest_kernel_gs_base;
+ #endif
++
+ 	u32 vm_entry_controls_shadow;
+ 	u32 vm_exit_controls_shadow;
+ 	/*
+@@ -913,6 +914,8 @@ static void copy_vmcs12_to_shadow(struct
+ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+ static int alloc_identity_pagetable(struct kvm *kvm);
+ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
++static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
++							  u32 msr, int type);
+ 
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+@@ -1848,6 +1851,29 @@ static void update_exception_bitmap(stru
+ 	vmcs_write32(EXCEPTION_BITMAP, eb);
+ }
+ 
++/*
++ * Check if MSR is intercepted for L01 MSR bitmap.
++ */
++static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
++{
++	unsigned long *msr_bitmap;
++	int f = sizeof(unsigned long);
++
++	if (!cpu_has_vmx_msr_bitmap())
++		return true;
++
++	msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
++
++	if (msr <= 0x1fff) {
++		return !!test_bit(msr, msr_bitmap + 0x800 / f);
++	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
++		msr &= 0x1fff;
++		return !!test_bit(msr, msr_bitmap + 0xc00 / f);
++	}
++
++	return true;
++}
++
+ static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+ 		unsigned long entry, unsigned long exit)
+ {
+@@ -2257,6 +2283,7 @@ static void vmx_vcpu_load(struct kvm_vcp
+ 	if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
+ 		per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
+ 		vmcs_load(vmx->loaded_vmcs->vmcs);
++		indirect_branch_prediction_barrier();
+ 	}
+ 
+ 	if (!already_loaded) {
+@@ -3058,6 +3085,33 @@ static int vmx_set_msr(struct kvm_vcpu *
+ 	case MSR_IA32_TSC:
+ 		kvm_write_tsc(vcpu, msr_info);
+ 		break;
++	case MSR_IA32_PRED_CMD:
++		if (!msr_info->host_initiated &&
++		    !guest_cpuid_has_ibpb(vcpu))
++			return 1;
++
++		if (data & ~PRED_CMD_IBPB)
++			return 1;
++
++		if (!data)
++			break;
++
++		wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
++
++		/*
++		 * For non-nested:
++		 * When it's written (to non-zero) for the first time, pass
++		 * it through.
++		 *
++		 * For nested:
++		 * The handling of the MSR bitmap for L2 guests is done in
++		 * nested_vmx_merge_msr_bitmap. We should not touch the
++		 * vmcs02.msr_bitmap here since it gets completely overwritten
++		 * in the merging.
++		 */
++		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
++					      MSR_TYPE_W);
++		break;
+ 	case MSR_IA32_CR_PAT:
+ 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+@@ -9437,9 +9491,23 @@ static inline bool nested_vmx_merge_msr_
+ 	struct page *page;
+ 	unsigned long *msr_bitmap_l1;
+ 	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
++	/*
++	 * pred_cmd is trying to verify two things:
++	 *
++	 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
++	 *    ensures that we do not accidentally generate an L02 MSR bitmap
++	 *    from the L12 MSR bitmap that is too permissive.
++	 * 2. That L1 or L2s have actually used the MSR. This avoids
++	 *    unnecessarily merging of the bitmap if the MSR is unused. This
++	 *    works properly because we only update the L01 MSR bitmap lazily.
++	 *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
++	 *    updated to reflect this when L1 (or its L2s) actually write to
++	 *    the MSR.
++	 */
++	bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
+ 
+-	/* This shortcut is ok because we support only x2APIC MSRs so far. */
+-	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
++	if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
++	    !pred_cmd)
+ 		return false;
+ 
+ 	page = nested_get_page(vcpu, vmcs12->msr_bitmap);
+@@ -9477,6 +9545,13 @@ static inline bool nested_vmx_merge_msr_
+ 				MSR_TYPE_W);
+ 		}
+ 	}
++
++	if (pred_cmd)
++		nested_vmx_disable_intercept_for_msr(
++					msr_bitmap_l1, msr_bitmap_l0,
++					MSR_IA32_PRED_CMD,
++					MSR_TYPE_W);
++
+ 	kunmap(page);
+ 	nested_release_page_clean(page);
+ 
diff --git a/queue-4.9/series b/queue-4.9/series
index f6d225945cb..af23d56d520 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -16,3 +16,12 @@ kaiser-fix-intel_bts-perf-crashes.patch
 x86-pti-make-unpoison-of-pgd-for-trusted-boot-work-for-real.patch
 kaiser-allocate-pgd-with-order-0-when-pti-off.patch
 serial-core-mark-port-as-initialized-after-successful-irq-change.patch
+kvm-nvmx-vmx_complete_nested_posted_interrupt-can-t-fail.patch
+kvm-nvmx-mark-vmcs12-pages-dirty-on-l2-exit.patch
+kvm-nvmx-eliminate-vmcs02-pool.patch
+kvm-vmx-introduce-alloc_loaded_vmcs.patch
+kvm-vmx-make-msr-bitmaps-per-vcpu.patch
+kvm-x86-add-ibpb-support.patch
+kvm-vmx-emulate-msr_ia32_arch_capabilities.patch
+kvm-vmx-allow-direct-access-to-msr_ia32_spec_ctrl.patch
+kvm-svm-allow-direct-access-to-msr_ia32_spec_ctrl.patch