From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 15 Nov 2019 03:13:22 +0000 (+0800)
Subject: 4.9-stable patches
X-Git-Tag: v4.4.202~3
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1345f6930ed534ff474452e6129b047b5b38261c;p=thirdparty%2Fkernel%2Fstable-queue.git

4.9-stable patches

added patches:
	cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
	documentation-add-itlb_multihit-documentation.patch
	kvm-add-helper-function-for-creating-vm-worker-threads.patch
	kvm-convert-kvm_lock-to-a-mutex.patch
	kvm-mmu-itlb_multihit-mitigation.patch
	kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
	kvm-x86-add-is_executable_pte.patch
	kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
	kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
	kvm-x86-do-not-release-the-page-inside-mmu_set_spte.patch
	kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
	kvm-x86-extend-usage-of-ret_mmio_pf_-constants.patch
	kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
	kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
	kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
	kvm-x86-simplify-ept_misconfig.patch
	kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
	x86-bugs-add-itlb_multihit-bug-infrastructure.patch
	x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
	x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
	x86-msr-add-the-ia32_tsx_ctrl-msr.patch
	x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
	x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
	x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
	x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
	x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
	x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch
---

diff --git a/queue-4.9/cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch b/queue-4.9/cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
new file mode 100644
index 00000000000..f22d59cb57e
--- /dev/null
+++ b/queue-4.9/cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
@@ -0,0 +1,102 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Tyler Hicks <tyhicks@canonical.com>
+Date: Mon, 4 Nov 2019 12:22:02 +0100
+Subject: cpu/speculation: Uninline and export CPU mitigations helpers
+
+From: Tyler Hicks <tyhicks@canonical.com>
+
+commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream.
+
+A kernel module may need to check the value of the "mitigations=" kernel
+command line parameter as part of its setup when the module needs
+to perform software mitigations for a CPU flaw.
+
+Uninline and export the helper functions surrounding the cpu_mitigations
+enum to allow for their usage from a module.
+
+Lastly, privatize the enum and cpu_mitigations variable since the value of
+cpu_mitigations can be checked with the exported helper functions.
+
+Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cpu.h |   25 ++-----------------------
+ kernel/cpu.c        |   27 ++++++++++++++++++++++++++-
+ 2 files changed, 28 insertions(+), 24 deletions(-)
+
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -287,28 +287,7 @@ static inline int cpuhp_smt_enable(void)
+ static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
+ #endif
+ 
+-/*
+- * These are used for a global "mitigations=" cmdline option for toggling
+- * optional CPU mitigations.
+- */
+-enum cpu_mitigations {
+-	CPU_MITIGATIONS_OFF,
+-	CPU_MITIGATIONS_AUTO,
+-	CPU_MITIGATIONS_AUTO_NOSMT,
+-};
+-
+-extern enum cpu_mitigations cpu_mitigations;
+-
+-/* mitigations=off */
+-static inline bool cpu_mitigations_off(void)
+-{
+-	return cpu_mitigations == CPU_MITIGATIONS_OFF;
+-}
+-
+-/* mitigations=auto,nosmt */
+-static inline bool cpu_mitigations_auto_nosmt(void)
+-{
+-	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+-}
++extern bool cpu_mitigations_off(void);
++extern bool cpu_mitigations_auto_nosmt(void);
+ 
+ #endif /* _LINUX_CPU_H_ */
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -2235,7 +2235,18 @@ void __init boot_cpu_hotplug_init(void)
+ 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
+ }
+ 
+-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
++/*
++ * These are used for a global "mitigations=" cmdline option for toggling
++ * optional CPU mitigations.
++ */
++enum cpu_mitigations {
++	CPU_MITIGATIONS_OFF,
++	CPU_MITIGATIONS_AUTO,
++	CPU_MITIGATIONS_AUTO_NOSMT,
++};
++
++static enum cpu_mitigations cpu_mitigations __ro_after_init =
++	CPU_MITIGATIONS_AUTO;
+ 
+ static int __init mitigations_parse_cmdline(char *arg)
+ {
+@@ -2252,3 +2263,17 @@ static int __init mitigations_parse_cmdl
+ 	return 0;
+ }
+ early_param("mitigations", mitigations_parse_cmdline);
++
++/* mitigations=off */
++bool cpu_mitigations_off(void)
++{
++	return cpu_mitigations == CPU_MITIGATIONS_OFF;
++}
++EXPORT_SYMBOL_GPL(cpu_mitigations_off);
++
++/* mitigations=auto,nosmt */
++bool cpu_mitigations_auto_nosmt(void)
++{
++	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
++}
++EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
diff --git a/queue-4.9/documentation-add-itlb_multihit-documentation.patch b/queue-4.9/documentation-add-itlb_multihit-documentation.patch
new file mode 100644
index 00000000000..f8c3896d5f6
--- /dev/null
+++ b/queue-4.9/documentation-add-itlb_multihit-documentation.patch
@@ -0,0 +1,199 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: "Gomez Iglesias, Antonio" <antonio.gomez.iglesias@intel.com>
+Date: Mon, 4 Nov 2019 12:22:03 +0100
+Subject: Documentation: Add ITLB_MULTIHIT documentation
+
+From: "Gomez Iglesias, Antonio" <antonio.gomez.iglesias@intel.com>
+
+commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream.
+
+Add the initial ITLB_MULTIHIT documentation.
+
+[ tglx: Add it to the index so it gets actually built. ]
+
+Signed-off-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
+Signed-off-by: Nelson D'Souza <nelson.dsouza@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.9: adjust filenames]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/hw-vuln/index.rst    |    1 
+ Documentation/hw-vuln/multihit.rst |  163 +++++++++++++++++++++++++++++++++++++
+ 2 files changed, 164 insertions(+)
+ create mode 100644 Documentation/hw-vuln/multihit.rst
+
+--- a/Documentation/hw-vuln/index.rst
++++ b/Documentation/hw-vuln/index.rst
+@@ -12,3 +12,4 @@ are configurable at compile, boot or run
+    l1tf
+    mds
+    tsx_async_abort
++   multihit.rst
+--- /dev/null
++++ b/Documentation/hw-vuln/multihit.rst
+@@ -0,0 +1,163 @@
++iTLB multihit
++=============
++
++iTLB multihit is an erratum where some processors may incur a machine check
++error, possibly resulting in an unrecoverable CPU lockup, when an
++instruction fetch hits multiple entries in the instruction TLB. This can
++occur when the page size is changed along with either the physical address
++or cache type. A malicious guest running on a virtualized system can
++exploit this erratum to perform a denial of service attack.
++
++
++Affected processors
++-------------------
++
++Variations of this erratum are present on most Intel Core and Xeon processor
++models. The erratum is not present on:
++
++   - non-Intel processors
++
++   - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
++
++   - Intel processors that have the PSCHANGE_MC_NO bit set in the
++     IA32_ARCH_CAPABILITIES MSR.
++
++
++Related CVEs
++------------
++
++The following CVE entry is related to this issue:
++
++   ==============  =================================================
++   CVE-2018-12207  Machine Check Error Avoidance on Page Size Change
++   ==============  =================================================
++
++
++Problem
++-------
++
++Privileged software, including OS and virtual machine managers (VMM), are in
++charge of memory management. A key component in memory management is the control
++of the page tables. Modern processors use virtual memory, a technique that creates
++the illusion of a very large memory for processors. This virtual space is split
++into pages of a given size. Page tables translate virtual addresses to physical
++addresses.
++
++To reduce latency when performing a virtual to physical address translation,
++processors include a structure, called TLB, that caches recent translations.
++There are separate TLBs for instruction (iTLB) and data (dTLB).
++
++Under this errata, instructions are fetched from a linear address translated
++using a 4 KB translation cached in the iTLB. Privileged software modifies the
++paging structure so that the same linear address using large page size (2 MB, 4
++MB, 1 GB) with a different physical address or memory type.  After the page
++structure modification but before the software invalidates any iTLB entries for
++the linear address, a code fetch that happens on the same linear address may
++cause a machine-check error which can result in a system hang or shutdown.
++
++
++Attack scenarios
++----------------
++
++Attacks against the iTLB multihit erratum can be mounted from malicious
++guests in a virtualized system.
++
++
++iTLB multihit system information
++--------------------------------
++
++The Linux kernel provides a sysfs interface to enumerate the current iTLB
++multihit status of the system:whether the system is vulnerable and which
++mitigations are active. The relevant sysfs file is:
++
++/sys/devices/system/cpu/vulnerabilities/itlb_multihit
++
++The possible values in this file are:
++
++.. list-table::
++
++     * - Not affected
++       - The processor is not vulnerable.
++     * - KVM: Mitigation: Split huge pages
++       - Software changes mitigate this issue.
++     * - KVM: Vulnerable
++       - The processor is vulnerable, but no mitigation enabled
++
++
++Enumeration of the erratum
++--------------------------------
++
++A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
++and will be set on CPU's which are mitigated against this issue.
++
++   =======================================   ===========   ===============================
++   IA32_ARCH_CAPABILITIES MSR                Not present   Possibly vulnerable,check model
++   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '0'           Likely vulnerable,check model
++   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '1'           Not vulnerable
++   =======================================   ===========   ===============================
++
++
++Mitigation mechanism
++-------------------------
++
++This erratum can be mitigated by restricting the use of large page sizes to
++non-executable pages.  This forces all iTLB entries to be 4K, and removes
++the possibility of multiple hits.
++
++In order to mitigate the vulnerability, KVM initially marks all huge pages
++as non-executable. If the guest attempts to execute in one of those pages,
++the page is broken down into 4K pages, which are then marked executable.
++
++If EPT is disabled or not available on the host, KVM is in control of TLB
++flushes and the problematic situation cannot happen.  However, the shadow
++EPT paging mechanism used by nested virtualization is vulnerable, because
++the nested guest can trigger multiple iTLB hits by modifying its own
++(non-nested) page tables.  For simplicity, KVM will make large pages
++non-executable in all shadow paging modes.
++
++Mitigation control on the kernel command line and KVM - module parameter
++------------------------------------------------------------------------
++
++The KVM hypervisor mitigation mechanism for marking huge pages as
++non-executable can be controlled with a module parameter "nx_huge_pages=".
++The kernel command line allows to control the iTLB multihit mitigations at
++boot time with the option "kvm.nx_huge_pages=".
++
++The valid arguments for these options are:
++
++  ==========  ================================================================
++  force       Mitigation is enabled. In this case, the mitigation implements
++              non-executable huge pages in Linux kernel KVM module. All huge
++              pages in the EPT are marked as non-executable.
++              If a guest attempts to execute in one of those pages, the page is
++              broken down into 4K pages, which are then marked executable.
++
++  off	      Mitigation is disabled.
++
++  auto        Enable mitigation only if the platform is affected and the kernel
++              was not booted with the "mitigations=off" command line parameter.
++	      This is the default option.
++  ==========  ================================================================
++
++
++Mitigation selection guide
++--------------------------
++
++1. No virtualization in use
++^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++   The system is protected by the kernel unconditionally and no further
++   action is required.
++
++2. Virtualization with trusted guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++   If the guest comes from a trusted source, you may assume that the guest will
++   not attempt to maliciously exploit these errata and no further action is
++   required.
++
++3. Virtualization with untrusted guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++   If the guest comes from an untrusted source, the guest host kernel will need
++   to apply iTLB multihit mitigation via the kernel command line or kvm
++   module parameter.
diff --git a/queue-4.9/kvm-add-helper-function-for-creating-vm-worker-threads.patch b/queue-4.9/kvm-add-helper-function-for-creating-vm-worker-threads.patch
new file mode 100644
index 00000000000..3a42eedcbb8
--- /dev/null
+++ b/queue-4.9/kvm-add-helper-function-for-creating-vm-worker-threads.patch
@@ -0,0 +1,134 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Junaid Shahid <junaids@google.com>
+Date: Mon, 4 Nov 2019 12:22:02 +0100
+Subject: kvm: Add helper function for creating VM worker threads
+
+From: Junaid Shahid <junaids@google.com>
+
+commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream.
+
+Add a function to create a kernel thread associated with a given VM. In
+particular, it ensures that the worker thread inherits the priority and
+cgroups of the calling thread.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kvm_host.h |    6 +++
+ virt/kvm/kvm_main.c      |   84 +++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 90 insertions(+)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1208,4 +1208,10 @@ static inline bool vcpu_valid_wakeup(str
+ }
+ #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
+ 
++typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
++
++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
++				uintptr_t data, const char *name,
++				struct task_struct **thread_ptr);
++
+ #endif
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -49,6 +49,7 @@
+ #include <linux/slab.h>
+ #include <linux/sort.h>
+ #include <linux/bsearch.h>
++#include <linux/kthread.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/io.h>
+@@ -3987,3 +3988,86 @@ void kvm_exit(void)
+ 	kvm_vfio_ops_exit();
+ }
+ EXPORT_SYMBOL_GPL(kvm_exit);
++
++struct kvm_vm_worker_thread_context {
++	struct kvm *kvm;
++	struct task_struct *parent;
++	struct completion init_done;
++	kvm_vm_thread_fn_t thread_fn;
++	uintptr_t data;
++	int err;
++};
++
++static int kvm_vm_worker_thread(void *context)
++{
++	/*
++	 * The init_context is allocated on the stack of the parent thread, so
++	 * we have to locally copy anything that is needed beyond initialization
++	 */
++	struct kvm_vm_worker_thread_context *init_context = context;
++	struct kvm *kvm = init_context->kvm;
++	kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
++	uintptr_t data = init_context->data;
++	int err;
++
++	err = kthread_park(current);
++	/* kthread_park(current) is never supposed to return an error */
++	WARN_ON(err != 0);
++	if (err)
++		goto init_complete;
++
++	err = cgroup_attach_task_all(init_context->parent, current);
++	if (err) {
++		kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
++			__func__, err);
++		goto init_complete;
++	}
++
++	set_user_nice(current, task_nice(init_context->parent));
++
++init_complete:
++	init_context->err = err;
++	complete(&init_context->init_done);
++	init_context = NULL;
++
++	if (err)
++		return err;
++
++	/* Wait to be woken up by the spawner before proceeding. */
++	kthread_parkme();
++
++	if (!kthread_should_stop())
++		err = thread_fn(kvm, data);
++
++	return err;
++}
++
++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
++				uintptr_t data, const char *name,
++				struct task_struct **thread_ptr)
++{
++	struct kvm_vm_worker_thread_context init_context = {};
++	struct task_struct *thread;
++
++	*thread_ptr = NULL;
++	init_context.kvm = kvm;
++	init_context.parent = current;
++	init_context.thread_fn = thread_fn;
++	init_context.data = data;
++	init_completion(&init_context.init_done);
++
++	thread = kthread_run(kvm_vm_worker_thread, &init_context,
++			     "%s-%d", name, task_pid_nr(current));
++	if (IS_ERR(thread))
++		return PTR_ERR(thread);
++
++	/* kthread_run is never supposed to return NULL */
++	WARN_ON(thread == NULL);
++
++	wait_for_completion(&init_context.init_done);
++
++	if (!init_context.err)
++		*thread_ptr = thread;
++
++	return init_context.err;
++}
diff --git a/queue-4.9/kvm-convert-kvm_lock-to-a-mutex.patch b/queue-4.9/kvm-convert-kvm_lock-to-a-mutex.patch
new file mode 100644
index 00000000000..b457cf39a9b
--- /dev/null
+++ b/queue-4.9/kvm-convert-kvm_lock-to-a-mutex.patch
@@ -0,0 +1,206 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Junaid Shahid <junaids@google.com>
+Date: Thu, 3 Jan 2019 17:14:28 -0800
+Subject: kvm: Convert kvm_lock to a mutex
+
+From: Junaid Shahid <junaids@google.com>
+
+commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream.
+
+It doesn't seem as if there is any particular need for kvm_lock to be a
+spinlock, so convert the lock to a mutex so that sleepable functions (in
+particular cond_resched()) can be called while holding it.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[bwh: Backported to 4.9:
+ - Drop changes in kvm_hyperv_tsc_notifier(), vm_stat_clear(),
+   vcpu_stat_clear(), kvm_uevent_notify_change()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/virtual/kvm/locking.txt |    6 +++---
+ arch/s390/kvm/kvm-s390.c              |    4 ++--
+ arch/x86/kvm/mmu.c                    |    4 ++--
+ arch/x86/kvm/x86.c                    |   10 +++++-----
+ include/linux/kvm_host.h              |    2 +-
+ virt/kvm/kvm_main.c                   |   18 +++++++++---------
+ 6 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/Documentation/virtual/kvm/locking.txt
++++ b/Documentation/virtual/kvm/locking.txt
+@@ -13,8 +13,8 @@ The acquisition orders for mutexes are a
+ - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+   them together is quite rare.
+ 
+-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
+-else is a leaf: no other lock is taken inside the critical sections.
++Everything else is a leaf: no other lock is taken inside the critical
++sections.
+ 
+ 2: Exception
+ ------------
+@@ -142,7 +142,7 @@ See the comments in spte_has_volatile_bi
+ ------------
+ 
+ Name:		kvm_lock
+-Type:		spinlock_t
++Type:		mutex
+ Arch:		any
+ Protects:	- vm_list
+ 
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -1422,13 +1422,13 @@ int kvm_arch_init_vm(struct kvm *kvm, un
+ 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
+ 	if (!kvm->arch.sca)
+ 		goto out_err;
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	sca_offset += 16;
+ 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
+ 		sca_offset = 0;
+ 	kvm->arch.sca = (struct bsca_block *)
+ 			((char *) kvm->arch.sca + sca_offset);
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 
+ 	sprintf(debug_name, "kvm-%u", current->pid);
+ 
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -4979,7 +4979,7 @@ mmu_shrink_scan(struct shrinker *shrink,
+ 	int nr_to_scan = sc->nr_to_scan;
+ 	unsigned long freed = 0;
+ 
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 
+ 	list_for_each_entry(kvm, &vm_list, vm_list) {
+ 		int idx;
+@@ -5029,7 +5029,7 @@ unlock:
+ 		break;
+ 	}
+ 
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 	return freed;
+ }
+ 
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -5978,17 +5978,17 @@ static int kvmclock_cpufreq_notifier(str
+ 
+ 	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+ 
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	list_for_each_entry(kvm, &vm_list, vm_list) {
+ 		kvm_for_each_vcpu(i, vcpu, kvm) {
+ 			if (vcpu->cpu != freq->cpu)
+ 				continue;
+ 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+-			if (vcpu->cpu != smp_processor_id())
++			if (vcpu->cpu != raw_smp_processor_id())
+ 				send_ipi = 1;
+ 		}
+ 	}
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 
+ 	if (freq->old < freq->new && send_ipi) {
+ 		/*
+@@ -6126,12 +6126,12 @@ static void pvclock_gtod_update_fn(struc
+ 	struct kvm_vcpu *vcpu;
+ 	int i;
+ 
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	list_for_each_entry(kvm, &vm_list, vm_list)
+ 		kvm_for_each_vcpu(i, vcpu, kvm)
+ 			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+ 	atomic_set(&kvm_guest_has_master_clock, 0);
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ }
+ 
+ static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -129,7 +129,7 @@ static inline bool is_error_page(struct
+ 
+ extern struct kmem_cache *kvm_vcpu_cache;
+ 
+-extern spinlock_t kvm_lock;
++extern struct mutex kvm_lock;
+ extern struct list_head vm_list;
+ 
+ struct kvm_io_range {
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -87,7 +87,7 @@ module_param(halt_poll_ns_shrink, uint,
+  *	kvm->lock --> kvm->slots_lock --> kvm->irq_lock
+  */
+ 
+-DEFINE_SPINLOCK(kvm_lock);
++DEFINE_MUTEX(kvm_lock);
+ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
+ LIST_HEAD(vm_list);
+ 
+@@ -666,9 +666,9 @@ static struct kvm *kvm_create_vm(unsigne
+ 	if (r)
+ 		goto out_err;
+ 
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	list_add(&kvm->vm_list, &vm_list);
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 
+ 	preempt_notifier_inc();
+ 
+@@ -724,9 +724,9 @@ static void kvm_destroy_vm(struct kvm *k
+ 
+ 	kvm_destroy_vm_debugfs(kvm);
+ 	kvm_arch_sync_events(kvm);
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	list_del(&kvm->vm_list);
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 	kvm_free_irq_routing(kvm);
+ 	for (i = 0; i < KVM_NR_BUSES; i++) {
+ 		if (kvm->buses[i])
+@@ -3752,13 +3752,13 @@ static int vm_stat_get(void *_offset, u6
+ 	u64 tmp_val;
+ 
+ 	*val = 0;
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	list_for_each_entry(kvm, &vm_list, vm_list) {
+ 		stat_tmp.kvm = kvm;
+ 		vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ 		*val += tmp_val;
+ 	}
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 	return 0;
+ }
+ 
+@@ -3772,13 +3772,13 @@ static int vcpu_stat_get(void *_offset,
+ 	u64 tmp_val;
+ 
+ 	*val = 0;
+-	spin_lock(&kvm_lock);
++	mutex_lock(&kvm_lock);
+ 	list_for_each_entry(kvm, &vm_list, vm_list) {
+ 		stat_tmp.kvm = kvm;
+ 		vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ 		*val += tmp_val;
+ 	}
+-	spin_unlock(&kvm_lock);
++	mutex_unlock(&kvm_lock);
+ 	return 0;
+ }
+ 
diff --git a/queue-4.9/kvm-mmu-itlb_multihit-mitigation.patch b/queue-4.9/kvm-mmu-itlb_multihit-mitigation.patch
new file mode 100644
index 00000000000..e15176a88d8
--- /dev/null
+++ b/queue-4.9/kvm-mmu-itlb_multihit-mitigation.patch
@@ -0,0 +1,497 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 4 Nov 2019 12:22:02 +0100
+Subject: kvm: mmu: ITLB_MULTIHIT mitigation
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream.
+
+With some Intel processors, putting the same virtual address in the TLB
+as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit
+and cause the processor to issue a machine check resulting in a CPU lockup.
+
+Unfortunately when EPT page tables use huge pages, it is possible for a
+malicious guest to cause this situation.
+
+Add a knob to mark huge pages as non-executable. When the nx_huge_pages
+parameter is enabled (and we are using EPT), all huge pages are marked as
+NX. If the guest attempts to execute in one of those pages, the page is
+broken down into 4K pages, which are then marked executable.
+
+This is not an issue for shadow paging (except nested EPT), because then
+the host is in control of TLB flushes and the problematic situation cannot
+happen.  With nested EPT, again the nested guest can cause problems shadow
+and direct EPT is treated in the same way.
+
+[ tglx: Fixup default to auto and massage wording a bit ]
+
+Originally-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.9:
+ - Use kvm_mmu_invalidate_zap_all_pages() instead of kvm_mmu_zap_all_fast()
+ - Don't provide mode for nx_largepages_splitted as all stats are read-only
+ - Adjust filename, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |   19 ++++
+ arch/x86/include/asm/kvm_host.h     |    2 
+ arch/x86/kernel/cpu/bugs.c          |   13 +++
+ arch/x86/kvm/mmu.c                  |  141 ++++++++++++++++++++++++++++++++++--
+ arch/x86/kvm/paging_tmpl.h          |   29 +++++--
+ arch/x86/kvm/x86.c                  |    9 ++
+ 6 files changed, 200 insertions(+), 13 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1975,6 +1975,19 @@ bytes respectively. Such letter suffixes
+ 			KVM MMU at runtime.
+ 			Default is 0 (off)
+ 
++	kvm.nx_huge_pages=
++			[KVM] Controls the software workaround for the
++			X86_BUG_ITLB_MULTIHIT bug.
++			force	: Always deploy workaround.
++			off	: Never deploy workaround.
++			auto    : Deploy workaround based on the presence of
++				  X86_BUG_ITLB_MULTIHIT.
++
++			Default is 'auto'.
++
++			If the software workaround is enabled for the host,
++			guests do need not to enable it for nested guests.
++
+ 	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
+ 			Default is 1 (enabled)
+ 
+@@ -2491,6 +2504,12 @@ bytes respectively. Such letter suffixes
+ 					       l1tf=off [X86]
+ 					       mds=off [X86]
+ 					       tsx_async_abort=off [X86]
++					       kvm.nx_huge_pages=off [X86]
++
++				Exceptions:
++					       This does not have any effect on
++					       kvm.nx_huge_pages when
++					       kvm.nx_huge_pages=force.
+ 
+ 			auto (default)
+ 				Mitigate all CPU vulnerabilities, but leave SMT
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -273,6 +273,7 @@ struct kvm_mmu_page {
+ 	/* hold the gfn of each spte inside spt */
+ 	gfn_t *gfns;
+ 	bool unsync;
++	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
+ 	int root_count;          /* Currently serving as active root */
+ 	unsigned int unsync_children;
+ 	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
+@@ -811,6 +812,7 @@ struct kvm_vm_stat {
+ 	ulong mmu_unsync;
+ 	ulong remote_tlb_flush;
+ 	ulong lpages;
++	ulong nx_lpage_splits;
+ };
+ 
+ struct kvm_vcpu_stat {
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1233,6 +1233,9 @@ void x86_spec_ctrl_setup_ap(void)
+ 		x86_amd_ssb_disable();
+ }
+ 
++bool itlb_multihit_kvm_mitigation;
++EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
++
+ #undef pr_fmt
+ #define pr_fmt(fmt)	"L1TF: " fmt
+ 
+@@ -1388,17 +1391,25 @@ static ssize_t l1tf_show_state(char *buf
+ 		       l1tf_vmx_states[l1tf_vmx_mitigation],
+ 		       sched_smt_active() ? "vulnerable" : "disabled");
+ }
++
++static ssize_t itlb_multihit_show_state(char *buf)
++{
++	if (itlb_multihit_kvm_mitigation)
++		return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
++	else
++		return sprintf(buf, "KVM: Vulnerable\n");
++}
+ #else
+ static ssize_t l1tf_show_state(char *buf)
+ {
+ 	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
+ }
+-#endif
+ 
+ static ssize_t itlb_multihit_show_state(char *buf)
+ {
+ 	return sprintf(buf, "Processor vulnerable\n");
+ }
++#endif
+ 
+ static ssize_t mds_show_state(char *buf)
+ {
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -44,6 +44,20 @@
+ #include <asm/vmx.h>
+ #include <asm/kvm_page_track.h>
+ 
++extern bool itlb_multihit_kvm_mitigation;
++
++static int __read_mostly nx_huge_pages = -1;
++
++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
++
++static struct kernel_param_ops nx_huge_pages_ops = {
++	.set = set_nx_huge_pages,
++	.get = param_get_bool,
++};
++
++module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
++__MODULE_PARM_TYPE(nx_huge_pages, "bool");
++
+ /*
+  * When setting this variable to true it enables Two-Dimensional-Paging
+  * where the hardware walks 2 page tables:
+@@ -202,6 +216,11 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+ 
++static bool is_nx_huge_page_enabled(void)
++{
++	return READ_ONCE(nx_huge_pages);
++}
++
+ /*
+  * the low bit of the generation number is always presumed to be zero.
+  * This disables mmio caching during memslot updates.  The concept is
+@@ -855,6 +874,15 @@ static void account_shadowed(struct kvm
+ 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
+ }
+ 
++static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
++{
++	if (sp->lpage_disallowed)
++		return;
++
++	++kvm->stat.nx_lpage_splits;
++	sp->lpage_disallowed = true;
++}
++
+ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
+ {
+ 	struct kvm_memslots *slots;
+@@ -872,6 +900,12 @@ static void unaccount_shadowed(struct kv
+ 	kvm_mmu_gfn_allow_lpage(slot, gfn);
+ }
+ 
++static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
++{
++	--kvm->stat.nx_lpage_splits;
++	sp->lpage_disallowed = false;
++}
++
+ static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
+ 					  struct kvm_memory_slot *slot)
+ {
+@@ -2408,6 +2442,9 @@ static int kvm_mmu_prepare_zap_page(stru
+ 			kvm_reload_remote_mmus(kvm);
+ 	}
+ 
++	if (sp->lpage_disallowed)
++		unaccount_huge_nx_page(kvm, sp);
++
+ 	sp->role.invalid = 1;
+ 	return ret;
+ }
+@@ -2559,6 +2596,11 @@ static int set_spte(struct kvm_vcpu *vcp
+ 	if (!speculative)
+ 		spte |= shadow_accessed_mask;
+ 
++	if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
++	    is_nx_huge_page_enabled()) {
++		pte_access &= ~ACC_EXEC_MASK;
++	}
++
+ 	if (pte_access & ACC_EXEC_MASK)
+ 		spte |= shadow_x_mask;
+ 	else
+@@ -2766,9 +2808,32 @@ static void direct_pte_prefetch(struct k
+ 	__direct_pte_prefetch(vcpu, sp, sptep);
+ }
+ 
++static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
++				       gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
++{
++	int level = *levelp;
++	u64 spte = *it.sptep;
++
++	if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
++	    is_nx_huge_page_enabled() &&
++	    is_shadow_present_pte(spte) &&
++	    !is_large_pte(spte)) {
++		/*
++		 * A small SPTE exists for this pfn, but FNAME(fetch)
++		 * and __direct_map would like to create a large PTE
++		 * instead: just force them to go down another level,
++		 * patching back for them into pfn the next 9 bits of
++		 * the address.
++		 */
++		u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
++		*pfnp |= gfn & page_mask;
++		(*levelp)--;
++	}
++}
++
+ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+ 			int map_writable, int level, kvm_pfn_t pfn,
+-			bool prefault)
++			bool prefault, bool lpage_disallowed)
+ {
+ 	struct kvm_shadow_walk_iterator it;
+ 	struct kvm_mmu_page *sp;
+@@ -2781,6 +2846,12 @@ static int __direct_map(struct kvm_vcpu
+ 
+ 	trace_kvm_mmu_spte_requested(gpa, level, pfn);
+ 	for_each_shadow_entry(vcpu, gpa, it) {
++		/*
++		 * We cannot overwrite existing page tables with an NX
++		 * large page, as the leaf could be executable.
++		 */
++		disallowed_hugepage_adjust(it, gfn, &pfn, &level);
++
+ 		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ 		if (it.level == level)
+ 			break;
+@@ -2791,6 +2862,8 @@ static int __direct_map(struct kvm_vcpu
+ 					      it.level - 1, true, ACC_ALL);
+ 
+ 			link_shadow_page(vcpu, it.sptep, sp);
++			if (lpage_disallowed)
++				account_huge_nx_page(vcpu->kvm, sp);
+ 		}
+ 	}
+ 
+@@ -3031,11 +3104,14 @@ static int nonpaging_map(struct kvm_vcpu
+ {
+ 	int r;
+ 	int level;
+-	bool force_pt_level = false;
++	bool force_pt_level;
+ 	kvm_pfn_t pfn;
+ 	unsigned long mmu_seq;
+ 	bool map_writable, write = error_code & PFERR_WRITE_MASK;
++	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
++				is_nx_huge_page_enabled();
+ 
++	force_pt_level = lpage_disallowed;
+ 	level = mapping_level(vcpu, gfn, &force_pt_level);
+ 	if (likely(!force_pt_level)) {
+ 		/*
+@@ -3068,7 +3144,8 @@ static int nonpaging_map(struct kvm_vcpu
+ 	make_mmu_pages_available(vcpu);
+ 	if (likely(!force_pt_level))
+ 		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+-	r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
++	r = __direct_map(vcpu, v, write, map_writable, level, pfn,
++			 prefault, false);
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+@@ -3564,6 +3641,8 @@ static int tdp_page_fault(struct kvm_vcp
+ 	unsigned long mmu_seq;
+ 	int write = error_code & PFERR_WRITE_MASK;
+ 	bool map_writable;
++	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
++				is_nx_huge_page_enabled();
+ 
+ 	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ 
+@@ -3574,8 +3653,9 @@ static int tdp_page_fault(struct kvm_vcp
+ 	if (r)
+ 		return r;
+ 
+-	force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
+-							   PT_DIRECTORY_LEVEL);
++	force_pt_level =
++		lpage_disallowed ||
++		!check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
+ 	level = mapping_level(vcpu, gfn, &force_pt_level);
+ 	if (likely(!force_pt_level)) {
+ 		if (level > PT_DIRECTORY_LEVEL &&
+@@ -3603,7 +3683,8 @@ static int tdp_page_fault(struct kvm_vcp
+ 	make_mmu_pages_available(vcpu);
+ 	if (likely(!force_pt_level))
+ 		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+-	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
++	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
++			 prefault, lpage_disallowed);
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+@@ -5053,8 +5134,56 @@ static void mmu_destroy_caches(void)
+ 		kmem_cache_destroy(mmu_page_header_cache);
+ }
+ 
++static bool get_nx_auto_mode(void)
++{
++	/* Return true when CPU has the bug, and mitigations are ON */
++	return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
++}
++
++static void __set_nx_huge_pages(bool val)
++{
++	nx_huge_pages = itlb_multihit_kvm_mitigation = val;
++}
++
++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
++{
++	bool old_val = nx_huge_pages;
++	bool new_val;
++
++	/* In "auto" mode deploy workaround only if CPU has the bug. */
++	if (sysfs_streq(val, "off"))
++		new_val = 0;
++	else if (sysfs_streq(val, "force"))
++		new_val = 1;
++	else if (sysfs_streq(val, "auto"))
++		new_val = get_nx_auto_mode();
++	else if (strtobool(val, &new_val) < 0)
++		return -EINVAL;
++
++	__set_nx_huge_pages(new_val);
++
++	if (new_val != old_val) {
++		struct kvm *kvm;
++		int idx;
++
++		mutex_lock(&kvm_lock);
++
++		list_for_each_entry(kvm, &vm_list, vm_list) {
++			idx = srcu_read_lock(&kvm->srcu);
++			kvm_mmu_invalidate_zap_all_pages(kvm);
++			srcu_read_unlock(&kvm->srcu, idx);
++		}
++		mutex_unlock(&kvm_lock);
++	}
++
++	return 0;
++}
++
+ int kvm_mmu_module_init(void)
+ {
++	if (nx_huge_pages == -1)
++		__set_nx_huge_pages(get_nx_auto_mode());
++
+ 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
+ 					    sizeof(struct pte_list_desc),
+ 					    0, SLAB_ACCOUNT, NULL);
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -573,13 +573,14 @@ static void FNAME(pte_prefetch)(struct k
+ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+ 			 struct guest_walker *gw,
+ 			 int write_fault, int hlevel,
+-			 kvm_pfn_t pfn, bool map_writable, bool prefault)
++			 kvm_pfn_t pfn, bool map_writable, bool prefault,
++			 bool lpage_disallowed)
+ {
+ 	struct kvm_mmu_page *sp = NULL;
+ 	struct kvm_shadow_walk_iterator it;
+ 	unsigned direct_access, access = gw->pt_access;
+ 	int top_level, ret;
+-	gfn_t base_gfn;
++	gfn_t gfn, base_gfn;
+ 
+ 	direct_access = gw->pte_access;
+ 
+@@ -624,13 +625,25 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 			link_shadow_page(vcpu, it.sptep, sp);
+ 	}
+ 
+-	base_gfn = gw->gfn;
++	/*
++	 * FNAME(page_fault) might have clobbered the bottom bits of
++	 * gw->gfn, restore them from the virtual address.
++	 */
++	gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
++	base_gfn = gfn;
+ 
+ 	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
+ 
+ 	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+ 		clear_sp_write_flooding_count(it.sptep);
+-		base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
++
++		/*
++		 * We cannot overwrite existing page tables with an NX
++		 * large page, as the leaf could be executable.
++		 */
++		disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
++
++		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ 		if (it.level == hlevel)
+ 			break;
+ 
+@@ -642,6 +655,8 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+ 					      it.level - 1, true, direct_access);
+ 			link_shadow_page(vcpu, it.sptep, sp);
++			if (lpage_disallowed)
++				account_huge_nx_page(vcpu->kvm, sp);
+ 		}
+ 	}
+ 
+@@ -718,9 +733,11 @@ static int FNAME(page_fault)(struct kvm_
+ 	int r;
+ 	kvm_pfn_t pfn;
+ 	int level = PT_PAGE_TABLE_LEVEL;
+-	bool force_pt_level = false;
+ 	unsigned long mmu_seq;
+ 	bool map_writable, is_self_change_mapping;
++	bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
++				is_nx_huge_page_enabled();
++	bool force_pt_level = lpage_disallowed;
+ 
+ 	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
+ 
+@@ -810,7 +827,7 @@ static int FNAME(page_fault)(struct kvm_
+ 	if (!force_pt_level)
+ 		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
+ 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+-			 level, pfn, map_writable, prefault);
++			 level, pfn, map_writable, prefault, lpage_disallowed);
+ 	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+ 
+ out_unlock:
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -191,6 +191,7 @@ struct kvm_stats_debugfs_item debugfs_en
+ 	{ "mmu_unsync", VM_STAT(mmu_unsync) },
+ 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+ 	{ "largepages", VM_STAT(lpages) },
++	{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits) },
+ 	{ NULL }
+ };
+ 
+@@ -1032,6 +1033,14 @@ u64 kvm_get_arch_capabilities(void)
+ 	rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+ 
+ 	/*
++	 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
++	 * the nested hypervisor runs with NX huge pages.  If it is not,
++	 * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
++	 * L1 guests, so it need not worry about its own (L2) guests.
++	 */
++	data |= ARCH_CAP_PSCHANGE_MC_NO;
++
++	/*
+ 	 * If we're doing cache flushes (either "always" or "cond")
+ 	 * we will do one whenever the guest does a vmlaunch/vmresume.
+ 	 * If an outer hypervisor is doing the cache flush for us
diff --git a/queue-4.9/kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch b/queue-4.9/kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
new file mode 100644
index 00000000000..e72a6081530
--- /dev/null
+++ b/queue-4.9/kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
@@ -0,0 +1,71 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Sun, 27 Oct 2019 16:23:23 +0100
+Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream.
+
+VMX already does so if the host has SMEP, in order to support the combination of
+CR0.WP=1 and CR4.SMEP=1.  However, it is perfectly safe to always do so, and in
+fact VMX already ends up running with EFER.NXE=1 on old processors that lack the
+"load EFER" controls, because it may help avoiding a slow MSR write.  Removing
+all the conditionals simplifies the code.
+
+SVM does not have similar code, but it should since recent AMD processors do
+support SMEP.  So this patch also makes the code for the two vendors more similar
+while fixing NPT=0, CR0.WP=1 and CR4.SMEP=1 on AMD processors.
+
+Cc: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[bwh: Backported to 4.9: adjust filename]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c |   10 ++++++++--
+ arch/x86/kvm/vmx.c |   14 +++-----------
+ 2 files changed, 11 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -590,8 +590,14 @@ static int get_npt_level(void)
+ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ {
+ 	vcpu->arch.efer = efer;
+-	if (!npt_enabled && !(efer & EFER_LMA))
+-		efer &= ~EFER_LME;
++
++	if (!npt_enabled) {
++		/* Shadow paging assumes NX to be available.  */
++		efer |= EFER_NX;
++
++		if (!(efer & EFER_LMA))
++			efer &= ~EFER_LME;
++	}
+ 
+ 	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
+ 	mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2219,17 +2219,9 @@ static bool update_transition_efer(struc
+ 	u64 guest_efer = vmx->vcpu.arch.efer;
+ 	u64 ignore_bits = 0;
+ 
+-	if (!enable_ept) {
+-		/*
+-		 * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+-		 * host CPUID is more efficient than testing guest CPUID
+-		 * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+-		 */
+-		if (boot_cpu_has(X86_FEATURE_SMEP))
+-			guest_efer |= EFER_NX;
+-		else if (!(guest_efer & EFER_NX))
+-			ignore_bits |= EFER_NX;
+-	}
++	/* Shadow paging assumes NX to be available.  */
++	if (!enable_ept)
++		guest_efer |= EFER_NX;
+ 
+ 	/*
+ 	 * LMA and LME handled by hardware; SCE meaningless outside long mode.
diff --git a/queue-4.9/kvm-x86-add-is_executable_pte.patch b/queue-4.9/kvm-x86-add-is_executable_pte.patch
new file mode 100644
index 00000000000..fd10ed46e5a
--- /dev/null
+++ b/queue-4.9/kvm-x86-add-is_executable_pte.patch
@@ -0,0 +1,30 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sat, 19 Oct 2019 18:10:55 +0100
+Subject: KVM: x86: Add is_executable_pte()
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+Extracted from commit d3e328f2cb01 "kvm: x86: mmu: Verify that
+restored PTE has needed perms in fast page fault".
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -338,6 +338,11 @@ static int is_last_spte(u64 pte, int lev
+ 	return 0;
+ }
+ 
++static bool is_executable_pte(u64 spte)
++{
++	return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask;
++}
++
+ static kvm_pfn_t spte_to_pfn(u64 pte)
+ {
+ 	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
diff --git a/queue-4.9/kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch b/queue-4.9/kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
new file mode 100644
index 00000000000..1195705457a
--- /dev/null
+++ b/queue-4.9/kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
@@ -0,0 +1,147 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 1 Jul 2019 06:22:57 -0400
+Subject: KVM: x86: add tracepoints around __direct_map and FNAME(fetch)
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream.
+
+These are useful in debugging shadow paging.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[bwh: Backported to 4.9:
+ - Keep using PT_PRESENT_MASK to test page write permission
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   13 ++++-----
+ arch/x86/kvm/mmutrace.h    |   59 +++++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kvm/paging_tmpl.h |    2 +
+ 3 files changed, 67 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -131,9 +131,6 @@ module_param(dbg, bool, 0644);
+ 
+ #include <trace/events/kvm.h>
+ 
+-#define CREATE_TRACE_POINTS
+-#include "mmutrace.h"
+-
+ #define SPTE_HOST_WRITEABLE	(1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+ #define SPTE_MMU_WRITEABLE	(1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+ 
+@@ -193,8 +190,12 @@ static u64 __read_mostly shadow_mmio_mas
+ static u64 __read_mostly shadow_present_mask;
+ 
+ static void mmu_spte_set(u64 *sptep, u64 spte);
++static bool is_executable_pte(u64 spte);
+ static void mmu_free_roots(struct kvm_vcpu *vcpu);
+ 
++#define CREATE_TRACE_POINTS
++#include "mmutrace.h"
++
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
+ {
+ 	shadow_mmio_mask = mmio_mask;
+@@ -2667,10 +2668,7 @@ static int mmu_set_spte(struct kvm_vcpu
+ 		ret = RET_PF_EMULATE;
+ 
+ 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
+-	pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
+-		 is_large_pte(*sptep)? "2MB" : "4kB",
+-		 *sptep & PT_PRESENT_MASK ?"RW":"R", gfn,
+-		 *sptep, sptep);
++	trace_kvm_mmu_set_spte(level, gfn, sptep);
+ 	if (!was_rmapped && is_large_pte(*sptep))
+ 		++vcpu->kvm->stat.lpages;
+ 
+@@ -2781,6 +2779,7 @@ static int __direct_map(struct kvm_vcpu
+ 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ 		return RET_PF_RETRY;
+ 
++	trace_kvm_mmu_spte_requested(gpa, level, pfn);
+ 	for_each_shadow_entry(vcpu, gpa, it) {
+ 		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+ 		if (it.level == level)
+--- a/arch/x86/kvm/mmutrace.h
++++ b/arch/x86/kvm/mmutrace.h
+@@ -322,6 +322,65 @@ TRACE_EVENT(
+ 		  __entry->kvm_gen == __entry->spte_gen
+ 	)
+ );
++
++TRACE_EVENT(
++	kvm_mmu_set_spte,
++	TP_PROTO(int level, gfn_t gfn, u64 *sptep),
++	TP_ARGS(level, gfn, sptep),
++
++	TP_STRUCT__entry(
++		__field(u64, gfn)
++		__field(u64, spte)
++		__field(u64, sptep)
++		__field(u8, level)
++		/* These depend on page entry type, so compute them now.  */
++		__field(bool, r)
++		__field(bool, x)
++		__field(u8, u)
++	),
++
++	TP_fast_assign(
++		__entry->gfn = gfn;
++		__entry->spte = *sptep;
++		__entry->sptep = virt_to_phys(sptep);
++		__entry->level = level;
++		__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
++		__entry->x = is_executable_pte(__entry->spte);
++		__entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
++	),
++
++	TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
++		  __entry->gfn, __entry->spte,
++		  __entry->r ? "r" : "-",
++		  __entry->spte & PT_PRESENT_MASK ? "w" : "-",
++		  __entry->x ? "x" : "-",
++		  __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
++		  __entry->level, __entry->sptep
++	)
++);
++
++TRACE_EVENT(
++	kvm_mmu_spte_requested,
++	TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
++	TP_ARGS(addr, level, pfn),
++
++	TP_STRUCT__entry(
++		__field(u64, gfn)
++		__field(u64, pfn)
++		__field(u8, level)
++	),
++
++	TP_fast_assign(
++		__entry->gfn = addr >> PAGE_SHIFT;
++		__entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
++		__entry->level = level;
++	),
++
++	TP_printk("gfn %llx pfn %llx level %d",
++		  __entry->gfn, __entry->pfn, __entry->level
++	)
++);
++
+ #endif /* _TRACE_KVMMMU_H */
+ 
+ #undef TRACE_INCLUDE_PATH
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -626,6 +626,8 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 
+ 	base_gfn = gw->gfn;
+ 
++	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
++
+ 	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+ 		clear_sp_write_flooding_count(it.sptep);
+ 		base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
diff --git a/queue-4.9/kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch b/queue-4.9/kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
new file mode 100644
index 00000000000..8a6556d7d6a
--- /dev/null
+++ b/queue-4.9/kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
@@ -0,0 +1,43 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Sun, 30 Jun 2019 08:36:21 -0400
+Subject: KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream.
+
+Note that in such a case it is quite likely that KVM will BUG_ON
+in __pte_list_remove when the VM is closed.  However, there is no
+immediate risk of memory corruption in the host so a WARN_ON is
+enough and it lets you gather traces for debugging.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -781,10 +781,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct
+ 
+ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+ {
+-	if (sp->role.direct)
+-		BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
+-	else
++	if (!sp->role.direct) {
+ 		sp->gfns[index] = gfn;
++		return;
++	}
++
++	if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
++		pr_err_ratelimited("gfn mismatch under direct page %llx "
++				   "(expected %llx, got %llx)\n",
++				   sp->gfn,
++				   kvm_mmu_page_get_gfn(sp, index), gfn);
+ }
+ 
+ /*
diff --git a/queue-4.9/kvm-x86-do-not-release-the-page-inside-mmu_set_spte.patch b/queue-4.9/kvm-x86-do-not-release-the-page-inside-mmu_set_spte.patch
new file mode 100644
index 00000000000..04365f6de2f
--- /dev/null
+++ b/queue-4.9/kvm-x86-do-not-release-the-page-inside-mmu_set_spte.patch
@@ -0,0 +1,137 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Junaid Shahid <junaids@google.com>
+Date: Thu, 3 Jan 2019 16:22:21 -0800
+Subject: kvm: x86: Do not release the page inside mmu_set_spte()
+
+From: Junaid Shahid <junaids@google.com>
+
+commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream.
+
+Release the page at the call-site where it was originally acquired.
+This makes the exit code cleaner for most call sites, since they
+do not need to duplicate code between success and the failure
+label.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   18 +++++++-----------
+ arch/x86/kvm/paging_tmpl.h |    8 +++-----
+ 2 files changed, 10 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2671,8 +2671,6 @@ static int mmu_set_spte(struct kvm_vcpu
+ 		}
+ 	}
+ 
+-	kvm_release_pfn_clean(pfn);
+-
+ 	return ret;
+ }
+ 
+@@ -2707,9 +2705,11 @@ static int direct_pte_prefetch_many(stru
+ 	if (ret <= 0)
+ 		return -1;
+ 
+-	for (i = 0; i < ret; i++, gfn++, start++)
++	for (i = 0; i < ret; i++, gfn++, start++) {
+ 		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+ 			     page_to_pfn(pages[i]), true, true);
++		put_page(pages[i]);
++	}
+ 
+ 	return 0;
+ }
+@@ -3055,6 +3055,7 @@ static int nonpaging_map(struct kvm_vcpu
+ 	if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+ 		return r;
+ 
++	r = RET_PF_RETRY;
+ 	spin_lock(&vcpu->kvm->mmu_lock);
+ 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ 		goto out_unlock;
+@@ -3062,14 +3063,11 @@ static int nonpaging_map(struct kvm_vcpu
+ 	if (likely(!force_pt_level))
+ 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+ 	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-	spin_unlock(&vcpu->kvm->mmu_lock);
+-
+-	return r;
+ 
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+-	return RET_PF_RETRY;
++	return r;
+ }
+ 
+ 
+@@ -3593,6 +3591,7 @@ static int tdp_page_fault(struct kvm_vcp
+ 	if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+ 		return r;
+ 
++	r = RET_PF_RETRY;
+ 	spin_lock(&vcpu->kvm->mmu_lock);
+ 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ 		goto out_unlock;
+@@ -3600,14 +3599,11 @@ static int tdp_page_fault(struct kvm_vcp
+ 	if (likely(!force_pt_level))
+ 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+ 	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-	spin_unlock(&vcpu->kvm->mmu_lock);
+-
+-	return r;
+ 
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+-	return RET_PF_RETRY;
++	return r;
+ }
+ 
+ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -499,6 +499,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vc
+ 	mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
+ 		     true, true);
+ 
++	kvm_release_pfn_clean(pfn);
+ 	return true;
+ }
+ 
+@@ -650,7 +651,6 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 	return ret;
+ 
+ out_gpte_changed:
+-	kvm_release_pfn_clean(pfn);
+ 	return RET_PF_RETRY;
+ }
+ 
+@@ -799,6 +799,7 @@ static int FNAME(page_fault)(struct kvm_
+ 			walker.pte_access &= ~ACC_EXEC_MASK;
+ 	}
+ 
++	r = RET_PF_RETRY;
+ 	spin_lock(&vcpu->kvm->mmu_lock);
+ 	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ 		goto out_unlock;
+@@ -811,14 +812,11 @@ static int FNAME(page_fault)(struct kvm_
+ 			 level, pfn, map_writable, prefault);
+ 	++vcpu->stat.pf_fixed;
+ 	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+-	spin_unlock(&vcpu->kvm->mmu_lock);
+-
+-	return r;
+ 
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+-	return RET_PF_RETRY;
++	return r;
+ }
+ 
+ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
diff --git a/queue-4.9/kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch b/queue-4.9/kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
new file mode 100644
index 00000000000..0dacf19ceea
--- /dev/null
+++ b/queue-4.9/kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
@@ -0,0 +1,65 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:23:33 +0200
+Subject: kvm/x86: Export MDS_NO=0 to guests when TSX is enabled
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream.
+
+Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX
+Async Abort(TAA) affected hosts that have TSX enabled and updated
+microcode. This is required so that the guests don't complain,
+
+  "Vulnerable: Clear CPU buffers attempted, no microcode"
+
+when the host has the updated microcode to clear CPU buffers.
+
+Microcode update also adds support for MSR_IA32_TSX_CTRL which is
+enumerated by the ARCH_CAP_TSX_CTRL bit in IA32_ARCH_CAPABILITIES MSR.
+Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is
+not exported to the guests.
+
+In this case export MDS_NO=0 to the guests. When guests have
+CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1050,6 +1050,25 @@ u64 kvm_get_arch_capabilities(void)
+ 	if (!boot_cpu_has_bug(X86_BUG_MDS))
+ 		data |= ARCH_CAP_MDS_NO;
+ 
++	/*
++	 * On TAA affected systems, export MDS_NO=0 when:
++	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
++	 *	- Updated microcode is present. This is detected by
++	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
++	 *	  that VERW clears CPU buffers.
++	 *
++	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
++	 * mitigation and don't complain:
++	 *
++	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
++	 *
++	 * If TSX is disabled on the system, guests are also mitigated against
++	 * TAA and clear CPU buffer mitigation is not required for guests.
++	 */
++	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
++	    (data & ARCH_CAP_TSX_CTRL_MSR))
++		data &= ~ARCH_CAP_MDS_NO;
++
+ 	return data;
+ }
+ 
diff --git a/queue-4.9/kvm-x86-extend-usage-of-ret_mmio_pf_-constants.patch b/queue-4.9/kvm-x86-extend-usage-of-ret_mmio_pf_-constants.patch
new file mode 100644
index 00000000000..d215864ad0f
--- /dev/null
+++ b/queue-4.9/kvm-x86-extend-usage-of-ret_mmio_pf_-constants.patch
@@ -0,0 +1,330 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 17 Aug 2017 15:03:32 +0200
+Subject: KVM: x86: extend usage of RET_MMIO_PF_* constants
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 9b8ebbdb74b5ad76b9dfd8b101af17839174b126 upstream.
+
+The x86 MMU if full of code that returns 0 and 1 for retry/emulate.  Use
+the existing RET_MMIO_PF_RETRY/RET_MMIO_PF_EMULATE enum, renaming it to
+drop the MMIO part.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   93 +++++++++++++++++++++------------------------
+ arch/x86/kvm/paging_tmpl.h |   18 ++++----
+ 2 files changed, 54 insertions(+), 57 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -142,6 +142,20 @@ module_param(dbg, bool, 0644);
+ /* make pte_list_desc fit well in cache line */
+ #define PTE_LIST_EXT 3
+ 
++/*
++ * Return values of handle_mmio_page_fault and mmu.page_fault:
++ * RET_PF_RETRY: let CPU fault again on the address.
++ * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
++ *
++ * For handle_mmio_page_fault only:
++ * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
++ */
++enum {
++	RET_PF_RETRY = 0,
++	RET_PF_EMULATE = 1,
++	RET_PF_INVALID = 2,
++};
++
+ struct pte_list_desc {
+ 	u64 *sptes[PTE_LIST_EXT];
+ 	struct pte_list_desc *more;
+@@ -2598,13 +2612,13 @@ done:
+ 	return ret;
+ }
+ 
+-static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+-			 int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
+-			 bool speculative, bool host_writable)
++static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
++			int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
++		       	bool speculative, bool host_writable)
+ {
+ 	int was_rmapped = 0;
+ 	int rmap_count;
+-	bool emulate = false;
++	int ret = RET_PF_RETRY;
+ 
+ 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
+ 		 *sptep, write_fault, gfn);
+@@ -2634,12 +2648,12 @@ static bool mmu_set_spte(struct kvm_vcpu
+ 	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
+ 	      true, host_writable)) {
+ 		if (write_fault)
+-			emulate = true;
++			ret = RET_PF_EMULATE;
+ 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ 	}
+ 
+ 	if (unlikely(is_mmio_spte(*sptep)))
+-		emulate = true;
++		ret = RET_PF_EMULATE;
+ 
+ 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
+ 	pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
+@@ -2659,7 +2673,7 @@ static bool mmu_set_spte(struct kvm_vcpu
+ 
+ 	kvm_release_pfn_clean(pfn);
+ 
+-	return emulate;
++	return ret;
+ }
+ 
+ static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
+@@ -2798,14 +2812,13 @@ static int kvm_handle_bad_page(struct kv
+ 	 * Do not cache the mmio info caused by writing the readonly gfn
+ 	 * into the spte otherwise read access on readonly gfn also can
+ 	 * caused mmio page fault and treat it as mmio access.
+-	 * Return 1 to tell kvm to emulate it.
+ 	 */
+ 	if (pfn == KVM_PFN_ERR_RO_FAULT)
+-		return 1;
++		return RET_PF_EMULATE;
+ 
+ 	if (pfn == KVM_PFN_ERR_HWPOISON) {
+ 		kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
+-		return 0;
++		return RET_PF_RETRY;
+ 	}
+ 
+ 	return -EFAULT;
+@@ -3031,13 +3044,13 @@ static int nonpaging_map(struct kvm_vcpu
+ 	}
+ 
+ 	if (fast_page_fault(vcpu, v, level, error_code))
+-		return 0;
++		return RET_PF_RETRY;
+ 
+ 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ 	smp_rmb();
+ 
+ 	if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
+-		return 0;
++		return RET_PF_RETRY;
+ 
+ 	if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+ 		return r;
+@@ -3056,7 +3069,7 @@ static int nonpaging_map(struct kvm_vcpu
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+-	return 0;
++	return RET_PF_RETRY;
+ }
+ 
+ 
+@@ -3383,54 +3396,38 @@ exit:
+ 	return reserved;
+ }
+ 
+-/*
+- * Return values of handle_mmio_page_fault:
+- * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
+- *			directly.
+- * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
+- *			fault path update the mmio spte.
+- * RET_MMIO_PF_RETRY: let CPU fault again on the address.
+- * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
+- */
+-enum {
+-	RET_MMIO_PF_EMULATE = 1,
+-	RET_MMIO_PF_INVALID = 2,
+-	RET_MMIO_PF_RETRY = 0,
+-	RET_MMIO_PF_BUG = -1
+-};
+-
+ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+ {
+ 	u64 spte;
+ 	bool reserved;
+ 
+ 	if (mmio_info_in_cache(vcpu, addr, direct))
+-		return RET_MMIO_PF_EMULATE;
++		return RET_PF_EMULATE;
+ 
+ 	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+ 	if (WARN_ON(reserved))
+-		return RET_MMIO_PF_BUG;
++		return -EINVAL;
+ 
+ 	if (is_mmio_spte(spte)) {
+ 		gfn_t gfn = get_mmio_spte_gfn(spte);
+ 		unsigned access = get_mmio_spte_access(spte);
+ 
+ 		if (!check_mmio_spte(vcpu, spte))
+-			return RET_MMIO_PF_INVALID;
++			return RET_PF_INVALID;
+ 
+ 		if (direct)
+ 			addr = 0;
+ 
+ 		trace_handle_mmio_page_fault(addr, gfn, access);
+ 		vcpu_cache_mmio_info(vcpu, addr, gfn, access);
+-		return RET_MMIO_PF_EMULATE;
++		return RET_PF_EMULATE;
+ 	}
+ 
+ 	/*
+ 	 * If the page table is zapped by other cpus, let CPU fault again on
+ 	 * the address.
+ 	 */
+-	return RET_MMIO_PF_RETRY;
++	return RET_PF_RETRY;
+ }
+ EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
+ 
+@@ -3480,7 +3477,7 @@ static int nonpaging_page_fault(struct k
+ 	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
+ 
+ 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
+-		return 1;
++		return RET_PF_EMULATE;
+ 
+ 	r = mmu_topup_memory_caches(vcpu);
+ 	if (r)
+@@ -3568,7 +3565,7 @@ static int tdp_page_fault(struct kvm_vcp
+ 	MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ 
+ 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
+-		return 1;
++		return RET_PF_EMULATE;
+ 
+ 	r = mmu_topup_memory_caches(vcpu);
+ 	if (r)
+@@ -3585,13 +3582,13 @@ static int tdp_page_fault(struct kvm_vcp
+ 	}
+ 
+ 	if (fast_page_fault(vcpu, gpa, level, error_code))
+-		return 0;
++		return RET_PF_RETRY;
+ 
+ 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
+ 	smp_rmb();
+ 
+ 	if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+-		return 0;
++		return RET_PF_RETRY;
+ 
+ 	if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+ 		return r;
+@@ -3610,7 +3607,7 @@ static int tdp_page_fault(struct kvm_vcp
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+-	return 0;
++	return RET_PF_RETRY;
+ }
+ 
+ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+@@ -4526,24 +4523,24 @@ int kvm_mmu_page_fault(struct kvm_vcpu *
+ 	enum emulation_result er;
+ 	bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
+ 
++	r = RET_PF_INVALID;
+ 	if (unlikely(error_code & PFERR_RSVD_MASK)) {
+ 		r = handle_mmio_page_fault(vcpu, cr2, direct);
+-		if (r == RET_MMIO_PF_EMULATE) {
++		if (r == RET_PF_EMULATE) {
+ 			emulation_type = 0;
+ 			goto emulate;
+ 		}
+-		if (r == RET_MMIO_PF_RETRY)
+-			return 1;
+-		if (r < 0)
+-			return r;
+-		/* Must be RET_MMIO_PF_INVALID.  */
+ 	}
+ 
+-	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
++	if (r == RET_PF_INVALID) {
++		r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
++		WARN_ON(r == RET_PF_INVALID);
++	}
++
++	if (r == RET_PF_RETRY)
++		return 1;
+ 	if (r < 0)
+ 		return r;
+-	if (!r)
+-		return 1;
+ 
+ 	if (mmio_info_in_cache(vcpu, cr2, direct))
+ 		emulation_type = 0;
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -577,7 +577,7 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 	struct kvm_mmu_page *sp = NULL;
+ 	struct kvm_shadow_walk_iterator it;
+ 	unsigned direct_access, access = gw->pt_access;
+-	int top_level, emulate;
++	int top_level, ret;
+ 
+ 	direct_access = gw->pte_access;
+ 
+@@ -643,15 +643,15 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 	}
+ 
+ 	clear_sp_write_flooding_count(it.sptep);
+-	emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
+-			       it.level, gw->gfn, pfn, prefault, map_writable);
++	ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
++			   it.level, gw->gfn, pfn, prefault, map_writable);
+ 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
+ 
+-	return emulate;
++	return ret;
+ 
+ out_gpte_changed:
+ 	kvm_release_pfn_clean(pfn);
+-	return 0;
++	return RET_PF_RETRY;
+ }
+ 
+  /*
+@@ -746,12 +746,12 @@ static int FNAME(page_fault)(struct kvm_
+ 		if (!prefault)
+ 			inject_page_fault(vcpu, &walker.fault);
+ 
+-		return 0;
++		return RET_PF_RETRY;
+ 	}
+ 
+ 	if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
+ 		shadow_page_table_clear_flood(vcpu, addr);
+-		return 1;
++		return RET_PF_EMULATE;
+ 	}
+ 
+ 	vcpu->arch.write_fault_to_shadow_pgtable = false;
+@@ -773,7 +773,7 @@ static int FNAME(page_fault)(struct kvm_
+ 
+ 	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
+ 			 &map_writable))
+-		return 0;
++		return RET_PF_RETRY;
+ 
+ 	if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr,
+ 				walker.gfn, pfn, walker.pte_access, &r))
+@@ -818,7 +818,7 @@ static int FNAME(page_fault)(struct kvm_
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+-	return 0;
++	return RET_PF_RETRY;
+ }
+ 
+ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
diff --git a/queue-4.9/kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch b/queue-4.9/kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
new file mode 100644
index 00000000000..39fd29c253c
--- /dev/null
+++ b/queue-4.9/kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
@@ -0,0 +1,173 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 24 Jun 2019 13:06:21 +0200
+Subject: KVM: x86: make FNAME(fetch) and __direct_map more similar
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream.
+
+These two functions are basically doing the same thing through
+kvm_mmu_get_page, link_shadow_page and mmu_set_spte; yet, for historical
+reasons, their code looks very different.  This patch tries to take the
+best of each and make them very similar, so that it is easy to understand
+changes that apply to both of them.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   53 +++++++++++++++++++++------------------------
+ arch/x86/kvm/paging_tmpl.h |   30 +++++++++++--------------
+ 2 files changed, 39 insertions(+), 44 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2757,40 +2757,39 @@ static void direct_pte_prefetch(struct k
+ 	__direct_pte_prefetch(vcpu, sp, sptep);
+ }
+ 
+-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
+-			int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
++static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
++			int map_writable, int level, kvm_pfn_t pfn,
++			bool prefault)
+ {
+-	struct kvm_shadow_walk_iterator iterator;
++	struct kvm_shadow_walk_iterator it;
+ 	struct kvm_mmu_page *sp;
+-	int emulate = 0;
+-	gfn_t pseudo_gfn;
++	int ret;
++	gfn_t gfn = gpa >> PAGE_SHIFT;
++	gfn_t base_gfn = gfn;
+ 
+ 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+-		return 0;
++		return RET_PF_RETRY;
+ 
+-	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+-		if (iterator.level == level) {
+-			emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+-					       write, level, gfn, pfn, prefault,
+-					       map_writable);
+-			direct_pte_prefetch(vcpu, iterator.sptep);
+-			++vcpu->stat.pf_fixed;
++	for_each_shadow_entry(vcpu, gpa, it) {
++		base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
++		if (it.level == level)
+ 			break;
+-		}
+ 
+-		drop_large_spte(vcpu, iterator.sptep);
+-		if (!is_shadow_present_pte(*iterator.sptep)) {
+-			u64 base_addr = iterator.addr;
+-
+-			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
+-			pseudo_gfn = base_addr >> PAGE_SHIFT;
+-			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+-					      iterator.level - 1, 1, ACC_ALL);
++		drop_large_spte(vcpu, it.sptep);
++		if (!is_shadow_present_pte(*it.sptep)) {
++			sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
++					      it.level - 1, true, ACC_ALL);
+ 
+-			link_shadow_page(vcpu, iterator.sptep, sp);
++			link_shadow_page(vcpu, it.sptep, sp);
+ 		}
+ 	}
+-	return emulate;
++
++	ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
++			   write, level, base_gfn, pfn, prefault,
++			   map_writable);
++	direct_pte_prefetch(vcpu, it.sptep);
++	++vcpu->stat.pf_fixed;
++	return ret;
+ }
+ 
+ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
+@@ -3062,8 +3061,7 @@ static int nonpaging_map(struct kvm_vcpu
+ 	make_mmu_pages_available(vcpu);
+ 	if (likely(!force_pt_level))
+ 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+-	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-
++	r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+@@ -3598,8 +3596,7 @@ static int tdp_page_fault(struct kvm_vcp
+ 	make_mmu_pages_available(vcpu);
+ 	if (likely(!force_pt_level))
+ 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+-	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-
++	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 	kvm_release_pfn_clean(pfn);
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -579,6 +579,7 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 	struct kvm_shadow_walk_iterator it;
+ 	unsigned direct_access, access = gw->pt_access;
+ 	int top_level, ret;
++	gfn_t base_gfn;
+ 
+ 	direct_access = gw->pte_access;
+ 
+@@ -623,31 +624,29 @@ static int FNAME(fetch)(struct kvm_vcpu
+ 			link_shadow_page(vcpu, it.sptep, sp);
+ 	}
+ 
+-	for (;
+-	     shadow_walk_okay(&it) && it.level > hlevel;
+-	     shadow_walk_next(&it)) {
+-		gfn_t direct_gfn;
++	base_gfn = gw->gfn;
+ 
++	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+ 		clear_sp_write_flooding_count(it.sptep);
++		base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
++		if (it.level == hlevel)
++			break;
++
+ 		validate_direct_spte(vcpu, it.sptep, direct_access);
+ 
+ 		drop_large_spte(vcpu, it.sptep);
+ 
+-		if (is_shadow_present_pte(*it.sptep))
+-			continue;
+-
+-		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+-
+-		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
+-				      true, direct_access);
+-		link_shadow_page(vcpu, it.sptep, sp);
++		if (!is_shadow_present_pte(*it.sptep)) {
++			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
++					      it.level - 1, true, direct_access);
++			link_shadow_page(vcpu, it.sptep, sp);
++		}
+ 	}
+ 
+-	clear_sp_write_flooding_count(it.sptep);
+ 	ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
+-			   it.level, gw->gfn, pfn, prefault, map_writable);
++			   it.level, base_gfn, pfn, prefault, map_writable);
+ 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
+-
++	++vcpu->stat.pf_fixed;
+ 	return ret;
+ 
+ out_gpte_changed:
+@@ -810,7 +809,6 @@ static int FNAME(page_fault)(struct kvm_
+ 		transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
+ 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+ 			 level, pfn, map_writable, prefault);
+-	++vcpu->stat.pf_fixed;
+ 	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+ 
+ out_unlock:
diff --git a/queue-4.9/kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch b/queue-4.9/kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
new file mode 100644
index 00000000000..bfba6f9b8a7
--- /dev/null
+++ b/queue-4.9/kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
@@ -0,0 +1,365 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Junaid Shahid <junaids@google.com>
+Date: Mon, 4 Nov 2019 12:22:03 +0100
+Subject: kvm: x86: mmu: Recovery of shattered NX large pages
+
+From: Junaid Shahid <junaids@google.com>
+
+commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream.
+
+The page table pages corresponding to broken down large pages are zapped in
+FIFO order, so that the large page can potentially be recovered, if it is
+not longer being used for execution.  This removes the performance penalty
+for walking deeper EPT page tables.
+
+By default, one large page will last about one hour once the guest
+reaches a steady state.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.9:
+ - Update another error path in kvm_create_vm() to use out_err_no_mmu_notifier
+ - Adjust filename, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    6 +
+ arch/x86/include/asm/kvm_host.h     |    4 +
+ arch/x86/kvm/mmu.c                  |  129 ++++++++++++++++++++++++++++++++++++
+ arch/x86/kvm/mmu.h                  |    4 +
+ arch/x86/kvm/x86.c                  |   11 +++
+ virt/kvm/kvm_main.c                 |   30 ++++++++
+ 6 files changed, 183 insertions(+), 1 deletion(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1988,6 +1988,12 @@ bytes respectively. Such letter suffixes
+ 			If the software workaround is enabled for the host,
+ 			guests do need not to enable it for nested guests.
+ 
++	kvm.nx_huge_pages_recovery_ratio=
++			[KVM] Controls how many 4KiB pages are periodically zapped
++			back to huge pages.  0 disables the recovery, otherwise if
++			the value is N KVM will zap 1/Nth of the 4KiB pages every
++			minute.  The default is 60.
++
+ 	kvm-amd.nested=	[KVM,AMD] Allow nested virtualization in KVM/SVM.
+ 			Default is 1 (enabled)
+ 
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -261,6 +261,7 @@ struct kvm_rmap_head {
+ struct kvm_mmu_page {
+ 	struct list_head link;
+ 	struct hlist_node hash_link;
++	struct list_head lpage_disallowed_link;
+ 
+ 	/*
+ 	 * The following two entries are used to key the shadow page in the
+@@ -725,6 +726,7 @@ struct kvm_arch {
+ 	 */
+ 	struct list_head active_mmu_pages;
+ 	struct list_head zapped_obsolete_pages;
++	struct list_head lpage_disallowed_mmu_pages;
+ 	struct kvm_page_track_notifier_node mmu_sp_tracker;
+ 	struct kvm_page_track_notifier_head track_notifier_head;
+ 
+@@ -799,6 +801,8 @@ struct kvm_arch {
+ 
+ 	bool x2apic_format;
+ 	bool x2apic_broadcast_quirk_disabled;
++
++	struct task_struct *nx_lpage_recovery_thread;
+ };
+ 
+ struct kvm_vm_stat {
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -37,6 +37,7 @@
+ #include <linux/srcu.h>
+ #include <linux/slab.h>
+ #include <linux/uaccess.h>
++#include <linux/kthread.h>
+ 
+ #include <asm/page.h>
+ #include <asm/cmpxchg.h>
+@@ -47,16 +48,26 @@
+ extern bool itlb_multihit_kvm_mitigation;
+ 
+ static int __read_mostly nx_huge_pages = -1;
++static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+ 
+ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+ 
+ static struct kernel_param_ops nx_huge_pages_ops = {
+ 	.set = set_nx_huge_pages,
+ 	.get = param_get_bool,
+ };
+ 
++static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
++	.set = set_nx_huge_pages_recovery_ratio,
++	.get = param_get_uint,
++};
++
+ module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+ __MODULE_PARM_TYPE(nx_huge_pages, "bool");
++module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
++		&nx_huge_pages_recovery_ratio, 0644);
++__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+ 
+ /*
+  * When setting this variable to true it enables Two-Dimensional-Paging
+@@ -880,6 +891,8 @@ static void account_huge_nx_page(struct
+ 		return;
+ 
+ 	++kvm->stat.nx_lpage_splits;
++	list_add_tail(&sp->lpage_disallowed_link,
++		      &kvm->arch.lpage_disallowed_mmu_pages);
+ 	sp->lpage_disallowed = true;
+ }
+ 
+@@ -904,6 +917,7 @@ static void unaccount_huge_nx_page(struc
+ {
+ 	--kvm->stat.nx_lpage_splits;
+ 	sp->lpage_disallowed = false;
++	list_del(&sp->lpage_disallowed_link);
+ }
+ 
+ static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
+@@ -5172,6 +5186,8 @@ static int set_nx_huge_pages(const char
+ 			idx = srcu_read_lock(&kvm->srcu);
+ 			kvm_mmu_invalidate_zap_all_pages(kvm);
+ 			srcu_read_unlock(&kvm->srcu, idx);
++
++			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+ 		}
+ 		mutex_unlock(&kvm_lock);
+ 	}
+@@ -5247,3 +5263,116 @@ void kvm_mmu_module_exit(void)
+ 	unregister_shrinker(&mmu_shrinker);
+ 	mmu_audit_disable();
+ }
++
++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
++{
++	unsigned int old_val;
++	int err;
++
++	old_val = nx_huge_pages_recovery_ratio;
++	err = param_set_uint(val, kp);
++	if (err)
++		return err;
++
++	if (READ_ONCE(nx_huge_pages) &&
++	    !old_val && nx_huge_pages_recovery_ratio) {
++		struct kvm *kvm;
++
++		mutex_lock(&kvm_lock);
++
++		list_for_each_entry(kvm, &vm_list, vm_list)
++			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
++
++		mutex_unlock(&kvm_lock);
++	}
++
++	return err;
++}
++
++static void kvm_recover_nx_lpages(struct kvm *kvm)
++{
++	int rcu_idx;
++	struct kvm_mmu_page *sp;
++	unsigned int ratio;
++	LIST_HEAD(invalid_list);
++	ulong to_zap;
++
++	rcu_idx = srcu_read_lock(&kvm->srcu);
++	spin_lock(&kvm->mmu_lock);
++
++	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
++	to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
++	while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
++		/*
++		 * We use a separate list instead of just using active_mmu_pages
++		 * because the number of lpage_disallowed pages is expected to
++		 * be relatively small compared to the total.
++		 */
++		sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
++				      struct kvm_mmu_page,
++				      lpage_disallowed_link);
++		WARN_ON_ONCE(!sp->lpage_disallowed);
++		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
++		WARN_ON_ONCE(sp->lpage_disallowed);
++
++		if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
++			kvm_mmu_commit_zap_page(kvm, &invalid_list);
++			if (to_zap)
++				cond_resched_lock(&kvm->mmu_lock);
++		}
++	}
++
++	spin_unlock(&kvm->mmu_lock);
++	srcu_read_unlock(&kvm->srcu, rcu_idx);
++}
++
++static long get_nx_lpage_recovery_timeout(u64 start_time)
++{
++	return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
++		? start_time + 60 * HZ - get_jiffies_64()
++		: MAX_SCHEDULE_TIMEOUT;
++}
++
++static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
++{
++	u64 start_time;
++	long remaining_time;
++
++	while (true) {
++		start_time = get_jiffies_64();
++		remaining_time = get_nx_lpage_recovery_timeout(start_time);
++
++		set_current_state(TASK_INTERRUPTIBLE);
++		while (!kthread_should_stop() && remaining_time > 0) {
++			schedule_timeout(remaining_time);
++			remaining_time = get_nx_lpage_recovery_timeout(start_time);
++			set_current_state(TASK_INTERRUPTIBLE);
++		}
++
++		set_current_state(TASK_RUNNING);
++
++		if (kthread_should_stop())
++			return 0;
++
++		kvm_recover_nx_lpages(kvm);
++	}
++}
++
++int kvm_mmu_post_init_vm(struct kvm *kvm)
++{
++	int err;
++
++	err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
++					  "kvm-nx-lpage-recovery",
++					  &kvm->arch.nx_lpage_recovery_thread);
++	if (!err)
++		kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
++
++	return err;
++}
++
++void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
++{
++	if (kvm->arch.nx_lpage_recovery_thread)
++		kthread_stop(kvm->arch.nx_lpage_recovery_thread);
++}
+--- a/arch/x86/kvm/mmu.h
++++ b/arch/x86/kvm/mmu.h
+@@ -185,4 +185,8 @@ void kvm_mmu_gfn_disallow_lpage(struct k
+ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
+ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
+ 				    struct kvm_memory_slot *slot, u64 gfn);
++
++int kvm_mmu_post_init_vm(struct kvm *kvm);
++void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
++
+ #endif
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8108,6 +8108,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un
+ 	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
+ 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
++	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
+ 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+ 	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
+ 
+@@ -8136,6 +8137,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un
+ 	return 0;
+ }
+ 
++int kvm_arch_post_init_vm(struct kvm *kvm)
++{
++	return kvm_mmu_post_init_vm(kvm);
++}
++
+ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
+ {
+ 	int r;
+@@ -8242,6 +8248,11 @@ int x86_set_memory_region(struct kvm *kv
+ }
+ EXPORT_SYMBOL_GPL(x86_set_memory_region);
+ 
++void kvm_arch_pre_destroy_vm(struct kvm *kvm)
++{
++	kvm_mmu_pre_destroy_vm(kvm);
++}
++
+ void kvm_arch_destroy_vm(struct kvm *kvm)
+ {
+ 	if (current->mm == kvm->mm) {
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -613,6 +613,23 @@ static int kvm_create_vm_debugfs(struct
+ 	return 0;
+ }
+ 
++/*
++ * Called after the VM is otherwise initialized, but just before adding it to
++ * the vm_list.
++ */
++int __weak kvm_arch_post_init_vm(struct kvm *kvm)
++{
++	return 0;
++}
++
++/*
++ * Called just after removing the VM from the vm_list, but before doing any
++ * other destruction.
++ */
++void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
++{
++}
++
+ static struct kvm *kvm_create_vm(unsigned long type)
+ {
+ 	int r, i;
+@@ -660,11 +677,15 @@ static struct kvm *kvm_create_vm(unsigne
+ 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
+ 					GFP_KERNEL);
+ 		if (!kvm->buses[i])
+-			goto out_err;
++			goto out_err_no_mmu_notifier;
+ 	}
+ 
+ 	r = kvm_init_mmu_notifier(kvm);
+ 	if (r)
++		goto out_err_no_mmu_notifier;
++
++	r = kvm_arch_post_init_vm(kvm);
++	if (r)
+ 		goto out_err;
+ 
+ 	mutex_lock(&kvm_lock);
+@@ -676,6 +697,11 @@ static struct kvm *kvm_create_vm(unsigne
+ 	return kvm;
+ 
+ out_err:
++#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
++	if (kvm->mmu_notifier.ops)
++		mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
++#endif
++out_err_no_mmu_notifier:
+ 	cleanup_srcu_struct(&kvm->irq_srcu);
+ out_err_no_irq_srcu:
+ 	cleanup_srcu_struct(&kvm->srcu);
+@@ -728,6 +754,8 @@ static void kvm_destroy_vm(struct kvm *k
+ 	mutex_lock(&kvm_lock);
+ 	list_del(&kvm->vm_list);
+ 	mutex_unlock(&kvm_lock);
++	kvm_arch_pre_destroy_vm(kvm);
++
+ 	kvm_free_irq_routing(kvm);
+ 	for (i = 0; i < KVM_NR_BUSES; i++) {
+ 		if (kvm->buses[i])
diff --git a/queue-4.9/kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch b/queue-4.9/kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
new file mode 100644
index 00000000000..b3642107066
--- /dev/null
+++ b/queue-4.9/kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
@@ -0,0 +1,74 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Sun, 23 Jun 2019 19:15:49 +0200
+Subject: KVM: x86: remove now unneeded hugepage gfn adjustment
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream.
+
+After the previous patch, the low bits of the gfn are masked in
+both FNAME(fetch) and __direct_map, so we do not need to clear them
+in transparent_hugepage_adjust.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |    9 +++------
+ arch/x86/kvm/paging_tmpl.h |    2 +-
+ 2 files changed, 4 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2824,11 +2824,10 @@ static int kvm_handle_bad_page(struct kv
+ }
+ 
+ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
+-					gfn_t *gfnp, kvm_pfn_t *pfnp,
++					gfn_t gfn, kvm_pfn_t *pfnp,
+ 					int *levelp)
+ {
+ 	kvm_pfn_t pfn = *pfnp;
+-	gfn_t gfn = *gfnp;
+ 	int level = *levelp;
+ 
+ 	/*
+@@ -2855,8 +2854,6 @@ static void transparent_hugepage_adjust(
+ 		mask = KVM_PAGES_PER_HPAGE(level) - 1;
+ 		VM_BUG_ON((gfn & mask) != (pfn & mask));
+ 		if (pfn & mask) {
+-			gfn &= ~mask;
+-			*gfnp = gfn;
+ 			kvm_release_pfn_clean(pfn);
+ 			pfn &= ~mask;
+ 			kvm_get_pfn(pfn);
+@@ -3060,7 +3057,7 @@ static int nonpaging_map(struct kvm_vcpu
+ 		goto out_unlock;
+ 	make_mmu_pages_available(vcpu);
+ 	if (likely(!force_pt_level))
+-		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
++		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ 	r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+@@ -3595,7 +3592,7 @@ static int tdp_page_fault(struct kvm_vcp
+ 		goto out_unlock;
+ 	make_mmu_pages_available(vcpu);
+ 	if (likely(!force_pt_level))
+-		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
++		transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+ 	r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+ out_unlock:
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -806,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_
+ 	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
+ 	make_mmu_pages_available(vcpu);
+ 	if (!force_pt_level)
+-		transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
++		transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
+ 	r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+ 			 level, pfn, map_writable, prefault);
+ 	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
diff --git a/queue-4.9/kvm-x86-simplify-ept_misconfig.patch b/queue-4.9/kvm-x86-simplify-ept_misconfig.patch
new file mode 100644
index 00000000000..c3ace492b14
--- /dev/null
+++ b/queue-4.9/kvm-x86-simplify-ept_misconfig.patch
@@ -0,0 +1,110 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 17 Aug 2017 18:36:56 +0200
+Subject: KVM: x86: simplify ept_misconfig
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit e08d26f0712532c79b5ba6200862eaf2036f8df6 upstream.
+
+Calling handle_mmio_page_fault() has been unnecessary since commit
+e9ee956e311d ("KVM: x86: MMU: Move handle_mmio_page_fault() call to
+kvm_mmu_page_fault()", 2016-02-22).
+
+handle_mmio_page_fault() can now be made static.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Radim KrÄmÃ¡Å <rkrcmar@redhat.com>
+[bwh: Backported to 4.9: adjust context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c |   19 ++++++++++++++++++-
+ arch/x86/kvm/mmu.h |   17 -----------------
+ arch/x86/kvm/vmx.c |   13 +++----------
+ 3 files changed, 21 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -3383,7 +3383,23 @@ exit:
+ 	return reserved;
+ }
+ 
+-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
++/*
++ * Return values of handle_mmio_page_fault:
++ * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
++ *			directly.
++ * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
++ *			fault path update the mmio spte.
++ * RET_MMIO_PF_RETRY: let CPU fault again on the address.
++ * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
++ */
++enum {
++	RET_MMIO_PF_EMULATE = 1,
++	RET_MMIO_PF_INVALID = 2,
++	RET_MMIO_PF_RETRY = 0,
++	RET_MMIO_PF_BUG = -1
++};
++
++static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
+ {
+ 	u64 spte;
+ 	bool reserved;
+@@ -4520,6 +4536,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *
+ 			return 1;
+ 		if (r < 0)
+ 			return r;
++		/* Must be RET_MMIO_PF_INVALID.  */
+ 	}
+ 
+ 	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
+--- a/arch/x86/kvm/mmu.h
++++ b/arch/x86/kvm/mmu.h
+@@ -56,23 +56,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio
+ void
+ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+ 
+-/*
+- * Return values of handle_mmio_page_fault:
+- * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
+- *			directly.
+- * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
+- *			fault path update the mmio spte.
+- * RET_MMIO_PF_RETRY: let CPU fault again on the address.
+- * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
+- */
+-enum {
+-	RET_MMIO_PF_EMULATE = 1,
+-	RET_MMIO_PF_INVALID = 2,
+-	RET_MMIO_PF_RETRY = 0,
+-	RET_MMIO_PF_BUG = -1
+-};
+-
+-int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
+ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
+ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
+ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -6556,16 +6556,9 @@ static int handle_ept_misconfig(struct k
+ 						       NULL, 0) == EMULATE_DONE;
+ 	}
+ 
+-	ret = handle_mmio_page_fault(vcpu, gpa, true);
+-	if (likely(ret == RET_MMIO_PF_EMULATE))
+-		return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
+-					      EMULATE_DONE;
+-
+-	if (unlikely(ret == RET_MMIO_PF_INVALID))
+-		return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
+-
+-	if (unlikely(ret == RET_MMIO_PF_RETRY))
+-		return 1;
++	ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
++	if (ret >= 0)
++		return ret;
+ 
+ 	/* It is the real ept misconfig */
+ 	WARN_ON(1);
diff --git a/queue-4.9/kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch b/queue-4.9/kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
new file mode 100644
index 00000000000..cda9933e5f0
--- /dev/null
+++ b/queue-4.9/kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
@@ -0,0 +1,62 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 19 Aug 2019 17:24:07 +0200
+Subject: KVM: x86: use Intel speculation bugs and features as derived in generic x86 code
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 0c54914d0c52a15db9954a76ce80fee32cf318f4 upstream.
+
+Similar to AMD bits, set the Intel bits from the vendor-independent
+feature and bug flags, because KVM_GET_SUPPORTED_CPUID does not care
+about the vendor and they should be set on AMD processors as well.
+
+Suggested-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    8 ++++++++
+ arch/x86/kvm/x86.c   |    8 ++++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -466,8 +466,16 @@ static inline int __do_cpuid_ent(struct
+ 			/* PKU is not yet implemented for shadow paging. */
+ 			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+ 				entry->ecx &= ~F(PKU);
++
+ 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ 			cpuid_mask(&entry->edx, CPUID_7_EDX);
++			if (boot_cpu_has(X86_FEATURE_IBPB) &&
++			    boot_cpu_has(X86_FEATURE_IBRS))
++				entry->edx |= F(SPEC_CTRL);
++			if (boot_cpu_has(X86_FEATURE_STIBP))
++				entry->edx |= F(INTEL_STIBP);
++			if (boot_cpu_has(X86_FEATURE_SSBD))
++				entry->edx |= F(SPEC_CTRL_SSBD);
+ 			/*
+ 			 * We emulate ARCH_CAPABILITIES in software even
+ 			 * if the host doesn't support it.
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1043,8 +1043,16 @@ u64 kvm_get_arch_capabilities(void)
+ 	if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
+ 		data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
+ 
++	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
++		data |= ARCH_CAP_RDCL_NO;
++	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
++		data |= ARCH_CAP_SSB_NO;
++	if (!boot_cpu_has_bug(X86_BUG_MDS))
++		data |= ARCH_CAP_MDS_NO;
++
+ 	return data;
+ }
++
+ EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+ 
+ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
diff --git a/queue-4.9/series b/queue-4.9/series
index b78224adbc9..fcecc359f3c 100644
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -2,3 +2,30 @@ kvm-mmu-don-t-read-pdptes-when-paging-is-not-enabled.patch
 bluetooth-hci_ldisc-postpone-hci_uart_proto_ready-bit-set-in-hci_uart_set_proto.patch
 mips-bcm63xx-fix-switch-core-reset-on-bcm6368.patch
 usb-gadget-core-unmap-request-from-dma-only-if-previously-mapped.patch
+kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
+x86-msr-add-the-ia32_tsx_ctrl-msr.patch
+x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
+x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
+x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
+x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
+kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
+x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
+x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
+x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch
+x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
+kvm-x86-simplify-ept_misconfig.patch
+kvm-x86-extend-usage-of-ret_mmio_pf_-constants.patch
+kvm-convert-kvm_lock-to-a-mutex.patch
+kvm-x86-do-not-release-the-page-inside-mmu_set_spte.patch
+kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
+kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
+kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
+kvm-x86-add-is_executable_pte.patch
+kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
+kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
+x86-bugs-add-itlb_multihit-bug-infrastructure.patch
+cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
+kvm-mmu-itlb_multihit-mitigation.patch
+kvm-add-helper-function-for-creating-vm-worker-threads.patch
+kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
+documentation-add-itlb_multihit-documentation.patch
diff --git a/queue-4.9/x86-bugs-add-itlb_multihit-bug-infrastructure.patch b/queue-4.9/x86-bugs-add-itlb_multihit-bug-infrastructure.patch
new file mode 100644
index 00000000000..fea99d608a9
--- /dev/null
+++ b/queue-4.9/x86-bugs-add-itlb_multihit-bug-infrastructure.patch
@@ -0,0 +1,258 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
+Date: Mon, 4 Nov 2019 12:22:01 +0100
+Subject: x86/bugs: Add ITLB_MULTIHIT bug infrastructure
+
+From: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
+
+commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream.
+
+Some processors may incur a machine check error possibly resulting in an
+unrecoverable CPU lockup when an instruction fetch encounters a TLB
+multi-hit in the instruction TLB. This can occur when the page size is
+changed along with either the physical address or cache type. The relevant
+erratum can be found here:
+
+   https://bugzilla.kernel.org/show_bug.cgi?id=205195
+
+There are other processors affected for which the erratum does not fully
+disclose the impact.
+
+This issue affects both bare-metal x86 page tables and EPT.
+
+It can be mitigated by either eliminating the use of large pages or by
+using careful TLB invalidations when changing the page size in the page
+tables.
+
+Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in
+MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which
+are mitigated against this issue.
+
+Signed-off-by: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
+Co-developed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+[bwh: Backported to 4.9:
+ - No support for X86_VENDOR_HYGON, ATOM_AIRMONT_NP
+ - Adjust context, indentation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu |    1 
+ arch/x86/include/asm/cpufeatures.h                 |    1 
+ arch/x86/include/asm/msr-index.h                   |    7 ++
+ arch/x86/kernel/cpu/bugs.c                         |   13 ++++
+ arch/x86/kernel/cpu/common.c                       |   61 +++++++++++----------
+ drivers/base/cpu.c                                 |    8 ++
+ include/linux/cpu.h                                |    2 
+ 7 files changed, 65 insertions(+), 28 deletions(-)
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -359,6 +359,7 @@ What:		/sys/devices/system/cpu/vulnerabi
+ 		/sys/devices/system/cpu/vulnerabilities/l1tf
+ 		/sys/devices/system/cpu/vulnerabilities/mds
+ 		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
++		/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ Date:		January 2018
+ Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description:	Information about CPU vulnerabilities
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -358,5 +358,6 @@
+ #define X86_BUG_MSBDS_ONLY	X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
+ #define X86_BUG_SWAPGS		X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+ #define X86_BUG_TAA		X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
++#define X86_BUG_ITLB_MULTIHIT	X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -77,6 +77,13 @@
+ 						  * Microarchitectural Data
+ 						  * Sampling (MDS) vulnerabilities.
+ 						  */
++#define ARCH_CAP_PSCHANGE_MC_NO		BIT(6)	 /*
++						  * The processor is not susceptible to a
++						  * machine check error due to modifying the
++						  * code page size along with either the
++						  * physical address or cache type
++						  * without TLB invalidation.
++						  */
+ #define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
+ #define ARCH_CAP_TAA_NO			BIT(8)	/*
+ 						 * Not susceptible to
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1395,6 +1395,11 @@ static ssize_t l1tf_show_state(char *buf
+ }
+ #endif
+ 
++static ssize_t itlb_multihit_show_state(char *buf)
++{
++	return sprintf(buf, "Processor vulnerable\n");
++}
++
+ static ssize_t mds_show_state(char *buf)
+ {
+ #ifdef CONFIG_HYPERVISOR_GUEST
+@@ -1497,6 +1502,9 @@ static ssize_t cpu_show_common(struct de
+ 	case X86_BUG_TAA:
+ 		return tsx_async_abort_show_state(buf);
+ 
++	case X86_BUG_ITLB_MULTIHIT:
++		return itlb_multihit_show_state(buf);
++
+ 	default:
+ 		break;
+ 	}
+@@ -1538,4 +1546,9 @@ ssize_t cpu_show_tsx_async_abort(struct
+ {
+ 	return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+ }
++
++ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
++{
++	return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -891,13 +891,14 @@ static void identify_cpu_without_cpuid(s
+ 	c->x86_cache_bits = c->x86_phys_bits;
+ }
+ 
+-#define NO_SPECULATION	BIT(0)
+-#define NO_MELTDOWN	BIT(1)
+-#define NO_SSB		BIT(2)
+-#define NO_L1TF		BIT(3)
+-#define NO_MDS		BIT(4)
+-#define MSBDS_ONLY	BIT(5)
+-#define NO_SWAPGS	BIT(6)
++#define NO_SPECULATION		BIT(0)
++#define NO_MELTDOWN		BIT(1)
++#define NO_SSB			BIT(2)
++#define NO_L1TF			BIT(3)
++#define NO_MDS			BIT(4)
++#define MSBDS_ONLY		BIT(5)
++#define NO_SWAPGS		BIT(6)
++#define NO_ITLB_MULTIHIT	BIT(7)
+ 
+ #define VULNWL(_vendor, _family, _model, _whitelist)	\
+ 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
+@@ -915,26 +916,26 @@ static const __initconst struct x86_cpu_
+ 	VULNWL(NSC,	5, X86_MODEL_ANY,	NO_SPECULATION),
+ 
+ 	/* Intel Family 6 */
+-	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION),
+-	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION),
+-	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION),
+-	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION),
+-	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION),
+-
+-	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-	VULNWL_INTEL(ATOM_SILVERMONT_X,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
++
++	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SILVERMONT_X,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ 
+ 	VULNWL_INTEL(CORE_YONAH,		NO_SSB),
+ 
+-	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ 
+-	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS),
+-	VULNWL_INTEL(ATOM_GOLDMONT_X,		NO_MDS | NO_L1TF | NO_SWAPGS),
+-	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS),
++	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_GOLDMONT_X,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ 
+ 	/*
+ 	 * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -945,13 +946,13 @@ static const __initconst struct x86_cpu_
+ 	 */
+ 
+ 	/* AMD Family 0xf - 0x12 */
+-	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+-	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+-	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+-	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ 
+ 	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
++	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ 	{}
+ };
+ 
+@@ -976,6 +977,10 @@ static void __init cpu_set_bug_bits(stru
+ {
+ 	u64 ia32_cap = x86_read_arch_cap_msr();
+ 
++	/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
++	if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
++		setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
++
+ 	if (cpu_matches(NO_SPECULATION))
+ 		return;
+ 
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -544,6 +544,12 @@ ssize_t __weak cpu_show_tsx_async_abort(
+ 	return sprintf(buf, "Not affected\n");
+ }
+ 
++ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
++			    struct device_attribute *attr, char *buf)
++{
++	return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+@@ -551,6 +557,7 @@ static DEVICE_ATTR(spec_store_bypass, 04
+ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
++static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+ 
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ 	&dev_attr_meltdown.attr,
+@@ -560,6 +567,7 @@ static struct attribute *cpu_root_vulner
+ 	&dev_attr_l1tf.attr,
+ 	&dev_attr_mds.attr,
+ 	&dev_attr_tsx_async_abort.attr,
++	&dev_attr_itlb_multihit.attr,
+ 	NULL
+ };
+ 
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -59,6 +59,8 @@ extern ssize_t cpu_show_mds(struct devic
+ extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+ 					struct device_attribute *attr,
+ 					char *buf);
++extern ssize_t cpu_show_itlb_multihit(struct device *dev,
++				      struct device_attribute *attr, char *buf);
+ 
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-4.9/x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch b/queue-4.9/x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
new file mode 100644
index 00000000000..2f1e17c0d31
--- /dev/null
+++ b/queue-4.9/x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
@@ -0,0 +1,67 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 10:52:35 +0200
+Subject: x86/cpu: Add a helper function x86_read_arch_cap_msr()
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 286836a70433fb64131d2590f4bf512097c255e1 upstream.
+
+Add a helper function to read the IA32_ARCH_CAPABILITIES MSR.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |   15 +++++++++++----
+ arch/x86/kernel/cpu/cpu.h    |    2 ++
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -962,19 +962,26 @@ static bool __init cpu_matches(unsigned
+ 	return m && !!(m->driver_data & which);
+ }
+ 
+-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
++u64 x86_read_arch_cap_msr(void)
+ {
+ 	u64 ia32_cap = 0;
+ 
++	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++	return ia32_cap;
++}
++
++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
++{
++	u64 ia32_cap = x86_read_arch_cap_msr();
++
+ 	if (cpu_matches(NO_SPECULATION))
+ 		return;
+ 
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ 	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+ 
+-	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+-
+ 	if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ 	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
+ 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -51,4 +51,6 @@ extern int detect_ht_early(struct cpuinf
+ 
+ extern void x86_spec_ctrl_setup_ap(void);
+ 
++extern u64 x86_read_arch_cap_msr(void);
++
+ #endif /* ARCH_X86_CPU_H */
diff --git a/queue-4.9/x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch b/queue-4.9/x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
new file mode 100644
index 00000000000..504438f1d4c
--- /dev/null
+++ b/queue-4.9/x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
@@ -0,0 +1,264 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 11:01:53 +0200
+Subject: x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream.
+
+Add a kernel cmdline parameter "tsx" to control the Transactional
+Synchronization Extensions (TSX) feature. On CPUs that support TSX
+control, use "tsx=on|off" to enable or disable TSX. Not specifying this
+option is equivalent to "tsx=off". This is because on certain processors
+TSX may be used as a part of a speculative side channel attack.
+
+Carve out the TSX controlling functionality into a separate compilation
+unit because TSX is a CPU feature while the TSX async abort control
+machinery will go to cpu/bugs.c.
+
+ [ bp: - Massage, shorten and clear the arg buffer.
+       - Clarifications of the tsx= possible options - Josh.
+       - Expand on TSX_CTRL availability - Pawan. ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+[bwh: Backported to 4.9: adjust filenames, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |   26 +++++++
+ arch/x86/kernel/cpu/Makefile        |    2 
+ arch/x86/kernel/cpu/common.c        |    2 
+ arch/x86/kernel/cpu/cpu.h           |   16 ++++
+ arch/x86/kernel/cpu/intel.c         |    5 +
+ arch/x86/kernel/cpu/tsx.c           |  125 ++++++++++++++++++++++++++++++++++++
+ 6 files changed, 175 insertions(+), 1 deletion(-)
+ create mode 100644 arch/x86/kernel/cpu/tsx.c
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -4516,6 +4516,32 @@ bytes respectively. Such letter suffixes
+ 			platforms where RDTSC is slow and this accounting
+ 			can add overhead.
+ 
++	tsx=		[X86] Control Transactional Synchronization
++			Extensions (TSX) feature in Intel processors that
++			support TSX control.
++
++			This parameter controls the TSX feature. The options are:
++
++			on	- Enable TSX on the system. Although there are
++				mitigations for all known security vulnerabilities,
++				TSX has been known to be an accelerator for
++				several previous speculation-related CVEs, and
++				so there may be unknown	security risks associated
++				with leaving it enabled.
++
++			off	- Disable TSX on the system. (Note that this
++				option takes effect only on newer CPUs which are
++				not vulnerable to MDS, i.e., have
++				MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
++				the new IA32_TSX_CTRL MSR through a microcode
++				update. This new MSR allows for the reliable
++				deactivation of the TSX functionality.)
++
++			Not specifying this option is equivalent to tsx=off.
++
++			See Documentation/hw-vuln/tsx_async_abort.rst
++			for more details.
++
+ 	turbografx.map[2|3]=	[HW,JOY]
+ 			TurboGraFX parallel port interface
+ 			Format:
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -25,7 +25,7 @@ obj-y			+= bugs.o
+ obj-$(CONFIG_PROC_FS)	+= proc.o
+ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+ 
+-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o
++obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o tsx.o
+ obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
+ obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
+ obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1416,6 +1416,8 @@ void __init identify_boot_cpu(void)
+ 	enable_sep_cpu();
+ #endif
+ 	cpu_detect_tlb(&boot_cpu_data);
++
++	tsx_init();
+ }
+ 
+ void identify_secondary_cpu(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -44,6 +44,22 @@ struct _tlb_table {
+ extern const struct cpu_dev *const __x86_cpu_dev_start[],
+ 			    *const __x86_cpu_dev_end[];
+ 
++#ifdef CONFIG_CPU_SUP_INTEL
++enum tsx_ctrl_states {
++	TSX_CTRL_ENABLE,
++	TSX_CTRL_DISABLE,
++	TSX_CTRL_NOT_SUPPORTED,
++};
++
++extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
++
++extern void __init tsx_init(void);
++extern void tsx_enable(void);
++extern void tsx_disable(void);
++#else
++static inline void tsx_init(void) { }
++#endif /* CONFIG_CPU_SUP_INTEL */
++
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+ extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -642,6 +642,11 @@ static void init_intel(struct cpuinfo_x8
+ 		detect_vmx_virtcap(c);
+ 
+ 	init_intel_energy_perf(c);
++
++	if (tsx_ctrl_state == TSX_CTRL_ENABLE)
++		tsx_enable();
++	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
++		tsx_disable();
+ }
+ 
+ #ifdef CONFIG_X86_32
+--- /dev/null
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -0,0 +1,125 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Intel Transactional Synchronization Extensions (TSX) control.
++ *
++ * Copyright (C) 2019 Intel Corporation
++ *
++ * Author:
++ *	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
++ */
++
++#include <linux/cpufeature.h>
++
++#include <asm/cmdline.h>
++
++#include "cpu.h"
++
++enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
++
++void tsx_disable(void)
++{
++	u64 tsx;
++
++	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
++
++	/* Force all transactions to immediately abort */
++	tsx |= TSX_CTRL_RTM_DISABLE;
++
++	/*
++	 * Ensure TSX support is not enumerated in CPUID.
++	 * This is visible to userspace and will ensure they
++	 * do not waste resources trying TSX transactions that
++	 * will always abort.
++	 */
++	tsx |= TSX_CTRL_CPUID_CLEAR;
++
++	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
++}
++
++void tsx_enable(void)
++{
++	u64 tsx;
++
++	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
++
++	/* Enable the RTM feature in the cpu */
++	tsx &= ~TSX_CTRL_RTM_DISABLE;
++
++	/*
++	 * Ensure TSX support is enumerated in CPUID.
++	 * This is visible to userspace and will ensure they
++	 * can enumerate and use the TSX feature.
++	 */
++	tsx &= ~TSX_CTRL_CPUID_CLEAR;
++
++	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
++}
++
++static bool __init tsx_ctrl_is_supported(void)
++{
++	u64 ia32_cap = x86_read_arch_cap_msr();
++
++	/*
++	 * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
++	 * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
++	 *
++	 * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
++	 * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
++	 * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
++	 * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
++	 * tsx= cmdline requests will do nothing on CPUs without
++	 * MSR_IA32_TSX_CTRL support.
++	 */
++	return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
++}
++
++void __init tsx_init(void)
++{
++	char arg[4] = {};
++	int ret;
++
++	if (!tsx_ctrl_is_supported())
++		return;
++
++	ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
++	if (ret >= 0) {
++		if (!strcmp(arg, "on")) {
++			tsx_ctrl_state = TSX_CTRL_ENABLE;
++		} else if (!strcmp(arg, "off")) {
++			tsx_ctrl_state = TSX_CTRL_DISABLE;
++		} else {
++			tsx_ctrl_state = TSX_CTRL_DISABLE;
++			pr_err("tsx: invalid option, defaulting to off\n");
++		}
++	} else {
++		/* tsx= not provided, defaulting to off */
++		tsx_ctrl_state = TSX_CTRL_DISABLE;
++	}
++
++	if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
++		tsx_disable();
++
++		/*
++		 * tsx_disable() will change the state of the
++		 * RTM CPUID bit.  Clear it here since it is now
++		 * expected to be not set.
++		 */
++		setup_clear_cpu_cap(X86_FEATURE_RTM);
++	} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
++
++		/*
++		 * HW defaults TSX to be enabled at bootup.
++		 * We may still need the TSX enable support
++		 * during init for special cases like
++		 * kexec after TSX is disabled.
++		 */
++		tsx_enable();
++
++		/*
++		 * tsx_enable() will change the state of the
++		 * RTM CPUID bit.  Force it here since it is now
++		 * expected to be set.
++		 */
++		setup_force_cpu_cap(X86_FEATURE_RTM);
++	}
++}
diff --git a/queue-4.9/x86-msr-add-the-ia32_tsx_ctrl-msr.patch b/queue-4.9/x86-msr-add-the-ia32_tsx_ctrl-msr.patch
new file mode 100644
index 00000000000..08c35389eab
--- /dev/null
+++ b/queue-4.9/x86-msr-add-the-ia32_tsx_ctrl-msr.patch
@@ -0,0 +1,84 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 10:45:50 +0200
+Subject: x86/msr: Add the IA32_TSX_CTRL MSR
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit c2955f270a84762343000f103e0640d29c7a96f3 upstream.
+
+Transactional Synchronization Extensions (TSX) may be used on certain
+processors as part of a speculative side channel attack.  A microcode
+update for existing processors that are vulnerable to this attack will
+add a new MSR - IA32_TSX_CTRL to allow the system administrator the
+option to disable TSX as one of the possible mitigations.
+
+The CPUs which get this new MSR after a microcode upgrade are the ones
+which do not set MSR_IA32_ARCH_CAPABILITIES.MDS_NO (bit 5) because those
+CPUs have CPUID.MD_CLEAR, i.e., the VERW implementation which clears all
+CPU buffers takes care of the TAA case as well.
+
+  [ Note that future processors that are not vulnerable will also
+    support the IA32_TSX_CTRL MSR. ]
+
+Add defines for the new IA32_TSX_CTRL MSR and its bits.
+
+TSX has two sub-features:
+
+1. Restricted Transactional Memory (RTM) is an explicitly-used feature
+   where new instructions begin and end TSX transactions.
+2. Hardware Lock Elision (HLE) is implicitly used when certain kinds of
+   "old" style locks are used by software.
+
+Bit 7 of the IA32_ARCH_CAPABILITIES indicates the presence of the
+IA32_TSX_CTRL MSR.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+  Bit 0: When set, it disables the Restricted Transactional Memory (RTM)
+         sub-feature of TSX (will force all transactions to abort on the
+	 XBEGIN instruction).
+
+  Bit 1: When set, it disables the enumeration of the RTM and HLE feature
+         (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+	  CPUID(EAX=7).EBX{bit11} read as 0).
+
+The other TSX sub-feature, Hardware Lock Elision (HLE), is
+unconditionally disabled by the new microcode but still enumerated
+as present by CPUID(EAX=7).EBX{bit4}, unless disabled by
+IA32_TSX_CTRL_MSR[1] - TSX_CTRL_CPUID_CLEAR.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -77,6 +77,7 @@
+ 						  * Microarchitectural Data
+ 						  * Sampling (MDS) vulnerabilities.
+ 						  */
++#define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
+ 
+ #define MSR_IA32_FLUSH_CMD		0x0000010b
+ #define L1D_FLUSH			BIT(0)	/*
+@@ -87,6 +88,10 @@
+ #define MSR_IA32_BBL_CR_CTL		0x00000119
+ #define MSR_IA32_BBL_CR_CTL3		0x0000011e
+ 
++#define MSR_IA32_TSX_CTRL		0x00000122
++#define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */
++#define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */
++
+ #define MSR_IA32_SYSENTER_CS		0x00000174
+ #define MSR_IA32_SYSENTER_ESP		0x00000175
+ #define MSR_IA32_SYSENTER_EIP		0x00000176
diff --git a/queue-4.9/x86-speculation-taa-add-documentation-for-tsx-async-abort.patch b/queue-4.9/x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
new file mode 100644
index 00000000000..13c5e159ae3
--- /dev/null
+++ b/queue-4.9/x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
@@ -0,0 +1,522 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:32:55 +0200
+Subject: x86/speculation/taa: Add documentation for TSX Async Abort
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream.
+
+Add the documenation for TSX Async Abort. Include the description of
+the issue, how to check the mitigation state, control the mitigation,
+guidance for system administrators.
+
+ [ bp: Add proper SPDX tags, touch ups by Josh and me. ]
+
+Co-developed-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+[bwh: Backported to 4.9: adjust filenames, context]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu |    1 
+ Documentation/hw-vuln/index.rst                    |    1 
+ Documentation/hw-vuln/tsx_async_abort.rst          |  276 +++++++++++++++++++++
+ Documentation/kernel-parameters.txt                |   38 ++
+ Documentation/x86/index.rst                        |    1 
+ Documentation/x86/tsx_async_abort.rst              |  117 ++++++++
+ 6 files changed, 434 insertions(+)
+ create mode 100644 Documentation/hw-vuln/tsx_async_abort.rst
+ create mode 100644 Documentation/x86/tsx_async_abort.rst
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -358,6 +358,7 @@ What:		/sys/devices/system/cpu/vulnerabi
+ 		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+ 		/sys/devices/system/cpu/vulnerabilities/l1tf
+ 		/sys/devices/system/cpu/vulnerabilities/mds
++		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ Date:		January 2018
+ Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description:	Information about CPU vulnerabilities
+--- a/Documentation/hw-vuln/index.rst
++++ b/Documentation/hw-vuln/index.rst
+@@ -11,3 +11,4 @@ are configurable at compile, boot or run
+ 
+    l1tf
+    mds
++   tsx_async_abort
+--- /dev/null
++++ b/Documentation/hw-vuln/tsx_async_abort.rst
+@@ -0,0 +1,276 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++TAA - TSX Asynchronous Abort
++======================================
++
++TAA is a hardware vulnerability that allows unprivileged speculative access to
++data which is available in various CPU internal buffers by using asynchronous
++aborts within an Intel TSX transactional region.
++
++Affected processors
++-------------------
++
++This vulnerability only affects Intel processors that support Intel
++Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
++is 0 in the IA32_ARCH_CAPABILITIES MSR.  On processors where the MDS_NO bit
++(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
++also mitigate against TAA.
++
++Whether a processor is affected or not can be read out from the TAA
++vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
++
++Related CVEs
++------------
++
++The following CVE entry is related to this TAA issue:
++
++   ==============  =====  ===================================================
++   CVE-2019-11135  TAA    TSX Asynchronous Abort (TAA) condition on some
++                          microprocessors utilizing speculative execution may
++                          allow an authenticated user to potentially enable
++                          information disclosure via a side channel with
++                          local access.
++   ==============  =====  ===================================================
++
++Problem
++-------
++
++When performing store, load or L1 refill operations, processors write
++data into temporary microarchitectural structures (buffers). The data in
++those buffers can be forwarded to load operations as an optimization.
++
++Intel TSX is an extension to the x86 instruction set architecture that adds
++hardware transactional memory support to improve performance of multi-threaded
++software. TSX lets the processor expose and exploit concurrency hidden in an
++application due to dynamically avoiding unnecessary synchronization.
++
++TSX supports atomic memory transactions that are either committed (success) or
++aborted. During an abort, operations that happened within the transactional region
++are rolled back. An asynchronous abort takes place, among other options, when a
++different thread accesses a cache line that is also used within the transactional
++region when that access might lead to a data race.
++
++Immediately after an uncompleted asynchronous abort, certain speculatively
++executed loads may read data from those internal buffers and pass it to dependent
++operations. This can be then used to infer the value via a cache side channel
++attack.
++
++Because the buffers are potentially shared between Hyper-Threads cross
++Hyper-Thread attacks are possible.
++
++The victim of a malicious actor does not need to make use of TSX. Only the
++attacker needs to begin a TSX transaction and raise an asynchronous abort
++which in turn potenitally leaks data stored in the buffers.
++
++More detailed technical information is available in the TAA specific x86
++architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
++
++
++Attack scenarios
++----------------
++
++Attacks against the TAA vulnerability can be implemented from unprivileged
++applications running on hosts or guests.
++
++As for MDS, the attacker has no control over the memory addresses that can
++be leaked. Only the victim is responsible for bringing data to the CPU. As
++a result, the malicious actor has to sample as much data as possible and
++then postprocess it to try to infer any useful information from it.
++
++A potential attacker only has read access to the data. Also, there is no direct
++privilege escalation by using this technique.
++
++
++.. _tsx_async_abort_sys_info:
++
++TAA system information
++-----------------------
++
++The Linux kernel provides a sysfs interface to enumerate the current TAA status
++of mitigated systems. The relevant sysfs file is:
++
++/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
++
++The possible values in this file are:
++
++.. list-table::
++
++   * - 'Vulnerable'
++     - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
++   * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
++     - The system tries to clear the buffers but the microcode might not support the operation.
++   * - 'Mitigation: Clear CPU buffers'
++     - The microcode has been updated to clear the buffers. TSX is still enabled.
++   * - 'Mitigation: TSX disabled'
++     - TSX is disabled.
++   * - 'Not affected'
++     - The CPU is not affected by this issue.
++
++.. _ucode_needed:
++
++Best effort mitigation mode
++^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++If the processor is vulnerable, but the availability of the microcode-based
++mitigation mechanism is not advertised via CPUID the kernel selects a best
++effort mitigation mode.  This mode invokes the mitigation instructions
++without a guarantee that they clear the CPU buffers.
++
++This is done to address virtualization scenarios where the host has the
++microcode update applied, but the hypervisor is not yet updated to expose the
++CPUID to the guest. If the host has updated microcode the protection takes
++effect; otherwise a few CPU cycles are wasted pointlessly.
++
++The state in the tsx_async_abort sysfs file reflects this situation
++accordingly.
++
++
++Mitigation mechanism
++--------------------
++
++The kernel detects the affected CPUs and the presence of the microcode which is
++required. If a CPU is affected and the microcode is available, then the kernel
++enables the mitigation by default.
++
++
++The mitigation can be controlled at boot time via a kernel command line option.
++See :ref:`taa_mitigation_control_command_line`.
++
++.. _virt_mechanism:
++
++Virtualization mitigation
++^^^^^^^^^^^^^^^^^^^^^^^^^
++
++Affected systems where the host has TAA microcode and TAA is mitigated by
++having disabled TSX previously, are not vulnerable regardless of the status
++of the VMs.
++
++In all other cases, if the host either does not have the TAA microcode or
++the kernel is not mitigated, the system might be vulnerable.
++
++
++.. _taa_mitigation_control_command_line:
++
++Mitigation control on the kernel command line
++---------------------------------------------
++
++The kernel command line allows to control the TAA mitigations at boot time with
++the option "tsx_async_abort=". The valid arguments for this option are:
++
++  ============  =============================================================
++  off		This option disables the TAA mitigation on affected platforms.
++                If the system has TSX enabled (see next parameter) and the CPU
++                is affected, the system is vulnerable.
++
++  full	        TAA mitigation is enabled. If TSX is enabled, on an affected
++                system it will clear CPU buffers on ring transitions. On
++                systems which are MDS-affected and deploy MDS mitigation,
++                TAA is also mitigated. Specifying this option on those
++                systems will have no effect.
++
++  full,nosmt    The same as tsx_async_abort=full, with SMT disabled on
++                vulnerable CPUs that have TSX enabled. This is the complete
++                mitigation. When TSX is disabled, SMT is not disabled because
++                CPU is not vulnerable to cross-thread TAA attacks.
++  ============  =============================================================
++
++Not specifying this option is equivalent to "tsx_async_abort=full".
++
++The kernel command line also allows to control the TSX feature using the
++parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
++to control the TSX feature and the enumeration of the TSX feature bits (RTM
++and HLE) in CPUID.
++
++The valid options are:
++
++  ============  =============================================================
++  off		Disables TSX on the system.
++
++                Note that this option takes effect only on newer CPUs which are
++                not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
++                and which get the new IA32_TSX_CTRL MSR through a microcode
++                update. This new MSR allows for the reliable deactivation of
++                the TSX functionality.
++
++  on		Enables TSX.
++
++                Although there are mitigations for all known security
++                vulnerabilities, TSX has been known to be an accelerator for
++                several previous speculation-related CVEs, and so there may be
++                unknown security risks associated with leaving it enabled.
++
++  auto		Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
++                on the system.
++  ============  =============================================================
++
++Not specifying this option is equivalent to "tsx=off".
++
++The following combinations of the "tsx_async_abort" and "tsx" are possible. For
++affected platforms tsx=auto is equivalent to tsx=off and the result will be:
++
++  =========  ==========================   =========================================
++  tsx=on     tsx_async_abort=full         The system will use VERW to clear CPU
++                                          buffers. Cross-thread attacks are still
++					  possible on SMT machines.
++  tsx=on     tsx_async_abort=full,nosmt   As above, cross-thread attacks on SMT
++                                          mitigated.
++  tsx=on     tsx_async_abort=off          The system is vulnerable.
++  tsx=off    tsx_async_abort=full         TSX might be disabled if microcode
++                                          provides a TSX control MSR. If so,
++					  system is not vulnerable.
++  tsx=off    tsx_async_abort=full,nosmt   Ditto
++  tsx=off    tsx_async_abort=off          ditto
++  =========  ==========================   =========================================
++
++
++For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
++buffers.  For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
++"tsx" command line argument has no effect.
++
++For the affected platforms below table indicates the mitigation status for the
++combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
++and TSX_CTRL_MSR.
++
++  =======  =========  =============  ========================================
++  MDS_NO   MD_CLEAR   TSX_CTRL_MSR   Status
++  =======  =========  =============  ========================================
++    0          0            0        Vulnerable (needs microcode)
++    0          1            0        MDS and TAA mitigated via VERW
++    1          1            0        MDS fixed, TAA vulnerable if TSX enabled
++                                     because MD_CLEAR has no meaning and
++                                     VERW is not guaranteed to clear buffers
++    1          X            1        MDS fixed, TAA can be mitigated by
++                                     VERW or TSX_CTRL_MSR
++  =======  =========  =============  ========================================
++
++Mitigation selection guide
++--------------------------
++
++1. Trusted userspace and guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++If all user space applications are from a trusted source and do not execute
++untrusted code which is supplied externally, then the mitigation can be
++disabled. The same applies to virtualized environments with trusted guests.
++
++
++2. Untrusted userspace and guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++If there are untrusted applications or guests on the system, enabling TSX
++might allow a malicious actor to leak data from the host or from other
++processes running on the same physical core.
++
++If the microcode is available and the TSX is disabled on the host, attacks
++are prevented in a virtualized environment as well, even if the VMs do not
++explicitly enable the mitigation.
++
++
++.. _taa_default_mitigations:
++
++Default mitigations
++-------------------
++
++The kernel's default action for vulnerable processors is:
++
++  - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2490,6 +2490,7 @@ bytes respectively. Such letter suffixes
+ 					       spec_store_bypass_disable=off [X86]
+ 					       l1tf=off [X86]
+ 					       mds=off [X86]
++					       tsx_async_abort=off [X86]
+ 
+ 			auto (default)
+ 				Mitigate all CPU vulnerabilities, but leave SMT
+@@ -2505,6 +2506,7 @@ bytes respectively. Such letter suffixes
+ 				be fully mitigated, even if it means losing SMT.
+ 				Equivalent to: l1tf=flush,nosmt [X86]
+ 					       mds=full,nosmt [X86]
++					       tsx_async_abort=full,nosmt [X86]
+ 
+ 	mminit_loglevel=
+ 			[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+@@ -4545,6 +4547,42 @@ bytes respectively. Such letter suffixes
+ 			See Documentation/hw-vuln/tsx_async_abort.rst
+ 			for more details.
+ 
++	tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
++			Abort (TAA) vulnerability.
++
++			Similar to Micro-architectural Data Sampling (MDS)
++			certain CPUs that support Transactional
++			Synchronization Extensions (TSX) are vulnerable to an
++			exploit against CPU internal buffers which can forward
++			information to a disclosure gadget under certain
++			conditions.
++
++			In vulnerable processors, the speculatively forwarded
++			data can be used in a cache side channel attack, to
++			access data to which the attacker does not have direct
++			access.
++
++			This parameter controls the TAA mitigation.  The
++			options are:
++
++			full       - Enable TAA mitigation on vulnerable CPUs
++				     if TSX is enabled.
++
++			full,nosmt - Enable TAA mitigation and disable SMT on
++				     vulnerable CPUs. If TSX is disabled, SMT
++				     is not disabled because CPU is not
++				     vulnerable to cross-thread TAA attacks.
++			off        - Unconditionally disable TAA mitigation
++
++			Not specifying this option is equivalent to
++			tsx_async_abort=full.  On CPUs which are MDS affected
++			and deploy MDS mitigation, TAA mitigation is not
++			required and doesn't provide any additional
++			mitigation.
++
++			For details see:
++			Documentation/hw-vuln/tsx_async_abort.rst
++
+ 	turbografx.map[2|3]=	[HW,JOY]
+ 			TurboGraFX parallel port interface
+ 			Format:
+--- a/Documentation/x86/index.rst
++++ b/Documentation/x86/index.rst
+@@ -6,3 +6,4 @@ x86 architecture specifics
+    :maxdepth: 1
+ 
+    mds
++   tsx_async_abort
+--- /dev/null
++++ b/Documentation/x86/tsx_async_abort.rst
+@@ -0,0 +1,117 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++TSX Async Abort (TAA) mitigation
++================================
++
++.. _tsx_async_abort:
++
++Overview
++--------
++
++TSX Async Abort (TAA) is a side channel attack on internal buffers in some
++Intel processors similar to Microachitectural Data Sampling (MDS).  In this
++case certain loads may speculatively pass invalid data to dependent operations
++when an asynchronous abort condition is pending in a Transactional
++Synchronization Extensions (TSX) transaction.  This includes loads with no
++fault or assist condition. Such loads may speculatively expose stale data from
++the same uarch data structures as in MDS, with same scope of exposure i.e.
++same-thread and cross-thread. This issue affects all current processors that
++support TSX.
++
++Mitigation strategy
++-------------------
++
++a) TSX disable - one of the mitigations is to disable TSX. A new MSR
++IA32_TSX_CTRL will be available in future and current processors after
++microcode update which can be used to disable TSX. In addition, it
++controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
++
++b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
++vulnerability. More details on this approach can be found in
++:ref:`Documentation/hw-vuln/mds.rst <mds>`.
++
++Kernel internal mitigation modes
++--------------------------------
++
++ =============    ============================================================
++ off              Mitigation is disabled. Either the CPU is not affected or
++                  tsx_async_abort=off is supplied on the kernel command line.
++
++ tsx disabled     Mitigation is enabled. TSX feature is disabled by default at
++                  bootup on processors that support TSX control.
++
++ verw             Mitigation is enabled. CPU is affected and MD_CLEAR is
++                  advertised in CPUID.
++
++ ucode needed     Mitigation is enabled. CPU is affected and MD_CLEAR is not
++                  advertised in CPUID. That is mainly for virtualization
++                  scenarios where the host has the updated microcode but the
++                  hypervisor does not expose MD_CLEAR in CPUID. It's a best
++                  effort approach without guarantee.
++ =============    ============================================================
++
++If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
++not provided then the kernel selects an appropriate mitigation depending on the
++status of RTM and MD_CLEAR CPUID bits.
++
++Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
++TAA mitigation, VERW behavior and TSX feature for various combinations of
++MSR_IA32_ARCH_CAPABILITIES bits.
++
++1. "tsx=off"
++
++=========  =========  ============  ============  ==============  ===================  ======================
++MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=off
++----------------------------------  -------------------------------------------------------------------------
++TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
++                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
++=========  =========  ============  ============  ==============  ===================  ======================
++    0          0           0         HW default         Yes           Same as MDS           Same as MDS
++    0          0           1        Invalid case   Invalid case       Invalid case          Invalid case
++    0          1           0         HW default         No         Need ucode update     Need ucode update
++    0          1           1          Disabled          Yes           TSX disabled          TSX disabled
++    1          X           1          Disabled           X             None needed           None needed
++=========  =========  ============  ============  ==============  ===================  ======================
++
++2. "tsx=on"
++
++=========  =========  ============  ============  ==============  ===================  ======================
++MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=on
++----------------------------------  -------------------------------------------------------------------------
++TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
++                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
++=========  =========  ============  ============  ==============  ===================  ======================
++    0          0           0         HW default        Yes            Same as MDS          Same as MDS
++    0          0           1        Invalid case   Invalid case       Invalid case         Invalid case
++    0          1           0         HW default        No          Need ucode update     Need ucode update
++    0          1           1          Enabled          Yes               None              Same as MDS
++    1          X           1          Enabled          X              None needed          None needed
++=========  =========  ============  ============  ==============  ===================  ======================
++
++3. "tsx=auto"
++
++=========  =========  ============  ============  ==============  ===================  ======================
++MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=auto
++----------------------------------  -------------------------------------------------------------------------
++TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
++                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
++=========  =========  ============  ============  ==============  ===================  ======================
++    0          0           0         HW default    Yes                Same as MDS           Same as MDS
++    0          0           1        Invalid case  Invalid case        Invalid case          Invalid case
++    0          1           0         HW default    No              Need ucode update     Need ucode update
++    0          1           1          Disabled      Yes               TSX disabled          TSX disabled
++    1          X           1          Enabled       X                 None needed           None needed
++=========  =========  ============  ============  ==============  ===================  ======================
++
++In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
++indicates whether MSR_IA32_TSX_CTRL is supported.
++
++There are two control bits in IA32_TSX_CTRL MSR:
++
++      Bit 0: When set it disables the Restricted Transactional Memory (RTM)
++             sub-feature of TSX (will force all transactions to abort on the
++             XBEGIN instruction).
++
++      Bit 1: When set it disables the enumeration of the RTM and HLE feature
++             (i.e. it will make CPUID(EAX=7).EBX{bit4} and
++             CPUID(EAX=7).EBX{bit11} read as 0).
diff --git a/queue-4.9/x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch b/queue-4.9/x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
new file mode 100644
index 00000000000..5711a58ea74
--- /dev/null
+++ b/queue-4.9/x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
@@ -0,0 +1,307 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 11:30:45 +0200
+Subject: x86/speculation/taa: Add mitigation for TSX Async Abort
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream.
+
+TSX Async Abort (TAA) is a side channel vulnerability to the internal
+buffers in some Intel processors similar to Microachitectural Data
+Sampling (MDS). In this case, certain loads may speculatively pass
+invalid data to dependent operations when an asynchronous abort
+condition is pending in a TSX transaction.
+
+This includes loads with no fault or assist condition. Such loads may
+speculatively expose stale data from the uarch data structures as in
+MDS. Scope of exposure is within the same-thread and cross-thread. This
+issue affects all current processors that support TSX, but do not have
+ARCH_CAP_TAA_NO (bit 8) set in MSR_IA32_ARCH_CAPABILITIES.
+
+On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0,
+CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers
+using VERW or L1D_FLUSH, there is no additional mitigation needed for
+TAA. On affected CPUs with MDS_NO=1 this issue can be mitigated by
+disabling the Transactional Synchronization Extensions (TSX) feature.
+
+A new MSR IA32_TSX_CTRL in future and current processors after a
+microcode update can be used to control the TSX feature. There are two
+bits in that MSR:
+
+* TSX_CTRL_RTM_DISABLE disables the TSX sub-feature Restricted
+Transactional Memory (RTM).
+
+* TSX_CTRL_CPUID_CLEAR clears the RTM enumeration in CPUID. The other
+TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally
+disabled with updated microcode but still enumerated as present by
+CPUID(EAX=7).EBX{bit4}.
+
+The second mitigation approach is similar to MDS which is clearing the
+affected CPU buffers on return to user space and when entering a guest.
+Relevant microcode update is required for the mitigation to work.  More
+details on this approach can be found here:
+
+  https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html
+
+The TSX feature can be controlled by the "tsx" command line parameter.
+If it is force-enabled then "Clear CPU buffers" (MDS mitigation) is
+deployed. The effective mitigation state can be read from sysfs.
+
+ [ bp:
+   - massage + comments cleanup
+   - s/TAA_MITIGATION_TSX_DISABLE/TAA_MITIGATION_TSX_DISABLED/g - Josh.
+   - remove partial TAA mitigation in update_mds_branch_idle() - Josh.
+   - s/tsx_async_abort_cmdline/tsx_async_abort_parse_cmdline/g
+ ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+[bwh: Backported to 4.9:
+ - Add #include "cpu.h" in bugs.c
+ - Adjust context, indentation]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h   |    1 
+ arch/x86/include/asm/msr-index.h     |    4 +
+ arch/x86/include/asm/nospec-branch.h |    4 -
+ arch/x86/include/asm/processor.h     |    7 ++
+ arch/x86/kernel/cpu/bugs.c           |  110 +++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c         |   15 ++++
+ 6 files changed, 139 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -357,5 +357,6 @@
+ #define X86_BUG_MDS		X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
+ #define X86_BUG_MSBDS_ONLY	X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
+ #define X86_BUG_SWAPGS		X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
++#define X86_BUG_TAA		X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+ 
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -78,6 +78,10 @@
+ 						  * Sampling (MDS) vulnerabilities.
+ 						  */
+ #define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
++#define ARCH_CAP_TAA_NO			BIT(8)	/*
++						 * Not susceptible to
++						 * TSX Async Abort (TAA) vulnerabilities.
++						 */
+ 
+ #define MSR_IA32_FLUSH_CMD		0x0000010b
+ #define L1D_FLUSH			BIT(0)	/*
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear)
+ #include <asm/segment.h>
+ 
+ /**
+- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
++ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+  *
+  * This uses the otherwise unused and obsolete VERW instruction in
+  * combination with microcode which triggers a CPU buffer flush when the
+@@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers
+ }
+ 
+ /**
+- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
++ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+  *
+  * Clear CPU buffers if the corresponding static key is enabled
+  */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -880,4 +880,11 @@ enum mds_mitigations {
+ 	MDS_MITIGATION_VMWERV,
+ };
+ 
++enum taa_mitigations {
++	TAA_MITIGATION_OFF,
++	TAA_MITIGATION_UCODE_NEEDED,
++	TAA_MITIGATION_VERW,
++	TAA_MITIGATION_TSX_DISABLED,
++};
++
+ #endif /* _ASM_X86_PROCESSOR_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -31,11 +31,14 @@
+ #include <asm/intel-family.h>
+ #include <asm/e820.h>
+ 
++#include "cpu.h"
++
+ static void __init spectre_v1_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
++static void __init taa_select_mitigation(void);
+ 
+ /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+ u64 x86_spec_ctrl_base;
+@@ -102,6 +105,7 @@ void __init check_bugs(void)
+ 	ssb_select_mitigation();
+ 	l1tf_select_mitigation();
+ 	mds_select_mitigation();
++	taa_select_mitigation();
+ 
+ 	arch_smt_update();
+ 
+@@ -266,6 +270,100 @@ static int __init mds_cmdline(char *str)
+ early_param("mds", mds_cmdline);
+ 
+ #undef pr_fmt
++#define pr_fmt(fmt)	"TAA: " fmt
++
++/* Default mitigation for TAA-affected CPUs */
++static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
++static bool taa_nosmt __ro_after_init;
++
++static const char * const taa_strings[] = {
++	[TAA_MITIGATION_OFF]		= "Vulnerable",
++	[TAA_MITIGATION_UCODE_NEEDED]	= "Vulnerable: Clear CPU buffers attempted, no microcode",
++	[TAA_MITIGATION_VERW]		= "Mitigation: Clear CPU buffers",
++	[TAA_MITIGATION_TSX_DISABLED]	= "Mitigation: TSX disabled",
++};
++
++static void __init taa_select_mitigation(void)
++{
++	u64 ia32_cap;
++
++	if (!boot_cpu_has_bug(X86_BUG_TAA)) {
++		taa_mitigation = TAA_MITIGATION_OFF;
++		return;
++	}
++
++	/* TSX previously disabled by tsx=off */
++	if (!boot_cpu_has(X86_FEATURE_RTM)) {
++		taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
++		goto out;
++	}
++
++	if (cpu_mitigations_off()) {
++		taa_mitigation = TAA_MITIGATION_OFF;
++		return;
++	}
++
++	/* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
++	if (taa_mitigation == TAA_MITIGATION_OFF)
++		goto out;
++
++	if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
++		taa_mitigation = TAA_MITIGATION_VERW;
++	else
++		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
++
++	/*
++	 * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
++	 * A microcode update fixes this behavior to clear CPU buffers. It also
++	 * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
++	 * ARCH_CAP_TSX_CTRL_MSR bit.
++	 *
++	 * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
++	 * update is required.
++	 */
++	ia32_cap = x86_read_arch_cap_msr();
++	if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
++	    !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
++		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
++
++	/*
++	 * TSX is enabled, select alternate mitigation for TAA which is
++	 * the same as MDS. Enable MDS static branch to clear CPU buffers.
++	 *
++	 * For guests that can't determine whether the correct microcode is
++	 * present on host, enable the mitigation for UCODE_NEEDED as well.
++	 */
++	static_branch_enable(&mds_user_clear);
++
++	if (taa_nosmt || cpu_mitigations_auto_nosmt())
++		cpu_smt_disable(false);
++
++out:
++	pr_info("%s\n", taa_strings[taa_mitigation]);
++}
++
++static int __init tsx_async_abort_parse_cmdline(char *str)
++{
++	if (!boot_cpu_has_bug(X86_BUG_TAA))
++		return 0;
++
++	if (!str)
++		return -EINVAL;
++
++	if (!strcmp(str, "off")) {
++		taa_mitigation = TAA_MITIGATION_OFF;
++	} else if (!strcmp(str, "full")) {
++		taa_mitigation = TAA_MITIGATION_VERW;
++	} else if (!strcmp(str, "full,nosmt")) {
++		taa_mitigation = TAA_MITIGATION_VERW;
++		taa_nosmt = true;
++	}
++
++	return 0;
++}
++early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
++
++#undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V1 : " fmt
+ 
+ enum spectre_v1_mitigation {
+@@ -780,6 +878,7 @@ static void update_mds_branch_idle(void)
+ }
+ 
+ #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
++#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+ 
+ void arch_smt_update(void)
+ {
+@@ -812,6 +911,17 @@ void arch_smt_update(void)
+ 		break;
+ 	}
+ 
++	switch (taa_mitigation) {
++	case TAA_MITIGATION_VERW:
++	case TAA_MITIGATION_UCODE_NEEDED:
++		if (sched_smt_active())
++			pr_warn_once(TAA_MSG_SMT);
++		break;
++	case TAA_MITIGATION_TSX_DISABLED:
++	case TAA_MITIGATION_OFF:
++		break;
++	}
++
+ 	mutex_unlock(&spec_ctrl_mutex);
+ }
+ 
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -998,6 +998,21 @@ static void __init cpu_set_bug_bits(stru
+ 	if (!cpu_matches(NO_SWAPGS))
+ 		setup_force_cpu_bug(X86_BUG_SWAPGS);
+ 
++	/*
++	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
++	 *	- TSX is supported or
++	 *	- TSX_CTRL is present
++	 *
++	 * TSX_CTRL check is needed for cases when TSX could be disabled before
++	 * the kernel boot e.g. kexec.
++	 * TSX_CTRL check alone is not sufficient for cases when the microcode
++	 * update is not present or running as guest that don't get TSX_CTRL.
++	 */
++	if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
++	    (cpu_has(c, X86_FEATURE_RTM) ||
++	     (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
++		setup_force_cpu_bug(X86_BUG_TAA);
++
+ 	if (cpu_matches(NO_MELTDOWN))
+ 		return;
+ 
diff --git a/queue-4.9/x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch b/queue-4.9/x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
new file mode 100644
index 00000000000..25a352470db
--- /dev/null
+++ b/queue-4.9/x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
@@ -0,0 +1,119 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:19:51 +0200
+Subject: x86/speculation/taa: Add sysfs reporting for TSX Async Abort
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream.
+
+Add the sysfs reporting file for TSX Async Abort. It exposes the
+vulnerability and the mitigation state similar to the existing files for
+the other hardware vulnerabilities.
+
+Sysfs file path is:
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   23 +++++++++++++++++++++++
+ drivers/base/cpu.c         |    9 +++++++++
+ include/linux/cpu.h        |    3 +++
+ 3 files changed, 35 insertions(+)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1418,6 +1418,21 @@ static ssize_t mds_show_state(char *buf)
+ 		       sched_smt_active() ? "vulnerable" : "disabled");
+ }
+ 
++static ssize_t tsx_async_abort_show_state(char *buf)
++{
++	if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
++	    (taa_mitigation == TAA_MITIGATION_OFF))
++		return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
++
++	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
++		return sprintf(buf, "%s; SMT Host state unknown\n",
++			       taa_strings[taa_mitigation]);
++	}
++
++	return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
++		       sched_smt_active() ? "vulnerable" : "disabled");
++}
++
+ static char *stibp_state(void)
+ {
+ 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+@@ -1483,6 +1498,9 @@ static ssize_t cpu_show_common(struct de
+ 	case X86_BUG_MDS:
+ 		return mds_show_state(buf);
+ 
++	case X86_BUG_TAA:
++		return tsx_async_abort_show_state(buf);
++
+ 	default:
+ 		break;
+ 	}
+@@ -1519,4 +1537,9 @@ ssize_t cpu_show_mds(struct device *dev,
+ {
+ 	return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
+ }
++
++ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
++{
++	return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
++}
+ #endif
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -537,12 +537,20 @@ ssize_t __weak cpu_show_mds(struct devic
+ 	return sprintf(buf, "Not affected\n");
+ }
+ 
++ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
++					struct device_attribute *attr,
++					char *buf)
++{
++	return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
++static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+ 
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+ 	&dev_attr_meltdown.attr,
+@@ -551,6 +559,7 @@ static struct attribute *cpu_root_vulner
+ 	&dev_attr_spec_store_bypass.attr,
+ 	&dev_attr_l1tf.attr,
+ 	&dev_attr_mds.attr,
++	&dev_attr_tsx_async_abort.attr,
+ 	NULL
+ };
+ 
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -56,6 +56,9 @@ extern ssize_t cpu_show_l1tf(struct devi
+ 			     struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_mds(struct device *dev,
+ 			    struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
++					struct device_attribute *attr,
++					char *buf);
+ 
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-4.9/x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch b/queue-4.9/x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
new file mode 100644
index 00000000000..1cd2da60282
--- /dev/null
+++ b/queue-4.9/x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
@@ -0,0 +1,49 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 6 Nov 2019 20:26:46 -0600
+Subject: x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 012206a822a8b6ac09125bfaa210a95b9eb8f1c1 upstream.
+
+For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of
+cpu_bugs_smt_update() causes the function to return early, unintentionally
+skipping the MDS and TAA logic.
+
+This is not a problem for MDS, because there appears to be no overlap
+between IBRS_ALL and MDS-affected CPUs.  So the MDS mitigation would be
+disabled and nothing would need to be done in this function anyway.
+
+But for TAA, the TAA_MSG_SMT string will never get printed on Cascade
+Lake and newer.
+
+The check is superfluous anyway: when 'spectre_v2_enabled' is
+SPECTRE_V2_IBRS_ENHANCED, 'spectre_v2_user' is always
+SPECTRE_V2_USER_NONE, and so the 'spectre_v2_user' switch statement
+handles it appropriately by doing nothing.  So just remove the check.
+
+Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort")
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -882,10 +882,6 @@ static void update_mds_branch_idle(void)
+ 
+ void arch_smt_update(void)
+ {
+-	/* Enhanced IBRS implies STIBP. No update required. */
+-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+-		return;
+-
+ 	mutex_lock(&spec_ctrl_mutex);
+ 
+ 	switch (spectre_v2_user) {
diff --git a/queue-4.9/x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch b/queue-4.9/x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
new file mode 100644
index 00000000000..11c22f99528
--- /dev/null
+++ b/queue-4.9/x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
@@ -0,0 +1,66 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:28:57 +0200
+Subject: x86/tsx: Add "auto" option to the tsx= cmdline parameter
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream.
+
+Platforms which are not affected by X86_BUG_TAA may want the TSX feature
+enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto
+disable TSX when X86_BUG_TAA is present, otherwise enable TSX.
+
+More details on X86_BUG_TAA can be found here:
+https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html
+
+ [ bp: Extend the arg buffer to accommodate "auto\0". ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+[bwh: Backported to 4.9: adjust filename]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/kernel-parameters.txt |    3 +++
+ arch/x86/kernel/cpu/tsx.c           |    7 ++++++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -4537,6 +4537,9 @@ bytes respectively. Such letter suffixes
+ 				update. This new MSR allows for the reliable
+ 				deactivation of the TSX functionality.)
+ 
++			auto	- Disable TSX if X86_BUG_TAA is present,
++				  otherwise enable TSX on the system.
++
+ 			Not specifying this option is equivalent to tsx=off.
+ 
+ 			See Documentation/hw-vuln/tsx_async_abort.rst
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -75,7 +75,7 @@ static bool __init tsx_ctrl_is_supported
+ 
+ void __init tsx_init(void)
+ {
+-	char arg[4] = {};
++	char arg[5] = {};
+ 	int ret;
+ 
+ 	if (!tsx_ctrl_is_supported())
+@@ -87,6 +87,11 @@ void __init tsx_init(void)
+ 			tsx_ctrl_state = TSX_CTRL_ENABLE;
+ 		} else if (!strcmp(arg, "off")) {
+ 			tsx_ctrl_state = TSX_CTRL_DISABLE;
++		} else if (!strcmp(arg, "auto")) {
++			if (boot_cpu_has_bug(X86_BUG_TAA))
++				tsx_ctrl_state = TSX_CTRL_DISABLE;
++			else
++				tsx_ctrl_state = TSX_CTRL_ENABLE;
+ 		} else {
+ 			tsx_ctrl_state = TSX_CTRL_DISABLE;
+ 			pr_err("tsx: invalid option, defaulting to off\n");
diff --git a/queue-4.9/x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch b/queue-4.9/x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch
new file mode 100644
index 00000000000..20f8b77e6fe
--- /dev/null
+++ b/queue-4.9/x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch
@@ -0,0 +1,137 @@
+From foo@baz Fri 15 Nov 2019 11:10:23 AM CST
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 23 Oct 2019 12:35:50 +0200
+Subject: x86/tsx: Add config options to set tsx=on|off|auto
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit db616173d787395787ecc93eef075fa975227b10 upstream.
+
+There is a general consensus that TSX usage is not largely spread while
+the history shows there is a non trivial space for side channel attacks
+possible. Therefore the tsx is disabled by default even on platforms
+that might have a safe implementation of TSX according to the current
+knowledge. This is a fair trade off to make.
+
+There are, however, workloads that really do benefit from using TSX and
+updating to a newer kernel with TSX disabled might introduce a
+noticeable regressions. This would be especially a problem for Linux
+distributions which will provide TAA mitigations.
+
+Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON
+and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config
+setting can be overridden by the tsx cmdline options.
+
+ [ bp: Text cleanups from Josh. ]
+
+Suggested-by: Borislav Petkov <bpetkov@suse.de>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+[bwh: Backported to 4.9: adjust doc filename]
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig          |   45 +++++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/tsx.c |   22 ++++++++++++++++------
+ 2 files changed, 61 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -1755,6 +1755,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
+ 
+ 	  If unsure, say y.
+ 
++choice
++	prompt "TSX enable mode"
++	depends on CPU_SUP_INTEL
++	default X86_INTEL_TSX_MODE_OFF
++	help
++	  Intel's TSX (Transactional Synchronization Extensions) feature
++	  allows to optimize locking protocols through lock elision which
++	  can lead to a noticeable performance boost.
++
++	  On the other hand it has been shown that TSX can be exploited
++	  to form side channel attacks (e.g. TAA) and chances are there
++	  will be more of those attacks discovered in the future.
++
++	  Therefore TSX is not enabled by default (aka tsx=off). An admin
++	  might override this decision by tsx=on the command line parameter.
++	  Even with TSX enabled, the kernel will attempt to enable the best
++	  possible TAA mitigation setting depending on the microcode available
++	  for the particular machine.
++
++	  This option allows to set the default tsx mode between tsx=on, =off
++	  and =auto. See Documentation/kernel-parameters.txt for more
++	  details.
++
++	  Say off if not sure, auto if TSX is in use but it should be used on safe
++	  platforms or on if TSX is in use and the security aspect of tsx is not
++	  relevant.
++
++config X86_INTEL_TSX_MODE_OFF
++	bool "off"
++	help
++	  TSX is disabled if possible - equals to tsx=off command line parameter.
++
++config X86_INTEL_TSX_MODE_ON
++	bool "on"
++	help
++	  TSX is always enabled on TSX capable HW - equals the tsx=on command
++	  line parameter.
++
++config X86_INTEL_TSX_MODE_AUTO
++	bool "auto"
++	help
++	  TSX is enabled on TSX capable HW that is believed to be safe against
++	  side channel attacks- equals the tsx=auto command line parameter.
++endchoice
++
+ config EFI
+ 	bool "EFI runtime service support"
+ 	depends on ACPI
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -73,6 +73,14 @@ static bool __init tsx_ctrl_is_supported
+ 	return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+ }
+ 
++static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
++{
++	if (boot_cpu_has_bug(X86_BUG_TAA))
++		return TSX_CTRL_DISABLE;
++
++	return TSX_CTRL_ENABLE;
++}
++
+ void __init tsx_init(void)
+ {
+ 	char arg[5] = {};
+@@ -88,17 +96,19 @@ void __init tsx_init(void)
+ 		} else if (!strcmp(arg, "off")) {
+ 			tsx_ctrl_state = TSX_CTRL_DISABLE;
+ 		} else if (!strcmp(arg, "auto")) {
+-			if (boot_cpu_has_bug(X86_BUG_TAA))
+-				tsx_ctrl_state = TSX_CTRL_DISABLE;
+-			else
+-				tsx_ctrl_state = TSX_CTRL_ENABLE;
++			tsx_ctrl_state = x86_get_tsx_auto_mode();
+ 		} else {
+ 			tsx_ctrl_state = TSX_CTRL_DISABLE;
+ 			pr_err("tsx: invalid option, defaulting to off\n");
+ 		}
+ 	} else {
+-		/* tsx= not provided, defaulting to off */
+-		tsx_ctrl_state = TSX_CTRL_DISABLE;
++		/* tsx= not provided */
++		if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
++			tsx_ctrl_state = x86_get_tsx_auto_mode();
++		else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
++			tsx_ctrl_state = TSX_CTRL_DISABLE;
++		else
++			tsx_ctrl_state = TSX_CTRL_ENABLE;
+ 	}
+ 
+ 	if (tsx_ctrl_state == TSX_CTRL_DISABLE) {