]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Nov 2019 18:10:15 +0000 (19:10 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Nov 2019 18:10:15 +0000 (19:10 +0100)
added patches:
cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
documentation-add-itlb_multihit-documentation.patch
drm-i915-add-gen9-bcs-cmdparsing.patch
drm-i915-add-support-for-mandatory-cmdparsing.patch
drm-i915-allow-parsing-of-unsized-batches.patch
drm-i915-cmdparser-add-support-for-backward-jumps.patch
drm-i915-cmdparser-check-reg_table_count-before-derefencing.patch
drm-i915-cmdparser-do-not-check-past-the-cmd-length.patch
drm-i915-cmdparser-fix-jump-whitelist-clearing.patch
drm-i915-cmdparser-ignore-length-operands-during-command-matching.patch
drm-i915-cmdparser-use-explicit-goto-for-error-paths.patch
drm-i915-disable-secure-batches-for-gen6.patch
drm-i915-don-t-use-gpu-relocations-prior-to-cmdparser-stalls.patch
drm-i915-gen8-add-rc6-ctx-corruption-wa.patch
drm-i915-gtt-add-read-only-pages-to-gen8_pte_encode.patch
drm-i915-gtt-disable-read-only-support-under-gvt.patch
drm-i915-gtt-read-only-pages-for-insert_entries-on-bdw.patch
drm-i915-lower-rm-timeout-to-avoid-dsi-hard-hangs.patch
drm-i915-move-engine-needs_cmd_parser-to-engine-flags.patch
drm-i915-prevent-writing-into-a-read-only-object-via-a-ggtt-mmap.patch
drm-i915-remove-master-tables-from-cmdparser.patch
drm-i915-rename-gen7-cmdparser-tables.patch
drm-i915-silence-smatch-for-cmdparser.patch
drm-i915-support-ro-ppgtt-mapped-cmdparser-shadow-buffers.patch
kvm-add-helper-function-for-creating-vm-worker-threads.patch
kvm-convert-kvm_lock-to-a-mutex.patch
kvm-mmu-do-not-release-the-page-inside-mmu_set_spte.patch
kvm-mmu-itlb_multihit-mitigation.patch
kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
kvm-x86-powerpc-do-not-allow-clearing-largepages-debugfs-entry.patch
kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
x86-bugs-add-itlb_multihit-bug-infrastructure.patch
x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
x86-cpu-add-tremont-to-the-cpu-vulnerability-whitelist.patch
x86-msr-add-the-ia32_tsx_ctrl-msr.patch
x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch

49 files changed:
queue-4.14/cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch [new file with mode: 0644]
queue-4.14/documentation-add-itlb_multihit-documentation.patch [new file with mode: 0644]
queue-4.14/drm-i915-add-gen9-bcs-cmdparsing.patch [new file with mode: 0644]
queue-4.14/drm-i915-add-support-for-mandatory-cmdparsing.patch [new file with mode: 0644]
queue-4.14/drm-i915-allow-parsing-of-unsized-batches.patch [new file with mode: 0644]
queue-4.14/drm-i915-cmdparser-add-support-for-backward-jumps.patch [new file with mode: 0644]
queue-4.14/drm-i915-cmdparser-check-reg_table_count-before-derefencing.patch [new file with mode: 0644]
queue-4.14/drm-i915-cmdparser-do-not-check-past-the-cmd-length.patch [new file with mode: 0644]
queue-4.14/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch [new file with mode: 0644]
queue-4.14/drm-i915-cmdparser-ignore-length-operands-during-command-matching.patch [new file with mode: 0644]
queue-4.14/drm-i915-cmdparser-use-explicit-goto-for-error-paths.patch [new file with mode: 0644]
queue-4.14/drm-i915-disable-secure-batches-for-gen6.patch [new file with mode: 0644]
queue-4.14/drm-i915-don-t-use-gpu-relocations-prior-to-cmdparser-stalls.patch [new file with mode: 0644]
queue-4.14/drm-i915-gen8-add-rc6-ctx-corruption-wa.patch [new file with mode: 0644]
queue-4.14/drm-i915-gtt-add-read-only-pages-to-gen8_pte_encode.patch [new file with mode: 0644]
queue-4.14/drm-i915-gtt-disable-read-only-support-under-gvt.patch [new file with mode: 0644]
queue-4.14/drm-i915-gtt-read-only-pages-for-insert_entries-on-bdw.patch [new file with mode: 0644]
queue-4.14/drm-i915-lower-rm-timeout-to-avoid-dsi-hard-hangs.patch [new file with mode: 0644]
queue-4.14/drm-i915-move-engine-needs_cmd_parser-to-engine-flags.patch [new file with mode: 0644]
queue-4.14/drm-i915-prevent-writing-into-a-read-only-object-via-a-ggtt-mmap.patch [new file with mode: 0644]
queue-4.14/drm-i915-remove-master-tables-from-cmdparser.patch [new file with mode: 0644]
queue-4.14/drm-i915-rename-gen7-cmdparser-tables.patch [new file with mode: 0644]
queue-4.14/drm-i915-silence-smatch-for-cmdparser.patch [new file with mode: 0644]
queue-4.14/drm-i915-support-ro-ppgtt-mapped-cmdparser-shadow-buffers.patch [new file with mode: 0644]
queue-4.14/kvm-add-helper-function-for-creating-vm-worker-threads.patch [new file with mode: 0644]
queue-4.14/kvm-convert-kvm_lock-to-a-mutex.patch [new file with mode: 0644]
queue-4.14/kvm-mmu-do-not-release-the-page-inside-mmu_set_spte.patch [new file with mode: 0644]
queue-4.14/kvm-mmu-itlb_multihit-mitigation.patch [new file with mode: 0644]
queue-4.14/kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch [new file with mode: 0644]
queue-4.14/kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch [new file with mode: 0644]
queue-4.14/kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch [new file with mode: 0644]
queue-4.14/kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch [new file with mode: 0644]
queue-4.14/kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch [new file with mode: 0644]
queue-4.14/kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch [new file with mode: 0644]
queue-4.14/kvm-x86-powerpc-do-not-allow-clearing-largepages-debugfs-entry.patch [new file with mode: 0644]
queue-4.14/kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch [new file with mode: 0644]
queue-4.14/kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/x86-bugs-add-itlb_multihit-bug-infrastructure.patch [new file with mode: 0644]
queue-4.14/x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch [new file with mode: 0644]
queue-4.14/x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch [new file with mode: 0644]
queue-4.14/x86-cpu-add-tremont-to-the-cpu-vulnerability-whitelist.patch [new file with mode: 0644]
queue-4.14/x86-msr-add-the-ia32_tsx_ctrl-msr.patch [new file with mode: 0644]
queue-4.14/x86-speculation-taa-add-documentation-for-tsx-async-abort.patch [new file with mode: 0644]
queue-4.14/x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch [new file with mode: 0644]
queue-4.14/x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch [new file with mode: 0644]
queue-4.14/x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch [new file with mode: 0644]
queue-4.14/x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch [new file with mode: 0644]
queue-4.14/x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch [new file with mode: 0644]

diff --git a/queue-4.14/cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch b/queue-4.14/cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
new file mode 100644 (file)
index 0000000..4569eea
--- /dev/null
@@ -0,0 +1,101 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Tyler Hicks <tyhicks@canonical.com>
+Date: Mon, 4 Nov 2019 12:22:02 +0100
+Subject: cpu/speculation: Uninline and export CPU mitigations helpers
+
+From: Tyler Hicks <tyhicks@canonical.com>
+
+commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream.
+
+A kernel module may need to check the value of the "mitigations=" kernel
+command line parameter as part of its setup when the module needs
+to perform software mitigations for a CPU flaw.
+
+Uninline and export the helper functions surrounding the cpu_mitigations
+enum to allow for their usage from a module.
+
+Lastly, privatize the enum and cpu_mitigations variable since the value of
+cpu_mitigations can be checked with the exported helper functions.
+
+Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cpu.h |   25 ++-----------------------
+ kernel/cpu.c        |   27 ++++++++++++++++++++++++++-
+ 2 files changed, 28 insertions(+), 24 deletions(-)
+
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -206,28 +206,7 @@ static inline int cpuhp_smt_enable(void)
+ static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
+ #endif
+-/*
+- * These are used for a global "mitigations=" cmdline option for toggling
+- * optional CPU mitigations.
+- */
+-enum cpu_mitigations {
+-      CPU_MITIGATIONS_OFF,
+-      CPU_MITIGATIONS_AUTO,
+-      CPU_MITIGATIONS_AUTO_NOSMT,
+-};
+-
+-extern enum cpu_mitigations cpu_mitigations;
+-
+-/* mitigations=off */
+-static inline bool cpu_mitigations_off(void)
+-{
+-      return cpu_mitigations == CPU_MITIGATIONS_OFF;
+-}
+-
+-/* mitigations=auto,nosmt */
+-static inline bool cpu_mitigations_auto_nosmt(void)
+-{
+-      return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+-}
++extern bool cpu_mitigations_off(void);
++extern bool cpu_mitigations_auto_nosmt(void);
+ #endif /* _LINUX_CPU_H_ */
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -2301,7 +2301,18 @@ void __init boot_cpu_hotplug_init(void)
+       this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
+ }
+-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
++/*
++ * These are used for a global "mitigations=" cmdline option for toggling
++ * optional CPU mitigations.
++ */
++enum cpu_mitigations {
++      CPU_MITIGATIONS_OFF,
++      CPU_MITIGATIONS_AUTO,
++      CPU_MITIGATIONS_AUTO_NOSMT,
++};
++
++static enum cpu_mitigations cpu_mitigations __ro_after_init =
++      CPU_MITIGATIONS_AUTO;
+ static int __init mitigations_parse_cmdline(char *arg)
+ {
+@@ -2318,3 +2329,17 @@ static int __init mitigations_parse_cmdl
+       return 0;
+ }
+ early_param("mitigations", mitigations_parse_cmdline);
++
++/* mitigations=off */
++bool cpu_mitigations_off(void)
++{
++      return cpu_mitigations == CPU_MITIGATIONS_OFF;
++}
++EXPORT_SYMBOL_GPL(cpu_mitigations_off);
++
++/* mitigations=auto,nosmt */
++bool cpu_mitigations_auto_nosmt(void)
++{
++      return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
++}
++EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
diff --git a/queue-4.14/documentation-add-itlb_multihit-documentation.patch b/queue-4.14/documentation-add-itlb_multihit-documentation.patch
new file mode 100644 (file)
index 0000000..f6b9b07
--- /dev/null
@@ -0,0 +1,197 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: "Gomez Iglesias, Antonio" <antonio.gomez.iglesias@intel.com>
+Date: Mon, 4 Nov 2019 12:22:03 +0100
+Subject: Documentation: Add ITLB_MULTIHIT documentation
+
+From: "Gomez Iglesias, Antonio" <antonio.gomez.iglesias@intel.com>
+
+commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream.
+
+Add the initial ITLB_MULTIHIT documentation.
+
+[ tglx: Add it to the index so it gets actually built. ]
+
+Signed-off-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
+Signed-off-by: Nelson D'Souza <nelson.dsouza@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/hw-vuln/index.rst    |    1 
+ Documentation/admin-guide/hw-vuln/multihit.rst |  163 +++++++++++++++++++++++++
+ 2 files changed, 164 insertions(+)
+ create mode 100644 Documentation/admin-guide/hw-vuln/multihit.rst
+
+--- a/Documentation/admin-guide/hw-vuln/index.rst
++++ b/Documentation/admin-guide/hw-vuln/index.rst
+@@ -13,3 +13,4 @@ are configurable at compile, boot or run
+    l1tf
+    mds
+    tsx_async_abort
++   multihit.rst
+--- /dev/null
++++ b/Documentation/admin-guide/hw-vuln/multihit.rst
+@@ -0,0 +1,163 @@
++iTLB multihit
++=============
++
++iTLB multihit is an erratum where some processors may incur a machine check
++error, possibly resulting in an unrecoverable CPU lockup, when an
++instruction fetch hits multiple entries in the instruction TLB. This can
++occur when the page size is changed along with either the physical address
++or cache type. A malicious guest running on a virtualized system can
++exploit this erratum to perform a denial of service attack.
++
++
++Affected processors
++-------------------
++
++Variations of this erratum are present on most Intel Core and Xeon processor
++models. The erratum is not present on:
++
++   - non-Intel processors
++
++   - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
++
++   - Intel processors that have the PSCHANGE_MC_NO bit set in the
++     IA32_ARCH_CAPABILITIES MSR.
++
++
++Related CVEs
++------------
++
++The following CVE entry is related to this issue:
++
++   ==============  =================================================
++   CVE-2018-12207  Machine Check Error Avoidance on Page Size Change
++   ==============  =================================================
++
++
++Problem
++-------
++
++Privileged software, including OS and virtual machine managers (VMM), are in
++charge of memory management. A key component in memory management is the control
++of the page tables. Modern processors use virtual memory, a technique that creates
++the illusion of a very large memory for processors. This virtual space is split
++into pages of a given size. Page tables translate virtual addresses to physical
++addresses.
++
++To reduce latency when performing a virtual to physical address translation,
++processors include a structure, called TLB, that caches recent translations.
++There are separate TLBs for instruction (iTLB) and data (dTLB).
++
++Under this errata, instructions are fetched from a linear address translated
++using a 4 KB translation cached in the iTLB. Privileged software modifies the
++paging structure so that the same linear address using large page size (2 MB, 4
++MB, 1 GB) with a different physical address or memory type.  After the page
++structure modification but before the software invalidates any iTLB entries for
++the linear address, a code fetch that happens on the same linear address may
++cause a machine-check error which can result in a system hang or shutdown.
++
++
++Attack scenarios
++----------------
++
++Attacks against the iTLB multihit erratum can be mounted from malicious
++guests in a virtualized system.
++
++
++iTLB multihit system information
++--------------------------------
++
++The Linux kernel provides a sysfs interface to enumerate the current iTLB
++multihit status of the system:whether the system is vulnerable and which
++mitigations are active. The relevant sysfs file is:
++
++/sys/devices/system/cpu/vulnerabilities/itlb_multihit
++
++The possible values in this file are:
++
++.. list-table::
++
++     * - Not affected
++       - The processor is not vulnerable.
++     * - KVM: Mitigation: Split huge pages
++       - Software changes mitigate this issue.
++     * - KVM: Vulnerable
++       - The processor is vulnerable, but no mitigation enabled
++
++
++Enumeration of the erratum
++--------------------------------
++
++A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
++and will be set on CPU's which are mitigated against this issue.
++
++   =======================================   ===========   ===============================
++   IA32_ARCH_CAPABILITIES MSR                Not present   Possibly vulnerable,check model
++   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '0'           Likely vulnerable,check model
++   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '1'           Not vulnerable
++   =======================================   ===========   ===============================
++
++
++Mitigation mechanism
++-------------------------
++
++This erratum can be mitigated by restricting the use of large page sizes to
++non-executable pages.  This forces all iTLB entries to be 4K, and removes
++the possibility of multiple hits.
++
++In order to mitigate the vulnerability, KVM initially marks all huge pages
++as non-executable. If the guest attempts to execute in one of those pages,
++the page is broken down into 4K pages, which are then marked executable.
++
++If EPT is disabled or not available on the host, KVM is in control of TLB
++flushes and the problematic situation cannot happen.  However, the shadow
++EPT paging mechanism used by nested virtualization is vulnerable, because
++the nested guest can trigger multiple iTLB hits by modifying its own
++(non-nested) page tables.  For simplicity, KVM will make large pages
++non-executable in all shadow paging modes.
++
++Mitigation control on the kernel command line and KVM - module parameter
++------------------------------------------------------------------------
++
++The KVM hypervisor mitigation mechanism for marking huge pages as
++non-executable can be controlled with a module parameter "nx_huge_pages=".
++The kernel command line allows to control the iTLB multihit mitigations at
++boot time with the option "kvm.nx_huge_pages=".
++
++The valid arguments for these options are:
++
++  ==========  ================================================================
++  force       Mitigation is enabled. In this case, the mitigation implements
++              non-executable huge pages in Linux kernel KVM module. All huge
++              pages in the EPT are marked as non-executable.
++              If a guest attempts to execute in one of those pages, the page is
++              broken down into 4K pages, which are then marked executable.
++
++  off       Mitigation is disabled.
++
++  auto        Enable mitigation only if the platform is affected and the kernel
++              was not booted with the "mitigations=off" command line parameter.
++            This is the default option.
++  ==========  ================================================================
++
++
++Mitigation selection guide
++--------------------------
++
++1. No virtualization in use
++^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++   The system is protected by the kernel unconditionally and no further
++   action is required.
++
++2. Virtualization with trusted guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++   If the guest comes from a trusted source, you may assume that the guest will
++   not attempt to maliciously exploit these errata and no further action is
++   required.
++
++3. Virtualization with untrusted guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++   If the guest comes from an untrusted source, the guest host kernel will need
++   to apply iTLB multihit mitigation via the kernel command line or kvm
++   module parameter.
diff --git a/queue-4.14/drm-i915-add-gen9-bcs-cmdparsing.patch b/queue-4.14/drm-i915-add-gen9-bcs-cmdparsing.patch
new file mode 100644 (file)
index 0000000..9200737
--- /dev/null
@@ -0,0 +1,264 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Mon, 23 Apr 2018 11:12:15 -0700
+Subject: drm/i915: Add gen9 BCS cmdparsing
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 0f2f39758341df70202ae1c42d5a1e4ee392b6d3 upstream.
+
+For gen9 we enable cmdparsing on the BCS ring, specifically
+to catch inadvertent accesses to sensitive registers
+
+Unlike gen7/hsw, we use the parser only to block certain
+registers. We can rely on h/w to block restricted commands,
+so the command tables only provide enough info to allow the
+parser to delineate each command, and identify commands that
+access registers.
+
+Note: This patch deliberately ignores checkpatch issues in
+favour of matching the style of the surrounding code. We'll
+correct the entire file in one go in a later patch.
+
+v3: rebase (Mika)
+v4: Add RING_TIMESTAMP registers to whitelist (Jon)
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |  116 ++++++++++++++++++++++++++++++---
+ drivers/gpu/drm/i915/i915_gem_gtt.c    |    3 
+ drivers/gpu/drm/i915/i915_reg.h        |    4 +
+ 3 files changed, 112 insertions(+), 11 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -442,6 +442,47 @@ static const struct drm_i915_cmd_descrip
+       CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
+ };
++/*
++ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
++ * need to re-enforce the register access checks. We therefore only need to
++ * teach the cmdparser how to find the end of each command, and identify
++ * register accesses. The table doesn't need to reject any commands, and so
++ * the only commands listed here are:
++ *   1) Those that touch registers
++ *   2) Those that do not have the default 8-bit length
++ *
++ * Note that the default MI length mask chosen for this table is 0xFF, not
++ * the 0x3F used on older devices. This is because the vast majority of MI
++ * cmds on Gen9 use a standard 8-bit Length field.
++ * All the Gen9 blitter instructions are standard 0xFF length mask, and
++ * none allow access to non-general registers, so in fact no BLT cmds are
++ * included in the table at all.
++ *
++ */
++static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
++      CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
++      CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      S  ),
++      CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      S  ),
++      CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
++      CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
++      CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
++      CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      S  ),
++      CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
++      CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   S  ),
++      CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   S  ),
++      CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  S  ),
++      CMD(  MI_LOAD_REGISTER_IMM(1),          SMI,   !F,  0xFF,   W,
++            .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 }    ),
++      CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3FF,  S  ),
++      CMD(  MI_STORE_REGISTER_MEM_GEN8,       SMI,    F,  4,      W,
++            .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
++      CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   S  ),
++      CMD(  MI_LOAD_REGISTER_MEM_GEN8,        SMI,    F,  4,      W,
++            .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
++      CMD(  MI_LOAD_REGISTER_REG,             SMI,    !F,  0xFF,  W,
++            .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
++};
++
+ static const struct drm_i915_cmd_descriptor noop_desc =
+       CMD(MI_NOOP, SMI, F, 1, S);
+@@ -488,6 +529,11 @@ static const struct drm_i915_cmd_table h
+       { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
+ };
++static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
++      { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
++};
++
++
+ /*
+  * Register whitelists, sorted by increasing register offset.
+  */
+@@ -603,6 +649,29 @@ static const struct drm_i915_reg_descrip
+       REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+ };
++static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
++      REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
++      REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
++      REG32(BCS_SWCTRL),
++      REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
++      REG64_IDX(BCS_GPR, 0),
++      REG64_IDX(BCS_GPR, 1),
++      REG64_IDX(BCS_GPR, 2),
++      REG64_IDX(BCS_GPR, 3),
++      REG64_IDX(BCS_GPR, 4),
++      REG64_IDX(BCS_GPR, 5),
++      REG64_IDX(BCS_GPR, 6),
++      REG64_IDX(BCS_GPR, 7),
++      REG64_IDX(BCS_GPR, 8),
++      REG64_IDX(BCS_GPR, 9),
++      REG64_IDX(BCS_GPR, 10),
++      REG64_IDX(BCS_GPR, 11),
++      REG64_IDX(BCS_GPR, 12),
++      REG64_IDX(BCS_GPR, 13),
++      REG64_IDX(BCS_GPR, 14),
++      REG64_IDX(BCS_GPR, 15),
++};
++
+ #undef REG64
+ #undef REG32
+@@ -628,6 +697,10 @@ static const struct drm_i915_reg_table h
+       { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+ };
++static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
++      { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
++};
++
+ static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
+ {
+       u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
+@@ -683,6 +756,17 @@ static u32 gen7_blt_get_cmd_length_mask(
+       return 0;
+ }
++static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
++{
++      u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
++
++      if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
++              return 0xFF;
++
++      DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
++      return 0;
++}
++
+ static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
+                                const struct drm_i915_cmd_table *cmd_tables,
+                                int cmd_table_count)
+@@ -840,7 +924,8 @@ void intel_engine_init_cmd_parser(struct
+       int cmd_table_count;
+       int ret;
+-      if (!IS_GEN7(engine->i915))
++      if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) &&
++                                      engine->id == BCS))
+               return;
+       switch (engine->id) {
+@@ -861,7 +946,6 @@ void intel_engine_init_cmd_parser(struct
+                       engine->reg_tables = ivb_render_reg_tables;
+                       engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
+               }
+-
+               engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
+               break;
+       case VCS:
+@@ -870,7 +954,16 @@ void intel_engine_init_cmd_parser(struct
+               engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
+               break;
+       case BCS:
+-              if (IS_HASWELL(engine->i915)) {
++              engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
++              if (IS_GEN9(engine->i915)) {
++                      cmd_tables = gen9_blt_cmd_table;
++                      cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
++                      engine->get_cmd_length_mask =
++                              gen9_blt_get_cmd_length_mask;
++
++                      /* BCS Engine unsafe without parser */
++                      engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
++              } else if (IS_HASWELL(engine->i915)) {
+                       cmd_tables = hsw_blt_ring_cmd_table;
+                       cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
+               } else {
+@@ -878,15 +971,17 @@ void intel_engine_init_cmd_parser(struct
+                       cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
+               }
+-              if (IS_HASWELL(engine->i915)) {
++              if (IS_GEN9(engine->i915)) {
++                      engine->reg_tables = gen9_blt_reg_tables;
++                      engine->reg_table_count =
++                              ARRAY_SIZE(gen9_blt_reg_tables);
++              } else if (IS_HASWELL(engine->i915)) {
+                       engine->reg_tables = hsw_blt_reg_tables;
+                       engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
+               } else {
+                       engine->reg_tables = ivb_blt_reg_tables;
+                       engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
+               }
+-
+-              engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+               break;
+       case VECS:
+               cmd_tables = hsw_vebox_cmd_table;
+@@ -1260,9 +1355,9 @@ int intel_engine_cmd_parser(struct intel
+               }
+               /*
+-               * If the batch buffer contains a chained batch, return an
+-               * error that tells the caller to abort and dispatch the
+-               * workload as a non-secure batch.
++               * We don't try to handle BATCH_BUFFER_START because it adds
++               * non-trivial complexity. Instead we abort the scan and return
++               * and error to indicate that the batch is unsafe.
+                */
+               if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+                       ret = -EACCES;
+@@ -1342,6 +1437,7 @@ int i915_cmd_parser_get_version(struct d
+        *    the parser enabled.
+        * 9. Don't whitelist or handle oacontrol specially, as ownership
+        *    for oacontrol state is moving to i915-perf.
++       * 10. Support for Gen9 BCS Parsing
+        */
+-      return 9;
++      return 10;
+ }
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -159,7 +159,8 @@ int intel_sanitize_enable_ppgtt(struct d
+       if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
+               return 0;
+-      if (enable_ppgtt == 1)
++      /* Full PPGTT is required by the Gen9 cmdparser */
++      if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9)
+               return 1;
+       if (enable_ppgtt == 2 && has_full_ppgtt)
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -703,6 +703,10 @@ static inline bool i915_mmio_reg_valid(i
+  */
+ #define BCS_SWCTRL _MMIO(0x22200)
++/* There are 16 GPR registers */
++#define BCS_GPR(n)    _MMIO(0x22600 + (n) * 8)
++#define BCS_GPR_UDW(n)        _MMIO(0x22600 + (n) * 8 + 4)
++
+ #define GPGPU_THREADS_DISPATCHED        _MMIO(0x2290)
+ #define GPGPU_THREADS_DISPATCHED_UDW  _MMIO(0x2290 + 4)
+ #define HS_INVOCATION_COUNT             _MMIO(0x2300)
diff --git a/queue-4.14/drm-i915-add-support-for-mandatory-cmdparsing.patch b/queue-4.14/drm-i915-add-support-for-mandatory-cmdparsing.patch
new file mode 100644 (file)
index 0000000..2b6d1b6
--- /dev/null
@@ -0,0 +1,106 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Wed, 1 Aug 2018 09:33:59 -0700
+Subject: drm/i915: Add support for mandatory cmdparsing
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 311a50e76a33d1e029563c24b2ff6db0c02b5afe upstream.
+
+The existing cmdparser for gen7 can be bypassed by specifying
+batch_len=0 in the execbuf call. This is safe because bypassing
+simply reduces the cmd-set available.
+
+In a later patch we will introduce cmdparsing for gen9, as a
+security measure, which must be strictly enforced since without
+it we are vulnerable to DoS attacks.
+
+Introduce the concept of 'required' cmd parsing that cannot be
+bypassed by submitting zero-length bb's.
+
+v2: rebase (Mika)
+v2: rebase (Mika)
+v3: fix conflict on engine flags (Mika)
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Joonas Lahtinen <joonas.lahtinen@intel.com>
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c     |    6 +++---
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 ++-
+ drivers/gpu/drm/i915/intel_ringbuffer.h    |   14 +++++++++++---
+ 3 files changed, 16 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -916,7 +916,7 @@ void intel_engine_init_cmd_parser(struct
+               return;
+       }
+-      engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
++      engine->flags |= I915_ENGINE_USING_CMD_PARSER;
+ }
+ /**
+@@ -928,7 +928,7 @@ void intel_engine_init_cmd_parser(struct
+  */
+ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
+ {
+-      if (!intel_engine_needs_cmd_parser(engine))
++      if (!intel_engine_using_cmd_parser(engine))
+               return;
+       fini_hash_table(engine);
+@@ -1317,7 +1317,7 @@ int i915_cmd_parser_get_version(struct d
+       /* If the command parser is not enabled, report 0 - unsupported */
+       for_each_engine(engine, dev_priv, id) {
+-              if (intel_engine_needs_cmd_parser(engine)) {
++              if (intel_engine_using_cmd_parser(engine)) {
+                       active = true;
+                       break;
+               }
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -270,7 +270,8 @@ static inline u64 gen8_noncanonical_addr
+ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
+ {
+-      return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
++      return intel_engine_requires_cmd_parser(eb->engine) ||
++              (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len);
+ }
+ static int eb_create(struct i915_execbuffer *eb)
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
+@@ -417,7 +417,8 @@ struct intel_engine_cs {
+       struct intel_engine_hangcheck hangcheck;
+-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
++#define I915_ENGINE_USING_CMD_PARSER  BIT(0)
++#define I915_ENGINE_REQUIRES_CMD_PARSER       BIT(3)
+       unsigned int flags;
+       /*
+@@ -445,9 +446,16 @@ struct intel_engine_cs {
+       u32 (*get_cmd_length_mask)(u32 cmd_header);
+ };
+-static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
++static inline bool
++intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
+ {
+-      return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
++      return engine->flags & I915_ENGINE_USING_CMD_PARSER;
++}
++
++static inline bool
++intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
++{
++      return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
+ }
+ static inline unsigned int
diff --git a/queue-4.14/drm-i915-allow-parsing-of-unsized-batches.patch b/queue-4.14/drm-i915-allow-parsing-of-unsized-batches.patch
new file mode 100644 (file)
index 0000000..e989c05
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Wed, 1 Aug 2018 09:45:50 -0700
+Subject: drm/i915: Allow parsing of unsized batches
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 435e8fc059dbe0eec823a75c22da2972390ba9e0 upstream.
+
+In "drm/i915: Add support for mandatory cmdparsing" we introduced the
+concept of mandatory parsing. This allows the cmdparser to be invoked
+even when user passes batch_len=0 to the execbuf ioctl's.
+
+However, the cmdparser needs to know the extents of the buffer being
+scanned. Refactor the code to ensure the cmdparser uses the actual
+object size, instead of the incoming length, if user passes 0.
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -271,7 +271,8 @@ static inline u64 gen8_noncanonical_addr
+ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
+ {
+       return intel_engine_requires_cmd_parser(eb->engine) ||
+-              (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len);
++              (intel_engine_using_cmd_parser(eb->engine) &&
++               eb->args->batch_len);
+ }
+ static int eb_create(struct i915_execbuffer *eb)
+@@ -2359,6 +2360,9 @@ i915_gem_do_execbuffer(struct drm_device
+               goto err_vma;
+       }
++      if (eb.batch_len == 0)
++              eb.batch_len = eb.batch->size - eb.batch_start_offset;
++
+       if (eb_use_cmdparser(&eb)) {
+               struct i915_vma *vma;
+@@ -2369,9 +2373,6 @@ i915_gem_do_execbuffer(struct drm_device
+               }
+       }
+-      if (eb.batch_len == 0)
+-              eb.batch_len = eb.batch->size - eb.batch_start_offset;
+-
+       /*
+        * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+        * batch" bit. Hence we need to pin secure batches into the global gtt.
diff --git a/queue-4.14/drm-i915-cmdparser-add-support-for-backward-jumps.patch b/queue-4.14/drm-i915-cmdparser-add-support-for-backward-jumps.patch
new file mode 100644 (file)
index 0000000..a2dc5d9
--- /dev/null
@@ -0,0 +1,404 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Thu, 20 Sep 2018 09:58:36 -0700
+Subject: drm/i915/cmdparser: Add support for backward jumps
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit f8c08d8faee5567803c8c533865296ca30286bbf upstream.
+
+To keep things manageable, the pre-gen9 cmdparser does not
+attempt to track any form of nested BB_START's. This did not
+prevent usermode from using nested starts, or even chained
+batches because the cmdparser is not strictly enforced pre gen9.
+
+Instead, the existence of a nested BB_START would cause the batch
+to be emitted in insecure mode, and any privileged capabilities
+would not be available.
+
+For Gen9, the cmdparser becomes mandatory (for BCS at least), and
+so not providing any form of nested BB_START support becomes
+overly restrictive. Any such batch will simply not run.
+
+We make heavy use of backward jumps in igt, and it is much easier
+to add support for this restricted subset of nested jumps, than to
+rewrite the whole of our test suite to avoid them.
+
+Add the required logic to support limited backward jumps, to
+instructions that have already been validated by the parser.
+
+Note that it's not sufficient to simply approve any BB_START
+that jumps backwards in the buffer because this would allow an
+attacker to embed a rogue instruction sequence within the
+operand words of a harmless instruction (say LRI) and jump to
+that.
+
+We introduce a bit array to track every instr offset successfully
+validated, and test the target of BB_START against this. If the
+target offset hits, it is re-written to the same offset in the
+shadow buffer and the BB_START cmd is allowed.
+
+Note: This patch deliberately ignores checkpatch issues in the
+cmdtables, in order to match the style of the surrounding code.
+We'll correct the entire file in one go in a later patch.
+
+v2: set dispatch secure late (Mika)
+v3: rebase (Mika)
+v4: Clear whitelist on each parse
+Minor review updates (Chris)
+v5: Correct backward jump batching
+v6: fix compilation error due to struct eb shuffle (Mika)
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c     |  151 ++++++++++++++++++++++++++---
+ drivers/gpu/drm/i915/i915_drv.h            |    9 +
+ drivers/gpu/drm/i915/i915_gem_context.c    |    5 
+ drivers/gpu/drm/i915/i915_gem_context.h    |    6 +
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |   32 ++++--
+ 5 files changed, 177 insertions(+), 26 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -481,6 +481,19 @@ static const struct drm_i915_cmd_descrip
+             .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
+       CMD(  MI_LOAD_REGISTER_REG,             SMI,    !F,  0xFF,  W,
+             .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
++
++      /*
++       * We allow BB_START but apply further checks. We just sanitize the
++       * basic fields here.
++       */
++#define MI_BB_START_OPERAND_MASK   GENMASK(SMI-1, 0)
++#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
++      CMD(  MI_BATCH_BUFFER_START_GEN8,       SMI,    !F,  0xFF,  B,
++            .bits = {{
++                      .offset = 0,
++                      .mask = MI_BB_START_OPERAND_MASK,
++                      .expected = MI_BB_START_OPERAND_EXPECT,
++            }},                                                      ),
+ };
+ static const struct drm_i915_cmd_descriptor noop_desc =
+@@ -1292,15 +1305,113 @@ static bool check_cmd(const struct intel
+       return true;
+ }
++static int check_bbstart(const struct i915_gem_context *ctx,
++                       u32 *cmd, u32 offset, u32 length,
++                       u32 batch_len,
++                       u64 batch_start,
++                       u64 shadow_batch_start)
++{
++      u64 jump_offset, jump_target;
++      u32 target_cmd_offset, target_cmd_index;
++
++      /* For igt compatibility on older platforms */
++      if (CMDPARSER_USES_GGTT(ctx->i915)) {
++              DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
++              return -EACCES;
++      }
++
++      if (length != 3) {
++              DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
++                        length);
++              return -EINVAL;
++      }
++
++      jump_target = *(u64*)(cmd+1);
++      jump_offset = jump_target - batch_start;
++
++      /*
++       * Any underflow of jump_target is guaranteed to be outside the range
++       * of a u32, so >= test catches both too large and too small
++       */
++      if (jump_offset >= batch_len) {
++              DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
++                        jump_target);
++              return -EINVAL;
++      }
++
++      /*
++       * This cannot overflow a u32 because we already checked jump_offset
++       * is within the BB, and the batch_len is a u32
++       */
++      target_cmd_offset = lower_32_bits(jump_offset);
++      target_cmd_index = target_cmd_offset / sizeof(u32);
++
++      *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
++
++      if (target_cmd_index == offset)
++              return 0;
++
++      if (ctx->jump_whitelist_cmds <= target_cmd_index) {
++              DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
++              return -EINVAL;
++      } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
++              DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
++                        jump_target);
++              return -EINVAL;
++      }
++
++      return 0;
++}
++
++static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
++{
++      const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
++      const u32 exact_size = BITS_TO_LONGS(batch_cmds);
++      u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
++      unsigned long *next_whitelist;
++
++      if (CMDPARSER_USES_GGTT(ctx->i915))
++              return;
++
++      if (batch_cmds <= ctx->jump_whitelist_cmds) {
++              memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32));
++              return;
++      }
++
++again:
++      next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
++      if (next_whitelist) {
++              kfree(ctx->jump_whitelist);
++              ctx->jump_whitelist = next_whitelist;
++              ctx->jump_whitelist_cmds =
++                      next_size * BITS_PER_BYTE * sizeof(long);
++              return;
++      }
++
++      if (next_size > exact_size) {
++              next_size = exact_size;
++              goto again;
++      }
++
++      DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
++      memset(ctx->jump_whitelist, 0,
++             BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32));
++
++      return;
++}
++
+ #define LENGTH_BIAS 2
+ /**
+  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
++ * @ctx: the context in which the batch is to execute
+  * @engine: the engine on which the batch is to execute
+  * @batch_obj: the batch buffer in question
+- * @shadow_batch_obj: copy of the batch buffer in question
++ * @batch_start: Canonical base address of batch
+  * @batch_start_offset: byte offset in the batch at which execution starts
+  * @batch_len: length of the commands in batch_obj
++ * @shadow_batch_obj: copy of the batch buffer in question
++ * @shadow_batch_start: Canonical base address of shadow_batch_obj
+  *
+  * Parses the specified batch buffer looking for privilege violations as
+  * described in the overview.
+@@ -1308,13 +1419,17 @@ static bool check_cmd(const struct intel
+  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
+  * if the batch appears legal but should use hardware parsing
+  */
+-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
++
++int intel_engine_cmd_parser(struct i915_gem_context *ctx,
++                          struct intel_engine_cs *engine,
+                           struct drm_i915_gem_object *batch_obj,
+-                          struct drm_i915_gem_object *shadow_batch_obj,
++                          u64 batch_start,
+                           u32 batch_start_offset,
+-                          u32 batch_len)
++                          u32 batch_len,
++                          struct drm_i915_gem_object *shadow_batch_obj,
++                          u64 shadow_batch_start)
+ {
+-      u32 *cmd, *batch_end;
++      u32 *cmd, *batch_end, offset = 0;
+       struct drm_i915_cmd_descriptor default_desc = noop_desc;
+       const struct drm_i915_cmd_descriptor *desc = &default_desc;
+       bool needs_clflush_after = false;
+@@ -1328,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel
+               return PTR_ERR(cmd);
+       }
++      init_whitelist(ctx, batch_len);
++
+       /*
+        * We use the batch length as size because the shadow object is as
+        * large or larger and copy_batch() will write MI_NOPs to the extra
+@@ -1348,16 +1465,6 @@ int intel_engine_cmd_parser(struct intel
+                       goto err;
+               }
+-              /*
+-               * We don't try to handle BATCH_BUFFER_START because it adds
+-               * non-trivial complexity. Instead we abort the scan and return
+-               * and error to indicate that the batch is unsafe.
+-               */
+-              if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+-                      ret = -EACCES;
+-                      goto err;
+-              }
+-
+               if (desc->flags & CMD_DESC_FIXED)
+                       length = desc->length.fixed;
+               else
+@@ -1377,7 +1484,21 @@ int intel_engine_cmd_parser(struct intel
+                       goto err;
+               }
++              if (desc->cmd.value == MI_BATCH_BUFFER_START) {
++                      ret = check_bbstart(ctx, cmd, offset, length,
++                                          batch_len, batch_start,
++                                          shadow_batch_start);
++
++                      if (ret)
++                              goto err;
++                      break;
++              }
++
++              if (ctx->jump_whitelist_cmds > offset)
++                      set_bit(offset, ctx->jump_whitelist);
++
+               cmd += length;
++              offset += length;
+               if  (cmd >= batch_end) {
+                       DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
+                       ret = -EINVAL;
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -3857,11 +3857,14 @@ const char *i915_cache_level_str(struct
+ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
+ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
+ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
+-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
++int intel_engine_cmd_parser(struct i915_gem_context *cxt,
++                          struct intel_engine_cs *engine,
+                           struct drm_i915_gem_object *batch_obj,
+-                          struct drm_i915_gem_object *shadow_batch_obj,
++                          u64 user_batch_start,
+                           u32 batch_start_offset,
+-                          u32 batch_len);
++                          u32 batch_len,
++                          struct drm_i915_gem_object *shadow_batch_obj,
++                          u64 shadow_batch_start);
+ /* i915_perf.c */
+ extern void i915_perf_init(struct drm_i915_private *dev_priv);
+--- a/drivers/gpu/drm/i915/i915_gem_context.c
++++ b/drivers/gpu/drm/i915/i915_gem_context.c
+@@ -141,6 +141,8 @@ static void i915_gem_context_free(struct
+               __i915_gem_object_release_unless_active(ce->state->obj);
+       }
++      kfree(ctx->jump_whitelist);
++
+       kfree(ctx->name);
+       put_pid(ctx->pid);
+@@ -321,6 +323,9 @@ __create_hw_context(struct drm_i915_priv
+       else
+               ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
++      ctx->jump_whitelist = NULL;
++      ctx->jump_whitelist_cmds = 0;
++
+       return ctx;
+ err_pid:
+--- a/drivers/gpu/drm/i915/i915_gem_context.h
++++ b/drivers/gpu/drm/i915/i915_gem_context.h
+@@ -181,6 +181,12 @@ struct i915_gem_context {
+       /** remap_slice: Bitmask of cache lines that need remapping */
+       u8 remap_slice;
++      /** jump_whitelist: Bit array for tracking cmds during cmdparsing */
++      unsigned long *jump_whitelist;
++
++      /** jump_whitelist_cmds: No of cmd slots available */
++      u32 jump_whitelist_cmds;
++
+       /** handles_vma: rbtree to look up our context specific obj/vma for
+        * the user handle. (user handles are per fd, but the binding is
+        * per vm, which may be one per context or shared with the global GTT)
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -1927,7 +1927,6 @@ shadow_batch_pin(struct i915_execbuffer
+       if (CMDPARSER_USES_GGTT(dev_priv)) {
+               flags = PIN_GLOBAL;
+               vm = &dev_priv->ggtt.base;
+-              eb->batch_flags |= I915_DISPATCH_SECURE;
+       } else if (eb->vm->has_read_only) {
+               flags = PIN_USER;
+               vm = eb->vm;
+@@ -1944,6 +1943,8 @@ static struct i915_vma *eb_parse(struct
+ {
+       struct drm_i915_gem_object *shadow_batch_obj;
+       struct i915_vma *vma;
++      u64 batch_start;
++      u64 shadow_batch_start;
+       int err;
+       shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
+@@ -1951,12 +1952,27 @@ static struct i915_vma *eb_parse(struct
+       if (IS_ERR(shadow_batch_obj))
+               return ERR_CAST(shadow_batch_obj);
+-      err = intel_engine_cmd_parser(eb->engine,
++      vma = shadow_batch_pin(eb, shadow_batch_obj);
++      if (IS_ERR(vma))
++              goto out;
++
++      batch_start = gen8_canonical_addr(eb->batch->node.start) +
++                    eb->batch_start_offset;
++
++      shadow_batch_start = gen8_canonical_addr(vma->node.start);
++
++      err = intel_engine_cmd_parser(eb->ctx,
++                                    eb->engine,
+                                     eb->batch->obj,
+-                                    shadow_batch_obj,
++                                    batch_start,
+                                     eb->batch_start_offset,
+-                                    eb->batch_len);
++                                    eb->batch_len,
++                                    shadow_batch_obj,
++                                    shadow_batch_start);
++
+       if (err) {
++              i915_vma_unpin(vma);
++
+               /*
+                * Unsafe GGTT-backed buffers can still be submitted safely
+                * as non-secure.
+@@ -1968,12 +1984,9 @@ static struct i915_vma *eb_parse(struct
+                       vma = NULL;
+               else
+                       vma = ERR_PTR(err);
+-              goto out;
+-      }
+-      vma = shadow_batch_pin(eb, shadow_batch_obj);
+-      if (IS_ERR(vma))
+               goto out;
++      }
+       eb->vma[eb->buffer_count] = i915_vma_get(vma);
+       eb->flags[eb->buffer_count] =
+@@ -1984,6 +1997,9 @@ static struct i915_vma *eb_parse(struct
+       eb->batch = vma;
+       /* eb->batch_len unchanged */
++      if (CMDPARSER_USES_GGTT(eb->i915))
++              eb->batch_flags |= I915_DISPATCH_SECURE;
++
+ out:
+       i915_gem_object_unpin_pages(shadow_batch_obj);
+       return vma;
diff --git a/queue-4.14/drm-i915-cmdparser-check-reg_table_count-before-derefencing.patch b/queue-4.14/drm-i915-cmdparser-check-reg_table_count-before-derefencing.patch
new file mode 100644 (file)
index 0000000..1b2103b
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Michal Srb <msrb@suse.com>
+Date: Mon, 5 Feb 2018 16:04:37 +0000
+Subject: drm/i915/cmdparser: Check reg_table_count before derefencing.
+
+From: Michal Srb <msrb@suse.com>
+
+commit b18224e95cb13ef7517aa26e6b47c85117327f11 upstream.
+
+The find_reg function was assuming that there is always at least one table in
+reg_tables. It is not always true.
+
+In case of VCS or VECS, the reg_tables is NULL and reg_table_count is 0,
+implying that no register-accessing commands are allowed. However, the command
+tables include commands such as MI_STORE_REGISTER_MEM. When trying to check
+such command, the find_reg would dereference NULL pointer.
+
+Now it will just return NULL meaning that the register was not found and the
+command will be rejected.
+
+Fixes: 76ff480ec963 ("drm/i915/cmdparser: Use binary search for faster register lookup")
+Signed-off-by: Michal Srb <msrb@suse.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180205142916.27092-2-msrb@suse.com
+Cc: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Matthew Auld <matthew.auld@intel.com>
+Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180205160438.3267-1-chris@chris-wilson.co.uk
+register lookup")
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -1038,7 +1038,7 @@ find_reg(const struct intel_engine_cs *e
+       const struct drm_i915_reg_table *table = engine->reg_tables;
+       int count = engine->reg_table_count;
+-      do {
++      for (; count > 0; ++table, --count) {
+               if (!table->master || is_master) {
+                       const struct drm_i915_reg_descriptor *reg;
+@@ -1046,7 +1046,7 @@ find_reg(const struct intel_engine_cs *e
+                       if (reg != NULL)
+                               return reg;
+               }
+-      } while (table++, --count);
++      }
+       return NULL;
+ }
diff --git a/queue-4.14/drm-i915-cmdparser-do-not-check-past-the-cmd-length.patch b/queue-4.14/drm-i915-cmdparser-do-not-check-past-the-cmd-length.patch
new file mode 100644 (file)
index 0000000..302296a
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Michal Srb <msrb@suse.com>
+Date: Mon, 5 Feb 2018 16:04:38 +0000
+Subject: drm/i915/cmdparser: Do not check past the cmd length.
+
+From: Michal Srb <msrb@suse.com>
+
+commit b3ad99ed45917f42884fee731fa3cf9b8229a26c upstream.
+
+The command MEDIA_VFE_STATE checks bits at offset +2 dwords. However, it is
+possible to have MEDIA_VFE_STATE command with length = 0 + LENGTH_BIAS = 2.
+In that case check_cmd will read bits from the following command, or even past
+the end of the buffer.
+
+If the offset ends up outside of the command length, reject the command.
+
+Fixes: 351e3db2b363 ("drm/i915: Implement command buffer parsing logic")
+Signed-off-by: Michal Srb <msrb@suse.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180205151745.29292-1-msrb@suse.com
+Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180205160438.3267-2-chris@chris-wilson.co.uk
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -1218,6 +1218,12 @@ static bool check_cmd(const struct intel
+                                       continue;
+                       }
++                      if (desc->bits[i].offset >= length) {
++                              DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n",
++                                               *cmd, engine->name);
++                              return false;
++                      }
++
+                       dword = cmd[desc->bits[i].offset] &
+                               desc->bits[i].mask;
diff --git a/queue-4.14/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch b/queue-4.14/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch
new file mode 100644 (file)
index 0000000..868c502
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Mon, 11 Nov 2019 08:13:24 -0800
+Subject: drm/i915/cmdparser: Fix jump whitelist clearing
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+commit ea0b163b13ffc52818c079adb00d55e227a6da6f upstream.
+
+When a jump_whitelist bitmap is reused, it needs to be cleared.
+Currently this is done with memset() and the size calculation assumes
+bitmaps are made of 32-bit words, not longs.  So on 64-bit
+architectures, only the first half of the bitmap is cleared.
+
+If some whitelist bits are carried over between successive batches
+submitted on the same context, this will presumably allow embedding
+the rogue instructions that we're trying to reject.
+
+Use bitmap_zero() instead, which gets the calculation right.
+
+Fixes: f8c08d8faee5 ("drm/i915/cmdparser: Add support for backward jumps")
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -1374,7 +1374,7 @@ static void init_whitelist(struct i915_g
+               return;
+       if (batch_cmds <= ctx->jump_whitelist_cmds) {
+-              memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32));
++              bitmap_zero(ctx->jump_whitelist, batch_cmds);
+               return;
+       }
+@@ -1394,8 +1394,7 @@ again:
+       }
+       DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+-      memset(ctx->jump_whitelist, 0,
+-             BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32));
++      bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+       return;
+ }
diff --git a/queue-4.14/drm-i915-cmdparser-ignore-length-operands-during-command-matching.patch b/queue-4.14/drm-i915-cmdparser-ignore-length-operands-during-command-matching.patch
new file mode 100644 (file)
index 0000000..6acc050
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Thu, 20 Sep 2018 09:45:10 -0700
+Subject: drm/i915/cmdparser: Ignore Length operands during command matching
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 926abff21a8f29ef159a3ac893b05c6e50e043c3 upstream.
+
+Some of the gen instruction macros (e.g. MI_DISPLAY_FLIP) have the
+length directly encoded in them. Since these are used directly in
+the tables, the Length becomes part of the comparison used for
+matching during parsing. Thus, if the cmd being parsed has a
+different length to that in the table, it is not matched and the
+cmd is accepted via the default variable length path.
+
+Fix by masking out everything except the Opcode in the cmd tables
+
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -187,7 +187,7 @@ struct drm_i915_cmd_table {
+ #define CMD(op, opm, f, lm, fl, ...)                          \
+       {                                                       \
+               .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),     \
+-              .cmd = { (op), ~0u << (opm) },                  \
++              .cmd = { (op & ~0u << (opm)), ~0u << (opm) },   \
+               .length = { (lm) },                             \
+               __VA_ARGS__                                     \
+       }
diff --git a/queue-4.14/drm-i915-cmdparser-use-explicit-goto-for-error-paths.patch b/queue-4.14/drm-i915-cmdparser-use-explicit-goto-for-error-paths.patch
new file mode 100644 (file)
index 0000000..2fb9a65
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Thu, 27 Sep 2018 10:23:17 -0700
+Subject: drm/i915/cmdparser: Use explicit goto for error paths
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 0546a29cd884fb8184731c79ab008927ca8859d0 upstream.
+
+In the next patch we will be adding a second valid
+termination condition which will require a small
+amount of refactoring to share logic with the BB_END
+case.
+
+Refactor all error conditions to jump to a dedicated
+exit path, with 'break' reserved only for a successful
+parse.
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |   25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -1337,21 +1337,15 @@ int intel_engine_cmd_parser(struct intel
+       do {
+               u32 length;
+-              if (*cmd == MI_BATCH_BUFFER_END) {
+-                      if (needs_clflush_after) {
+-                              void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+-                              drm_clflush_virt_range(ptr,
+-                                                     (void *)(cmd + 1) - ptr);
+-                      }
++              if (*cmd == MI_BATCH_BUFFER_END)
+                       break;
+-              }
+               desc = find_cmd(engine, *cmd, desc, &default_desc);
+               if (!desc) {
+                       DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
+                                        *cmd);
+                       ret = -EINVAL;
+-                      break;
++                      goto err;
+               }
+               /*
+@@ -1361,7 +1355,7 @@ int intel_engine_cmd_parser(struct intel
+                */
+               if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+                       ret = -EACCES;
+-                      break;
++                      goto err;
+               }
+               if (desc->flags & CMD_DESC_FIXED)
+@@ -1375,22 +1369,29 @@ int intel_engine_cmd_parser(struct intel
+                                        length,
+                                        batch_end - cmd);
+                       ret = -EINVAL;
+-                      break;
++                      goto err;
+               }
+               if (!check_cmd(engine, desc, cmd, length)) {
+                       ret = -EACCES;
+-                      break;
++                      goto err;
+               }
+               cmd += length;
+               if  (cmd >= batch_end) {
+                       DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
+                       ret = -EINVAL;
+-                      break;
++                      goto err;
+               }
+       } while (1);
++      if (needs_clflush_after) {
++              void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
++
++              drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
++      }
++
++err:
+       i915_gem_object_unpin_map(shadow_batch_obj);
+       return ret;
+ }
diff --git a/queue-4.14/drm-i915-disable-secure-batches-for-gen6.patch b/queue-4.14/drm-i915-disable-secure-batches-for-gen6.patch
new file mode 100644 (file)
index 0000000..45277ee
--- /dev/null
@@ -0,0 +1,96 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Fri, 8 Jun 2018 08:53:46 -0700
+Subject: drm/i915: Disable Secure Batches for gen6+
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 44157641d448cbc0c4b73c5231d2b911f0cb0427 upstream.
+
+Retroactively stop reporting support for secure batches
+through the api for gen6+ so that older binaries trigger
+the fallback path instead.
+
+Older binaries use secure batches pre gen6 to access resources
+that are not available to normal usermode processes. However,
+all known userspace explicitly checks for HAS_SECURE_BATCHES
+before relying on the secure batch feature.
+
+Since there are no known binaries relying on this for newer gens
+we can kill secure batches from gen6, via I915_PARAM_HAS_SECURE_BATCHES.
+
+v2: rebase (Mika)
+v3: rebase (Mika)
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_drv.c            |    2 +-
+ drivers/gpu/drm/i915/i915_drv.h            |    2 ++
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |   12 ++++++++++--
+ 3 files changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_drv.c
++++ b/drivers/gpu/drm/i915/i915_drv.c
+@@ -323,7 +323,7 @@ static int i915_getparam(struct drm_devi
+               value = i915.semaphores;
+               break;
+       case I915_PARAM_HAS_SECURE_BATCHES:
+-              value = capable(CAP_SYS_ADMIN);
++              value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN);
+               break;
+       case I915_PARAM_CMD_PARSER_VERSION:
+               value = i915_cmd_parser_get_version(dev_priv);
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -2996,6 +2996,8 @@ intel_info(const struct drm_i915_private
+ #define HAS_BLT(dev_priv)     HAS_ENGINE(dev_priv, BCS)
+ #define HAS_VEBOX(dev_priv)   HAS_ENGINE(dev_priv, VECS)
++#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
++
+ #define HAS_LLC(dev_priv)     ((dev_priv)->info.has_llc)
+ #define HAS_SNOOP(dev_priv)   ((dev_priv)->info.has_snoop)
+ #define HAS_EDRAM(dev_priv)   (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -2195,6 +2195,7 @@ i915_gem_do_execbuffer(struct drm_device
+                      struct drm_i915_gem_exec_object2 *exec,
+                      struct drm_syncobj **fences)
+ {
++      struct drm_i915_private *dev_priv = to_i915(dev);
+       struct i915_execbuffer eb;
+       struct dma_fence *in_fence = NULL;
+       struct sync_file *out_fence = NULL;
+@@ -2204,7 +2205,7 @@ i915_gem_do_execbuffer(struct drm_device
+       BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
+                    ~__EXEC_OBJECT_UNKNOWN_FLAGS);
+-      eb.i915 = to_i915(dev);
++      eb.i915 = dev_priv;
+       eb.file = file;
+       eb.args = args;
+       if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
+@@ -2226,8 +2227,15 @@ i915_gem_do_execbuffer(struct drm_device
+       eb.batch_flags = 0;
+       if (args->flags & I915_EXEC_SECURE) {
++              if (INTEL_GEN(dev_priv) >= 11)
++                      return -ENODEV;
++
++              /* Return -EPERM to trigger fallback code on old binaries. */
++              if (!HAS_SECURE_BATCHES(dev_priv))
++                      return -EPERM;
++
+               if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
+-                  return -EPERM;
++                      return -EPERM;
+               eb.batch_flags |= I915_DISPATCH_SECURE;
+       }
diff --git a/queue-4.14/drm-i915-don-t-use-gpu-relocations-prior-to-cmdparser-stalls.patch b/queue-4.14/drm-i915-don-t-use-gpu-relocations-prior-to-cmdparser-stalls.patch
new file mode 100644 (file)
index 0000000..8043b62
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Sat, 26 Aug 2017 14:56:20 +0100
+Subject: drm/i915: Don't use GPU relocations prior to cmdparser stalls
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3dbf26ed7b9b40a8cb008ab9ad25703363af815d upstream.
+
+If we are using the cmdparser, we will have to copy the batch and so
+stall for the relocations. Rather than prolong that stall by adding more
+relocation requests, just use CPU relocations and do the stall upfront.
+
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20170826135620.25949-1-chris@chris-wilson.co.uk
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -268,6 +268,11 @@ static inline u64 gen8_noncanonical_addr
+       return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+ }
++static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
++{
++      return eb->engine->needs_cmd_parser && eb->batch_len;
++}
++
+ static int eb_create(struct i915_execbuffer *eb)
+ {
+       if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
+@@ -1165,6 +1170,10 @@ static u32 *reloc_gpu(struct i915_execbu
+       if (unlikely(!cache->rq)) {
+               int err;
++              /* If we need to copy for the cmdparser, we will stall anyway */
++              if (eb_use_cmdparser(eb))
++                      return ERR_PTR(-EWOULDBLOCK);
++
+               err = __reloc_gpu_alloc(eb, vma, len);
+               if (unlikely(err))
+                       return ERR_PTR(err);
+@@ -2305,7 +2314,7 @@ i915_gem_do_execbuffer(struct drm_device
+               goto err_vma;
+       }
+-      if (eb.engine->needs_cmd_parser && eb.batch_len) {
++      if (eb_use_cmdparser(&eb)) {
+               struct i915_vma *vma;
+               vma = eb_parse(&eb, drm_is_current_master(file));
diff --git a/queue-4.14/drm-i915-gen8-add-rc6-ctx-corruption-wa.patch b/queue-4.14/drm-i915-gen8-add-rc6-ctx-corruption-wa.patch
new file mode 100644 (file)
index 0000000..4cad9bc
--- /dev/null
@@ -0,0 +1,354 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Imre Deak <imre.deak@intel.com>
+Date: Mon, 9 Jul 2018 18:24:27 +0300
+Subject: drm/i915/gen8+: Add RC6 CTX corruption WA
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 7e34f4e4aad3fd34c02b294a3cf2321adf5b4438 upstream.
+
+In some circumstances the RC6 context can get corrupted. We can detect
+this and take the required action, that is disable RC6 and runtime PM.
+The HW recovers from the corrupted state after a system suspend/resume
+cycle, so detect the recovery and re-enable RC6 and runtime PM.
+
+v2: rebase (Mika)
+v3:
+- Move intel_suspend_gt_powersave() to the end of the GEM suspend
+  sequence.
+- Add commit message.
+v4:
+- Rebased on intel_uncore_forcewake_put(i915->uncore, ...) API
+  change.
+v5:
+- Rebased on latest upstream gt_pm refactoring.
+
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_drv.c         |    2 
+ drivers/gpu/drm/i915/i915_drv.h         |    6 +
+ drivers/gpu/drm/i915/i915_gem.c         |    6 +
+ drivers/gpu/drm/i915/i915_gem_request.c |    4 
+ drivers/gpu/drm/i915/i915_reg.h         |    2 
+ drivers/gpu/drm/i915/intel_drv.h        |    3 
+ drivers/gpu/drm/i915/intel_pm.c         |  153 ++++++++++++++++++++++++++++----
+ 7 files changed, 158 insertions(+), 18 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_drv.c
++++ b/drivers/gpu/drm/i915/i915_drv.c
+@@ -1564,6 +1564,7 @@ static int i915_drm_suspend_late(struct
+       disable_rpm_wakeref_asserts(dev_priv);
+       intel_display_set_init_power(dev_priv, false);
++      i915_rc6_ctx_wa_suspend(dev_priv);
+       fw_csr = !IS_GEN9_LP(dev_priv) &&
+               suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
+@@ -1800,6 +1801,7 @@ static int i915_drm_resume_early(struct
+               intel_display_set_init_power(dev_priv, true);
+       i915_gem_sanitize(dev_priv);
++      i915_rc6_ctx_wa_resume(dev_priv);
+       enable_rpm_wakeref_asserts(dev_priv);
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -1320,6 +1320,7 @@ struct intel_gen6_power_mgmt {
+       enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
+       bool enabled;
++      bool ctx_corrupted;
+       struct delayed_work autoenable_work;
+       atomic_t num_waiters;
+       atomic_t boosts;
+@@ -3025,9 +3026,12 @@ intel_info(const struct drm_i915_private
+ /* Early gen2 have a totally busted CS tlb and require pinned batches. */
+ #define HAS_BROKEN_CS_TLB(dev_priv)   (IS_I830(dev_priv) || IS_I845G(dev_priv))
++#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \
++      (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) == 9)
++
+ /* WaRsDisableCoarsePowerGating:skl,bxt */
+ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
+-      (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
++      (INTEL_GEN(dev_priv) == 9)
+ /*
+  * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -3243,6 +3243,12 @@ i915_gem_idle_work_handler(struct work_s
+       if (INTEL_GEN(dev_priv) >= 6)
+               gen6_rps_idle(dev_priv);
++
++      if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)) {
++              i915_rc6_ctx_wa_check(dev_priv);
++              intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
++      }
++
+       intel_runtime_pm_put(dev_priv);
+ out_unlock:
+       mutex_unlock(&dev->struct_mutex);
+--- a/drivers/gpu/drm/i915/i915_gem_request.c
++++ b/drivers/gpu/drm/i915/i915_gem_request.c
+@@ -252,6 +252,10 @@ static void mark_busy(struct drm_i915_pr
+       GEM_BUG_ON(!i915->gt.active_requests);
+       intel_runtime_pm_get_noresume(i915);
++
++      if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
++              intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
++
+       i915->gt.awake = true;
+       intel_enable_gt_powersave(i915);
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -358,6 +358,8 @@ static inline bool i915_mmio_reg_valid(i
+ #define GEN8_CONFIG0                  _MMIO(0xD00)
+ #define  GEN9_DEFAULT_FIXES           (1 << 3 | 1 << 2 | 1 << 1)
++#define GEN8_RC6_CTX_INFO             _MMIO(0x8504)
++
+ #define GAC_ECO_BITS                  _MMIO(0x14090)
+ #define   ECOBITS_SNB_BIT             (1<<13)
+ #define   ECOBITS_PPGTT_CACHE64B      (3<<8)
+--- a/drivers/gpu/drm/i915/intel_drv.h
++++ b/drivers/gpu/drm/i915/intel_drv.h
+@@ -1838,6 +1838,9 @@ void intel_enable_gt_powersave(struct dr
+ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv);
+ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
+ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
++bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915);
++void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915);
++void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915);
+ void gen6_rps_busy(struct drm_i915_private *dev_priv);
+ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
+ void gen6_rps_idle(struct drm_i915_private *dev_priv);
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -6282,19 +6282,23 @@ static void gen9_disable_rps(struct drm_
+       I915_WRITE(GEN6_RP_CONTROL, 0);
+ }
+-static void gen6_disable_rps(struct drm_i915_private *dev_priv)
++static void gen6_disable_rc6(struct drm_i915_private *dev_priv)
+ {
+       I915_WRITE(GEN6_RC_CONTROL, 0);
++}
++
++static void gen6_disable_rps(struct drm_i915_private *dev_priv)
++{
+       I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
+       I915_WRITE(GEN6_RP_CONTROL, 0);
+ }
+-static void cherryview_disable_rps(struct drm_i915_private *dev_priv)
++static void cherryview_disable_rc6(struct drm_i915_private *dev_priv)
+ {
+       I915_WRITE(GEN6_RC_CONTROL, 0);
+ }
+-static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
++static void valleyview_disable_rc6(struct drm_i915_private *dev_priv)
+ {
+       /* we're doing forcewake before Disabling RC6,
+        * This what the BIOS expects when going into suspend */
+@@ -6545,7 +6549,8 @@ static void gen9_enable_rc6(struct drm_i
+       I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
+       /* 3a: Enable RC6 */
+-      if (intel_enable_rc6() & INTEL_RC6_ENABLE)
++      if (!dev_priv->rps.ctx_corrupted &&
++          intel_enable_rc6() & INTEL_RC6_ENABLE)
+               rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+       DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
+       I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+@@ -6594,7 +6599,8 @@ static void gen8_enable_rps(struct drm_i
+               I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+       /* 3: Enable RC6 */
+-      if (intel_enable_rc6() & INTEL_RC6_ENABLE)
++      if (!dev_priv->rps.ctx_corrupted &&
++          intel_enable_rc6() & INTEL_RC6_ENABLE)
+               rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+       intel_print_rc6_info(dev_priv, rc6_mask);
+       if (IS_BROADWELL(dev_priv))
+@@ -7775,6 +7781,95 @@ static void intel_init_emon(struct drm_i
+       dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+ }
++static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
++{
++      return !I915_READ(GEN8_RC6_CTX_INFO);
++}
++
++static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
++{
++      if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
++              return;
++
++      if (i915_rc6_ctx_corrupted(i915)) {
++              DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
++              i915->rps.ctx_corrupted = true;
++              intel_runtime_pm_get(i915);
++      }
++}
++
++static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
++{
++      if (i915->rps.ctx_corrupted) {
++              intel_runtime_pm_put(i915);
++              i915->rps.ctx_corrupted = false;
++      }
++}
++
++/**
++ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
++ * @i915: i915 device
++ *
++ * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
++ */
++void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
++{
++      if (i915->rps.ctx_corrupted)
++              intel_runtime_pm_put(i915);
++}
++
++/**
++ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
++ * @i915: i915 device
++ *
++ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
++ */
++void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
++{
++      if (!i915->rps.ctx_corrupted)
++              return;
++
++      if (i915_rc6_ctx_corrupted(i915)) {
++              intel_runtime_pm_get(i915);
++              return;
++      }
++
++      DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
++      i915->rps.ctx_corrupted = false;
++}
++
++static void intel_disable_rc6(struct drm_i915_private *dev_priv);
++
++/**
++ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
++ * @i915: i915 device
++ *
++ * Check if an RC6 CTX corruption has happened since the last check and if so
++ * disable RC6 and runtime power management.
++ *
++ * Return false if no context corruption has happened since the last call of
++ * this function, true otherwise.
++*/
++bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
++{
++      if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
++              return false;
++
++      if (i915->rps.ctx_corrupted)
++              return false;
++
++      if (!i915_rc6_ctx_corrupted(i915))
++              return false;
++
++      DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
++
++      intel_disable_rc6(i915);
++      i915->rps.ctx_corrupted = true;
++      intel_runtime_pm_get_noresume(i915);
++
++      return true;
++}
++
+ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
+ {
+       /*
+@@ -7789,6 +7884,8 @@ void intel_init_gt_powersave(struct drm_
+       mutex_lock(&dev_priv->drm.struct_mutex);
+       mutex_lock(&dev_priv->rps.hw_lock);
++      i915_rc6_ctx_wa_init(dev_priv);
++
+       /* Initialize RPS limits (for userspace) */
+       if (IS_CHERRYVIEW(dev_priv))
+               cherryview_init_gt_powersave(dev_priv);
+@@ -7838,6 +7935,8 @@ void intel_cleanup_gt_powersave(struct d
+       if (IS_VALLEYVIEW(dev_priv))
+               valleyview_cleanup_gt_powersave(dev_priv);
++      i915_rc6_ctx_wa_cleanup(dev_priv);
++
+       if (!i915.enable_rc6)
+               intel_runtime_pm_put(dev_priv);
+ }
+@@ -7869,27 +7968,47 @@ void intel_sanitize_gt_powersave(struct
+       gen6_reset_rps_interrupts(dev_priv);
+ }
+-void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
++static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
+ {
+-      if (!READ_ONCE(dev_priv->rps.enabled))
+-              return;
++      if (INTEL_GEN(dev_priv) >= 9)
++              gen9_disable_rc6(dev_priv);
++      else if (IS_CHERRYVIEW(dev_priv))
++              cherryview_disable_rc6(dev_priv);
++      else if (IS_VALLEYVIEW(dev_priv))
++              valleyview_disable_rc6(dev_priv);
++      else if (INTEL_GEN(dev_priv) >= 6)
++              gen6_disable_rc6(dev_priv);
++}
++static void intel_disable_rc6(struct drm_i915_private *dev_priv)
++{
+       mutex_lock(&dev_priv->rps.hw_lock);
++      __intel_disable_rc6(dev_priv);
++      mutex_unlock(&dev_priv->rps.hw_lock);
++}
+-      if (INTEL_GEN(dev_priv) >= 9) {
+-              gen9_disable_rc6(dev_priv);
++static void intel_disable_rps(struct drm_i915_private *dev_priv)
++{
++      if (INTEL_GEN(dev_priv) >= 9)
+               gen9_disable_rps(dev_priv);
+-      } else if (IS_CHERRYVIEW(dev_priv)) {
+-              cherryview_disable_rps(dev_priv);
+-      } else if (IS_VALLEYVIEW(dev_priv)) {
+-              valleyview_disable_rps(dev_priv);
+-      } else if (INTEL_GEN(dev_priv) >= 6) {
++      else if (INTEL_GEN(dev_priv) >= 6)
+               gen6_disable_rps(dev_priv);
+-      }  else if (IS_IRONLAKE_M(dev_priv)) {
++      else if (IS_IRONLAKE_M(dev_priv))
+               ironlake_disable_drps(dev_priv);
+-      }
++}
++
++void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
++{
++      if (!READ_ONCE(dev_priv->rps.enabled))
++              return;
++
++      mutex_lock(&dev_priv->rps.hw_lock);
++
++      __intel_disable_rc6(dev_priv);
++      intel_disable_rps(dev_priv);
+       dev_priv->rps.enabled = false;
++
+       mutex_unlock(&dev_priv->rps.hw_lock);
+ }
diff --git a/queue-4.14/drm-i915-gtt-add-read-only-pages-to-gen8_pte_encode.patch b/queue-4.14/drm-i915-gtt-add-read-only-pages-to-gen8_pte_encode.patch
new file mode 100644 (file)
index 0000000..ce13f34
--- /dev/null
@@ -0,0 +1,109 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Thu, 12 Jul 2018 19:53:10 +0100
+Subject: drm/i915/gtt: Add read only pages to gen8_pte_encode
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 25dda4dabeeb12af5209b0183c788ef2a88dabbe upstream.
+
+We can set a bit inside the ppGTT PTE to indicate a page is read-only;
+writes from the GPU will be discarded. We can use this to protect pages
+and in particular support read-only userptr mappings (necessary for
+importing PROT_READ vma).
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Matthew Auld <matthew.william.auld@gmail.com>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-1-chris@chris-wilson.co.uk
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_gem_gtt.c |   23 +++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -223,10 +223,13 @@ static void ppgtt_unbind_vma(struct i915
+ }
+ static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
+-                                enum i915_cache_level level)
++                                enum i915_cache_level level,
++                                u32 flags)
+ {
+-      gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
+-      pte |= addr;
++      gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
++
++      if (unlikely(flags & PTE_READ_ONLY))
++              pte &= ~_PAGE_RW;
+       switch (level) {
+       case I915_CACHE_NONE:
+@@ -487,7 +490,7 @@ static void gen8_initialize_pt(struct i9
+                              struct i915_page_table *pt)
+ {
+       fill_px(vm, pt,
+-              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
++              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
+ }
+ static void gen6_initialize_pt(struct i915_address_space *vm,
+@@ -691,7 +694,7 @@ static bool gen8_ppgtt_clear_pt(struct i
+       unsigned int pte = gen8_pte_index(start);
+       unsigned int pte_end = pte + num_entries;
+       const gen8_pte_t scratch_pte =
+-              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
++              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
+       gen8_pte_t *vaddr;
+       GEM_BUG_ON(num_entries > pt->used_ptes);
+@@ -866,7 +869,7 @@ gen8_ppgtt_insert_pte_entries(struct i91
+                             enum i915_cache_level cache_level)
+ {
+       struct i915_page_directory *pd;
+-      const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
++      const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0);
+       gen8_pte_t *vaddr;
+       bool ret;
+@@ -1264,7 +1267,7 @@ static void gen8_dump_ppgtt(struct i915_
+ {
+       struct i915_address_space *vm = &ppgtt->base;
+       const gen8_pte_t scratch_pte =
+-              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
++              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
+       u64 start = 0, length = ppgtt->base.total;
+       if (use_4lvl(vm)) {
+@@ -2078,7 +2081,7 @@ static void gen8_ggtt_insert_page(struct
+       gen8_pte_t __iomem *pte =
+               (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
+-      gen8_set_pte(pte, gen8_pte_encode(addr, level));
++      gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+       ggtt->invalidate(vm->i915);
+ }
+@@ -2091,7 +2094,7 @@ static void gen8_ggtt_insert_entries(str
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       struct sgt_iter sgt_iter;
+       gen8_pte_t __iomem *gtt_entries;
+-      const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
++      const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+       dma_addr_t addr;
+       gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+@@ -2162,7 +2165,7 @@ static void gen8_ggtt_clear_range(struct
+       unsigned first_entry = start >> PAGE_SHIFT;
+       unsigned num_entries = length >> PAGE_SHIFT;
+       const gen8_pte_t scratch_pte =
+-              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
++              gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
+       gen8_pte_t __iomem *gtt_base =
+               (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+       const int max_entries = ggtt_total_entries(ggtt) - first_entry;
diff --git a/queue-4.14/drm-i915-gtt-disable-read-only-support-under-gvt.patch b/queue-4.14/drm-i915-gtt-disable-read-only-support-under-gvt.patch
new file mode 100644 (file)
index 0000000..e273008
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu, 12 Jul 2018 19:53:12 +0100
+Subject: drm/i915/gtt: Disable read-only support under GVT
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit c9e666880de5a1fed04dc412b046916d542b72dd upstream.
+
+GVT is not propagating the PTE bits, and is always setting the
+read-write bit, thus breaking read-only support.
+
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
+Cc: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Matthew Auld <matthew.william.auld@gmail.com>
+Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-3-chris@chris-wilson.co.uk
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_gem_gtt.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -1343,8 +1343,12 @@ static int gen8_ppgtt_init(struct i915_h
+               return ret;
+       }
+-      /* From bdw, there is support for read-only pages in the PPGTT */
+-      ppgtt->base.has_read_only = true;
++      /*
++       * From bdw, there is support for read-only pages in the PPGTT.
++       *
++       * XXX GVT is not honouring the lack of RW in the PTE bits.
++       */
++      ppgtt->base.has_read_only = !intel_vgpu_active(dev_priv);
+       /* There are only few exceptions for gen >=6. chv and bxt.
+        * And we are not sure about the latter so play safe for now.
diff --git a/queue-4.14/drm-i915-gtt-read-only-pages-for-insert_entries-on-bdw.patch b/queue-4.14/drm-i915-gtt-read-only-pages-for-insert_entries-on-bdw.patch
new file mode 100644 (file)
index 0000000..1ebcd2f
--- /dev/null
@@ -0,0 +1,220 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: "Vivi, Rodrigo" <rodrigo.vivi@intel.com>
+Date: Mon, 6 Aug 2018 14:10:48 -0700
+Subject: drm/i915/gtt: Read-only pages for insert_entries on bdw+
+
+From: "Vivi, Rodrigo" <rodrigo.vivi@intel.com>
+
+commit 250f8c8140ac0a5e5acb91891d6813f12778b224 upstream.
+
+Hook up the flags to allow read-only ppGTT mappings for gen8+
+
+v2: Include a selftest to check that writes to a readonly PTE are
+dropped
+v3: Don't duplicate cpu_check() as we can just reuse it, and even worse
+don't wholesale copy the theory-of-operation comment from igt_ctx_exec
+without changing it to explain the intention behind the new test!
+v4: Joonas really likes magic mystery values
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Matthew Auld <matthew.william.auld@gmail.com>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-2-chris@chris-wilson.co.uk
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_gem_gtt.c     |   37 ++++++++++++++++++++------------
+ drivers/gpu/drm/i915/i915_gem_gtt.h     |    7 +++++-
+ drivers/gpu/drm/i915/intel_ringbuffer.c |   11 ++++++---
+ 3 files changed, 38 insertions(+), 17 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -207,7 +207,7 @@ static int ppgtt_bind_vma(struct i915_vm
+       vma->pages = vma->obj->mm.pages;
+-      /* Currently applicable only to VLV */
++      /* Applicable to VLV, and gen8+ */
+       pte_flags = 0;
+       if (vma->obj->gt_ro)
+               pte_flags |= PTE_READ_ONLY;
+@@ -866,10 +866,11 @@ gen8_ppgtt_insert_pte_entries(struct i91
+                             struct i915_page_directory_pointer *pdp,
+                             struct sgt_dma *iter,
+                             struct gen8_insert_pte *idx,
+-                            enum i915_cache_level cache_level)
++                            enum i915_cache_level cache_level,
++                            u32 flags)
+ {
+       struct i915_page_directory *pd;
+-      const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0);
++      const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+       gen8_pte_t *vaddr;
+       bool ret;
+@@ -920,20 +921,20 @@ gen8_ppgtt_insert_pte_entries(struct i91
+ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
+                                  struct i915_vma *vma,
+                                  enum i915_cache_level cache_level,
+-                                 u32 unused)
++                                 u32 flags)
+ {
+       struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+       struct sgt_dma iter = sgt_dma(vma);
+       struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+       gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
+-                                    cache_level);
++                                    cache_level, flags);
+ }
+ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
+                                  struct i915_vma *vma,
+                                  enum i915_cache_level cache_level,
+-                                 u32 unused)
++                                 u32 flags)
+ {
+       struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+       struct sgt_dma iter = sgt_dma(vma);
+@@ -941,7 +942,7 @@ static void gen8_ppgtt_insert_4lvl(struc
+       struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+       while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter,
+-                                           &idx, cache_level))
++                                           &idx, cache_level, flags))
+               GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+ }
+@@ -1342,6 +1343,9 @@ static int gen8_ppgtt_init(struct i915_h
+               return ret;
+       }
++      /* From bdw, there is support for read-only pages in the PPGTT */
++      ppgtt->base.has_read_only = true;
++
+       /* There are only few exceptions for gen >=6. chv and bxt.
+        * And we are not sure about the latter so play safe for now.
+        */
+@@ -2089,7 +2093,7 @@ static void gen8_ggtt_insert_page(struct
+ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+                                    struct i915_vma *vma,
+                                    enum i915_cache_level level,
+-                                   u32 unused)
++                                   u32 flags)
+ {
+       struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+       struct sgt_iter sgt_iter;
+@@ -2097,6 +2101,9 @@ static void gen8_ggtt_insert_entries(str
+       const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+       dma_addr_t addr;
++      /* The GTT does not support read-only mappings */
++      GEM_BUG_ON(flags & PTE_READ_ONLY);
++
+       gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+       gtt_entries += vma->node.start >> PAGE_SHIFT;
+       for_each_sgt_dma(addr, sgt_iter, vma->pages)
+@@ -2226,13 +2233,14 @@ struct insert_entries {
+       struct i915_address_space *vm;
+       struct i915_vma *vma;
+       enum i915_cache_level level;
++      u32 flags;
+ };
+ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+ {
+       struct insert_entries *arg = _arg;
+-      gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
++      gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+       bxt_vtd_ggtt_wa(arg->vm);
+       return 0;
+@@ -2241,9 +2249,9 @@ static int bxt_vtd_ggtt_insert_entries__
+ static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+                                            struct i915_vma *vma,
+                                            enum i915_cache_level level,
+-                                           u32 unused)
++                                           u32 flags)
+ {
+-      struct insert_entries arg = { vm, vma, level };
++      struct insert_entries arg = { vm, vma, level, flags };
+       stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+ }
+@@ -2340,7 +2348,7 @@ static int ggtt_bind_vma(struct i915_vma
+                       return ret;
+       }
+-      /* Currently applicable only to VLV */
++      /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+       pte_flags = 0;
+       if (obj->gt_ro)
+               pte_flags |= PTE_READ_ONLY;
+@@ -3066,6 +3074,10 @@ int i915_ggtt_init_hw(struct drm_i915_pr
+        */
+       mutex_lock(&dev_priv->drm.struct_mutex);
+       i915_address_space_init(&ggtt->base, dev_priv, "[global]");
++
++      /* Only VLV supports read-only GGTT mappings */
++      ggtt->base.has_read_only = IS_VALLEYVIEW(dev_priv);
++
+       if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
+               ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
+       mutex_unlock(&dev_priv->drm.struct_mutex);
+@@ -3098,7 +3110,6 @@ int i915_ggtt_enable_hw(struct drm_i915_
+ {
+       if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
+               return -EIO;
+-
+       return 0;
+ }
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
+@@ -295,7 +295,12 @@ struct i915_address_space {
+       struct list_head unbound_list;
+       struct pagevec free_pages;
+-      bool pt_kmap_wc;
++
++      /* Some systems require uncached updates of the page directories */
++      bool pt_kmap_wc:1;
++
++      /* Some systems support read-only mappings for GGTT and/or PPGTT */
++      bool has_read_only:1;
+       /* FIXME: Need a more generic return type */
+       gen6_pte_t (*pte_encode)(dma_addr_t addr,
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
+@@ -1358,6 +1358,7 @@ void intel_ring_unpin(struct intel_ring
+ static struct i915_vma *
+ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
+ {
++      struct i915_address_space *vm = &dev_priv->ggtt.base;
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+@@ -1367,10 +1368,14 @@ intel_ring_create_vma(struct drm_i915_pr
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+-      /* mark ring buffers as read-only from GPU side by default */
+-      obj->gt_ro = 1;
++      /*
++       * Mark ring buffers as read-only from GPU side (so no stray overwrites)
++       * if supported by the platform's GGTT.
++       */
++      if (vm->has_read_only)
++              obj->gt_ro = 1;
+-      vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
++      vma = i915_vma_instance(obj, vm, NULL);
+       if (IS_ERR(vma))
+               goto err;
diff --git a/queue-4.14/drm-i915-lower-rm-timeout-to-avoid-dsi-hard-hangs.patch b/queue-4.14/drm-i915-lower-rm-timeout-to-avoid-dsi-hard-hangs.patch
new file mode 100644 (file)
index 0000000..d41a0f5
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Uma Shankar <uma.shankar@intel.com>
+Date: Tue, 7 Aug 2018 21:15:35 +0530
+Subject: drm/i915: Lower RM timeout to avoid DSI hard hangs
+
+From: Uma Shankar <uma.shankar@intel.com>
+
+commit 1d85a299c4db57c55e0229615132c964d17aa765 upstream.
+
+In BXT/APL, device 2 MMIO reads from MIPI controller requires its PLL
+to be turned ON. When MIPI PLL is turned off (MIPI Display is not
+active or connected), and someone (host or GT engine) tries to read
+MIPI registers, it causes hard hang. This is a hardware restriction
+or limitation.
+
+Driver by itself doesn't read MIPI registers when MIPI display is off.
+But any userspace application can submit unprivileged batch buffer for
+execution. In that batch buffer there can be mmio reads. And these
+reads are allowed even for unprivileged applications. If these
+register reads are for MIPI DSI controller and MIPI display is not
+active during that time, then the MMIO read operation causes system
+hard hang and only way to recover is hard reboot. A genuine
+process/application won't submit batch buffer like this and doesn't
+cause any issue. But on a compromised system, a malign userspace
+process/app can generate such batch buffer and can trigger system
+hard hang (denial of service attack).
+
+The fix is to lower the internal MMIO timeout value to an optimum
+value of 950us as recommended by hardware team. If the timeout is
+beyond 1ms (which will hit for any value we choose if MMIO READ on a
+DSI specific register is performed without PLL ON), it causes the
+system hang. But if the timeout value is lower than it will be below
+the threshold (even if timeout happens) and system will not get into
+a hung state. This will avoid a system hang without losing any
+programming or GT interrupts, taking the worst case of lowest CDCLK
+frequency and early DC5 abort into account.
+
+Signed-off-by: Uma Shankar <uma.shankar@intel.com>
+Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_reg.h |    4 ++++
+ drivers/gpu/drm/i915/intel_pm.c |    8 ++++++++
+ 2 files changed, 12 insertions(+)
+
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -6726,6 +6726,10 @@ enum {
+ #define SKL_CSR_DC5_DC6_COUNT _MMIO(0x8002C)
+ #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038)
++/* Display Internal Timeout Register */
++#define RM_TIMEOUT            _MMIO(0x42060)
++#define  MMIO_TIMEOUT_US(us)  ((us) << 0)
++
+ /* interrupts */
+ #define DE_MASTER_IRQ_CONTROL   (1 << 31)
+ #define DE_SPRITEB_FLIP_DONE    (1 << 29)
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -121,6 +121,14 @@ static void bxt_init_clock_gating(struct
+        */
+       I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
+                  PWM1_GATING_DIS | PWM2_GATING_DIS);
++
++      /*
++       * Lower the display internal timeout.
++       * This is needed to avoid any hard hangs when DSI port PLL
++       * is off and a MMIO access is attempted by any privilege
++       * application, using batch buffers or any other means.
++       */
++      I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
+ }
+ static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
diff --git a/queue-4.14/drm-i915-move-engine-needs_cmd_parser-to-engine-flags.patch b/queue-4.14/drm-i915-move-engine-needs_cmd_parser-to-engine-flags.patch
new file mode 100644 (file)
index 0000000..502c27f
--- /dev/null
@@ -0,0 +1,101 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Date: Wed, 29 Nov 2017 08:24:09 +0000
+Subject: drm/i915: Move engine->needs_cmd_parser to engine->flags
+
+From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+
+commit 439e2ee4ca520e72870e4fa44aa0076060ad6857 upstream.
+
+Will be adding a new per-engine flags shortly so it makes sense
+to consolidate.
+
+v2: Keep the original code flow in intel_engine_cleanup_cmd_parser.
+    (Joonas Lahtinen)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20171129082409.18189-1-tvrtko.ursulin@linux.intel.com
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c     |    7 ++++---
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
+ drivers/gpu/drm/i915/intel_ringbuffer.h    |    8 +++++++-
+ 3 files changed, 12 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -26,6 +26,7 @@
+  */
+ #include "i915_drv.h"
++#include "intel_ringbuffer.h"
+ /**
+  * DOC: batch buffer command parser
+@@ -940,7 +941,7 @@ void intel_engine_init_cmd_parser(struct
+               return;
+       }
+-      engine->needs_cmd_parser = true;
++      engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
+ }
+ /**
+@@ -952,7 +953,7 @@ void intel_engine_init_cmd_parser(struct
+  */
+ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
+ {
+-      if (!engine->needs_cmd_parser)
++      if (!intel_engine_needs_cmd_parser(engine))
+               return;
+       fini_hash_table(engine);
+@@ -1356,7 +1357,7 @@ int i915_cmd_parser_get_version(struct d
+       /* If the command parser is not enabled, report 0 - unsupported */
+       for_each_engine(engine, dev_priv, id) {
+-              if (engine->needs_cmd_parser) {
++              if (intel_engine_needs_cmd_parser(engine)) {
+                       active = true;
+                       break;
+               }
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -270,7 +270,7 @@ static inline u64 gen8_noncanonical_addr
+ static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
+ {
+-      return eb->engine->needs_cmd_parser && eb->batch_len;
++      return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+ }
+ static int eb_create(struct i915_execbuffer *eb)
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
+@@ -417,7 +417,8 @@ struct intel_engine_cs {
+       struct intel_engine_hangcheck hangcheck;
+-      bool needs_cmd_parser;
++#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
++      unsigned int flags;
+       /*
+        * Table of commands the command parser needs to know about
+@@ -444,6 +445,11 @@ struct intel_engine_cs {
+       u32 (*get_cmd_length_mask)(u32 cmd_header);
+ };
++static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
++{
++      return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
++}
++
+ static inline unsigned int
+ intel_engine_flag(const struct intel_engine_cs *engine)
+ {
diff --git a/queue-4.14/drm-i915-prevent-writing-into-a-read-only-object-via-a-ggtt-mmap.patch b/queue-4.14/drm-i915-prevent-writing-into-a-read-only-object-via-a-ggtt-mmap.patch
new file mode 100644 (file)
index 0000000..95ea210
--- /dev/null
@@ -0,0 +1,164 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Thu, 12 Jul 2018 19:53:13 +0100
+Subject: drm/i915: Prevent writing into a read-only object via a GGTT mmap
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 3e977ac6179b39faa3c0eda5fce4f00663ae298d upstream.
+
+If the user has created a read-only object, they should not be allowed
+to circumvent the write protection by using a GGTT mmapping. Deny it.
+
+Also most machines do not support read-only GGTT PTEs, so again we have
+to reject attempted writes. Fortunately, this is known a priori, so we
+can at least reject in the call to create the mmap (with a sanity check
+in the fault handler).
+
+v2: Check the vma->vm_flags during mmap() to allow readonly access.
+v3: Remove VM_MAYWRITE to curtail mprotect()
+
+Testcase: igt/gem_userptr_blits/readonly_mmap*
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Matthew Auld <matthew.william.auld@gmail.com>
+Cc: David Herrmann <dh.herrmann@gmail.com>
+Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> #v1
+Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-4-chris@chris-wilson.co.uk
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/drm_gem.c               |    9 +++++++++
+ drivers/gpu/drm/i915/i915_gem.c         |    4 ++++
+ drivers/gpu/drm/i915/i915_gem_gtt.c     |   12 +++++++-----
+ drivers/gpu/drm/i915/i915_gem_object.h  |   13 ++++++++++++-
+ drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +-
+ include/drm/drm_vma_manager.h           |    1 +
+ 6 files changed, 34 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gem.c
++++ b/drivers/gpu/drm/drm_gem.c
+@@ -1035,6 +1035,15 @@ int drm_gem_mmap(struct file *filp, stru
+               return -EACCES;
+       }
++      if (node->readonly) {
++              if (vma->vm_flags & VM_WRITE) {
++                      drm_gem_object_put_unlocked(obj);
++                      return -EINVAL;
++              }
++
++              vma->vm_flags &= ~VM_MAYWRITE;
++      }
++
+       ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT,
+                              vma);
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -1834,6 +1834,10 @@ int i915_gem_fault(struct vm_fault *vmf)
+       unsigned int flags;
+       int ret;
++      /* Sanity check that we allow writing into this object */
++      if (i915_gem_object_is_readonly(obj) && write)
++              return VM_FAULT_SIGBUS;
++
+       /* We don't use vmf->pgoff since that has the fake offset */
+       page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
+@@ -209,7 +209,7 @@ static int ppgtt_bind_vma(struct i915_vm
+       /* Applicable to VLV, and gen8+ */
+       pte_flags = 0;
+-      if (vma->obj->gt_ro)
++      if (i915_gem_object_is_readonly(vma->obj))
+               pte_flags |= PTE_READ_ONLY;
+       vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+@@ -2105,8 +2105,10 @@ static void gen8_ggtt_insert_entries(str
+       const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+       dma_addr_t addr;
+-      /* The GTT does not support read-only mappings */
+-      GEM_BUG_ON(flags & PTE_READ_ONLY);
++      /*
++       * Note that we ignore PTE_READ_ONLY here. The caller must be careful
++       * not to allow the user to override access to a read only page.
++       */
+       gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+       gtt_entries += vma->node.start >> PAGE_SHIFT;
+@@ -2354,7 +2356,7 @@ static int ggtt_bind_vma(struct i915_vma
+       /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+       pte_flags = 0;
+-      if (obj->gt_ro)
++      if (i915_gem_object_is_readonly(obj))
+               pte_flags |= PTE_READ_ONLY;
+       intel_runtime_pm_get(i915);
+@@ -2396,7 +2398,7 @@ static int aliasing_gtt_bind_vma(struct
+       /* Currently applicable only to VLV */
+       pte_flags = 0;
+-      if (vma->obj->gt_ro)
++      if (i915_gem_object_is_readonly(vma->obj))
+               pte_flags |= PTE_READ_ONLY;
+       if (flags & I915_VMA_LOCAL_BIND) {
+--- a/drivers/gpu/drm/i915/i915_gem_object.h
++++ b/drivers/gpu/drm/i915/i915_gem_object.h
+@@ -140,7 +140,6 @@ struct drm_i915_gem_object {
+        * Is the object to be mapped as read-only to the GPU
+        * Only honoured if hardware has relevant pte bit
+        */
+-      unsigned long gt_ro:1;
+       unsigned int cache_level:3;
+       unsigned int cache_coherent:2;
+ #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+@@ -313,6 +312,18 @@ static inline void i915_gem_object_unloc
+       reservation_object_unlock(obj->resv);
+ }
++static inline void
++i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
++{
++      obj->base.vma_node.readonly = true;
++}
++
++static inline bool
++i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
++{
++      return obj->base.vma_node.readonly;
++}
++
+ static inline bool
+ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
+ {
+--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
++++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
+@@ -1373,7 +1373,7 @@ intel_ring_create_vma(struct drm_i915_pr
+        * if supported by the platform's GGTT.
+        */
+       if (vm->has_read_only)
+-              obj->gt_ro = 1;
++              i915_gem_object_set_readonly(obj);
+       vma = i915_vma_instance(obj, vm, NULL);
+       if (IS_ERR(vma))
+--- a/include/drm/drm_vma_manager.h
++++ b/include/drm/drm_vma_manager.h
+@@ -41,6 +41,7 @@ struct drm_vma_offset_node {
+       rwlock_t vm_lock;
+       struct drm_mm_node vm_node;
+       struct rb_root vm_files;
++      bool readonly:1;
+ };
+ struct drm_vma_offset_manager {
diff --git a/queue-4.14/drm-i915-remove-master-tables-from-cmdparser.patch b/queue-4.14/drm-i915-remove-master-tables-from-cmdparser.patch
new file mode 100644 (file)
index 0000000..0f5aee7
--- /dev/null
@@ -0,0 +1,298 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Fri, 8 Jun 2018 10:05:26 -0700
+Subject: drm/i915: Remove Master tables from cmdparser
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 66d8aba1cd6db34af10de465c0d52af679288cb6 upstream.
+
+The previous patch has killed support for secure batches
+on gen6+, and hence the cmdparsers master tables are
+now dead code. Remove them.
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c     |   84 +++++++----------------------
+ drivers/gpu/drm/i915/i915_drv.h            |    3 -
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +-
+ 3 files changed, 26 insertions(+), 68 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -51,13 +51,11 @@
+  * granting userspace undue privileges. There are three categories of privilege.
+  *
+  * First, commands which are explicitly defined as privileged or which should
+- * only be used by the kernel driver. The parser generally rejects such
+- * commands, though it may allow some from the drm master process.
++ * only be used by the kernel driver. The parser rejects such commands
+  *
+  * Second, commands which access registers. To support correct/enhanced
+  * userspace functionality, particularly certain OpenGL extensions, the parser
+- * provides a whitelist of registers which userspace may safely access (for both
+- * normal and drm master processes).
++ * provides a whitelist of registers which userspace may safely access
+  *
+  * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
+  * The parser always rejects such commands.
+@@ -82,9 +80,9 @@
+  * in the per-engine command tables.
+  *
+  * Other command table entries map fairly directly to high level categories
+- * mentioned above: rejected, master-only, register whitelist. The parser
+- * implements a number of checks, including the privileged memory checks, via a
+- * general bitmasking mechanism.
++ * mentioned above: rejected, register whitelist. The parser implements a number
++ * of checks, including the privileged memory checks, via a general bitmasking
++ * mechanism.
+  */
+ /*
+@@ -102,8 +100,6 @@ struct drm_i915_cmd_descriptor {
+        * CMD_DESC_REJECT: The command is never allowed
+        * CMD_DESC_REGISTER: The command should be checked against the
+        *                    register whitelist for the appropriate ring
+-       * CMD_DESC_MASTER: The command is allowed if the submitting process
+-       *                  is the DRM master
+        */
+       u32 flags;
+ #define CMD_DESC_FIXED    (1<<0)
+@@ -111,7 +107,6 @@ struct drm_i915_cmd_descriptor {
+ #define CMD_DESC_REJECT   (1<<2)
+ #define CMD_DESC_REGISTER (1<<3)
+ #define CMD_DESC_BITMASK  (1<<4)
+-#define CMD_DESC_MASTER   (1<<5)
+       /*
+        * The command's unique identification bits and the bitmask to get them.
+@@ -207,14 +202,13 @@ struct drm_i915_cmd_table {
+ #define R CMD_DESC_REJECT
+ #define W CMD_DESC_REGISTER
+ #define B CMD_DESC_BITMASK
+-#define M CMD_DESC_MASTER
+ /*            Command                          Mask   Fixed Len   Action
+             ---------------------------------------------------------- */
+ static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
+       CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
+       CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
+-      CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
++      CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      R  ),
+       CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
+       CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
+       CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
+@@ -311,7 +305,7 @@ static const struct drm_i915_cmd_descrip
+       CMD(  MI_URB_ATOMIC_ALLOC,              SMI,    F,  1,      S  ),
+       CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
+       CMD(  MI_RS_CONTEXT,                    SMI,    F,  1,      S  ),
+-      CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
++      CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   R  ),
+       CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
+       CMD(  MI_LOAD_REGISTER_REG,             SMI,   !F,  0xFF,   W,
+             .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
+@@ -444,7 +438,7 @@ static const struct drm_i915_cmd_descrip
+ };
+ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
+-      CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
++      CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   R  ),
+       CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
+ };
+@@ -461,7 +455,6 @@ static const struct drm_i915_cmd_descrip
+ #undef R
+ #undef W
+ #undef B
+-#undef M
+ static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+@@ -610,47 +603,29 @@ static const struct drm_i915_reg_descrip
+       REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+ };
+-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
+-      REG32(FORCEWAKE_MT),
+-      REG32(DERRMR),
+-      REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
+-      REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
+-      REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
+-};
+-
+-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
+-      REG32(FORCEWAKE_MT),
+-      REG32(DERRMR),
+-};
+-
+ #undef REG64
+ #undef REG32
+ struct drm_i915_reg_table {
+       const struct drm_i915_reg_descriptor *regs;
+       int num_regs;
+-      bool master;
+ };
+ static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
+-      { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
+-      { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
++      { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
+ };
+ static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
+-      { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
+-      { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
++      { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+ };
+ static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
+-      { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
+-      { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
+-      { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
++      { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
++      { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
+ };
+ static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
+-      { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
+-      { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
++      { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+ };
+ static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
+@@ -1027,22 +1002,16 @@ __find_reg(const struct drm_i915_reg_des
+ }
+ static const struct drm_i915_reg_descriptor *
+-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
++find_reg(const struct intel_engine_cs *engine, u32 addr)
+ {
+       const struct drm_i915_reg_table *table = engine->reg_tables;
++      const struct drm_i915_reg_descriptor *reg = NULL;
+       int count = engine->reg_table_count;
+-      for (; count > 0; ++table, --count) {
+-              if (!table->master || is_master) {
+-                      const struct drm_i915_reg_descriptor *reg;
+-
+-                      reg = __find_reg(table->regs, table->num_regs, addr);
+-                      if (reg != NULL)
+-                              return reg;
+-              }
+-      }
++      for (; !reg && (count > 0); ++table, --count)
++              reg = __find_reg(table->regs, table->num_regs, addr);
+-      return NULL;
++      return reg;
+ }
+ /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
+@@ -1127,8 +1096,7 @@ unpin_src:
+ static bool check_cmd(const struct intel_engine_cs *engine,
+                     const struct drm_i915_cmd_descriptor *desc,
+-                    const u32 *cmd, u32 length,
+-                    const bool is_master)
++                    const u32 *cmd, u32 length)
+ {
+       if (desc->flags & CMD_DESC_SKIP)
+               return true;
+@@ -1138,12 +1106,6 @@ static bool check_cmd(const struct intel
+               return false;
+       }
+-      if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
+-              DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
+-                               *cmd);
+-              return false;
+-      }
+-
+       if (desc->flags & CMD_DESC_REGISTER) {
+               /*
+                * Get the distance between individual register offset
+@@ -1157,7 +1119,7 @@ static bool check_cmd(const struct intel
+                    offset += step) {
+                       const u32 reg_addr = cmd[offset] & desc->reg.mask;
+                       const struct drm_i915_reg_descriptor *reg =
+-                              find_reg(engine, is_master, reg_addr);
++                              find_reg(engine, reg_addr);
+                       if (!reg) {
+                               DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+@@ -1244,7 +1206,6 @@ static bool check_cmd(const struct intel
+  * @shadow_batch_obj: copy of the batch buffer in question
+  * @batch_start_offset: byte offset in the batch at which execution starts
+  * @batch_len: length of the commands in batch_obj
+- * @is_master: is the submitting process the drm master?
+  *
+  * Parses the specified batch buffer looking for privilege violations as
+  * described in the overview.
+@@ -1256,8 +1217,7 @@ int intel_engine_cmd_parser(struct intel
+                           struct drm_i915_gem_object *batch_obj,
+                           struct drm_i915_gem_object *shadow_batch_obj,
+                           u32 batch_start_offset,
+-                          u32 batch_len,
+-                          bool is_master)
++                          u32 batch_len)
+ {
+       u32 *cmd, *batch_end;
+       struct drm_i915_cmd_descriptor default_desc = noop_desc;
+@@ -1323,7 +1283,7 @@ int intel_engine_cmd_parser(struct intel
+                       break;
+               }
+-              if (!check_cmd(engine, desc, cmd, length, is_master)) {
++              if (!check_cmd(engine, desc, cmd, length)) {
+                       ret = -EACCES;
+                       break;
+               }
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -3847,8 +3847,7 @@ int intel_engine_cmd_parser(struct intel
+                           struct drm_i915_gem_object *batch_obj,
+                           struct drm_i915_gem_object *shadow_batch_obj,
+                           u32 batch_start_offset,
+-                          u32 batch_len,
+-                          bool is_master);
++                          u32 batch_len);
+ /* i915_perf.c */
+ extern void i915_perf_init(struct drm_i915_private *dev_priv);
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -1911,7 +1911,7 @@ static int i915_reset_gen7_sol_offsets(s
+       return 0;
+ }
+-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
++static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
+ {
+       struct drm_i915_gem_object *shadow_batch_obj;
+       struct i915_vma *vma;
+@@ -1926,8 +1926,7 @@ static struct i915_vma *eb_parse(struct
+                                     eb->batch->obj,
+                                     shadow_batch_obj,
+                                     eb->batch_start_offset,
+-                                    eb->batch_len,
+-                                    is_master);
++                                    eb->batch_len);
+       if (err) {
+               if (err == -EACCES) /* unhandled chained batch */
+                       vma = NULL;
+@@ -2325,7 +2324,7 @@ i915_gem_do_execbuffer(struct drm_device
+       if (eb_use_cmdparser(&eb)) {
+               struct i915_vma *vma;
+-              vma = eb_parse(&eb, drm_is_current_master(file));
++              vma = eb_parse(&eb);
+               if (IS_ERR(vma)) {
+                       err = PTR_ERR(vma);
+                       goto err_vma;
diff --git a/queue-4.14/drm-i915-rename-gen7-cmdparser-tables.patch b/queue-4.14/drm-i915-rename-gen7-cmdparser-tables.patch
new file mode 100644 (file)
index 0000000..18b0bec
--- /dev/null
@@ -0,0 +1,177 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Fri, 20 Apr 2018 14:26:01 -0700
+Subject: drm/i915: Rename gen7 cmdparser tables
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 0a2f661b6c21815a7fa60e30babe975fee8e73c6 upstream.
+
+We're about to introduce some new tables for later gens, and the
+current naming for the gen7 tables will no longer make sense.
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |   70 ++++++++++++++++-----------------
+ 1 file changed, 35 insertions(+), 35 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -211,7 +211,7 @@ struct drm_i915_cmd_table {
+ /*            Command                          Mask   Fixed Len   Action
+             ---------------------------------------------------------- */
+-static const struct drm_i915_cmd_descriptor common_cmds[] = {
++static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
+       CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
+       CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
+       CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
+@@ -244,7 +244,7 @@ static const struct drm_i915_cmd_descrip
+       CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
+ };
+-static const struct drm_i915_cmd_descriptor render_cmds[] = {
++static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
+       CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
+       CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
+       CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
+@@ -328,7 +328,7 @@ static const struct drm_i915_cmd_descrip
+       CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS,  S3D,   !F,  0x1FF,  S  ),
+ };
+-static const struct drm_i915_cmd_descriptor video_cmds[] = {
++static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
+       CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
+       CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
+       CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
+@@ -372,7 +372,7 @@ static const struct drm_i915_cmd_descrip
+       CMD(  MFX_WAIT,                         SMFX,   F,  1,      S  ),
+ };
+-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
++static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
+       CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
+       CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
+       CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
+@@ -410,7 +410,7 @@ static const struct drm_i915_cmd_descrip
+             }},                                                      ),
+ };
+-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
++static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
+       CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
+       CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  B,
+             .bits = {{
+@@ -463,35 +463,35 @@ static const struct drm_i915_cmd_descrip
+ #undef B
+ #undef M
+-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
+-      { common_cmds, ARRAY_SIZE(common_cmds) },
+-      { render_cmds, ARRAY_SIZE(render_cmds) },
++static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
++      { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
++      { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
+ };
+-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
+-      { common_cmds, ARRAY_SIZE(common_cmds) },
+-      { render_cmds, ARRAY_SIZE(render_cmds) },
++static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
++      { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
++      { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
+       { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
+ };
+-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
+-      { common_cmds, ARRAY_SIZE(common_cmds) },
+-      { video_cmds, ARRAY_SIZE(video_cmds) },
++static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
++      { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
++      { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
+ };
+-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
+-      { common_cmds, ARRAY_SIZE(common_cmds) },
+-      { vecs_cmds, ARRAY_SIZE(vecs_cmds) },
++static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
++      { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
++      { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
+ };
+-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
+-      { common_cmds, ARRAY_SIZE(common_cmds) },
+-      { blt_cmds, ARRAY_SIZE(blt_cmds) },
++static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
++      { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
++      { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
+ };
+-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
+-      { common_cmds, ARRAY_SIZE(common_cmds) },
+-      { blt_cmds, ARRAY_SIZE(blt_cmds) },
++static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
++      { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
++      { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
+       { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
+ };
+@@ -871,12 +871,12 @@ void intel_engine_init_cmd_parser(struct
+       switch (engine->id) {
+       case RCS:
+               if (IS_HASWELL(engine->i915)) {
+-                      cmd_tables = hsw_render_ring_cmds;
++                      cmd_tables = hsw_render_ring_cmd_table;
+                       cmd_table_count =
+-                              ARRAY_SIZE(hsw_render_ring_cmds);
++                              ARRAY_SIZE(hsw_render_ring_cmd_table);
+               } else {
+-                      cmd_tables = gen7_render_cmds;
+-                      cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
++                      cmd_tables = gen7_render_cmd_table;
++                      cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
+               }
+               if (IS_HASWELL(engine->i915)) {
+@@ -890,17 +890,17 @@ void intel_engine_init_cmd_parser(struct
+               engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
+               break;
+       case VCS:
+-              cmd_tables = gen7_video_cmds;
+-              cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
++              cmd_tables = gen7_video_cmd_table;
++              cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
+               engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
+               break;
+       case BCS:
+               if (IS_HASWELL(engine->i915)) {
+-                      cmd_tables = hsw_blt_ring_cmds;
+-                      cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
++                      cmd_tables = hsw_blt_ring_cmd_table;
++                      cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
+               } else {
+-                      cmd_tables = gen7_blt_cmds;
+-                      cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
++                      cmd_tables = gen7_blt_cmd_table;
++                      cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
+               }
+               if (IS_HASWELL(engine->i915)) {
+@@ -914,8 +914,8 @@ void intel_engine_init_cmd_parser(struct
+               engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+               break;
+       case VECS:
+-              cmd_tables = hsw_vebox_cmds;
+-              cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
++              cmd_tables = hsw_vebox_cmd_table;
++              cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
+               /* VECS can use the same length_mask function as VCS */
+               engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
+               break;
diff --git a/queue-4.14/drm-i915-silence-smatch-for-cmdparser.patch b/queue-4.14/drm-i915-silence-smatch-for-cmdparser.patch
new file mode 100644 (file)
index 0000000..17cc831
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Chris Wilson <chris@chris-wilson.co.uk>
+Date: Tue, 7 Nov 2017 15:40:55 +0000
+Subject: drm/i915: Silence smatch for cmdparser
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+commit 0ffba1fc98e8ec35caae8d50b657296ebb9a9a51 upstream.
+
+drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:808:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:811:23: error: not an lvalue
+drivers/gpu/drm/i915/i915_cmd_parser.c:814:23: error: not an lvalue
+
+If we move the shift into each case not only do we kill the warning from
+smatch, but we shrink the code slightly:
+
+   text           data     bss     dec     hex filename
+1267906          20587    3168 1291661  13b58d before
+1267890          20587    3168 1291645  13b57d after
+
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Cc: Matthew Auld <matthew.william.auld@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20171107154055.19460-1-chris@chris-wilson.co.uk
+Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
+Reviewed-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_cmd_parser.c |   13 +++----------
+ 1 file changed, 3 insertions(+), 10 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
+@@ -798,22 +798,15 @@ struct cmd_node {
+  */
+ static inline u32 cmd_header_key(u32 x)
+ {
+-      u32 shift;
+-
+       switch (x >> INSTR_CLIENT_SHIFT) {
+       default:
+       case INSTR_MI_CLIENT:
+-              shift = STD_MI_OPCODE_SHIFT;
+-              break;
++              return x >> STD_MI_OPCODE_SHIFT;
+       case INSTR_RC_CLIENT:
+-              shift = STD_3D_OPCODE_SHIFT;
+-              break;
++              return x >> STD_3D_OPCODE_SHIFT;
+       case INSTR_BC_CLIENT:
+-              shift = STD_2D_OPCODE_SHIFT;
+-              break;
++              return x >> STD_2D_OPCODE_SHIFT;
+       }
+-
+-      return x >> shift;
+ }
+ static int init_hash_table(struct intel_engine_cs *engine,
diff --git a/queue-4.14/drm-i915-support-ro-ppgtt-mapped-cmdparser-shadow-buffers.patch b/queue-4.14/drm-i915-support-ro-ppgtt-mapped-cmdparser-shadow-buffers.patch
new file mode 100644 (file)
index 0000000..0107061
--- /dev/null
@@ -0,0 +1,200 @@
+From foo@baz Tue 12 Nov 2019 04:10:24 PM CET
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+Date: Tue, 22 May 2018 13:59:06 -0700
+Subject: drm/i915: Support ro ppgtt mapped cmdparser shadow buffers
+
+From: Jon Bloomfield <jon.bloomfield@intel.com>
+
+commit 4f7af1948abcb18b4772fe1bcd84d7d27d96258c upstream.
+
+For Gen7, the original cmdparser motive was to permit limited
+use of register read/write instructions in unprivileged BB's.
+This worked by copying the user supplied bb to a kmd owned
+bb, and running it in secure mode, from the ggtt, only if
+the scanner finds no unsafe commands or registers.
+
+For Gen8+ we can't use this same technique because running bb's
+from the ggtt also disables access to ppgtt space. But we also
+do not actually require 'secure' execution since we are only
+trying to reduce the available command/register set. Instead we
+will copy the user buffer to a kmd owned read-only bb in ppgtt,
+and run in the usual non-secure mode.
+
+Note that ro pages are only supported by ppgtt (not ggtt), but
+luckily that's exactly what we need.
+
+Add the required paths to map the shadow buffer to ppgtt ro for Gen8+
+
+v2: IS_GEN7/IS_GEN (Mika)
+v3: rebase
+v4: rebase
+v5: rebase
+
+Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Dave Airlie <airlied@redhat.com>
+Cc: Takashi Iwai <tiwai@suse.de>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_drv.h            |   14 +++++++
+ drivers/gpu/drm/i915/i915_gem.c            |   16 +++++++-
+ drivers/gpu/drm/i915/i915_gem_execbuffer.c |   56 ++++++++++++++++++++---------
+ 3 files changed, 68 insertions(+), 18 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -2980,6 +2980,12 @@ intel_info(const struct drm_i915_private
+ #define IS_GEN9_LP(dev_priv)  (IS_GEN9(dev_priv) && IS_LP(dev_priv))
+ #define IS_GEN9_BC(dev_priv)  (IS_GEN9(dev_priv) && !IS_LP(dev_priv))
++/*
++ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
++ * All later gens can run the final buffer from the ppgtt
++ */
++#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv)
++
+ #define ENGINE_MASK(id)       BIT(id)
+ #define RENDER_RING   ENGINE_MASK(RCS)
+ #define BSD_RING      ENGINE_MASK(VCS)
+@@ -3393,6 +3399,14 @@ i915_gem_object_ggtt_pin(struct drm_i915
+                        u64 alignment,
+                        u64 flags);
++struct i915_vma * __must_check
++i915_gem_object_pin(struct drm_i915_gem_object *obj,
++                  struct i915_address_space *vm,
++                  const struct i915_ggtt_view *view,
++                  u64 size,
++                  u64 alignment,
++                  u64 flags);
++
+ int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
+ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -4000,6 +4000,20 @@ i915_gem_object_ggtt_pin(struct drm_i915
+ {
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+       struct i915_address_space *vm = &dev_priv->ggtt.base;
++
++      return i915_gem_object_pin(obj, vm, view, size, alignment,
++                                 flags | PIN_GLOBAL);
++}
++
++struct i915_vma *
++i915_gem_object_pin(struct drm_i915_gem_object *obj,
++                  struct i915_address_space *vm,
++                  const struct i915_ggtt_view *view,
++                  u64 size,
++                  u64 alignment,
++                  u64 flags)
++{
++      struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+       struct i915_vma *vma;
+       int ret;
+@@ -4057,7 +4071,7 @@ i915_gem_object_ggtt_pin(struct drm_i915
+                       return ERR_PTR(ret);
+       }
+-      ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
++      ret = i915_vma_pin(vma, size, alignment, flags);
+       if (ret)
+               return ERR_PTR(ret);
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -1912,6 +1912,33 @@ static int i915_reset_gen7_sol_offsets(s
+       return 0;
+ }
++static struct i915_vma *
++shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
++{
++      struct drm_i915_private *dev_priv = eb->i915;
++      struct i915_address_space *vm;
++      u64 flags;
++
++      /*
++       * PPGTT backed shadow buffers must be mapped RO, to prevent
++       * post-scan tampering
++       */
++      if (CMDPARSER_USES_GGTT(dev_priv)) {
++              flags = PIN_GLOBAL;
++              vm = &dev_priv->ggtt.base;
++              eb->batch_flags |= I915_DISPATCH_SECURE;
++      } else if (eb->vm->has_read_only) {
++              flags = PIN_USER;
++              vm = eb->vm;
++              i915_gem_object_set_readonly(obj);
++      } else {
++              DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
++              return ERR_PTR(-EINVAL);
++      }
++
++      return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
++}
++
+ static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
+ {
+       struct drm_i915_gem_object *shadow_batch_obj;
+@@ -1929,14 +1956,21 @@ static struct i915_vma *eb_parse(struct
+                                     eb->batch_start_offset,
+                                     eb->batch_len);
+       if (err) {
+-              if (err == -EACCES) /* unhandled chained batch */
++              /*
++               * Unsafe GGTT-backed buffers can still be submitted safely
++               * as non-secure.
++               * For PPGTT backing however, we have no choice but to forcibly
++               * reject unsafe buffers
++               */
++              if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
++                      /* Execute original buffer non-secure */
+                       vma = NULL;
+               else
+                       vma = ERR_PTR(err);
+               goto out;
+       }
+-      vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
++      vma = shadow_batch_pin(eb, shadow_batch_obj);
+       if (IS_ERR(vma))
+               goto out;
+@@ -1945,6 +1979,9 @@ static struct i915_vma *eb_parse(struct
+               __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+       vma->exec_flags = &eb->flags[eb->buffer_count];
+       eb->buffer_count++;
++      eb->batch_start_offset = 0;
++      eb->batch = vma;
++      /* eb->batch_len unchanged */
+ out:
+       i915_gem_object_unpin_pages(shadow_batch_obj);
+@@ -2330,21 +2367,6 @@ i915_gem_do_execbuffer(struct drm_device
+                       err = PTR_ERR(vma);
+                       goto err_vma;
+               }
+-
+-              if (vma) {
+-                      /*
+-                       * Batch parsed and accepted:
+-                       *
+-                       * Set the DISPATCH_SECURE bit to remove the NON_SECURE
+-                       * bit from MI_BATCH_BUFFER_START commands issued in
+-                       * the dispatch_execbuffer implementations. We
+-                       * specifically don't want that set on batches the
+-                       * command parser has accepted.
+-                       */
+-                      eb.batch_flags |= I915_DISPATCH_SECURE;
+-                      eb.batch_start_offset = 0;
+-                      eb.batch = vma;
+-              }
+       }
+       if (eb.batch_len == 0)
diff --git a/queue-4.14/kvm-add-helper-function-for-creating-vm-worker-threads.patch b/queue-4.14/kvm-add-helper-function-for-creating-vm-worker-threads.patch
new file mode 100644 (file)
index 0000000..d333d7c
--- /dev/null
@@ -0,0 +1,134 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Junaid Shahid <junaids@google.com>
+Date: Fri, 1 Nov 2019 00:14:08 +0100
+Subject: kvm: Add helper function for creating VM worker threads
+
+From: Junaid Shahid <junaids@google.com>
+
+commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream.
+
+Add a function to create a kernel thread associated with a given VM. In
+particular, it ensures that the worker thread inherits the priority and
+cgroups of the calling thread.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kvm_host.h |    6 +++
+ virt/kvm/kvm_main.c      |   84 +++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 90 insertions(+)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1260,4 +1260,10 @@ static inline bool vcpu_valid_wakeup(str
+ }
+ #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
++typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
++
++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
++                              uintptr_t data, const char *name,
++                              struct task_struct **thread_ptr);
++
+ #endif
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -51,6 +51,7 @@
+ #include <linux/slab.h>
+ #include <linux/sort.h>
+ #include <linux/bsearch.h>
++#include <linux/kthread.h>
+ #include <asm/processor.h>
+ #include <asm/io.h>
+@@ -4155,3 +4156,86 @@ void kvm_exit(void)
+       kvm_vfio_ops_exit();
+ }
+ EXPORT_SYMBOL_GPL(kvm_exit);
++
++struct kvm_vm_worker_thread_context {
++      struct kvm *kvm;
++      struct task_struct *parent;
++      struct completion init_done;
++      kvm_vm_thread_fn_t thread_fn;
++      uintptr_t data;
++      int err;
++};
++
++static int kvm_vm_worker_thread(void *context)
++{
++      /*
++       * The init_context is allocated on the stack of the parent thread, so
++       * we have to locally copy anything that is needed beyond initialization
++       */
++      struct kvm_vm_worker_thread_context *init_context = context;
++      struct kvm *kvm = init_context->kvm;
++      kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
++      uintptr_t data = init_context->data;
++      int err;
++
++      err = kthread_park(current);
++      /* kthread_park(current) is never supposed to return an error */
++      WARN_ON(err != 0);
++      if (err)
++              goto init_complete;
++
++      err = cgroup_attach_task_all(init_context->parent, current);
++      if (err) {
++              kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
++                      __func__, err);
++              goto init_complete;
++      }
++
++      set_user_nice(current, task_nice(init_context->parent));
++
++init_complete:
++      init_context->err = err;
++      complete(&init_context->init_done);
++      init_context = NULL;
++
++      if (err)
++              return err;
++
++      /* Wait to be woken up by the spawner before proceeding. */
++      kthread_parkme();
++
++      if (!kthread_should_stop())
++              err = thread_fn(kvm, data);
++
++      return err;
++}
++
++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
++                              uintptr_t data, const char *name,
++                              struct task_struct **thread_ptr)
++{
++      struct kvm_vm_worker_thread_context init_context = {};
++      struct task_struct *thread;
++
++      *thread_ptr = NULL;
++      init_context.kvm = kvm;
++      init_context.parent = current;
++      init_context.thread_fn = thread_fn;
++      init_context.data = data;
++      init_completion(&init_context.init_done);
++
++      thread = kthread_run(kvm_vm_worker_thread, &init_context,
++                           "%s-%d", name, task_pid_nr(current));
++      if (IS_ERR(thread))
++              return PTR_ERR(thread);
++
++      /* kthread_run is never supposed to return NULL */
++      WARN_ON(thread == NULL);
++
++      wait_for_completion(&init_context.init_done);
++
++      if (!init_context.err)
++              *thread_ptr = thread;
++
++      return init_context.err;
++}
diff --git a/queue-4.14/kvm-convert-kvm_lock-to-a-mutex.patch b/queue-4.14/kvm-convert-kvm_lock-to-a-mutex.patch
new file mode 100644 (file)
index 0000000..ab1af20
--- /dev/null
@@ -0,0 +1,248 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Junaid Shahid <junaids@google.com>
+Date: Thu, 3 Jan 2019 17:14:28 -0800
+Subject: kvm: Convert kvm_lock to a mutex
+
+From: Junaid Shahid <junaids@google.com>
+
+commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream.
+
+It doesn't seem as if there is any particular need for kvm_lock to be a
+spinlock, so convert the lock to a mutex so that sleepable functions (in
+particular cond_resched()) can be called while holding it.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/virtual/kvm/locking.txt |    4 +---
+ arch/s390/kvm/kvm-s390.c              |    4 ++--
+ arch/x86/kvm/mmu.c                    |    4 ++--
+ arch/x86/kvm/x86.c                    |   10 +++++-----
+ include/linux/kvm_host.h              |    2 +-
+ virt/kvm/kvm_main.c                   |   30 +++++++++++++++---------------
+ 6 files changed, 26 insertions(+), 28 deletions(-)
+
+--- a/Documentation/virtual/kvm/locking.txt
++++ b/Documentation/virtual/kvm/locking.txt
+@@ -15,8 +15,6 @@ The acquisition orders for mutexes are a
+ On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
+-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
+-
+ Everything else is a leaf: no other lock is taken inside the critical
+ sections.
+@@ -169,7 +167,7 @@ which time it will be set using the Dirt
+ ------------
+ Name:         kvm_lock
+-Type:         spinlock_t
++Type:         mutex
+ Arch:         any
+ Protects:     - vm_list
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -1926,13 +1926,13 @@ int kvm_arch_init_vm(struct kvm *kvm, un
+       kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
+       if (!kvm->arch.sca)
+               goto out_err;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       sca_offset += 16;
+       if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
+               sca_offset = 0;
+       kvm->arch.sca = (struct bsca_block *)
+                       ((char *) kvm->arch.sca + sca_offset);
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       sprintf(debug_name, "kvm-%u", current->pid);
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -5454,7 +5454,7 @@ mmu_shrink_scan(struct shrinker *shrink,
+       int nr_to_scan = sc->nr_to_scan;
+       unsigned long freed = 0;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               int idx;
+@@ -5504,7 +5504,7 @@ unlock:
+               break;
+       }
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       return freed;
+ }
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -6156,17 +6156,17 @@ static int kvmclock_cpufreq_notifier(str
+       smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               kvm_for_each_vcpu(i, vcpu, kvm) {
+                       if (vcpu->cpu != freq->cpu)
+                               continue;
+                       kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+-                      if (vcpu->cpu != smp_processor_id())
++                      if (vcpu->cpu != raw_smp_processor_id())
+                               send_ipi = 1;
+               }
+       }
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       if (freq->old < freq->new && send_ipi) {
+               /*
+@@ -6303,12 +6303,12 @@ static void pvclock_gtod_update_fn(struc
+       struct kvm_vcpu *vcpu;
+       int i;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list)
+               kvm_for_each_vcpu(i, vcpu, kvm)
+                       kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+       atomic_set(&kvm_guest_has_master_clock, 0);
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+ }
+ static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -140,7 +140,7 @@ static inline bool is_error_page(struct
+ extern struct kmem_cache *kvm_vcpu_cache;
+-extern spinlock_t kvm_lock;
++extern struct mutex kvm_lock;
+ extern struct list_head vm_list;
+ struct kvm_io_range {
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
+  *    kvm->lock --> kvm->slots_lock --> kvm->irq_lock
+  */
+-DEFINE_SPINLOCK(kvm_lock);
++DEFINE_MUTEX(kvm_lock);
+ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
+ LIST_HEAD(vm_list);
+@@ -668,9 +668,9 @@ static struct kvm *kvm_create_vm(unsigne
+       if (r)
+               goto out_err;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_add(&kvm->vm_list, &vm_list);
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       preempt_notifier_inc();
+@@ -716,9 +716,9 @@ static void kvm_destroy_vm(struct kvm *k
+       kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
+       kvm_destroy_vm_debugfs(kvm);
+       kvm_arch_sync_events(kvm);
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_del(&kvm->vm_list);
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       kvm_free_irq_routing(kvm);
+       for (i = 0; i < KVM_NR_BUSES; i++) {
+               struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
+@@ -3830,13 +3830,13 @@ static int vm_stat_get(void *_offset, u6
+       u64 tmp_val;
+       *val = 0;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               stat_tmp.kvm = kvm;
+               vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+               *val += tmp_val;
+       }
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       return 0;
+ }
+@@ -3849,12 +3849,12 @@ static int vm_stat_clear(void *_offset,
+       if (val)
+               return -EINVAL;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               stat_tmp.kvm = kvm;
+               vm_stat_clear_per_vm((void *)&stat_tmp, 0);
+       }
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       return 0;
+ }
+@@ -3869,13 +3869,13 @@ static int vcpu_stat_get(void *_offset,
+       u64 tmp_val;
+       *val = 0;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               stat_tmp.kvm = kvm;
+               vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+               *val += tmp_val;
+       }
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       return 0;
+ }
+@@ -3888,12 +3888,12 @@ static int vcpu_stat_clear(void *_offset
+       if (val)
+               return -EINVAL;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               stat_tmp.kvm = kvm;
+               vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
+       }
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       return 0;
+ }
+@@ -3914,7 +3914,7 @@ static void kvm_uevent_notify_change(uns
+       if (!kvm_dev.this_device || !kvm)
+               return;
+-      spin_lock(&kvm_lock);
++      mutex_lock(&kvm_lock);
+       if (type == KVM_EVENT_CREATE_VM) {
+               kvm_createvm_count++;
+               kvm_active_vms++;
+@@ -3923,7 +3923,7 @@ static void kvm_uevent_notify_change(uns
+       }
+       created = kvm_createvm_count;
+       active = kvm_active_vms;
+-      spin_unlock(&kvm_lock);
++      mutex_unlock(&kvm_lock);
+       env = kzalloc(sizeof(*env), GFP_KERNEL);
+       if (!env)
diff --git a/queue-4.14/kvm-mmu-do-not-release-the-page-inside-mmu_set_spte.patch b/queue-4.14/kvm-mmu-do-not-release-the-page-inside-mmu_set_spte.patch
new file mode 100644 (file)
index 0000000..7f67848
--- /dev/null
@@ -0,0 +1,137 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Junaid Shahid <junaids@google.com>
+Date: Thu, 3 Jan 2019 16:22:21 -0800
+Subject: kvm: mmu: Do not release the page inside mmu_set_spte()
+
+From: Junaid Shahid <junaids@google.com>
+
+commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream.
+
+Release the page at the call-site where it was originally acquired.
+This makes the exit code cleaner for most call sites, since they
+do not need to duplicate code between success and the failure
+label.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   18 +++++++-----------
+ arch/x86/kvm/paging_tmpl.h |    8 +++-----
+ 2 files changed, 10 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -2918,8 +2918,6 @@ static int mmu_set_spte(struct kvm_vcpu
+               }
+       }
+-      kvm_release_pfn_clean(pfn);
+-
+       return ret;
+ }
+@@ -2954,9 +2952,11 @@ static int direct_pte_prefetch_many(stru
+       if (ret <= 0)
+               return -1;
+-      for (i = 0; i < ret; i++, gfn++, start++)
++      for (i = 0; i < ret; i++, gfn++, start++) {
+               mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+                            page_to_pfn(pages[i]), true, true);
++              put_page(pages[i]);
++      }
+       return 0;
+ }
+@@ -3361,6 +3361,7 @@ static int nonpaging_map(struct kvm_vcpu
+       if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
+               return r;
++      r = RET_PF_RETRY;
+       spin_lock(&vcpu->kvm->mmu_lock);
+       if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+               goto out_unlock;
+@@ -3369,14 +3370,11 @@ static int nonpaging_map(struct kvm_vcpu
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+       r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-      spin_unlock(&vcpu->kvm->mmu_lock);
+-
+-      return r;
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+-      return RET_PF_RETRY;
++      return r;
+ }
+@@ -3954,6 +3952,7 @@ static int tdp_page_fault(struct kvm_vcp
+       if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
+               return r;
++      r = RET_PF_RETRY;
+       spin_lock(&vcpu->kvm->mmu_lock);
+       if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+               goto out_unlock;
+@@ -3962,14 +3961,11 @@ static int tdp_page_fault(struct kvm_vcp
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+       r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-      spin_unlock(&vcpu->kvm->mmu_lock);
+-
+-      return r;
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+-      return RET_PF_RETRY;
++      return r;
+ }
+ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vc
+       mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
+                    true, true);
++      kvm_release_pfn_clean(pfn);
+       return true;
+ }
+@@ -673,7 +674,6 @@ static int FNAME(fetch)(struct kvm_vcpu
+       return ret;
+ out_gpte_changed:
+-      kvm_release_pfn_clean(pfn);
+       return RET_PF_RETRY;
+ }
+@@ -821,6 +821,7 @@ static int FNAME(page_fault)(struct kvm_
+                       walker.pte_access &= ~ACC_EXEC_MASK;
+       }
++      r = RET_PF_RETRY;
+       spin_lock(&vcpu->kvm->mmu_lock);
+       if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+               goto out_unlock;
+@@ -834,14 +835,11 @@ static int FNAME(page_fault)(struct kvm_
+                        level, pfn, map_writable, prefault);
+       ++vcpu->stat.pf_fixed;
+       kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+-      spin_unlock(&vcpu->kvm->mmu_lock);
+-
+-      return r;
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+-      return RET_PF_RETRY;
++      return r;
+ }
+ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
diff --git a/queue-4.14/kvm-mmu-itlb_multihit-mitigation.patch b/queue-4.14/kvm-mmu-itlb_multihit-mitigation.patch
new file mode 100644 (file)
index 0000000..91d9c07
--- /dev/null
@@ -0,0 +1,494 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 4 Nov 2019 12:22:02 +0100
+Subject: kvm: mmu: ITLB_MULTIHIT mitigation
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream.
+
+With some Intel processors, putting the same virtual address in the TLB
+as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit
+and cause the processor to issue a machine check resulting in a CPU lockup.
+
+Unfortunately when EPT page tables use huge pages, it is possible for a
+malicious guest to cause this situation.
+
+Add a knob to mark huge pages as non-executable. When the nx_huge_pages
+parameter is enabled (and we are using EPT), all huge pages are marked as
+NX. If the guest attempts to execute in one of those pages, the page is
+broken down into 4K pages, which are then marked executable.
+
+This is not an issue for shadow paging (except nested EPT), because then
+the host is in control of TLB flushes and the problematic situation cannot
+happen.  With nested EPT, again the nested guest can cause problems shadow
+and direct EPT is treated in the same way.
+
+[ tglx: Fixup default to auto and massage wording a bit ]
+
+Originally-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   19 +++
+ arch/x86/include/asm/kvm_host.h                 |    2 
+ arch/x86/kernel/cpu/bugs.c                      |   13 ++
+ arch/x86/kvm/mmu.c                              |  141 ++++++++++++++++++++++--
+ arch/x86/kvm/paging_tmpl.h                      |   29 +++-
+ arch/x86/kvm/x86.c                              |    9 +
+ 6 files changed, 200 insertions(+), 13 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -1852,6 +1852,19 @@
+                       KVM MMU at runtime.
+                       Default is 0 (off)
++      kvm.nx_huge_pages=
++                      [KVM] Controls the software workaround for the
++                      X86_BUG_ITLB_MULTIHIT bug.
++                      force   : Always deploy workaround.
++                      off     : Never deploy workaround.
++                      auto    : Deploy workaround based on the presence of
++                                X86_BUG_ITLB_MULTIHIT.
++
++                      Default is 'auto'.
++
++                      If the software workaround is enabled for the host,
++                      guests do need not to enable it for nested guests.
++
+       kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
+                       Default is 1 (enabled)
+@@ -2410,6 +2423,12 @@
+                                              l1tf=off [X86]
+                                              mds=off [X86]
+                                              tsx_async_abort=off [X86]
++                                             kvm.nx_huge_pages=off [X86]
++
++                              Exceptions:
++                                             This does not have any effect on
++                                             kvm.nx_huge_pages when
++                                             kvm.nx_huge_pages=force.
+                       auto (default)
+                               Mitigate all CPU vulnerabilities, but leave SMT
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -289,6 +289,7 @@ struct kvm_mmu_page {
+       /* hold the gfn of each spte inside spt */
+       gfn_t *gfns;
+       bool unsync;
++      bool lpage_disallowed; /* Can't be replaced by an equiv large page */
+       int root_count;          /* Currently serving as active root */
+       unsigned int unsync_children;
+       struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
+@@ -867,6 +868,7 @@ struct kvm_vm_stat {
+       ulong mmu_unsync;
+       ulong remote_tlb_flush;
+       ulong lpages;
++      ulong nx_lpage_splits;
+       ulong max_mmu_page_hash_collisions;
+ };
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1225,6 +1225,9 @@ void x86_spec_ctrl_setup_ap(void)
+               x86_amd_ssb_disable();
+ }
++bool itlb_multihit_kvm_mitigation;
++EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
++
+ #undef pr_fmt
+ #define pr_fmt(fmt)   "L1TF: " fmt
+@@ -1380,17 +1383,25 @@ static ssize_t l1tf_show_state(char *buf
+                      l1tf_vmx_states[l1tf_vmx_mitigation],
+                      sched_smt_active() ? "vulnerable" : "disabled");
+ }
++
++static ssize_t itlb_multihit_show_state(char *buf)
++{
++      if (itlb_multihit_kvm_mitigation)
++              return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
++      else
++              return sprintf(buf, "KVM: Vulnerable\n");
++}
+ #else
+ static ssize_t l1tf_show_state(char *buf)
+ {
+       return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
+ }
+-#endif
+ static ssize_t itlb_multihit_show_state(char *buf)
+ {
+       return sprintf(buf, "Processor vulnerable\n");
+ }
++#endif
+ static ssize_t mds_show_state(char *buf)
+ {
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -48,6 +48,20 @@
+ #include <asm/kvm_page_track.h>
+ #include "trace.h"
++extern bool itlb_multihit_kvm_mitigation;
++
++static int __read_mostly nx_huge_pages = -1;
++
++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
++
++static struct kernel_param_ops nx_huge_pages_ops = {
++      .set = set_nx_huge_pages,
++      .get = param_get_bool,
++};
++
++module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
++__MODULE_PARM_TYPE(nx_huge_pages, "bool");
++
+ /*
+  * When setting this variable to true it enables Two-Dimensional-Paging
+  * where the hardware walks 2 page tables:
+@@ -266,6 +280,11 @@ static inline bool spte_ad_enabled(u64 s
+       return !(spte & shadow_acc_track_value);
+ }
++static bool is_nx_huge_page_enabled(void)
++{
++      return READ_ONCE(nx_huge_pages);
++}
++
+ static inline u64 spte_shadow_accessed_mask(u64 spte)
+ {
+       MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+@@ -1078,6 +1097,15 @@ static void account_shadowed(struct kvm
+       kvm_mmu_gfn_disallow_lpage(slot, gfn);
+ }
++static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
++{
++      if (sp->lpage_disallowed)
++              return;
++
++      ++kvm->stat.nx_lpage_splits;
++      sp->lpage_disallowed = true;
++}
++
+ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
+ {
+       struct kvm_memslots *slots;
+@@ -1095,6 +1123,12 @@ static void unaccount_shadowed(struct kv
+       kvm_mmu_gfn_allow_lpage(slot, gfn);
+ }
++static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
++{
++      --kvm->stat.nx_lpage_splits;
++      sp->lpage_disallowed = false;
++}
++
+ static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
+                                         struct kvm_memory_slot *slot)
+ {
+@@ -2642,6 +2676,9 @@ static int kvm_mmu_prepare_zap_page(stru
+                       kvm_reload_remote_mmus(kvm);
+       }
++      if (sp->lpage_disallowed)
++              unaccount_huge_nx_page(kvm, sp);
++
+       sp->role.invalid = 1;
+       return ret;
+ }
+@@ -2796,6 +2833,11 @@ static int set_spte(struct kvm_vcpu *vcp
+       if (!speculative)
+               spte |= spte_shadow_accessed_mask(spte);
++      if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
++          is_nx_huge_page_enabled()) {
++              pte_access &= ~ACC_EXEC_MASK;
++      }
++
+       if (pte_access & ACC_EXEC_MASK)
+               spte |= shadow_x_mask;
+       else
+@@ -3009,9 +3051,32 @@ static void direct_pte_prefetch(struct k
+       __direct_pte_prefetch(vcpu, sp, sptep);
+ }
++static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
++                                     gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
++{
++      int level = *levelp;
++      u64 spte = *it.sptep;
++
++      if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
++          is_nx_huge_page_enabled() &&
++          is_shadow_present_pte(spte) &&
++          !is_large_pte(spte)) {
++              /*
++               * A small SPTE exists for this pfn, but FNAME(fetch)
++               * and __direct_map would like to create a large PTE
++               * instead: just force them to go down another level,
++               * patching back for them into pfn the next 9 bits of
++               * the address.
++               */
++              u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
++              *pfnp |= gfn & page_mask;
++              (*levelp)--;
++      }
++}
++
+ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
+                       int map_writable, int level, kvm_pfn_t pfn,
+-                      bool prefault)
++                      bool prefault, bool lpage_disallowed)
+ {
+       struct kvm_shadow_walk_iterator it;
+       struct kvm_mmu_page *sp;
+@@ -3024,6 +3089,12 @@ static int __direct_map(struct kvm_vcpu
+       trace_kvm_mmu_spte_requested(gpa, level, pfn);
+       for_each_shadow_entry(vcpu, gpa, it) {
++              /*
++               * We cannot overwrite existing page tables with an NX
++               * large page, as the leaf could be executable.
++               */
++              disallowed_hugepage_adjust(it, gfn, &pfn, &level);
++
+               base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+               if (it.level == level)
+                       break;
+@@ -3034,6 +3105,8 @@ static int __direct_map(struct kvm_vcpu
+                                             it.level - 1, true, ACC_ALL);
+                       link_shadow_page(vcpu, it.sptep, sp);
++                      if (lpage_disallowed)
++                              account_huge_nx_page(vcpu->kvm, sp);
+               }
+       }
+@@ -3333,11 +3406,14 @@ static int nonpaging_map(struct kvm_vcpu
+ {
+       int r;
+       int level;
+-      bool force_pt_level = false;
++      bool force_pt_level;
+       kvm_pfn_t pfn;
+       unsigned long mmu_seq;
+       bool map_writable, write = error_code & PFERR_WRITE_MASK;
++      bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
++                              is_nx_huge_page_enabled();
++      force_pt_level = lpage_disallowed;
+       level = mapping_level(vcpu, gfn, &force_pt_level);
+       if (likely(!force_pt_level)) {
+               /*
+@@ -3371,7 +3447,8 @@ static int nonpaging_map(struct kvm_vcpu
+               goto out_unlock;
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+-      r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
++      r = __direct_map(vcpu, v, write, map_writable, level, pfn,
++                       prefault, false);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+@@ -3921,6 +3998,8 @@ static int tdp_page_fault(struct kvm_vcp
+       unsigned long mmu_seq;
+       int write = error_code & PFERR_WRITE_MASK;
+       bool map_writable;
++      bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
++                              is_nx_huge_page_enabled();
+       MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+@@ -3931,8 +4010,9 @@ static int tdp_page_fault(struct kvm_vcp
+       if (r)
+               return r;
+-      force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
+-                                                         PT_DIRECTORY_LEVEL);
++      force_pt_level =
++              lpage_disallowed ||
++              !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
+       level = mapping_level(vcpu, gfn, &force_pt_level);
+       if (likely(!force_pt_level)) {
+               if (level > PT_DIRECTORY_LEVEL &&
+@@ -3961,7 +4041,8 @@ static int tdp_page_fault(struct kvm_vcp
+               goto out_unlock;
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+-      r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
++      r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
++                       prefault, lpage_disallowed);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+@@ -5524,8 +5605,56 @@ static void mmu_destroy_caches(void)
+               kmem_cache_destroy(mmu_page_header_cache);
+ }
++static bool get_nx_auto_mode(void)
++{
++      /* Return true when CPU has the bug, and mitigations are ON */
++      return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
++}
++
++static void __set_nx_huge_pages(bool val)
++{
++      nx_huge_pages = itlb_multihit_kvm_mitigation = val;
++}
++
++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
++{
++      bool old_val = nx_huge_pages;
++      bool new_val;
++
++      /* In "auto" mode deploy workaround only if CPU has the bug. */
++      if (sysfs_streq(val, "off"))
++              new_val = 0;
++      else if (sysfs_streq(val, "force"))
++              new_val = 1;
++      else if (sysfs_streq(val, "auto"))
++              new_val = get_nx_auto_mode();
++      else if (strtobool(val, &new_val) < 0)
++              return -EINVAL;
++
++      __set_nx_huge_pages(new_val);
++
++      if (new_val != old_val) {
++              struct kvm *kvm;
++              int idx;
++
++              mutex_lock(&kvm_lock);
++
++              list_for_each_entry(kvm, &vm_list, vm_list) {
++                      idx = srcu_read_lock(&kvm->srcu);
++                      kvm_mmu_invalidate_zap_all_pages(kvm);
++                      srcu_read_unlock(&kvm->srcu, idx);
++              }
++              mutex_unlock(&kvm_lock);
++      }
++
++      return 0;
++}
++
+ int kvm_mmu_module_init(void)
+ {
++      if (nx_huge_pages == -1)
++              __set_nx_huge_pages(get_nx_auto_mode());
++
+       kvm_mmu_reset_all_pte_masks();
+       pte_list_desc_cache = kmem_cache_create("pte_list_desc",
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct k
+ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+                        struct guest_walker *gw,
+                        int write_fault, int hlevel,
+-                       kvm_pfn_t pfn, bool map_writable, bool prefault)
++                       kvm_pfn_t pfn, bool map_writable, bool prefault,
++                       bool lpage_disallowed)
+ {
+       struct kvm_mmu_page *sp = NULL;
+       struct kvm_shadow_walk_iterator it;
+       unsigned direct_access, access = gw->pt_access;
+       int top_level, ret;
+-      gfn_t base_gfn;
++      gfn_t gfn, base_gfn;
+       direct_access = gw->pte_access;
+@@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu
+                       link_shadow_page(vcpu, it.sptep, sp);
+       }
+-      base_gfn = gw->gfn;
++      /*
++       * FNAME(page_fault) might have clobbered the bottom bits of
++       * gw->gfn, restore them from the virtual address.
++       */
++      gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
++      base_gfn = gfn;
+       trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
+       for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+               clear_sp_write_flooding_count(it.sptep);
+-              base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
++
++              /*
++               * We cannot overwrite existing page tables with an NX
++               * large page, as the leaf could be executable.
++               */
++              disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
++
++              base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+               if (it.level == hlevel)
+                       break;
+@@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu
+                       sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
+                                             it.level - 1, true, direct_access);
+                       link_shadow_page(vcpu, it.sptep, sp);
++                      if (lpage_disallowed)
++                              account_huge_nx_page(vcpu->kvm, sp);
+               }
+       }
+@@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_
+       int r;
+       kvm_pfn_t pfn;
+       int level = PT_PAGE_TABLE_LEVEL;
+-      bool force_pt_level = false;
+       unsigned long mmu_seq;
+       bool map_writable, is_self_change_mapping;
++      bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
++                              is_nx_huge_page_enabled();
++      bool force_pt_level = lpage_disallowed;
+       pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
+@@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_
+       if (!force_pt_level)
+               transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
+       r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+-                       level, pfn, map_writable, prefault);
++                       level, pfn, map_writable, prefault, lpage_disallowed);
+       kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+ out_unlock:
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -192,6 +192,7 @@ struct kvm_stats_debugfs_item debugfs_en
+       { "mmu_unsync", VM_STAT(mmu_unsync) },
+       { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+       { "largepages", VM_STAT(lpages, .mode = 0444) },
++      { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
+       { "max_mmu_page_hash_collisions",
+               VM_STAT(max_mmu_page_hash_collisions) },
+       { NULL }
+@@ -1070,6 +1071,14 @@ u64 kvm_get_arch_capabilities(void)
+       rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+       /*
++       * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
++       * the nested hypervisor runs with NX huge pages.  If it is not,
++       * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
++       * L1 guests, so it need not worry about its own (L2) guests.
++       */
++      data |= ARCH_CAP_PSCHANGE_MC_NO;
++
++      /*
+        * If we're doing cache flushes (either "always" or "cond")
+        * we will do one whenever the guest does a vmlaunch/vmresume.
+        * If an outer hypervisor is doing the cache flush for us
diff --git a/queue-4.14/kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch b/queue-4.14/kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
new file mode 100644 (file)
index 0000000..7b9996b
--- /dev/null
@@ -0,0 +1,70 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Sun, 27 Oct 2019 09:36:37 +0100
+Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream.
+
+VMX already does so if the host has SMEP, in order to support the combination of
+CR0.WP=1 and CR4.SMEP=1.  However, it is perfectly safe to always do so, and in
+fact VMX also ends up running with EFER.NXE=1 on old processors that lack the
+"load EFER" controls, because it may help avoiding a slow MSR write.
+
+SVM does not have similar code, but it should since recent AMD processors do
+support SMEP.  So this patch makes the code for the two vendors simpler and
+more similar, while fixing an issue with CR0.WP=1 and CR4.SMEP=1 on AMD.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Joerg Roedel <jroedel@suse.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c |   10 ++++++++--
+ arch/x86/kvm/vmx.c |   14 +++-----------
+ 2 files changed, 11 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -608,8 +608,14 @@ static int get_npt_level(struct kvm_vcpu
+ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+ {
+       vcpu->arch.efer = efer;
+-      if (!npt_enabled && !(efer & EFER_LMA))
+-              efer &= ~EFER_LME;
++
++      if (!npt_enabled) {
++              /* Shadow paging assumes NX to be available.  */
++              efer |= EFER_NX;
++
++              if (!(efer & EFER_LMA))
++                      efer &= ~EFER_LME;
++      }
+       to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
+       mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2259,17 +2259,9 @@ static bool update_transition_efer(struc
+       u64 guest_efer = vmx->vcpu.arch.efer;
+       u64 ignore_bits = 0;
+-      if (!enable_ept) {
+-              /*
+-               * NX is needed to handle CR0.WP=1, CR4.SMEP=1.  Testing
+-               * host CPUID is more efficient than testing guest CPUID
+-               * or CR4.  Host SMEP is anyway a requirement for guest SMEP.
+-               */
+-              if (boot_cpu_has(X86_FEATURE_SMEP))
+-                      guest_efer |= EFER_NX;
+-              else if (!(guest_efer & EFER_NX))
+-                      ignore_bits |= EFER_NX;
+-      }
++      /* Shadow paging assumes NX to be available.  */
++      if (!enable_ept)
++              guest_efer |= EFER_NX;
+       /*
+        * LMA and LME handled by hardware; SCE meaningless outside long mode.
diff --git a/queue-4.14/kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch b/queue-4.14/kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
new file mode 100644 (file)
index 0000000..7e730ab
--- /dev/null
@@ -0,0 +1,143 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 4 Jul 2019 05:14:13 -0400
+Subject: KVM: x86: add tracepoints around __direct_map and FNAME(fetch)
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream.
+
+These are useful in debugging shadow paging.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   14 +++++-----
+ arch/x86/kvm/mmutrace.h    |   59 +++++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kvm/paging_tmpl.h |    2 +
+ 3 files changed, 68 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -139,9 +139,6 @@ module_param(dbg, bool, 0644);
+ #include <trace/events/kvm.h>
+-#define CREATE_TRACE_POINTS
+-#include "mmutrace.h"
+-
+ #define SPTE_HOST_WRITEABLE   (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+ #define SPTE_MMU_WRITEABLE    (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
+@@ -244,6 +241,11 @@ static u64 __read_mostly shadow_nonprese
+ static void mmu_spte_set(u64 *sptep, u64 spte);
+ static void mmu_free_roots(struct kvm_vcpu *vcpu);
++static bool is_executable_pte(u64 spte);
++
++#define CREATE_TRACE_POINTS
++#include "mmutrace.h"
++
+ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
+ {
+@@ -2909,10 +2911,7 @@ static int mmu_set_spte(struct kvm_vcpu
+               ret = RET_PF_EMULATE;
+       pgprintk("%s: setting spte %llx\n", __func__, *sptep);
+-      pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
+-               is_large_pte(*sptep)? "2MB" : "4kB",
+-               *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
+-               *sptep, sptep);
++      trace_kvm_mmu_set_spte(level, gfn, sptep);
+       if (!was_rmapped && is_large_pte(*sptep))
+               ++vcpu->kvm->stat.lpages;
+@@ -3023,6 +3022,7 @@ static int __direct_map(struct kvm_vcpu
+       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+               return RET_PF_RETRY;
++      trace_kvm_mmu_spte_requested(gpa, level, pfn);
+       for_each_shadow_entry(vcpu, gpa, it) {
+               base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+               if (it.level == level)
+--- a/arch/x86/kvm/mmutrace.h
++++ b/arch/x86/kvm/mmutrace.h
+@@ -325,6 +325,65 @@ TRACE_EVENT(
+                 __entry->kvm_gen == __entry->spte_gen
+       )
+ );
++
++TRACE_EVENT(
++      kvm_mmu_set_spte,
++      TP_PROTO(int level, gfn_t gfn, u64 *sptep),
++      TP_ARGS(level, gfn, sptep),
++
++      TP_STRUCT__entry(
++              __field(u64, gfn)
++              __field(u64, spte)
++              __field(u64, sptep)
++              __field(u8, level)
++              /* These depend on page entry type, so compute them now.  */
++              __field(bool, r)
++              __field(bool, x)
++              __field(u8, u)
++      ),
++
++      TP_fast_assign(
++              __entry->gfn = gfn;
++              __entry->spte = *sptep;
++              __entry->sptep = virt_to_phys(sptep);
++              __entry->level = level;
++              __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
++              __entry->x = is_executable_pte(__entry->spte);
++              __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
++      ),
++
++      TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
++                __entry->gfn, __entry->spte,
++                __entry->r ? "r" : "-",
++                __entry->spte & PT_WRITABLE_MASK ? "w" : "-",
++                __entry->x ? "x" : "-",
++                __entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
++                __entry->level, __entry->sptep
++      )
++);
++
++TRACE_EVENT(
++      kvm_mmu_spte_requested,
++      TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
++      TP_ARGS(addr, level, pfn),
++
++      TP_STRUCT__entry(
++              __field(u64, gfn)
++              __field(u64, pfn)
++              __field(u8, level)
++      ),
++
++      TP_fast_assign(
++              __entry->gfn = addr >> PAGE_SHIFT;
++              __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
++              __entry->level = level;
++      ),
++
++      TP_printk("gfn %llx pfn %llx level %d",
++                __entry->gfn, __entry->pfn, __entry->level
++      )
++);
++
+ #endif /* _TRACE_KVMMMU_H */
+ #undef TRACE_INCLUDE_PATH
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -649,6 +649,8 @@ static int FNAME(fetch)(struct kvm_vcpu
+       base_gfn = gw->gfn;
++      trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
++
+       for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+               clear_sp_write_flooding_count(it.sptep);
+               base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
diff --git a/queue-4.14/kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch b/queue-4.14/kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
new file mode 100644 (file)
index 0000000..13b9f89
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Sun, 30 Jun 2019 08:36:21 -0400
+Subject: KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream.
+
+Note that in such a case it is quite likely that KVM will BUG_ON
+in __pte_list_remove when the VM is closed.  However, there is no
+immediate risk of memory corruption in the host so a WARN_ON is
+enough and it lets you gather traces for debugging.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -1008,10 +1008,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct
+ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
+ {
+-      if (sp->role.direct)
+-              BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
+-      else
++      if (!sp->role.direct) {
+               sp->gfns[index] = gfn;
++              return;
++      }
++
++      if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
++              pr_err_ratelimited("gfn mismatch under direct page %llx "
++                                 "(expected %llx, got %llx)\n",
++                                 sp->gfn,
++                                 kvm_mmu_page_get_gfn(sp, index), gfn);
+ }
+ /*
diff --git a/queue-4.14/kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch b/queue-4.14/kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
new file mode 100644 (file)
index 0000000..edec6c5
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:23:33 +0200
+Subject: kvm/x86: Export MDS_NO=0 to guests when TSX is enabled
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream.
+
+Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX
+Async Abort(TAA) affected hosts that have TSX enabled and updated
+microcode. This is required so that the guests don't complain,
+
+  "Vulnerable: Clear CPU buffers attempted, no microcode"
+
+when the host has the updated microcode to clear CPU buffers.
+
+Microcode update also adds support for MSR_IA32_TSX_CTRL which is
+enumerated by the ARCH_CAP_TSX_CTRL bit in IA32_ARCH_CAPABILITIES MSR.
+Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is
+not exported to the guests.
+
+In this case export MDS_NO=0 to the guests. When guests have
+CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1088,6 +1088,25 @@ u64 kvm_get_arch_capabilities(void)
+       if (!boot_cpu_has_bug(X86_BUG_MDS))
+               data |= ARCH_CAP_MDS_NO;
++      /*
++       * On TAA affected systems, export MDS_NO=0 when:
++       *      - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
++       *      - Updated microcode is present. This is detected by
++       *        the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
++       *        that VERW clears CPU buffers.
++       *
++       * When MDS_NO=0 is exported, guests deploy clear CPU buffer
++       * mitigation and don't complain:
++       *
++       *      "Vulnerable: Clear CPU buffers attempted, no microcode"
++       *
++       * If TSX is disabled on the system, guests are also mitigated against
++       * TAA and clear CPU buffer mitigation is not required for guests.
++       */
++      if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
++          (data & ARCH_CAP_TSX_CTRL_MSR))
++              data &= ~ARCH_CAP_MDS_NO;
++
+       return data;
+ }
diff --git a/queue-4.14/kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch b/queue-4.14/kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
new file mode 100644 (file)
index 0000000..bc845bc
--- /dev/null
@@ -0,0 +1,172 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 24 Jun 2019 13:06:21 +0200
+Subject: KVM: x86: make FNAME(fetch) and __direct_map more similar
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream.
+
+These two functions are basically doing the same thing through
+kvm_mmu_get_page, link_shadow_page and mmu_set_spte; yet, for historical
+reasons, their code looks very different.  This patch tries to take the
+best of each and make them very similar, so that it is easy to understand
+changes that apply to both of them.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |   53 +++++++++++++++++++++------------------------
+ arch/x86/kvm/paging_tmpl.h |   30 +++++++++++--------------
+ 2 files changed, 39 insertions(+), 44 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -3004,40 +3004,39 @@ static void direct_pte_prefetch(struct k
+       __direct_pte_prefetch(vcpu, sp, sptep);
+ }
+-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
+-                      int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
++static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
++                      int map_writable, int level, kvm_pfn_t pfn,
++                      bool prefault)
+ {
+-      struct kvm_shadow_walk_iterator iterator;
++      struct kvm_shadow_walk_iterator it;
+       struct kvm_mmu_page *sp;
+-      int emulate = 0;
+-      gfn_t pseudo_gfn;
++      int ret;
++      gfn_t gfn = gpa >> PAGE_SHIFT;
++      gfn_t base_gfn = gfn;
+       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+-              return 0;
++              return RET_PF_RETRY;
+-      for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+-              if (iterator.level == level) {
+-                      emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+-                                             write, level, gfn, pfn, prefault,
+-                                             map_writable);
+-                      direct_pte_prefetch(vcpu, iterator.sptep);
+-                      ++vcpu->stat.pf_fixed;
++      for_each_shadow_entry(vcpu, gpa, it) {
++              base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
++              if (it.level == level)
+                       break;
+-              }
+-              drop_large_spte(vcpu, iterator.sptep);
+-              if (!is_shadow_present_pte(*iterator.sptep)) {
+-                      u64 base_addr = iterator.addr;
++              drop_large_spte(vcpu, it.sptep);
++              if (!is_shadow_present_pte(*it.sptep)) {
++                      sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
++                                            it.level - 1, true, ACC_ALL);
+-                      base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
+-                      pseudo_gfn = base_addr >> PAGE_SHIFT;
+-                      sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+-                                            iterator.level - 1, 1, ACC_ALL);
+-
+-                      link_shadow_page(vcpu, iterator.sptep, sp);
++                      link_shadow_page(vcpu, it.sptep, sp);
+               }
+       }
+-      return emulate;
++
++      ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
++                         write, level, base_gfn, pfn, prefault,
++                         map_writable);
++      direct_pte_prefetch(vcpu, it.sptep);
++      ++vcpu->stat.pf_fixed;
++      return ret;
+ }
+ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
+@@ -3369,8 +3368,7 @@ static int nonpaging_map(struct kvm_vcpu
+               goto out_unlock;
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+-      r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-
++      r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+@@ -3960,8 +3958,7 @@ static int tdp_page_fault(struct kvm_vcp
+               goto out_unlock;
+       if (likely(!force_pt_level))
+               transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
+-      r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+-
++      r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       kvm_release_pfn_clean(pfn);
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -602,6 +602,7 @@ static int FNAME(fetch)(struct kvm_vcpu
+       struct kvm_shadow_walk_iterator it;
+       unsigned direct_access, access = gw->pt_access;
+       int top_level, ret;
++      gfn_t base_gfn;
+       direct_access = gw->pte_access;
+@@ -646,31 +647,29 @@ static int FNAME(fetch)(struct kvm_vcpu
+                       link_shadow_page(vcpu, it.sptep, sp);
+       }
+-      for (;
+-           shadow_walk_okay(&it) && it.level > hlevel;
+-           shadow_walk_next(&it)) {
+-              gfn_t direct_gfn;
++      base_gfn = gw->gfn;
++      for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
+               clear_sp_write_flooding_count(it.sptep);
++              base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
++              if (it.level == hlevel)
++                      break;
++
+               validate_direct_spte(vcpu, it.sptep, direct_access);
+               drop_large_spte(vcpu, it.sptep);
+-              if (is_shadow_present_pte(*it.sptep))
+-                      continue;
+-
+-              direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+-
+-              sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
+-                                    true, direct_access);
+-              link_shadow_page(vcpu, it.sptep, sp);
++              if (!is_shadow_present_pte(*it.sptep)) {
++                      sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
++                                            it.level - 1, true, direct_access);
++                      link_shadow_page(vcpu, it.sptep, sp);
++              }
+       }
+-      clear_sp_write_flooding_count(it.sptep);
+       ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
+-                         it.level, gw->gfn, pfn, prefault, map_writable);
++                         it.level, base_gfn, pfn, prefault, map_writable);
+       FNAME(pte_prefetch)(vcpu, gw, it.sptep);
+-
++      ++vcpu->stat.pf_fixed;
+       return ret;
+ out_gpte_changed:
+@@ -833,7 +832,6 @@ static int FNAME(page_fault)(struct kvm_
+               transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
+       r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+                        level, pfn, map_writable, prefault);
+-      ++vcpu->stat.pf_fixed;
+       kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
+ out_unlock:
diff --git a/queue-4.14/kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch b/queue-4.14/kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
new file mode 100644 (file)
index 0000000..f176367
--- /dev/null
@@ -0,0 +1,363 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Junaid Shahid <junaids@google.com>
+Date: Fri, 1 Nov 2019 00:14:14 +0100
+Subject: kvm: x86: mmu: Recovery of shattered NX large pages
+
+From: Junaid Shahid <junaids@google.com>
+
+commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream.
+
+The page table pages corresponding to broken down large pages are zapped in
+FIFO order, so that the large page can potentially be recovered, if it is
+not longer being used for execution.  This removes the performance penalty
+for walking deeper EPT page tables.
+
+By default, one large page will last about one hour once the guest
+reaches a steady state.
+
+Signed-off-by: Junaid Shahid <junaids@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    6 +
+ arch/x86/include/asm/kvm_host.h                 |    4 
+ arch/x86/kvm/mmu.c                              |  129 ++++++++++++++++++++++++
+ arch/x86/kvm/mmu.h                              |    4 
+ arch/x86/kvm/x86.c                              |   11 ++
+ virt/kvm/kvm_main.c                             |   30 +++++
+ 6 files changed, 183 insertions(+), 1 deletion(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -1865,6 +1865,12 @@
+                       If the software workaround is enabled for the host,
+                       guests do need not to enable it for nested guests.
++      kvm.nx_huge_pages_recovery_ratio=
++                      [KVM] Controls how many 4KiB pages are periodically zapped
++                      back to huge pages.  0 disables the recovery, otherwise if
++                      the value is N KVM will zap 1/Nth of the 4KiB pages every
++                      minute.  The default is 60.
++
+       kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
+                       Default is 1 (enabled)
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -277,6 +277,7 @@ struct kvm_rmap_head {
+ struct kvm_mmu_page {
+       struct list_head link;
+       struct hlist_node hash_link;
++      struct list_head lpage_disallowed_link;
+       /*
+        * The following two entries are used to key the shadow page in the
+@@ -780,6 +781,7 @@ struct kvm_arch {
+        */
+       struct list_head active_mmu_pages;
+       struct list_head zapped_obsolete_pages;
++      struct list_head lpage_disallowed_mmu_pages;
+       struct kvm_page_track_notifier_node mmu_sp_tracker;
+       struct kvm_page_track_notifier_head track_notifier_head;
+@@ -855,6 +857,8 @@ struct kvm_arch {
+       bool x2apic_format;
+       bool x2apic_broadcast_quirk_disabled;
++
++      struct task_struct *nx_lpage_recovery_thread;
+ };
+ struct kvm_vm_stat {
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -40,6 +40,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/hash.h>
+ #include <linux/kern_levels.h>
++#include <linux/kthread.h>
+ #include <asm/page.h>
+ #include <asm/cmpxchg.h>
+@@ -51,16 +52,26 @@
+ extern bool itlb_multihit_kvm_mitigation;
+ static int __read_mostly nx_huge_pages = -1;
++static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+ static struct kernel_param_ops nx_huge_pages_ops = {
+       .set = set_nx_huge_pages,
+       .get = param_get_bool,
+ };
++static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
++      .set = set_nx_huge_pages_recovery_ratio,
++      .get = param_get_uint,
++};
++
+ module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+ __MODULE_PARM_TYPE(nx_huge_pages, "bool");
++module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
++              &nx_huge_pages_recovery_ratio, 0644);
++__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+ /*
+  * When setting this variable to true it enables Two-Dimensional-Paging
+@@ -1103,6 +1114,8 @@ static void account_huge_nx_page(struct
+               return;
+       ++kvm->stat.nx_lpage_splits;
++      list_add_tail(&sp->lpage_disallowed_link,
++                    &kvm->arch.lpage_disallowed_mmu_pages);
+       sp->lpage_disallowed = true;
+ }
+@@ -1127,6 +1140,7 @@ static void unaccount_huge_nx_page(struc
+ {
+       --kvm->stat.nx_lpage_splits;
+       sp->lpage_disallowed = false;
++      list_del(&sp->lpage_disallowed_link);
+ }
+ static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
+@@ -5643,6 +5657,8 @@ static int set_nx_huge_pages(const char
+                       idx = srcu_read_lock(&kvm->srcu);
+                       kvm_mmu_invalidate_zap_all_pages(kvm);
+                       srcu_read_unlock(&kvm->srcu, idx);
++
++                      wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+               }
+               mutex_unlock(&kvm_lock);
+       }
+@@ -5720,3 +5736,116 @@ void kvm_mmu_module_exit(void)
+       unregister_shrinker(&mmu_shrinker);
+       mmu_audit_disable();
+ }
++
++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
++{
++      unsigned int old_val;
++      int err;
++
++      old_val = nx_huge_pages_recovery_ratio;
++      err = param_set_uint(val, kp);
++      if (err)
++              return err;
++
++      if (READ_ONCE(nx_huge_pages) &&
++          !old_val && nx_huge_pages_recovery_ratio) {
++              struct kvm *kvm;
++
++              mutex_lock(&kvm_lock);
++
++              list_for_each_entry(kvm, &vm_list, vm_list)
++                      wake_up_process(kvm->arch.nx_lpage_recovery_thread);
++
++              mutex_unlock(&kvm_lock);
++      }
++
++      return err;
++}
++
++static void kvm_recover_nx_lpages(struct kvm *kvm)
++{
++      int rcu_idx;
++      struct kvm_mmu_page *sp;
++      unsigned int ratio;
++      LIST_HEAD(invalid_list);
++      ulong to_zap;
++
++      rcu_idx = srcu_read_lock(&kvm->srcu);
++      spin_lock(&kvm->mmu_lock);
++
++      ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
++      to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
++      while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
++              /*
++               * We use a separate list instead of just using active_mmu_pages
++               * because the number of lpage_disallowed pages is expected to
++               * be relatively small compared to the total.
++               */
++              sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
++                                    struct kvm_mmu_page,
++                                    lpage_disallowed_link);
++              WARN_ON_ONCE(!sp->lpage_disallowed);
++              kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
++              WARN_ON_ONCE(sp->lpage_disallowed);
++
++              if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
++                      kvm_mmu_commit_zap_page(kvm, &invalid_list);
++                      if (to_zap)
++                              cond_resched_lock(&kvm->mmu_lock);
++              }
++      }
++
++      spin_unlock(&kvm->mmu_lock);
++      srcu_read_unlock(&kvm->srcu, rcu_idx);
++}
++
++static long get_nx_lpage_recovery_timeout(u64 start_time)
++{
++      return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
++              ? start_time + 60 * HZ - get_jiffies_64()
++              : MAX_SCHEDULE_TIMEOUT;
++}
++
++static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
++{
++      u64 start_time;
++      long remaining_time;
++
++      while (true) {
++              start_time = get_jiffies_64();
++              remaining_time = get_nx_lpage_recovery_timeout(start_time);
++
++              set_current_state(TASK_INTERRUPTIBLE);
++              while (!kthread_should_stop() && remaining_time > 0) {
++                      schedule_timeout(remaining_time);
++                      remaining_time = get_nx_lpage_recovery_timeout(start_time);
++                      set_current_state(TASK_INTERRUPTIBLE);
++              }
++
++              set_current_state(TASK_RUNNING);
++
++              if (kthread_should_stop())
++                      return 0;
++
++              kvm_recover_nx_lpages(kvm);
++      }
++}
++
++int kvm_mmu_post_init_vm(struct kvm *kvm)
++{
++      int err;
++
++      err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
++                                        "kvm-nx-lpage-recovery",
++                                        &kvm->arch.nx_lpage_recovery_thread);
++      if (!err)
++              kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
++
++      return err;
++}
++
++void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
++{
++      if (kvm->arch.nx_lpage_recovery_thread)
++              kthread_stop(kvm->arch.nx_lpage_recovery_thread);
++}
+--- a/arch/x86/kvm/mmu.h
++++ b/arch/x86/kvm/mmu.h
+@@ -195,4 +195,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_
+ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
+                                   struct kvm_memory_slot *slot, u64 gfn);
+ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
++
++int kvm_mmu_post_init_vm(struct kvm *kvm);
++void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
++
+ #endif
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8370,6 +8370,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un
+       INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
+       INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
++      INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
+       INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+       atomic_set(&kvm->arch.noncoherent_dma_count, 0);
+@@ -8399,6 +8400,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un
+       return 0;
+ }
++int kvm_arch_post_init_vm(struct kvm *kvm)
++{
++      return kvm_mmu_post_init_vm(kvm);
++}
++
+ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
+ {
+       int r;
+@@ -8502,6 +8508,11 @@ int x86_set_memory_region(struct kvm *kv
+ }
+ EXPORT_SYMBOL_GPL(x86_set_memory_region);
++void kvm_arch_pre_destroy_vm(struct kvm *kvm)
++{
++      kvm_mmu_pre_destroy_vm(kvm);
++}
++
+ void kvm_arch_destroy_vm(struct kvm *kvm)
+ {
+       if (current->mm == kvm->mm) {
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -608,6 +608,23 @@ static int kvm_create_vm_debugfs(struct
+       return 0;
+ }
++/*
++ * Called after the VM is otherwise initialized, but just before adding it to
++ * the vm_list.
++ */
++int __weak kvm_arch_post_init_vm(struct kvm *kvm)
++{
++      return 0;
++}
++
++/*
++ * Called just after removing the VM from the vm_list, but before doing any
++ * other destruction.
++ */
++void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
++{
++}
++
+ static struct kvm *kvm_create_vm(unsigned long type)
+ {
+       int r, i;
+@@ -662,11 +679,15 @@ static struct kvm *kvm_create_vm(unsigne
+               rcu_assign_pointer(kvm->buses[i],
+                       kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
+               if (!kvm->buses[i])
+-                      goto out_err;
++                      goto out_err_no_mmu_notifier;
+       }
+       r = kvm_init_mmu_notifier(kvm);
+       if (r)
++              goto out_err_no_mmu_notifier;
++
++      r = kvm_arch_post_init_vm(kvm);
++      if (r)
+               goto out_err;
+       mutex_lock(&kvm_lock);
+@@ -678,6 +699,11 @@ static struct kvm *kvm_create_vm(unsigne
+       return kvm;
+ out_err:
++#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
++      if (kvm->mmu_notifier.ops)
++              mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
++#endif
++out_err_no_mmu_notifier:
+       cleanup_srcu_struct(&kvm->irq_srcu);
+ out_err_no_irq_srcu:
+       cleanup_srcu_struct(&kvm->srcu);
+@@ -720,6 +746,8 @@ static void kvm_destroy_vm(struct kvm *k
+       mutex_lock(&kvm_lock);
+       list_del(&kvm->vm_list);
+       mutex_unlock(&kvm_lock);
++      kvm_arch_pre_destroy_vm(kvm);
++
+       kvm_free_irq_routing(kvm);
+       for (i = 0; i < KVM_NR_BUSES; i++) {
+               struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
diff --git a/queue-4.14/kvm-x86-powerpc-do-not-allow-clearing-largepages-debugfs-entry.patch b/queue-4.14/kvm-x86-powerpc-do-not-allow-clearing-largepages-debugfs-entry.patch
new file mode 100644 (file)
index 0000000..78a183a
--- /dev/null
@@ -0,0 +1,99 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 11 Oct 2019 11:59:48 +0200
+Subject: kvm: x86, powerpc: do not allow clearing largepages debugfs entry
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 833b45de69a6016c4b0cebe6765d526a31a81580 upstream.
+
+The largepages debugfs entry is incremented/decremented as shadow
+pages are created or destroyed.  Clearing it will result in an
+underflow, which is harmless to KVM but ugly (and could be
+misinterpreted by tools that use debugfs information), so make
+this particular statistic read-only.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: kvm-ppc@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c       |    6 +++---
+ include/linux/kvm_host.h |    2 ++
+ virt/kvm/kvm_main.c      |   10 +++++++---
+ 3 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -90,8 +90,8 @@ u64 __read_mostly efer_reserved_bits = ~
+ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
+ #endif
+-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
++#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
++#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
+ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
+                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+@@ -191,7 +191,7 @@ struct kvm_stats_debugfs_item debugfs_en
+       { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
+       { "mmu_unsync", VM_STAT(mmu_unsync) },
+       { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+-      { "largepages", VM_STAT(lpages) },
++      { "largepages", VM_STAT(lpages, .mode = 0444) },
+       { "max_mmu_page_hash_collisions",
+               VM_STAT(max_mmu_page_hash_collisions) },
+       { NULL }
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1013,6 +1013,7 @@ enum kvm_stat_kind {
+ struct kvm_stat_data {
+       int offset;
++      int mode;
+       struct kvm *kvm;
+ };
+@@ -1020,6 +1021,7 @@ struct kvm_stats_debugfs_item {
+       const char *name;
+       int offset;
+       enum kvm_stat_kind kind;
++      int mode;
+ };
+ extern struct kvm_stats_debugfs_item debugfs_entries[];
+ extern struct dentry *kvm_debugfs_dir;
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -596,8 +596,9 @@ static int kvm_create_vm_debugfs(struct
+               stat_data->kvm = kvm;
+               stat_data->offset = p->offset;
++              stat_data->mode = p->mode ? p->mode : 0644;
+               kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
+-              if (!debugfs_create_file(p->name, 0644,
++              if (!debugfs_create_file(p->name, stat_data->mode,
+                                        kvm->debugfs_dentry,
+                                        stat_data,
+                                        stat_fops_per_vm[p->kind]))
+@@ -3713,7 +3714,9 @@ static int kvm_debugfs_open(struct inode
+       if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
+               return -ENOENT;
+-      if (simple_attr_open(inode, file, get, set, fmt)) {
++      if (simple_attr_open(inode, file, get,
++                           stat_data->mode & S_IWUGO ? set : NULL,
++                           fmt)) {
+               kvm_put_kvm(stat_data->kvm);
+               return -ENOMEM;
+       }
+@@ -3964,7 +3967,8 @@ static int kvm_init_debug(void)
+       kvm_debugfs_num_entries = 0;
+       for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
+-              if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
++              int mode = p->mode ? p->mode : 0644;
++              if (!debugfs_create_file(p->name, mode, kvm_debugfs_dir,
+                                        (void *)(long)p->offset,
+                                        stat_fops[p->kind]))
+                       goto out_dir;
diff --git a/queue-4.14/kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch b/queue-4.14/kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
new file mode 100644 (file)
index 0000000..608097c
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Sun, 23 Jun 2019 19:15:49 +0200
+Subject: KVM: x86: remove now unneeded hugepage gfn adjustment
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream.
+
+After the previous patch, the low bits of the gfn are masked in
+both FNAME(fetch) and __direct_map, so we do not need to clear them
+in transparent_hugepage_adjust.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu.c         |    9 +++------
+ arch/x86/kvm/paging_tmpl.h |    2 +-
+ 2 files changed, 4 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -3071,11 +3071,10 @@ static int kvm_handle_bad_page(struct kv
+ }
+ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
+-                                      gfn_t *gfnp, kvm_pfn_t *pfnp,
++                                      gfn_t gfn, kvm_pfn_t *pfnp,
+                                       int *levelp)
+ {
+       kvm_pfn_t pfn = *pfnp;
+-      gfn_t gfn = *gfnp;
+       int level = *levelp;
+       /*
+@@ -3102,8 +3101,6 @@ static void transparent_hugepage_adjust(
+               mask = KVM_PAGES_PER_HPAGE(level) - 1;
+               VM_BUG_ON((gfn & mask) != (pfn & mask));
+               if (pfn & mask) {
+-                      gfn &= ~mask;
+-                      *gfnp = gfn;
+                       kvm_release_pfn_clean(pfn);
+                       pfn &= ~mask;
+                       kvm_get_pfn(pfn);
+@@ -3367,7 +3364,7 @@ static int nonpaging_map(struct kvm_vcpu
+       if (make_mmu_pages_available(vcpu) < 0)
+               goto out_unlock;
+       if (likely(!force_pt_level))
+-              transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
++              transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+       r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+@@ -3957,7 +3954,7 @@ static int tdp_page_fault(struct kvm_vcp
+       if (make_mmu_pages_available(vcpu) < 0)
+               goto out_unlock;
+       if (likely(!force_pt_level))
+-              transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
++              transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
+       r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+ out_unlock:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+--- a/arch/x86/kvm/paging_tmpl.h
++++ b/arch/x86/kvm/paging_tmpl.h
+@@ -829,7 +829,7 @@ static int FNAME(page_fault)(struct kvm_
+       if (make_mmu_pages_available(vcpu) < 0)
+               goto out_unlock;
+       if (!force_pt_level)
+-              transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
++              transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
+       r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
+                        level, pfn, map_writable, prefault);
+       kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
diff --git a/queue-4.14/kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch b/queue-4.14/kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
new file mode 100644 (file)
index 0000000..fe0bc57
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Mon, 19 Aug 2019 17:24:07 +0200
+Subject: KVM: x86: use Intel speculation bugs and features as derived in generic x86 code
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 0c54914d0c52a15db9954a76ce80fee32cf318f4 upstream.
+
+Similar to AMD bits, set the Intel bits from the vendor-independent
+feature and bug flags, because KVM_GET_SUPPORTED_CPUID does not care
+about the vendor and they should be set on AMD processors as well.
+
+Suggested-by: Jim Mattson <jmattson@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    8 ++++++++
+ arch/x86/kvm/x86.c   |    8 ++++++++
+ 2 files changed, 16 insertions(+)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -481,8 +481,16 @@ static inline int __do_cpuid_ent(struct
+                       /* PKU is not yet implemented for shadow paging. */
+                       if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
+                               entry->ecx &= ~F(PKU);
++
+                       entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+                       cpuid_mask(&entry->edx, CPUID_7_EDX);
++                      if (boot_cpu_has(X86_FEATURE_IBPB) &&
++                          boot_cpu_has(X86_FEATURE_IBRS))
++                              entry->edx |= F(SPEC_CTRL);
++                      if (boot_cpu_has(X86_FEATURE_STIBP))
++                              entry->edx |= F(INTEL_STIBP);
++                      if (boot_cpu_has(X86_FEATURE_SSBD))
++                              entry->edx |= F(SPEC_CTRL_SSBD);
+                       /*
+                        * We emulate ARCH_CAPABILITIES in software even
+                        * if the host doesn't support it.
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1081,8 +1081,16 @@ u64 kvm_get_arch_capabilities(void)
+       if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
+               data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
++      if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
++              data |= ARCH_CAP_RDCL_NO;
++      if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
++              data |= ARCH_CAP_SSB_NO;
++      if (!boot_cpu_has_bug(X86_BUG_MDS))
++              data |= ARCH_CAP_MDS_NO;
++
+       return data;
+ }
++
+ EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
index 628f20255eb1da441eb065da3bccedc17183cb00..4f07b9e847bb6f11e7fd7908ea1f8f1c0162dde2 100644 (file)
@@ -104,3 +104,51 @@ mm-filemap.c-don-t-initiate-writeback-if-mapping-has-no-dirty-pages.patch
 cgroup-writeback-don-t-switch-wbs-immediately-on-dead-wbs-if-the-memcg-is-dead.patch
 usbip-fix-free-of-unallocated-memory-in-vhci-tx.patch
 net-prevent-load-store-tearing-on-sk-sk_stamp.patch
+drm-i915-gtt-add-read-only-pages-to-gen8_pte_encode.patch
+drm-i915-gtt-read-only-pages-for-insert_entries-on-bdw.patch
+drm-i915-gtt-disable-read-only-support-under-gvt.patch
+drm-i915-prevent-writing-into-a-read-only-object-via-a-ggtt-mmap.patch
+drm-i915-cmdparser-check-reg_table_count-before-derefencing.patch
+drm-i915-cmdparser-do-not-check-past-the-cmd-length.patch
+drm-i915-silence-smatch-for-cmdparser.patch
+drm-i915-don-t-use-gpu-relocations-prior-to-cmdparser-stalls.patch
+drm-i915-move-engine-needs_cmd_parser-to-engine-flags.patch
+drm-i915-rename-gen7-cmdparser-tables.patch
+drm-i915-disable-secure-batches-for-gen6.patch
+drm-i915-remove-master-tables-from-cmdparser.patch
+drm-i915-add-support-for-mandatory-cmdparsing.patch
+drm-i915-support-ro-ppgtt-mapped-cmdparser-shadow-buffers.patch
+drm-i915-allow-parsing-of-unsized-batches.patch
+drm-i915-add-gen9-bcs-cmdparsing.patch
+drm-i915-cmdparser-use-explicit-goto-for-error-paths.patch
+drm-i915-cmdparser-add-support-for-backward-jumps.patch
+drm-i915-cmdparser-ignore-length-operands-during-command-matching.patch
+drm-i915-lower-rm-timeout-to-avoid-dsi-hard-hangs.patch
+drm-i915-gen8-add-rc6-ctx-corruption-wa.patch
+drm-i915-cmdparser-fix-jump-whitelist-clearing.patch
+kvm-x86-use-intel-speculation-bugs-and-features-as-derived-in-generic-x86-code.patch
+x86-msr-add-the-ia32_tsx_ctrl-msr.patch
+x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
+x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
+x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
+x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
+kvm-x86-export-mds_no-0-to-guests-when-tsx-is-enabled.patch
+x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
+x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
+x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch
+x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
+x86-bugs-add-itlb_multihit-bug-infrastructure.patch
+x86-cpu-add-tremont-to-the-cpu-vulnerability-whitelist.patch
+cpu-speculation-uninline-and-export-cpu-mitigations-helpers.patch
+documentation-add-itlb_multihit-documentation.patch
+kvm-x86-powerpc-do-not-allow-clearing-largepages-debugfs-entry.patch
+kvm-convert-kvm_lock-to-a-mutex.patch
+kvm-mmu-do-not-release-the-page-inside-mmu_set_spte.patch
+kvm-x86-make-fname-fetch-and-__direct_map-more-similar.patch
+kvm-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch
+kvm-x86-change-kvm_mmu_page_get_gfn-bug_on-to-warn_on.patch
+kvm-x86-add-tracepoints-around-__direct_map-and-fname-fetch.patch
+kvm-vmx-svm-always-run-with-efer.nxe-1-when-shadow-paging-is-active.patch
+kvm-mmu-itlb_multihit-mitigation.patch
+kvm-add-helper-function-for-creating-vm-worker-threads.patch
+kvm-x86-mmu-recovery-of-shattered-nx-large-pages.patch
diff --git a/queue-4.14/x86-bugs-add-itlb_multihit-bug-infrastructure.patch b/queue-4.14/x86-bugs-add-itlb_multihit-bug-infrastructure.patch
new file mode 100644 (file)
index 0000000..954b3e7
--- /dev/null
@@ -0,0 +1,254 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
+Date: Mon, 4 Nov 2019 12:22:01 +0100
+Subject: x86/bugs: Add ITLB_MULTIHIT bug infrastructure
+
+From: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
+
+commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream.
+
+Some processors may incur a machine check error possibly resulting in an
+unrecoverable CPU lockup when an instruction fetch encounters a TLB
+multi-hit in the instruction TLB. This can occur when the page size is
+changed along with either the physical address or cache type. The relevant
+erratum can be found here:
+
+   https://bugzilla.kernel.org/show_bug.cgi?id=205195
+
+There are other processors affected for which the erratum does not fully
+disclose the impact.
+
+This issue affects both bare-metal x86 page tables and EPT.
+
+It can be mitigated by either eliminating the use of large pages or by
+using careful TLB invalidations when changing the page size in the page
+tables.
+
+Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in
+MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which
+are mitigated against this issue.
+
+Signed-off-by: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
+Co-developed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu |    1 
+ arch/x86/include/asm/cpufeatures.h                 |    1 
+ arch/x86/include/asm/msr-index.h                   |    7 ++
+ arch/x86/kernel/cpu/bugs.c                         |   13 ++++
+ arch/x86/kernel/cpu/common.c                       |   61 +++++++++++----------
+ drivers/base/cpu.c                                 |    8 ++
+ include/linux/cpu.h                                |    2 
+ 7 files changed, 65 insertions(+), 28 deletions(-)
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -382,6 +382,7 @@ What:              /sys/devices/system/cpu/vulnerabi
+               /sys/devices/system/cpu/vulnerabilities/l1tf
+               /sys/devices/system/cpu/vulnerabilities/mds
+               /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
++              /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ Date:         January 2018
+ Contact:      Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description:  Information about CPU vulnerabilities
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -389,5 +389,6 @@
+ #define X86_BUG_MSBDS_ONLY            X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
+ #define X86_BUG_SWAPGS                        X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+ #define X86_BUG_TAA                   X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
++#define X86_BUG_ITLB_MULTIHIT         X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -84,6 +84,13 @@
+                                                 * Microarchitectural Data
+                                                 * Sampling (MDS) vulnerabilities.
+                                                 */
++#define ARCH_CAP_PSCHANGE_MC_NO               BIT(6)   /*
++                                                * The processor is not susceptible to a
++                                                * machine check error due to modifying the
++                                                * code page size along with either the
++                                                * physical address or cache type
++                                                * without TLB invalidation.
++                                                */
+ #define ARCH_CAP_TSX_CTRL_MSR         BIT(7)  /* MSR for TSX control is available. */
+ #define ARCH_CAP_TAA_NO                       BIT(8)  /*
+                                                * Not susceptible to
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1387,6 +1387,11 @@ static ssize_t l1tf_show_state(char *buf
+ }
+ #endif
++static ssize_t itlb_multihit_show_state(char *buf)
++{
++      return sprintf(buf, "Processor vulnerable\n");
++}
++
+ static ssize_t mds_show_state(char *buf)
+ {
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+@@ -1487,6 +1492,9 @@ static ssize_t cpu_show_common(struct de
+       case X86_BUG_TAA:
+               return tsx_async_abort_show_state(buf);
++      case X86_BUG_ITLB_MULTIHIT:
++              return itlb_multihit_show_state(buf);
++
+       default:
+               break;
+       }
+@@ -1528,4 +1536,9 @@ ssize_t cpu_show_tsx_async_abort(struct
+ {
+       return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+ }
++
++ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
++{
++      return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
++}
+ #endif
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -897,13 +897,14 @@ static void identify_cpu_without_cpuid(s
+       c->x86_cache_bits = c->x86_phys_bits;
+ }
+-#define NO_SPECULATION        BIT(0)
+-#define NO_MELTDOWN   BIT(1)
+-#define NO_SSB                BIT(2)
+-#define NO_L1TF               BIT(3)
+-#define NO_MDS                BIT(4)
+-#define MSBDS_ONLY    BIT(5)
+-#define NO_SWAPGS     BIT(6)
++#define NO_SPECULATION                BIT(0)
++#define NO_MELTDOWN           BIT(1)
++#define NO_SSB                        BIT(2)
++#define NO_L1TF                       BIT(3)
++#define NO_MDS                        BIT(4)
++#define MSBDS_ONLY            BIT(5)
++#define NO_SWAPGS             BIT(6)
++#define NO_ITLB_MULTIHIT      BIT(7)
+ #define VULNWL(_vendor, _family, _model, _whitelist)  \
+       { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
+@@ -921,26 +922,26 @@ static const __initconst struct x86_cpu_
+       VULNWL(NSC,     5, X86_MODEL_ANY,       NO_SPECULATION),
+       /* Intel Family 6 */
+-      VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION),
+-      VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION),
+-      VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION),
+-      VULNWL_INTEL(ATOM_BONNELL,              NO_SPECULATION),
+-      VULNWL_INTEL(ATOM_BONNELL_MID,          NO_SPECULATION),
+-
+-      VULNWL_INTEL(ATOM_SILVERMONT,           NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-      VULNWL_INTEL(ATOM_SILVERMONT_X,         NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-      VULNWL_INTEL(ATOM_SILVERMONT_MID,       NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-      VULNWL_INTEL(ATOM_AIRMONT,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-      VULNWL_INTEL(XEON_PHI_KNL,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+-      VULNWL_INTEL(XEON_PHI_KNM,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_BONNELL,              NO_SPECULATION | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_BONNELL_MID,          NO_SPECULATION | NO_ITLB_MULTIHIT),
++
++      VULNWL_INTEL(ATOM_SILVERMONT,           NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_SILVERMONT_X,         NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_SILVERMONT_MID,       NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_AIRMONT,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(XEON_PHI_KNL,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(XEON_PHI_KNM,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(CORE_YONAH,                NO_SSB),
+-      VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+-      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS),
+-      VULNWL_INTEL(ATOM_GOLDMONT_X,           NO_MDS | NO_L1TF | NO_SWAPGS),
+-      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS),
++      VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_GOLDMONT_X,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       /*
+        * Technically, swapgs isn't serializing on AMD (despite it previously
+@@ -951,13 +952,13 @@ static const __initconst struct x86_cpu_
+        */
+       /* AMD Family 0xf - 0x12 */
+-      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+-      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+-      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+-      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
++      VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
++      VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
+-      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
++      VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       {}
+ };
+@@ -982,6 +983,10 @@ static void __init cpu_set_bug_bits(stru
+ {
+       u64 ia32_cap = x86_read_arch_cap_msr();
++      /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
++      if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
++              setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
++
+       if (cpu_matches(NO_SPECULATION))
+               return;
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -546,6 +546,12 @@ ssize_t __weak cpu_show_tsx_async_abort(
+       return sprintf(buf, "Not affected\n");
+ }
++ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
++                          struct device_attribute *attr, char *buf)
++{
++      return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+@@ -553,6 +559,7 @@ static DEVICE_ATTR(spec_store_bypass, 04
+ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
++static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+@@ -562,6 +569,7 @@ static struct attribute *cpu_root_vulner
+       &dev_attr_l1tf.attr,
+       &dev_attr_mds.attr,
+       &dev_attr_tsx_async_abort.attr,
++      &dev_attr_itlb_multihit.attr,
+       NULL
+ };
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -62,6 +62,8 @@ extern ssize_t cpu_show_mds(struct devic
+ extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf);
++extern ssize_t cpu_show_itlb_multihit(struct device *dev,
++                                    struct device_attribute *attr, char *buf);
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-4.14/x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch b/queue-4.14/x86-cpu-add-a-helper-function-x86_read_arch_cap_msr.patch
new file mode 100644 (file)
index 0000000..d236e51
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 10:52:35 +0200
+Subject: x86/cpu: Add a helper function x86_read_arch_cap_msr()
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 286836a70433fb64131d2590f4bf512097c255e1 upstream.
+
+Add a helper function to read the IA32_ARCH_CAPABILITIES MSR.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |   15 +++++++++++----
+ arch/x86/kernel/cpu/cpu.h    |    2 ++
+ 2 files changed, 13 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -968,19 +968,26 @@ static bool __init cpu_matches(unsigned
+       return m && !!(m->driver_data & which);
+ }
+-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
++u64 x86_read_arch_cap_msr(void)
+ {
+       u64 ia32_cap = 0;
++      if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
++              rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
++
++      return ia32_cap;
++}
++
++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
++{
++      u64 ia32_cap = x86_read_arch_cap_msr();
++
+       if (cpu_matches(NO_SPECULATION))
+               return;
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+-      if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+-              rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+-
+       if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
+          !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
+               setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -54,4 +54,6 @@ unsigned int aperfmperf_get_khz(int cpu)
+ extern void x86_spec_ctrl_setup_ap(void);
++extern u64 x86_read_arch_cap_msr(void);
++
+ #endif /* ARCH_X86_CPU_H */
diff --git a/queue-4.14/x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch b/queue-4.14/x86-cpu-add-a-tsx-cmdline-option-with-tsx-disabled-by-default.patch
new file mode 100644 (file)
index 0000000..34c515d
--- /dev/null
@@ -0,0 +1,261 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 11:01:53 +0200
+Subject: x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream.
+
+Add a kernel cmdline parameter "tsx" to control the Transactional
+Synchronization Extensions (TSX) feature. On CPUs that support TSX
+control, use "tsx=on|off" to enable or disable TSX. Not specifying this
+option is equivalent to "tsx=off". This is because on certain processors
+TSX may be used as a part of a speculative side channel attack.
+
+Carve out the TSX controlling functionality into a separate compilation
+unit because TSX is a CPU feature while the TSX async abort control
+machinery will go to cpu/bugs.c.
+
+ [ bp: - Massage, shorten and clear the arg buffer.
+       - Clarifications of the tsx= possible options - Josh.
+       - Expand on TSX_CTRL availability - Pawan. ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |   26 ++++
+ arch/x86/kernel/cpu/Makefile                    |    2 
+ arch/x86/kernel/cpu/common.c                    |    1 
+ arch/x86/kernel/cpu/cpu.h                       |   16 +++
+ arch/x86/kernel/cpu/intel.c                     |    5 
+ arch/x86/kernel/cpu/tsx.c                       |  125 ++++++++++++++++++++++++
+ 6 files changed, 174 insertions(+), 1 deletion(-)
+ create mode 100644 arch/x86/kernel/cpu/tsx.c
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4505,6 +4505,32 @@
+                       platforms where RDTSC is slow and this accounting
+                       can add overhead.
++      tsx=            [X86] Control Transactional Synchronization
++                      Extensions (TSX) feature in Intel processors that
++                      support TSX control.
++
++                      This parameter controls the TSX feature. The options are:
++
++                      on      - Enable TSX on the system. Although there are
++                              mitigations for all known security vulnerabilities,
++                              TSX has been known to be an accelerator for
++                              several previous speculation-related CVEs, and
++                              so there may be unknown security risks associated
++                              with leaving it enabled.
++
++                      off     - Disable TSX on the system. (Note that this
++                              option takes effect only on newer CPUs which are
++                              not vulnerable to MDS, i.e., have
++                              MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
++                              the new IA32_TSX_CTRL MSR through a microcode
++                              update. This new MSR allows for the reliable
++                              deactivation of the TSX functionality.)
++
++                      Not specifying this option is equivalent to tsx=off.
++
++                      See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
++                      for more details.
++
+       turbografx.map[2|3]=    [HW,JOY]
+                       TurboGraFX parallel port interface
+                       Format:
+--- a/arch/x86/kernel/cpu/Makefile
++++ b/arch/x86/kernel/cpu/Makefile
+@@ -28,7 +28,7 @@ obj-y                        += cpuid-deps.o
+ obj-$(CONFIG_PROC_FS) += proc.o
+ obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+-obj-$(CONFIG_CPU_SUP_INTEL)           += intel.o
++obj-$(CONFIG_CPU_SUP_INTEL)           += intel.o tsx.o
+ obj-$(CONFIG_CPU_SUP_AMD)             += amd.o
+ obj-$(CONFIG_CPU_SUP_CYRIX_32)                += cyrix.o
+ obj-$(CONFIG_CPU_SUP_CENTAUR)         += centaur.o
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1414,6 +1414,7 @@ void __init identify_boot_cpu(void)
+       enable_sep_cpu();
+ #endif
+       cpu_detect_tlb(&boot_cpu_data);
++      tsx_init();
+ }
+ void identify_secondary_cpu(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/cpu.h
++++ b/arch/x86/kernel/cpu/cpu.h
+@@ -45,6 +45,22 @@ struct _tlb_table {
+ extern const struct cpu_dev *const __x86_cpu_dev_start[],
+                           *const __x86_cpu_dev_end[];
++#ifdef CONFIG_CPU_SUP_INTEL
++enum tsx_ctrl_states {
++      TSX_CTRL_ENABLE,
++      TSX_CTRL_DISABLE,
++      TSX_CTRL_NOT_SUPPORTED,
++};
++
++extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
++
++extern void __init tsx_init(void);
++extern void tsx_enable(void);
++extern void tsx_disable(void);
++#else
++static inline void tsx_init(void) { }
++#endif /* CONFIG_CPU_SUP_INTEL */
++
+ extern void get_cpu_cap(struct cpuinfo_x86 *c);
+ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+ extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -695,6 +695,11 @@ static void init_intel(struct cpuinfo_x8
+       init_intel_energy_perf(c);
+       init_intel_misc_features(c);
++
++      if (tsx_ctrl_state == TSX_CTRL_ENABLE)
++              tsx_enable();
++      if (tsx_ctrl_state == TSX_CTRL_DISABLE)
++              tsx_disable();
+ }
+ #ifdef CONFIG_X86_32
+--- /dev/null
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -0,0 +1,125 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Intel Transactional Synchronization Extensions (TSX) control.
++ *
++ * Copyright (C) 2019 Intel Corporation
++ *
++ * Author:
++ *    Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
++ */
++
++#include <linux/cpufeature.h>
++
++#include <asm/cmdline.h>
++
++#include "cpu.h"
++
++enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
++
++void tsx_disable(void)
++{
++      u64 tsx;
++
++      rdmsrl(MSR_IA32_TSX_CTRL, tsx);
++
++      /* Force all transactions to immediately abort */
++      tsx |= TSX_CTRL_RTM_DISABLE;
++
++      /*
++       * Ensure TSX support is not enumerated in CPUID.
++       * This is visible to userspace and will ensure they
++       * do not waste resources trying TSX transactions that
++       * will always abort.
++       */
++      tsx |= TSX_CTRL_CPUID_CLEAR;
++
++      wrmsrl(MSR_IA32_TSX_CTRL, tsx);
++}
++
++void tsx_enable(void)
++{
++      u64 tsx;
++
++      rdmsrl(MSR_IA32_TSX_CTRL, tsx);
++
++      /* Enable the RTM feature in the cpu */
++      tsx &= ~TSX_CTRL_RTM_DISABLE;
++
++      /*
++       * Ensure TSX support is enumerated in CPUID.
++       * This is visible to userspace and will ensure they
++       * can enumerate and use the TSX feature.
++       */
++      tsx &= ~TSX_CTRL_CPUID_CLEAR;
++
++      wrmsrl(MSR_IA32_TSX_CTRL, tsx);
++}
++
++static bool __init tsx_ctrl_is_supported(void)
++{
++      u64 ia32_cap = x86_read_arch_cap_msr();
++
++      /*
++       * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
++       * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
++       *
++       * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
++       * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
++       * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
++       * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
++       * tsx= cmdline requests will do nothing on CPUs without
++       * MSR_IA32_TSX_CTRL support.
++       */
++      return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
++}
++
++void __init tsx_init(void)
++{
++      char arg[4] = {};
++      int ret;
++
++      if (!tsx_ctrl_is_supported())
++              return;
++
++      ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
++      if (ret >= 0) {
++              if (!strcmp(arg, "on")) {
++                      tsx_ctrl_state = TSX_CTRL_ENABLE;
++              } else if (!strcmp(arg, "off")) {
++                      tsx_ctrl_state = TSX_CTRL_DISABLE;
++              } else {
++                      tsx_ctrl_state = TSX_CTRL_DISABLE;
++                      pr_err("tsx: invalid option, defaulting to off\n");
++              }
++      } else {
++              /* tsx= not provided, defaulting to off */
++              tsx_ctrl_state = TSX_CTRL_DISABLE;
++      }
++
++      if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
++              tsx_disable();
++
++              /*
++               * tsx_disable() will change the state of the
++               * RTM CPUID bit.  Clear it here since it is now
++               * expected to be not set.
++               */
++              setup_clear_cpu_cap(X86_FEATURE_RTM);
++      } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
++
++              /*
++               * HW defaults TSX to be enabled at bootup.
++               * We may still need the TSX enable support
++               * during init for special cases like
++               * kexec after TSX is disabled.
++               */
++              tsx_enable();
++
++              /*
++               * tsx_enable() will change the state of the
++               * RTM CPUID bit.  Force it here since it is now
++               * expected to be set.
++               */
++              setup_force_cpu_cap(X86_FEATURE_RTM);
++      }
++}
diff --git a/queue-4.14/x86-cpu-add-tremont-to-the-cpu-vulnerability-whitelist.patch b/queue-4.14/x86-cpu-add-tremont-to-the-cpu-vulnerability-whitelist.patch
new file mode 100644 (file)
index 0000000..0dca97c
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Nov 2019 12:22:01 +0100
+Subject: x86/cpu: Add Tremont to the cpu vulnerability whitelist
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit cad14885a8d32c1c0d8eaa7bf5c0152a22b6080e upstream.
+
+Add the new cpu family ATOM_TREMONT_D to the cpu vunerability
+whitelist. ATOM_TREMONT_D is not affected by X86_BUG_ITLB_MULTIHIT.
+
+ATOM_TREMONT_D might have mitigations against other issues as well, but
+only the ITLB multihit mitigation is confirmed at this point.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/common.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -951,6 +951,8 @@ static const __initconst struct x86_cpu_
+        * good enough for our purposes.
+        */
++      VULNWL_INTEL(ATOM_TREMONT_X,            NO_ITLB_MULTIHIT),
++
+       /* AMD Family 0xf - 0x12 */
+       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
diff --git a/queue-4.14/x86-msr-add-the-ia32_tsx_ctrl-msr.patch b/queue-4.14/x86-msr-add-the-ia32_tsx_ctrl-msr.patch
new file mode 100644 (file)
index 0000000..1457fb6
--- /dev/null
@@ -0,0 +1,83 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 10:45:50 +0200
+Subject: x86/msr: Add the IA32_TSX_CTRL MSR
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit c2955f270a84762343000f103e0640d29c7a96f3 upstream.
+
+Transactional Synchronization Extensions (TSX) may be used on certain
+processors as part of a speculative side channel attack.  A microcode
+update for existing processors that are vulnerable to this attack will
+add a new MSR - IA32_TSX_CTRL to allow the system administrator the
+option to disable TSX as one of the possible mitigations.
+
+The CPUs which get this new MSR after a microcode upgrade are the ones
+which do not set MSR_IA32_ARCH_CAPABILITIES.MDS_NO (bit 5) because those
+CPUs have CPUID.MD_CLEAR, i.e., the VERW implementation which clears all
+CPU buffers takes care of the TAA case as well.
+
+  [ Note that future processors that are not vulnerable will also
+    support the IA32_TSX_CTRL MSR. ]
+
+Add defines for the new IA32_TSX_CTRL MSR and its bits.
+
+TSX has two sub-features:
+
+1. Restricted Transactional Memory (RTM) is an explicitly-used feature
+   where new instructions begin and end TSX transactions.
+2. Hardware Lock Elision (HLE) is implicitly used when certain kinds of
+   "old" style locks are used by software.
+
+Bit 7 of the IA32_ARCH_CAPABILITIES indicates the presence of the
+IA32_TSX_CTRL MSR.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+  Bit 0: When set, it disables the Restricted Transactional Memory (RTM)
+         sub-feature of TSX (will force all transactions to abort on the
+        XBEGIN instruction).
+
+  Bit 1: When set, it disables the enumeration of the RTM and HLE feature
+         (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+         CPUID(EAX=7).EBX{bit11} read as 0).
+
+The other TSX sub-feature, Hardware Lock Elision (HLE), is
+unconditionally disabled by the new microcode but still enumerated
+as present by CPUID(EAX=7).EBX{bit4}, unless disabled by
+IA32_TSX_CTRL_MSR[1] - TSX_CTRL_CPUID_CLEAR.
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/msr-index.h |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -84,6 +84,7 @@
+                                                 * Microarchitectural Data
+                                                 * Sampling (MDS) vulnerabilities.
+                                                 */
++#define ARCH_CAP_TSX_CTRL_MSR         BIT(7)  /* MSR for TSX control is available. */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
+@@ -94,6 +95,10 @@
+ #define MSR_IA32_BBL_CR_CTL           0x00000119
+ #define MSR_IA32_BBL_CR_CTL3          0x0000011e
++#define MSR_IA32_TSX_CTRL             0x00000122
++#define TSX_CTRL_RTM_DISABLE          BIT(0)  /* Disable RTM feature */
++#define TSX_CTRL_CPUID_CLEAR          BIT(1)  /* Disable TSX enumeration */
++
+ #define MSR_IA32_SYSENTER_CS          0x00000174
+ #define MSR_IA32_SYSENTER_ESP         0x00000175
+ #define MSR_IA32_SYSENTER_EIP         0x00000176
diff --git a/queue-4.14/x86-speculation-taa-add-documentation-for-tsx-async-abort.patch b/queue-4.14/x86-speculation-taa-add-documentation-for-tsx-async-abort.patch
new file mode 100644 (file)
index 0000000..5fabe23
--- /dev/null
@@ -0,0 +1,520 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:32:55 +0200
+Subject: x86/speculation/taa: Add documentation for TSX Async Abort
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream.
+
+Add the documenation for TSX Async Abort. Include the description of
+the issue, how to check the mitigation state, control the mitigation,
+guidance for system administrators.
+
+ [ bp: Add proper SPDX tags, touch ups by Josh and me. ]
+
+Co-developed-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/ABI/testing/sysfs-devices-system-cpu    |    1 
+ Documentation/admin-guide/hw-vuln/index.rst           |    1 
+ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst |  276 ++++++++++++++++++
+ Documentation/admin-guide/kernel-parameters.txt       |   38 ++
+ Documentation/x86/index.rst                           |    1 
+ Documentation/x86/tsx_async_abort.rst                 |  117 +++++++
+ 6 files changed, 434 insertions(+)
+ create mode 100644 Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+ create mode 100644 Documentation/x86/tsx_async_abort.rst
+
+--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
+@@ -381,6 +381,7 @@ What:              /sys/devices/system/cpu/vulnerabi
+               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
+               /sys/devices/system/cpu/vulnerabilities/l1tf
+               /sys/devices/system/cpu/vulnerabilities/mds
++              /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ Date:         January 2018
+ Contact:      Linux kernel mailing list <linux-kernel@vger.kernel.org>
+ Description:  Information about CPU vulnerabilities
+--- a/Documentation/admin-guide/hw-vuln/index.rst
++++ b/Documentation/admin-guide/hw-vuln/index.rst
+@@ -12,3 +12,4 @@ are configurable at compile, boot or run
+    spectre
+    l1tf
+    mds
++   tsx_async_abort
+--- /dev/null
++++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+@@ -0,0 +1,276 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++TAA - TSX Asynchronous Abort
++======================================
++
++TAA is a hardware vulnerability that allows unprivileged speculative access to
++data which is available in various CPU internal buffers by using asynchronous
++aborts within an Intel TSX transactional region.
++
++Affected processors
++-------------------
++
++This vulnerability only affects Intel processors that support Intel
++Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
++is 0 in the IA32_ARCH_CAPABILITIES MSR.  On processors where the MDS_NO bit
++(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
++also mitigate against TAA.
++
++Whether a processor is affected or not can be read out from the TAA
++vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
++
++Related CVEs
++------------
++
++The following CVE entry is related to this TAA issue:
++
++   ==============  =====  ===================================================
++   CVE-2019-11135  TAA    TSX Asynchronous Abort (TAA) condition on some
++                          microprocessors utilizing speculative execution may
++                          allow an authenticated user to potentially enable
++                          information disclosure via a side channel with
++                          local access.
++   ==============  =====  ===================================================
++
++Problem
++-------
++
++When performing store, load or L1 refill operations, processors write
++data into temporary microarchitectural structures (buffers). The data in
++those buffers can be forwarded to load operations as an optimization.
++
++Intel TSX is an extension to the x86 instruction set architecture that adds
++hardware transactional memory support to improve performance of multi-threaded
++software. TSX lets the processor expose and exploit concurrency hidden in an
++application due to dynamically avoiding unnecessary synchronization.
++
++TSX supports atomic memory transactions that are either committed (success) or
++aborted. During an abort, operations that happened within the transactional region
++are rolled back. An asynchronous abort takes place, among other options, when a
++different thread accesses a cache line that is also used within the transactional
++region when that access might lead to a data race.
++
++Immediately after an uncompleted asynchronous abort, certain speculatively
++executed loads may read data from those internal buffers and pass it to dependent
++operations. This can be then used to infer the value via a cache side channel
++attack.
++
++Because the buffers are potentially shared between Hyper-Threads cross
++Hyper-Thread attacks are possible.
++
++The victim of a malicious actor does not need to make use of TSX. Only the
++attacker needs to begin a TSX transaction and raise an asynchronous abort
++which in turn potenitally leaks data stored in the buffers.
++
++More detailed technical information is available in the TAA specific x86
++architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
++
++
++Attack scenarios
++----------------
++
++Attacks against the TAA vulnerability can be implemented from unprivileged
++applications running on hosts or guests.
++
++As for MDS, the attacker has no control over the memory addresses that can
++be leaked. Only the victim is responsible for bringing data to the CPU. As
++a result, the malicious actor has to sample as much data as possible and
++then postprocess it to try to infer any useful information from it.
++
++A potential attacker only has read access to the data. Also, there is no direct
++privilege escalation by using this technique.
++
++
++.. _tsx_async_abort_sys_info:
++
++TAA system information
++-----------------------
++
++The Linux kernel provides a sysfs interface to enumerate the current TAA status
++of mitigated systems. The relevant sysfs file is:
++
++/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
++
++The possible values in this file are:
++
++.. list-table::
++
++   * - 'Vulnerable'
++     - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
++   * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
++     - The system tries to clear the buffers but the microcode might not support the operation.
++   * - 'Mitigation: Clear CPU buffers'
++     - The microcode has been updated to clear the buffers. TSX is still enabled.
++   * - 'Mitigation: TSX disabled'
++     - TSX is disabled.
++   * - 'Not affected'
++     - The CPU is not affected by this issue.
++
++.. _ucode_needed:
++
++Best effort mitigation mode
++^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++If the processor is vulnerable, but the availability of the microcode-based
++mitigation mechanism is not advertised via CPUID the kernel selects a best
++effort mitigation mode.  This mode invokes the mitigation instructions
++without a guarantee that they clear the CPU buffers.
++
++This is done to address virtualization scenarios where the host has the
++microcode update applied, but the hypervisor is not yet updated to expose the
++CPUID to the guest. If the host has updated microcode the protection takes
++effect; otherwise a few CPU cycles are wasted pointlessly.
++
++The state in the tsx_async_abort sysfs file reflects this situation
++accordingly.
++
++
++Mitigation mechanism
++--------------------
++
++The kernel detects the affected CPUs and the presence of the microcode which is
++required. If a CPU is affected and the microcode is available, then the kernel
++enables the mitigation by default.
++
++
++The mitigation can be controlled at boot time via a kernel command line option.
++See :ref:`taa_mitigation_control_command_line`.
++
++.. _virt_mechanism:
++
++Virtualization mitigation
++^^^^^^^^^^^^^^^^^^^^^^^^^
++
++Affected systems where the host has TAA microcode and TAA is mitigated by
++having disabled TSX previously, are not vulnerable regardless of the status
++of the VMs.
++
++In all other cases, if the host either does not have the TAA microcode or
++the kernel is not mitigated, the system might be vulnerable.
++
++
++.. _taa_mitigation_control_command_line:
++
++Mitigation control on the kernel command line
++---------------------------------------------
++
++The kernel command line allows to control the TAA mitigations at boot time with
++the option "tsx_async_abort=". The valid arguments for this option are:
++
++  ============  =============================================================
++  off         This option disables the TAA mitigation on affected platforms.
++                If the system has TSX enabled (see next parameter) and the CPU
++                is affected, the system is vulnerable.
++
++  full                TAA mitigation is enabled. If TSX is enabled, on an affected
++                system it will clear CPU buffers on ring transitions. On
++                systems which are MDS-affected and deploy MDS mitigation,
++                TAA is also mitigated. Specifying this option on those
++                systems will have no effect.
++
++  full,nosmt    The same as tsx_async_abort=full, with SMT disabled on
++                vulnerable CPUs that have TSX enabled. This is the complete
++                mitigation. When TSX is disabled, SMT is not disabled because
++                CPU is not vulnerable to cross-thread TAA attacks.
++  ============  =============================================================
++
++Not specifying this option is equivalent to "tsx_async_abort=full".
++
++The kernel command line also allows to control the TSX feature using the
++parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
++to control the TSX feature and the enumeration of the TSX feature bits (RTM
++and HLE) in CPUID.
++
++The valid options are:
++
++  ============  =============================================================
++  off         Disables TSX on the system.
++
++                Note that this option takes effect only on newer CPUs which are
++                not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
++                and which get the new IA32_TSX_CTRL MSR through a microcode
++                update. This new MSR allows for the reliable deactivation of
++                the TSX functionality.
++
++  on          Enables TSX.
++
++                Although there are mitigations for all known security
++                vulnerabilities, TSX has been known to be an accelerator for
++                several previous speculation-related CVEs, and so there may be
++                unknown security risks associated with leaving it enabled.
++
++  auto                Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
++                on the system.
++  ============  =============================================================
++
++Not specifying this option is equivalent to "tsx=off".
++
++The following combinations of the "tsx_async_abort" and "tsx" are possible. For
++affected platforms tsx=auto is equivalent to tsx=off and the result will be:
++
++  =========  ==========================   =========================================
++  tsx=on     tsx_async_abort=full         The system will use VERW to clear CPU
++                                          buffers. Cross-thread attacks are still
++                                        possible on SMT machines.
++  tsx=on     tsx_async_abort=full,nosmt   As above, cross-thread attacks on SMT
++                                          mitigated.
++  tsx=on     tsx_async_abort=off          The system is vulnerable.
++  tsx=off    tsx_async_abort=full         TSX might be disabled if microcode
++                                          provides a TSX control MSR. If so,
++                                        system is not vulnerable.
++  tsx=off    tsx_async_abort=full,nosmt   Ditto
++  tsx=off    tsx_async_abort=off          ditto
++  =========  ==========================   =========================================
++
++
++For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
++buffers.  For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
++"tsx" command line argument has no effect.
++
++For the affected platforms below table indicates the mitigation status for the
++combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
++and TSX_CTRL_MSR.
++
++  =======  =========  =============  ========================================
++  MDS_NO   MD_CLEAR   TSX_CTRL_MSR   Status
++  =======  =========  =============  ========================================
++    0          0            0        Vulnerable (needs microcode)
++    0          1            0        MDS and TAA mitigated via VERW
++    1          1            0        MDS fixed, TAA vulnerable if TSX enabled
++                                     because MD_CLEAR has no meaning and
++                                     VERW is not guaranteed to clear buffers
++    1          X            1        MDS fixed, TAA can be mitigated by
++                                     VERW or TSX_CTRL_MSR
++  =======  =========  =============  ========================================
++
++Mitigation selection guide
++--------------------------
++
++1. Trusted userspace and guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++If all user space applications are from a trusted source and do not execute
++untrusted code which is supplied externally, then the mitigation can be
++disabled. The same applies to virtualized environments with trusted guests.
++
++
++2. Untrusted userspace and guests
++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++
++If there are untrusted applications or guests on the system, enabling TSX
++might allow a malicious actor to leak data from the host or from other
++processes running on the same physical core.
++
++If the microcode is available and the TSX is disabled on the host, attacks
++are prevented in a virtualized environment as well, even if the VMs do not
++explicitly enable the mitigation.
++
++
++.. _taa_default_mitigations:
++
++Default mitigations
++-------------------
++
++The kernel's default action for vulnerable processors is:
++
++  - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2409,6 +2409,7 @@
+                                              ssbd=force-off [ARM64]
+                                              l1tf=off [X86]
+                                              mds=off [X86]
++                                             tsx_async_abort=off [X86]
+                       auto (default)
+                               Mitigate all CPU vulnerabilities, but leave SMT
+@@ -2424,6 +2425,7 @@
+                               be fully mitigated, even if it means losing SMT.
+                               Equivalent to: l1tf=flush,nosmt [X86]
+                                              mds=full,nosmt [X86]
++                                             tsx_async_abort=full,nosmt [X86]
+       mminit_loglevel=
+                       [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+@@ -4534,6 +4536,42 @@
+                       See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+                       for more details.
++      tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
++                      Abort (TAA) vulnerability.
++
++                      Similar to Micro-architectural Data Sampling (MDS)
++                      certain CPUs that support Transactional
++                      Synchronization Extensions (TSX) are vulnerable to an
++                      exploit against CPU internal buffers which can forward
++                      information to a disclosure gadget under certain
++                      conditions.
++
++                      In vulnerable processors, the speculatively forwarded
++                      data can be used in a cache side channel attack, to
++                      access data to which the attacker does not have direct
++                      access.
++
++                      This parameter controls the TAA mitigation.  The
++                      options are:
++
++                      full       - Enable TAA mitigation on vulnerable CPUs
++                                   if TSX is enabled.
++
++                      full,nosmt - Enable TAA mitigation and disable SMT on
++                                   vulnerable CPUs. If TSX is disabled, SMT
++                                   is not disabled because CPU is not
++                                   vulnerable to cross-thread TAA attacks.
++                      off        - Unconditionally disable TAA mitigation
++
++                      Not specifying this option is equivalent to
++                      tsx_async_abort=full.  On CPUs which are MDS affected
++                      and deploy MDS mitigation, TAA mitigation is not
++                      required and doesn't provide any additional
++                      mitigation.
++
++                      For details see:
++                      Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
++
+       turbografx.map[2|3]=    [HW,JOY]
+                       TurboGraFX parallel port interface
+                       Format:
+--- a/Documentation/x86/index.rst
++++ b/Documentation/x86/index.rst
+@@ -6,3 +6,4 @@ x86 architecture specifics
+    :maxdepth: 1
+    mds
++   tsx_async_abort
+--- /dev/null
++++ b/Documentation/x86/tsx_async_abort.rst
+@@ -0,0 +1,117 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++TSX Async Abort (TAA) mitigation
++================================
++
++.. _tsx_async_abort:
++
++Overview
++--------
++
++TSX Async Abort (TAA) is a side channel attack on internal buffers in some
++Intel processors similar to Microachitectural Data Sampling (MDS).  In this
++case certain loads may speculatively pass invalid data to dependent operations
++when an asynchronous abort condition is pending in a Transactional
++Synchronization Extensions (TSX) transaction.  This includes loads with no
++fault or assist condition. Such loads may speculatively expose stale data from
++the same uarch data structures as in MDS, with same scope of exposure i.e.
++same-thread and cross-thread. This issue affects all current processors that
++support TSX.
++
++Mitigation strategy
++-------------------
++
++a) TSX disable - one of the mitigations is to disable TSX. A new MSR
++IA32_TSX_CTRL will be available in future and current processors after
++microcode update which can be used to disable TSX. In addition, it
++controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
++
++b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
++vulnerability. More details on this approach can be found in
++:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
++
++Kernel internal mitigation modes
++--------------------------------
++
++ =============    ============================================================
++ off              Mitigation is disabled. Either the CPU is not affected or
++                  tsx_async_abort=off is supplied on the kernel command line.
++
++ tsx disabled     Mitigation is enabled. TSX feature is disabled by default at
++                  bootup on processors that support TSX control.
++
++ verw             Mitigation is enabled. CPU is affected and MD_CLEAR is
++                  advertised in CPUID.
++
++ ucode needed     Mitigation is enabled. CPU is affected and MD_CLEAR is not
++                  advertised in CPUID. That is mainly for virtualization
++                  scenarios where the host has the updated microcode but the
++                  hypervisor does not expose MD_CLEAR in CPUID. It's a best
++                  effort approach without guarantee.
++ =============    ============================================================
++
++If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
++not provided then the kernel selects an appropriate mitigation depending on the
++status of RTM and MD_CLEAR CPUID bits.
++
++Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
++TAA mitigation, VERW behavior and TSX feature for various combinations of
++MSR_IA32_ARCH_CAPABILITIES bits.
++
++1. "tsx=off"
++
++=========  =========  ============  ============  ==============  ===================  ======================
++MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=off
++----------------------------------  -------------------------------------------------------------------------
++TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
++                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
++=========  =========  ============  ============  ==============  ===================  ======================
++    0          0           0         HW default         Yes           Same as MDS           Same as MDS
++    0          0           1        Invalid case   Invalid case       Invalid case          Invalid case
++    0          1           0         HW default         No         Need ucode update     Need ucode update
++    0          1           1          Disabled          Yes           TSX disabled          TSX disabled
++    1          X           1          Disabled           X             None needed           None needed
++=========  =========  ============  ============  ==============  ===================  ======================
++
++2. "tsx=on"
++
++=========  =========  ============  ============  ==============  ===================  ======================
++MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=on
++----------------------------------  -------------------------------------------------------------------------
++TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
++                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
++=========  =========  ============  ============  ==============  ===================  ======================
++    0          0           0         HW default        Yes            Same as MDS          Same as MDS
++    0          0           1        Invalid case   Invalid case       Invalid case         Invalid case
++    0          1           0         HW default        No          Need ucode update     Need ucode update
++    0          1           1          Enabled          Yes               None              Same as MDS
++    1          X           1          Enabled          X              None needed          None needed
++=========  =========  ============  ============  ==============  ===================  ======================
++
++3. "tsx=auto"
++
++=========  =========  ============  ============  ==============  ===================  ======================
++MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=auto
++----------------------------------  -------------------------------------------------------------------------
++TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
++                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
++=========  =========  ============  ============  ==============  ===================  ======================
++    0          0           0         HW default    Yes                Same as MDS           Same as MDS
++    0          0           1        Invalid case  Invalid case        Invalid case          Invalid case
++    0          1           0         HW default    No              Need ucode update     Need ucode update
++    0          1           1          Disabled      Yes               TSX disabled          TSX disabled
++    1          X           1          Enabled       X                 None needed           None needed
++=========  =========  ============  ============  ==============  ===================  ======================
++
++In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
++indicates whether MSR_IA32_TSX_CTRL is supported.
++
++There are two control bits in IA32_TSX_CTRL MSR:
++
++      Bit 0: When set it disables the Restricted Transactional Memory (RTM)
++             sub-feature of TSX (will force all transactions to abort on the
++             XBEGIN instruction).
++
++      Bit 1: When set it disables the enumeration of the RTM and HLE feature
++             (i.e. it will make CPUID(EAX=7).EBX{bit4} and
++             CPUID(EAX=7).EBX{bit11} read as 0).
diff --git a/queue-4.14/x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch b/queue-4.14/x86-speculation-taa-add-mitigation-for-tsx-async-abort.patch
new file mode 100644 (file)
index 0000000..c978438
--- /dev/null
@@ -0,0 +1,303 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 11:30:45 +0200
+Subject: x86/speculation/taa: Add mitigation for TSX Async Abort
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream.
+
+TSX Async Abort (TAA) is a side channel vulnerability to the internal
+buffers in some Intel processors similar to Microachitectural Data
+Sampling (MDS). In this case, certain loads may speculatively pass
+invalid data to dependent operations when an asynchronous abort
+condition is pending in a TSX transaction.
+
+This includes loads with no fault or assist condition. Such loads may
+speculatively expose stale data from the uarch data structures as in
+MDS. Scope of exposure is within the same-thread and cross-thread. This
+issue affects all current processors that support TSX, but do not have
+ARCH_CAP_TAA_NO (bit 8) set in MSR_IA32_ARCH_CAPABILITIES.
+
+On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0,
+CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers
+using VERW or L1D_FLUSH, there is no additional mitigation needed for
+TAA. On affected CPUs with MDS_NO=1 this issue can be mitigated by
+disabling the Transactional Synchronization Extensions (TSX) feature.
+
+A new MSR IA32_TSX_CTRL in future and current processors after a
+microcode update can be used to control the TSX feature. There are two
+bits in that MSR:
+
+* TSX_CTRL_RTM_DISABLE disables the TSX sub-feature Restricted
+Transactional Memory (RTM).
+
+* TSX_CTRL_CPUID_CLEAR clears the RTM enumeration in CPUID. The other
+TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally
+disabled with updated microcode but still enumerated as present by
+CPUID(EAX=7).EBX{bit4}.
+
+The second mitigation approach is similar to MDS which is clearing the
+affected CPU buffers on return to user space and when entering a guest.
+Relevant microcode update is required for the mitigation to work.  More
+details on this approach can be found here:
+
+  https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html
+
+The TSX feature can be controlled by the "tsx" command line parameter.
+If it is force-enabled then "Clear CPU buffers" (MDS mitigation) is
+deployed. The effective mitigation state can be read from sysfs.
+
+ [ bp:
+   - massage + comments cleanup
+   - s/TAA_MITIGATION_TSX_DISABLE/TAA_MITIGATION_TSX_DISABLED/g - Josh.
+   - remove partial TAA mitigation in update_mds_branch_idle() - Josh.
+   - s/tsx_async_abort_cmdline/tsx_async_abort_parse_cmdline/g
+ ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/cpufeatures.h   |    1 
+ arch/x86/include/asm/msr-index.h     |    4 +
+ arch/x86/include/asm/nospec-branch.h |    4 -
+ arch/x86/include/asm/processor.h     |    7 ++
+ arch/x86/kernel/cpu/bugs.c           |  110 +++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/common.c         |   15 ++++
+ 6 files changed, 139 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeatures.h
++++ b/arch/x86/include/asm/cpufeatures.h
+@@ -388,5 +388,6 @@
+ #define X86_BUG_MDS                   X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
+ #define X86_BUG_MSBDS_ONLY            X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
+ #define X86_BUG_SWAPGS                        X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
++#define X86_BUG_TAA                   X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+ #endif /* _ASM_X86_CPUFEATURES_H */
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -85,6 +85,10 @@
+                                                 * Sampling (MDS) vulnerabilities.
+                                                 */
+ #define ARCH_CAP_TSX_CTRL_MSR         BIT(7)  /* MSR for TSX control is available. */
++#define ARCH_CAP_TAA_NO                       BIT(8)  /*
++                                               * Not susceptible to
++                                               * TSX Async Abort (TAA) vulnerabilities.
++                                               */
+ #define MSR_IA32_FLUSH_CMD            0x0000010b
+ #define L1D_FLUSH                     BIT(0)  /*
+--- a/arch/x86/include/asm/nospec-branch.h
++++ b/arch/x86/include/asm/nospec-branch.h
+@@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear)
+ #include <asm/segment.h>
+ /**
+- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
++ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+  *
+  * This uses the otherwise unused and obsolete VERW instruction in
+  * combination with microcode which triggers a CPU buffer flush when the
+@@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers
+ }
+ /**
+- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
++ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+  *
+  * Clear CPU buffers if the corresponding static key is enabled
+  */
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -994,4 +994,11 @@ enum mds_mitigations {
+       MDS_MITIGATION_VMWERV,
+ };
++enum taa_mitigations {
++      TAA_MITIGATION_OFF,
++      TAA_MITIGATION_UCODE_NEEDED,
++      TAA_MITIGATION_VERW,
++      TAA_MITIGATION_TSX_DISABLED,
++};
++
+ #endif /* _ASM_X86_PROCESSOR_H */
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -32,11 +32,14 @@
+ #include <asm/intel-family.h>
+ #include <asm/e820/api.h>
++#include "cpu.h"
++
+ static void __init spectre_v1_select_mitigation(void);
+ static void __init spectre_v2_select_mitigation(void);
+ static void __init ssb_select_mitigation(void);
+ static void __init l1tf_select_mitigation(void);
+ static void __init mds_select_mitigation(void);
++static void __init taa_select_mitigation(void);
+ /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+ u64 x86_spec_ctrl_base;
+@@ -103,6 +106,7 @@ void __init check_bugs(void)
+       ssb_select_mitigation();
+       l1tf_select_mitigation();
+       mds_select_mitigation();
++      taa_select_mitigation();
+       arch_smt_update();
+@@ -267,6 +271,100 @@ static int __init mds_cmdline(char *str)
+ early_param("mds", mds_cmdline);
+ #undef pr_fmt
++#define pr_fmt(fmt)   "TAA: " fmt
++
++/* Default mitigation for TAA-affected CPUs */
++static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
++static bool taa_nosmt __ro_after_init;
++
++static const char * const taa_strings[] = {
++      [TAA_MITIGATION_OFF]            = "Vulnerable",
++      [TAA_MITIGATION_UCODE_NEEDED]   = "Vulnerable: Clear CPU buffers attempted, no microcode",
++      [TAA_MITIGATION_VERW]           = "Mitigation: Clear CPU buffers",
++      [TAA_MITIGATION_TSX_DISABLED]   = "Mitigation: TSX disabled",
++};
++
++static void __init taa_select_mitigation(void)
++{
++      u64 ia32_cap;
++
++      if (!boot_cpu_has_bug(X86_BUG_TAA)) {
++              taa_mitigation = TAA_MITIGATION_OFF;
++              return;
++      }
++
++      /* TSX previously disabled by tsx=off */
++      if (!boot_cpu_has(X86_FEATURE_RTM)) {
++              taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
++              goto out;
++      }
++
++      if (cpu_mitigations_off()) {
++              taa_mitigation = TAA_MITIGATION_OFF;
++              return;
++      }
++
++      /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
++      if (taa_mitigation == TAA_MITIGATION_OFF)
++              goto out;
++
++      if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
++              taa_mitigation = TAA_MITIGATION_VERW;
++      else
++              taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
++
++      /*
++       * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
++       * A microcode update fixes this behavior to clear CPU buffers. It also
++       * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
++       * ARCH_CAP_TSX_CTRL_MSR bit.
++       *
++       * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
++       * update is required.
++       */
++      ia32_cap = x86_read_arch_cap_msr();
++      if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
++          !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
++              taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
++
++      /*
++       * TSX is enabled, select alternate mitigation for TAA which is
++       * the same as MDS. Enable MDS static branch to clear CPU buffers.
++       *
++       * For guests that can't determine whether the correct microcode is
++       * present on host, enable the mitigation for UCODE_NEEDED as well.
++       */
++      static_branch_enable(&mds_user_clear);
++
++      if (taa_nosmt || cpu_mitigations_auto_nosmt())
++              cpu_smt_disable(false);
++
++out:
++      pr_info("%s\n", taa_strings[taa_mitigation]);
++}
++
++static int __init tsx_async_abort_parse_cmdline(char *str)
++{
++      if (!boot_cpu_has_bug(X86_BUG_TAA))
++              return 0;
++
++      if (!str)
++              return -EINVAL;
++
++      if (!strcmp(str, "off")) {
++              taa_mitigation = TAA_MITIGATION_OFF;
++      } else if (!strcmp(str, "full")) {
++              taa_mitigation = TAA_MITIGATION_VERW;
++      } else if (!strcmp(str, "full,nosmt")) {
++              taa_mitigation = TAA_MITIGATION_VERW;
++              taa_nosmt = true;
++      }
++
++      return 0;
++}
++early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
++
++#undef pr_fmt
+ #define pr_fmt(fmt)     "Spectre V1 : " fmt
+ enum spectre_v1_mitigation {
+@@ -772,6 +870,7 @@ static void update_mds_branch_idle(void)
+ }
+ #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
++#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+ void arch_smt_update(void)
+ {
+@@ -804,6 +903,17 @@ void arch_smt_update(void)
+               break;
+       }
++      switch (taa_mitigation) {
++      case TAA_MITIGATION_VERW:
++      case TAA_MITIGATION_UCODE_NEEDED:
++              if (sched_smt_active())
++                      pr_warn_once(TAA_MSG_SMT);
++              break;
++      case TAA_MITIGATION_TSX_DISABLED:
++      case TAA_MITIGATION_OFF:
++              break;
++      }
++
+       mutex_unlock(&spec_ctrl_mutex);
+ }
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1004,6 +1004,21 @@ static void __init cpu_set_bug_bits(stru
+       if (!cpu_matches(NO_SWAPGS))
+               setup_force_cpu_bug(X86_BUG_SWAPGS);
++      /*
++       * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
++       *      - TSX is supported or
++       *      - TSX_CTRL is present
++       *
++       * TSX_CTRL check is needed for cases when TSX could be disabled before
++       * the kernel boot e.g. kexec.
++       * TSX_CTRL check alone is not sufficient for cases when the microcode
++       * update is not present or running as guest that don't get TSX_CTRL.
++       */
++      if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
++          (cpu_has(c, X86_FEATURE_RTM) ||
++           (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
++              setup_force_cpu_bug(X86_BUG_TAA);
++
+       if (cpu_matches(NO_MELTDOWN))
+               return;
diff --git a/queue-4.14/x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch b/queue-4.14/x86-speculation-taa-add-sysfs-reporting-for-tsx-async-abort.patch
new file mode 100644 (file)
index 0000000..8f76056
--- /dev/null
@@ -0,0 +1,118 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:19:51 +0200
+Subject: x86/speculation/taa: Add sysfs reporting for TSX Async Abort
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream.
+
+Add the sysfs reporting file for TSX Async Abort. It exposes the
+vulnerability and the mitigation state similar to the existing files for
+the other hardware vulnerabilities.
+
+Sysfs file path is:
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Reviewed-by: Mark Gross <mgross@linux.intel.com>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |   23 +++++++++++++++++++++++
+ drivers/base/cpu.c         |    9 +++++++++
+ include/linux/cpu.h        |    3 +++
+ 3 files changed, 35 insertions(+)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1408,6 +1408,21 @@ static ssize_t mds_show_state(char *buf)
+                      sched_smt_active() ? "vulnerable" : "disabled");
+ }
++static ssize_t tsx_async_abort_show_state(char *buf)
++{
++      if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
++          (taa_mitigation == TAA_MITIGATION_OFF))
++              return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
++
++      if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
++              return sprintf(buf, "%s; SMT Host state unknown\n",
++                             taa_strings[taa_mitigation]);
++      }
++
++      return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
++                     sched_smt_active() ? "vulnerable" : "disabled");
++}
++
+ static char *stibp_state(void)
+ {
+       if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+@@ -1473,6 +1488,9 @@ static ssize_t cpu_show_common(struct de
+       case X86_BUG_MDS:
+               return mds_show_state(buf);
++      case X86_BUG_TAA:
++              return tsx_async_abort_show_state(buf);
++
+       default:
+               break;
+       }
+@@ -1509,4 +1527,9 @@ ssize_t cpu_show_mds(struct device *dev,
+ {
+       return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
+ }
++
++ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
++{
++      return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
++}
+ #endif
+--- a/drivers/base/cpu.c
++++ b/drivers/base/cpu.c
+@@ -539,12 +539,20 @@ ssize_t __weak cpu_show_mds(struct devic
+       return sprintf(buf, "Not affected\n");
+ }
++ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
++                                      struct device_attribute *attr,
++                                      char *buf)
++{
++      return sprintf(buf, "Not affected\n");
++}
++
+ static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+ static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+ static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+ static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
+ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
++static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+@@ -553,6 +561,7 @@ static struct attribute *cpu_root_vulner
+       &dev_attr_spec_store_bypass.attr,
+       &dev_attr_l1tf.attr,
+       &dev_attr_mds.attr,
++      &dev_attr_tsx_async_abort.attr,
+       NULL
+ };
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -59,6 +59,9 @@ extern ssize_t cpu_show_l1tf(struct devi
+                            struct device_attribute *attr, char *buf);
+ extern ssize_t cpu_show_mds(struct device *dev,
+                           struct device_attribute *attr, char *buf);
++extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
++                                      struct device_attribute *attr,
++                                      char *buf);
+ extern __printf(4, 5)
+ struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/queue-4.14/x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch b/queue-4.14/x86-speculation-taa-fix-printing-of-taa_msg_smt-on-ibrs_all-cpus.patch
new file mode 100644 (file)
index 0000000..64ea152
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Wed, 6 Nov 2019 20:26:46 -0600
+Subject: x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 012206a822a8b6ac09125bfaa210a95b9eb8f1c1 upstream.
+
+For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of
+cpu_bugs_smt_update() causes the function to return early, unintentionally
+skipping the MDS and TAA logic.
+
+This is not a problem for MDS, because there appears to be no overlap
+between IBRS_ALL and MDS-affected CPUs.  So the MDS mitigation would be
+disabled and nothing would need to be done in this function anyway.
+
+But for TAA, the TAA_MSG_SMT string will never get printed on Cascade
+Lake and newer.
+
+The check is superfluous anyway: when 'spectre_v2_enabled' is
+SPECTRE_V2_IBRS_ENHANCED, 'spectre_v2_user' is always
+SPECTRE_V2_USER_NONE, and so the 'spectre_v2_user' switch statement
+handles it appropriately by doing nothing.  So just remove the check.
+
+Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort")
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/bugs.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -874,10 +874,6 @@ static void update_mds_branch_idle(void)
+ void arch_smt_update(void)
+ {
+-      /* Enhanced IBRS implies STIBP. No update required. */
+-      if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+-              return;
+-
+       mutex_lock(&spec_ctrl_mutex);
+       switch (spectre_v2_user) {
diff --git a/queue-4.14/x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch b/queue-4.14/x86-tsx-add-auto-option-to-the-tsx-cmdline-parameter.patch
new file mode 100644 (file)
index 0000000..4e61e21
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Wed, 23 Oct 2019 12:28:57 +0200
+Subject: x86/tsx: Add "auto" option to the tsx= cmdline parameter
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream.
+
+Platforms which are not affected by X86_BUG_TAA may want the TSX feature
+enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto
+disable TSX when X86_BUG_TAA is present, otherwise enable TSX.
+
+More details on X86_BUG_TAA can be found here:
+https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html
+
+ [ bp: Extend the arg buffer to accommodate "auto\0". ]
+
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Tony Luck <tony.luck@intel.com>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    3 +++
+ arch/x86/kernel/cpu/tsx.c                       |    7 ++++++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -4526,6 +4526,9 @@
+                               update. This new MSR allows for the reliable
+                               deactivation of the TSX functionality.)
++                      auto    - Disable TSX if X86_BUG_TAA is present,
++                                otherwise enable TSX on the system.
++
+                       Not specifying this option is equivalent to tsx=off.
+                       See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -75,7 +75,7 @@ static bool __init tsx_ctrl_is_supported
+ void __init tsx_init(void)
+ {
+-      char arg[4] = {};
++      char arg[5] = {};
+       int ret;
+       if (!tsx_ctrl_is_supported())
+@@ -87,6 +87,11 @@ void __init tsx_init(void)
+                       tsx_ctrl_state = TSX_CTRL_ENABLE;
+               } else if (!strcmp(arg, "off")) {
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
++              } else if (!strcmp(arg, "auto")) {
++                      if (boot_cpu_has_bug(X86_BUG_TAA))
++                              tsx_ctrl_state = TSX_CTRL_DISABLE;
++                      else
++                              tsx_ctrl_state = TSX_CTRL_ENABLE;
+               } else {
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
+                       pr_err("tsx: invalid option, defaulting to off\n");
diff --git a/queue-4.14/x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch b/queue-4.14/x86-tsx-add-config-options-to-set-tsx-on-off-auto.patch
new file mode 100644 (file)
index 0000000..203590c
--- /dev/null
@@ -0,0 +1,135 @@
+From foo@baz Tue 12 Nov 2019 04:11:14 PM CET
+From: Michal Hocko <mhocko@suse.com>
+Date: Wed, 23 Oct 2019 12:35:50 +0200
+Subject: x86/tsx: Add config options to set tsx=on|off|auto
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit db616173d787395787ecc93eef075fa975227b10 upstream.
+
+There is a general consensus that TSX usage is not largely spread while
+the history shows there is a non trivial space for side channel attacks
+possible. Therefore the tsx is disabled by default even on platforms
+that might have a safe implementation of TSX according to the current
+knowledge. This is a fair trade off to make.
+
+There are, however, workloads that really do benefit from using TSX and
+updating to a newer kernel with TSX disabled might introduce a
+noticeable regressions. This would be especially a problem for Linux
+distributions which will provide TAA mitigations.
+
+Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON
+and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config
+setting can be overridden by the tsx cmdline options.
+
+ [ bp: Text cleanups from Josh. ]
+
+Suggested-by: Borislav Petkov <bpetkov@suse.de>
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/Kconfig          |   45 +++++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/tsx.c |   22 ++++++++++++++++------
+ 2 files changed, 61 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -1853,6 +1853,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
+         If unsure, say y.
++choice
++      prompt "TSX enable mode"
++      depends on CPU_SUP_INTEL
++      default X86_INTEL_TSX_MODE_OFF
++      help
++        Intel's TSX (Transactional Synchronization Extensions) feature
++        allows to optimize locking protocols through lock elision which
++        can lead to a noticeable performance boost.
++
++        On the other hand it has been shown that TSX can be exploited
++        to form side channel attacks (e.g. TAA) and chances are there
++        will be more of those attacks discovered in the future.
++
++        Therefore TSX is not enabled by default (aka tsx=off). An admin
++        might override this decision by tsx=on the command line parameter.
++        Even with TSX enabled, the kernel will attempt to enable the best
++        possible TAA mitigation setting depending on the microcode available
++        for the particular machine.
++
++        This option allows to set the default tsx mode between tsx=on, =off
++        and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
++        details.
++
++        Say off if not sure, auto if TSX is in use but it should be used on safe
++        platforms or on if TSX is in use and the security aspect of tsx is not
++        relevant.
++
++config X86_INTEL_TSX_MODE_OFF
++      bool "off"
++      help
++        TSX is disabled if possible - equals to tsx=off command line parameter.
++
++config X86_INTEL_TSX_MODE_ON
++      bool "on"
++      help
++        TSX is always enabled on TSX capable HW - equals the tsx=on command
++        line parameter.
++
++config X86_INTEL_TSX_MODE_AUTO
++      bool "auto"
++      help
++        TSX is enabled on TSX capable HW that is believed to be safe against
++        side channel attacks- equals the tsx=auto command line parameter.
++endchoice
++
+ config EFI
+       bool "EFI runtime service support"
+       depends on ACPI
+--- a/arch/x86/kernel/cpu/tsx.c
++++ b/arch/x86/kernel/cpu/tsx.c
+@@ -73,6 +73,14 @@ static bool __init tsx_ctrl_is_supported
+       return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+ }
++static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
++{
++      if (boot_cpu_has_bug(X86_BUG_TAA))
++              return TSX_CTRL_DISABLE;
++
++      return TSX_CTRL_ENABLE;
++}
++
+ void __init tsx_init(void)
+ {
+       char arg[5] = {};
+@@ -88,17 +96,19 @@ void __init tsx_init(void)
+               } else if (!strcmp(arg, "off")) {
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
+               } else if (!strcmp(arg, "auto")) {
+-                      if (boot_cpu_has_bug(X86_BUG_TAA))
+-                              tsx_ctrl_state = TSX_CTRL_DISABLE;
+-                      else
+-                              tsx_ctrl_state = TSX_CTRL_ENABLE;
++                      tsx_ctrl_state = x86_get_tsx_auto_mode();
+               } else {
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
+                       pr_err("tsx: invalid option, defaulting to off\n");
+               }
+       } else {
+-              /* tsx= not provided, defaulting to off */
+-              tsx_ctrl_state = TSX_CTRL_DISABLE;
++              /* tsx= not provided */
++              if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
++                      tsx_ctrl_state = x86_get_tsx_auto_mode();
++              else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
++                      tsx_ctrl_state = TSX_CTRL_DISABLE;
++              else
++                      tsx_ctrl_state = TSX_CTRL_ENABLE;
+       }
+       if (tsx_ctrl_state == TSX_CTRL_DISABLE) {