Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 30 Jan 2018 03:08:02 +0000 (19:08 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 30 Jan 2018 03:08:02 +0000 (19:08 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Jan 2018 03:08:02 +0000 (19:08 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Jan 2018 03:08:02 +0000 (19:08 -0800)
diff --combined arch/x86/entry/entry_64.S

index ff6f8022612c565e7ed17ab6200ecf195f4d7bed,b4f00984089e98cd346f20e054ac78b5fba0bf77..a835704951629f2d741cc222ea479a362244ce06
--- 1/arch/x86/entry/entry_64.S
--- 2/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@@ -53,19 -53,15 +53,19 @@@ ENTRY(native_usergs_sysret64
   END(native_usergs_sysret64)
   #endif /* CONFIG_PARAVIRT */
   
- -.macro TRACE_IRQS_IRETQ
+ +.macro TRACE_IRQS_FLAGS flags:req
   #ifdef CONFIG_TRACE_IRQFLAGS
- -      bt      $9, EFLAGS(%rsp)                /* interrupts off? */
+ +      bt      $9, \flags              /* interrupts off? */
         jnc     1f
         TRACE_IRQS_ON
   1:
   #endif
   .endm
   
+ +.macro TRACE_IRQS_IRETQ
+ +      TRACE_IRQS_FLAGS EFLAGS(%rsp)
+ +.endm
+ +
   /*
    * When dynamic function tracer is enabled it will add a breakpoint
    * to all locations that it is about to modify, sync CPUs, update
@@@ -219,6 -215,8 +219,6 @@@ ENTRY(entry_SYSCALL_64
         movq    %rsp, PER_CPU_VAR(rsp_scratch)
         movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
   
- -      TRACE_IRQS_OFF
- -
         /* Construct struct pt_regs on stack */
         pushq   $__USER_DS                      /* pt_regs->ss */
         pushq   PER_CPU_VAR(rsp_scratch)        /* pt_regs->sp */
@@@ -239,8 -237,6 +239,8 @@@ GLOBAL(entry_SYSCALL_64_after_hwframe
         sub     $(6*8), %rsp                    /* pt_regs->bp, bx, r12-15 not saved */
         UNWIND_HINT_REGS extra=0
   
+ +      TRACE_IRQS_OFF
+ +
         /*
          * If we need to do entry work or if we guess we'll need to do
          * exit work, go straight to the slow path.
@@@ -499,7 -495,8 +499,8 @@@ ENTRY(__switch_to_asm
          * exist, overwrite the RSB with entries which capture
          * speculative execution to prevent attack.
          */
-       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       /* Clobbers %rbx */
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
   #endif
   
         /* restore callee-saved registers */
@@@ -1113,13 -1110,11 +1114,13 @@@ ENTRY(native_load_gs_index
         FRAME_BEGIN
         pushfq
         DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+ +      TRACE_IRQS_OFF
         SWAPGS
   .Lgs_change:
         movl    %edi, %gs
   2:    ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
         SWAPGS
+ +      TRACE_IRQS_FLAGS (%rsp)
         popfq
         FRAME_END
         ret
diff --combined arch/x86/include/asm/cpufeatures.h

index 67bbfaa1448b60e02df7527424c85aebef582cac,73b5fff159a434f13351bf0830f723885c1879cf..1d9199e1c2ad45531b21f5300439b5ef18943193
--- 1/arch/x86/include/asm/cpufeatures.h
--- 2/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@@ -13,7 -13,7 +13,7 @@@
   /*
    * Defines x86 CPU feature bits
    */
- #define NCAPINTS                      18         /* N 32-bit words worth of info */
+ #define NCAPINTS                      19         /* N 32-bit words worth of info */
   #define NBUGINTS                      1          /* N 32-bit bug flags */
   
   /*
@@@ -203,15 -203,14 +203,15 @@@
   #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
   #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
   #define X86_FEATURE_PTI                       ( 7*32+11) /* Kernel Page Table Isolation enabled */
- #define X86_FEATURE_RETPOLINE         ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
- #define X86_FEATURE_RETPOLINE_AMD     ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE         ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+ #define X86_FEATURE_RETPOLINE_AMD     ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
   #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
- #define X86_FEATURE_AVX512_4VNNIW     ( 7*32+16) /* AVX-512 Neural Network Instructions */
- #define X86_FEATURE_AVX512_4FMAPS     ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+ +#define X86_FEATURE_CDP_L2            ( 7*32+15) /* Code and Data Prioritization L2 */
   
   #define X86_FEATURE_MBA                       ( 7*32+18) /* Memory Bandwidth Allocation */
- #define X86_FEATURE_RSB_CTXSW         ( 7*32+19) /* Fill RSB on context switches */
+ #define X86_FEATURE_RSB_CTXSW         ( 7*32+19) /* "" Fill RSB on context switches */
+ 
+ #define X86_FEATURE_USE_IBPB          ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
   
   /* Virtualization flags: Linux defined, word 8 */
   #define X86_FEATURE_TPR_SHADOW                ( 8*32+ 0) /* Intel TPR Shadow */
@@@ -272,6 -271,9 +272,9 @@@
   #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
   #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
   #define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
+ #define X86_FEATURE_IBPB              (13*32+12) /* Indirect Branch Prediction Barrier */
+ #define X86_FEATURE_IBRS              (13*32+14) /* Indirect Branch Restricted Speculation */
+ #define X86_FEATURE_STIBP             (13*32+15) /* Single Thread Indirect Branch Predictors */
   
   /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
   #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
@@@ -320,6 -322,13 +323,13 @@@
   #define X86_FEATURE_SUCCOR            (17*32+ 1) /* Uncorrectable error containment and recovery */
   #define X86_FEATURE_SMCA              (17*32+ 3) /* Scalable MCA */
   
+ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+ #define X86_FEATURE_AVX512_4VNNIW     (18*32+ 2) /* AVX-512 Neural Network Instructions */
+ #define X86_FEATURE_AVX512_4FMAPS     (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+ #define X86_FEATURE_SPEC_CTRL         (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+ #define X86_FEATURE_INTEL_STIBP               (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+ 
   /*
    * BUG word(s)
    */
diff --combined arch/x86/include/asm/disabled-features.h

index b027633e73003e121d7c043438ac7dbd10fc07a4,c6a3af198294e6128b623197107ac6e24cd30521..33833d1909afda4b31f266e2c25ce6226abb9e19
--- 1/arch/x86/include/asm/disabled-features.h
--- 2/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@@ -16,12 -16,6 +16,12 @@@
   # define DISABLE_MPX  (1<<(X86_FEATURE_MPX & 31))
   #endif
   
+ +#ifdef CONFIG_X86_INTEL_UMIP
+ +# define DISABLE_UMIP 0
+ +#else
+ +# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
+ +#endif
+ +
   #ifdef CONFIG_X86_64
   # define DISABLE_VME          (1<<(X86_FEATURE_VME & 31))
   # define DISABLE_K6_MTRR      (1<<(X86_FEATURE_K6_MTRR & 31))
@@@ -75,8 -69,9 +75,9 @@@
   #define DISABLED_MASK13       0
   #define DISABLED_MASK14       0
   #define DISABLED_MASK15       0
- -#define DISABLED_MASK16       (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
+ +#define DISABLED_MASK16       (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
   #define DISABLED_MASK17       0
- #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+ #define DISABLED_MASK18       0
+ #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
   
   #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --combined arch/x86/include/asm/msr-index.h

index e7b983a355060a6c5b5db76f1fc18475eb647ae5,eb83ff1bae8f1a709039975aa383ebcb29eafc40..e520a1e6fc114ab0de9a9aa966574f1d01195cb1
--- 1/arch/x86/include/asm/msr-index.h
--- 2/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@@ -39,6 -39,13 +39,13 @@@
   
   /* Intel MSRs. Some also available on other CPUs */
   
+ #define MSR_IA32_SPEC_CTRL            0x00000048 /* Speculation Control */
+ #define SPEC_CTRL_IBRS                        (1 << 0)   /* Indirect Branch Restricted Speculation */
+ #define SPEC_CTRL_STIBP                       (1 << 1)   /* Single Thread Indirect Branch Predictors */
+ 
+ #define MSR_IA32_PRED_CMD             0x00000049 /* Prediction Command */
+ #define PRED_CMD_IBPB                 (1 << 0)   /* Indirect Branch Prediction Barrier */
+ 
   #define MSR_PPIN_CTL                  0x0000004e
   #define MSR_PPIN                      0x0000004f
   
@@@ -57,6 -64,11 +64,11 @@@
   #define SNB_C3_AUTO_UNDEMOTE          (1UL << 28)
   
   #define MSR_MTRRcap                   0x000000fe
+ 
+ #define MSR_IA32_ARCH_CAPABILITIES    0x0000010a
+ #define ARCH_CAP_RDCL_NO              (1 << 0)   /* Not susceptible to Meltdown */
+ #define ARCH_CAP_IBRS_ALL             (1 << 1)   /* Enhanced IBRS support */
+ 
   #define MSR_IA32_BBL_CR_CTL           0x00000119
   #define MSR_IA32_BBL_CR_CTL3          0x0000011e
   
@@@ -324,9 -336,6 +336,9 @@@
   #define MSR_AMD64_IBSBRTARGET         0xc001103b
   #define MSR_AMD64_IBSOPDATA4          0xc001103d
   #define MSR_AMD64_IBS_REG_COUNT_MAX   8 /* includes MSR_AMD64_IBSBRTARGET */
+ +#define MSR_AMD64_SEV                 0xc0010131
+ +#define MSR_AMD64_SEV_ENABLED_BIT     0
+ +#define MSR_AMD64_SEV_ENABLED         BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
   
   /* Fam 17h MSRs */
   #define MSR_F17H_IRPERF                       0xc00000e9
diff --combined arch/x86/include/asm/processor.h

index d3a67fba200ae2a5c03f52a7815dca00b43c63ad,881ca3b1d6d49ffe8dcb23d82df717aeee772af5..efbde088a71826fe0ae933084b3e831a2e45b10e
--- 1/arch/x86/include/asm/processor.h
--- 2/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@@ -132,7 -132,6 +132,7 @@@ struct cpuinfo_x86 
         /* Index into per_cpu list: */
         u16                     cpu_index;
         u32                     microcode;
+ +      unsigned                initialized : 1;
   } __randomize_layout;
   
   struct cpuid_regs {
@@@ -971,4 -970,7 +971,7 @@@ bool xen_set_default_idle(void)
   
   void stop_this_cpu(void *dummy);
   void df_debug(struct pt_regs *regs, long error_code);
+ 
+ void __ibp_barrier(void);
+ 
   #endif /* _ASM_X86_PROCESSOR_H */
diff --combined arch/x86/kernel/alternative.c

index 4817d743c26359c697559053a23518a5e25b241b,14a52c7d23d46804fbf7f7382c8e9a505c2f1a86..30571fdaaf6ff9d54efb5e3b3c07f41a7e473a55
--- 1/arch/x86/kernel/alternative.c
--- 2/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@@ -298,7 -298,7 +298,7 @@@ recompute_jump(struct alt_instr *a, u8 
         tgt_rip  = next_rip + o_dspl;
         n_dspl = tgt_rip - orig_insn;
   
-       DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+       DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
   
         if (tgt_rip - orig_insn >= 0) {
                 if (n_dspl - 2 <= 127)
@@@ -355,7 -355,7 +355,7 @@@ static void __init_or_module noinline o
         add_nops(instr + (a->instrlen - a->padlen), a->padlen);
         local_irq_restore(flags);
   
-       DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+       DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
                    instr, a->instrlen - a->padlen, a->padlen);
   }
   
@@@ -376,7 -376,7 +376,7 @@@ void __init_or_module noinline apply_al
         u8 *instr, *replacement;
         u8 insnbuf[MAX_PATCH_LEN];
   
-       DPRINTK("alt table %p -> %p", start, end);
+       DPRINTK("alt table %px, -> %px", start, end);
         /*
          * The scan order should be from start to end. A later scanned
          * alternative code can overwrite previously scanned alternative code.
@@@ -400,14 -400,14 +400,14 @@@
                         continue;
                 }
   
-               DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+               DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d",
                         a->cpuid >> 5,
                         a->cpuid & 0x1f,
                         instr, a->instrlen,
                         replacement, a->replacementlen, a->padlen);
   
-               DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
-               DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
+               DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
+               DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
   
                 memcpy(insnbuf, replacement, a->replacementlen);
                 insnbuf_sz = a->replacementlen;
@@@ -433,7 -433,7 +433,7 @@@
                                  a->instrlen - a->replacementlen);
                         insnbuf_sz += a->instrlen - a->replacementlen;
                 }
-               DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
+               DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
   
                 text_poke_early(instr, insnbuf, insnbuf_sz);
         }
@@@ -445,6 -445,7 +445,6 @@@ static void alternatives_smp_lock(cons
   {
         const s32 *poff;
   
- -      mutex_lock(&text_mutex);
         for (poff = start; poff < end; poff++) {
                 u8 *ptr = (u8 *)poff + *poff;
   
@@@ -454,6 -455,7 +454,6 @@@
                 if (*ptr == 0x3e)
                         text_poke(ptr, ((unsigned char []){0xf0}), 1);
         }
- -      mutex_unlock(&text_mutex);
   }
   
   static void alternatives_smp_unlock(const s32 *start, const s32 *end,
@@@ -461,6 -463,7 +461,6 @@@
   {
         const s32 *poff;
   
- -      mutex_lock(&text_mutex);
         for (poff = start; poff < end; poff++) {
                 u8 *ptr = (u8 *)poff + *poff;
   
@@@ -470,6 -473,7 +470,6 @@@
                 if (*ptr == 0xf0)
                         text_poke(ptr, ((unsigned char []){0x3E}), 1);
         }
- -      mutex_unlock(&text_mutex);
   }
   
   struct smp_alt_module {
@@@ -488,7 -492,8 +488,7 @@@
         struct list_head next;
   };
   static LIST_HEAD(smp_alt_modules);
- -static DEFINE_MUTEX(smp_alt);
- -static bool uniproc_patched = false;  /* protected by smp_alt */
+ +static bool uniproc_patched = false;  /* protected by text_mutex */
   
   void __init_or_module alternatives_smp_module_add(struct module *mod,
                                                   char *name,
@@@ -497,7 -502,7 +497,7 @@@
   {
         struct smp_alt_module *smp;
   
- -      mutex_lock(&smp_alt);
+ +      mutex_lock(&text_mutex);
         if (!uniproc_patched)
                 goto unlock;
   
@@@ -524,14 -529,14 +524,14 @@@
   smp_unlock:
         alternatives_smp_unlock(locks, locks_end, text, text_end);
   unlock:
- -      mutex_unlock(&smp_alt);
+ +      mutex_unlock(&text_mutex);
   }
   
   void __init_or_module alternatives_smp_module_del(struct module *mod)
   {
         struct smp_alt_module *item;
   
- -      mutex_lock(&smp_alt);
+ +      mutex_lock(&text_mutex);
         list_for_each_entry(item, &smp_alt_modules, next) {
                 if (mod != item->mod)
                         continue;
@@@ -539,7 -544,7 +539,7 @@@
                 kfree(item);
                 break;
         }
- -      mutex_unlock(&smp_alt);
+ +      mutex_unlock(&text_mutex);
   }
   
   void alternatives_enable_smp(void)
@@@ -549,7 -554,7 +549,7 @@@
         /* Why bother if there are no other CPUs? */
         BUG_ON(num_possible_cpus() == 1);
   
- -      mutex_lock(&smp_alt);
+ +      mutex_lock(&text_mutex);
   
         if (uniproc_patched) {
                 pr_info("switching to SMP code\n");
@@@ -561,13 -566,10 +561,13 @@@
                                               mod->text, mod->text_end);
                 uniproc_patched = false;
         }
- -      mutex_unlock(&smp_alt);
+ +      mutex_unlock(&text_mutex);
   }
   
- -/* Return 1 if the address range is reserved for smp-alternatives */
+ +/*
+ + * Return 1 if the address range is reserved for SMP-alternatives.
+ + * Must hold text_mutex.
+ + */
   int alternatives_text_reserved(void *start, void *end)
   {
         struct smp_alt_module *mod;
@@@ -575,8 -577,6 +575,8 @@@
         u8 *text_start = start;
         u8 *text_end = end;
   
+ +      lockdep_assert_held(&text_mutex);
+ +
         list_for_each_entry(mod, &smp_alt_modules, next) {
                 if (mod->text > text_end || mod->text_end < text_start)
                         continue;
diff --combined arch/x86/kernel/cpu/common.c

index ef29ad001991d6acdd5b59cd707d85f9ff99f9ea,970ee06dc8aad645dc80d1a8f839aad4f667181e..c7c996a692fd9d6af8d36ac57d6ec70a3e47e620
--- 1/arch/x86/kernel/cpu/common.c
--- 2/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -47,6 -47,8 +47,8 @@@
   #include <asm/pat.h>
   #include <asm/microcode.h>
   #include <asm/microcode_intel.h>
+ #include <asm/intel-family.h>
+ #include <asm/cpu_device_id.h>
   
   #ifdef CONFIG_X86_LOCAL_APIC
   #include <asm/uv/uv.h>
@@@ -329,30 -331,6 +331,30 @@@ static __always_inline void setup_smap(
         }
   }
   
+ +static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+ +{
+ +      /* Check the boot processor, plus build option for UMIP. */
+ +      if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+ +              goto out;
+ +
+ +      /* Check the current processor's cpuid bits. */
+ +      if (!cpu_has(c, X86_FEATURE_UMIP))
+ +              goto out;
+ +
+ +      cr4_set_bits(X86_CR4_UMIP);
+ +
+ +      pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+ +
+ +      return;
+ +
+ +out:
+ +      /*
+ +       * Make sure UMIP is disabled in case it was enabled in a
+ +       * previous boot (e.g., via kexec).
+ +       */
+ +      cr4_clear_bits(X86_CR4_UMIP);
+ +}
+ +
   /*
    * Protection Keys are not available in 32-bit mode.
    */
@@@ -769,6 -747,7 +771,7 @@@ void get_cpu_cap(struct cpuinfo_x86 *c
                 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
                 c->x86_capability[CPUID_7_0_EBX] = ebx;
                 c->x86_capability[CPUID_7_ECX] = ecx;
+               c->x86_capability[CPUID_7_EDX] = edx;
         }
   
         /* Extended state features: level 0x0000000d */
@@@ -876,14 -855,49 +879,49 @@@ static void identify_cpu_without_cpuid(
   #endif
   }
   
+ static const __initdata struct x86_cpu_id cpu_no_speculation[] = {
+       { X86_VENDOR_INTEL,     6, INTEL_FAM6_ATOM_CEDARVIEW,   X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL,     6, INTEL_FAM6_ATOM_CLOVERVIEW,  X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL,     6, INTEL_FAM6_ATOM_LINCROFT,    X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL,     6, INTEL_FAM6_ATOM_PENWELL,     X86_FEATURE_ANY },
+       { X86_VENDOR_INTEL,     6, INTEL_FAM6_ATOM_PINEVIEW,    X86_FEATURE_ANY },
+       { X86_VENDOR_CENTAUR,   5 },
+       { X86_VENDOR_INTEL,     5 },
+       { X86_VENDOR_NSC,       5 },
+       { X86_VENDOR_ANY,       4 },
+       {}
+ };
+ 
+ static const __initdata struct x86_cpu_id cpu_no_meltdown[] = {
+       { X86_VENDOR_AMD },
+       {}
+ };
+ 
+ static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+ {
+       u64 ia32_cap = 0;
+ 
+       if (x86_match_cpu(cpu_no_meltdown))
+               return false;
+ 
+       if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ 
+       /* Rogue Data Cache Load? No! */
+       if (ia32_cap & ARCH_CAP_RDCL_NO)
+               return false;
+ 
+       return true;
+ }
+ 
   /*
    * Do minimum CPU detection early.
    * Fields really needed: vendor, cpuid_level, family, model, mask,
    * cache alignment.
    * The others are not touched to avoid unwanted side effects.
    *
- - * WARNING: this function is only called on the BP.  Don't add code here
- - * that is supposed to run on all CPUs.
+ + * WARNING: this function is only called on the boot CPU.  Don't add code
+ + * here that is supposed to run on all CPUs.
    */
   static void __init early_identify_cpu(struct cpuinfo_x86 *c)
   {
@@@ -923,11 -937,12 +961,12 @@@
   
         setup_force_cpu_cap(X86_FEATURE_ALWAYS);
   
-       if (c->x86_vendor != X86_VENDOR_AMD)
-               setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- 
-       setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
-       setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+       if (!x86_match_cpu(cpu_no_speculation)) {
+               if (cpu_vulnerable_to_meltdown(c))
+                       setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+               setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+               setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+       }
   
         fpu__init_system(c);
   
@@@ -1173,10 -1188,9 +1212,10 @@@ static void identify_cpu(struct cpuinfo
         /* Disable the PN if appropriate */
         squash_the_stupid_serial_number(c);
   
- -      /* Set up SMEP/SMAP */
+ +      /* Set up SMEP/SMAP/UMIP */
         setup_smep(c);
         setup_smap(c);
+ +      setup_umip(c);
   
         /*
          * The vendor-specific functions might have changed features.
diff --combined arch/x86/kernel/cpu/intel.c

index b1af22073e28cd59e773518a5f32f947287b5567,0c8b916abced5c7e22bb94df1f53471a38656cac..6936d14d4c779c77abb32b7890ad14cdc2d12aee
--- 1/arch/x86/kernel/cpu/intel.c
--- 2/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@@ -102,6 -102,59 +102,59 @@@ static void probe_xeon_phi_r3mwait(stru
                 ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
   }
   
+ /*
+  * Early microcode releases for the Spectre v2 mitigation were broken.
+  * Information taken from;
+  * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+  * - https://kb.vmware.com/s/article/52345
+  * - Microcode revisions observed in the wild
+  * - Release note from 20180108 microcode release
+  */
+ struct sku_microcode {
+       u8 model;
+       u8 stepping;
+       u32 microcode;
+ };
+ static const struct sku_microcode spectre_bad_microcodes[] = {
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x84 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x84 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x84 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x84 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x84 },
+       { INTEL_FAM6_SKYLAKE_X,         0x03,   0x0100013e },
+       { INTEL_FAM6_SKYLAKE_X,         0x04,   0x0200003c },
+       { INTEL_FAM6_SKYLAKE_MOBILE,    0x03,   0xc2 },
+       { INTEL_FAM6_SKYLAKE_DESKTOP,   0x03,   0xc2 },
+       { INTEL_FAM6_BROADWELL_CORE,    0x04,   0x28 },
+       { INTEL_FAM6_BROADWELL_GT3E,    0x01,   0x1b },
+       { INTEL_FAM6_BROADWELL_XEON_D,  0x02,   0x14 },
+       { INTEL_FAM6_BROADWELL_XEON_D,  0x03,   0x07000011 },
+       { INTEL_FAM6_BROADWELL_X,       0x01,   0x0b000025 },
+       { INTEL_FAM6_HASWELL_ULT,       0x01,   0x21 },
+       { INTEL_FAM6_HASWELL_GT3E,      0x01,   0x18 },
+       { INTEL_FAM6_HASWELL_CORE,      0x03,   0x23 },
+       { INTEL_FAM6_HASWELL_X,         0x02,   0x3b },
+       { INTEL_FAM6_HASWELL_X,         0x04,   0x10 },
+       { INTEL_FAM6_IVYBRIDGE_X,       0x04,   0x42a },
+       /* Updated in the 20180108 release; blacklist until we know otherwise */
+       { INTEL_FAM6_ATOM_GEMINI_LAKE,  0x01,   0x22 },
+       /* Observed in the wild */
+       { INTEL_FAM6_SANDYBRIDGE_X,     0x06,   0x61b },
+       { INTEL_FAM6_SANDYBRIDGE_X,     0x07,   0x712 },
+ };
+ 
+ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+ {
+       int i;
+ 
+       for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+               if (c->x86_model == spectre_bad_microcodes[i].model &&
+                   c->x86_mask == spectre_bad_microcodes[i].stepping)
+                       return (c->microcode <= spectre_bad_microcodes[i].microcode);
+       }
+       return false;
+ }
+ 
   static void early_init_intel(struct cpuinfo_x86 *c)
   {
         u64 misc_enable;
@@@ -122,6 -175,30 +175,30 @@@
         if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64))
                 c->microcode = intel_get_microcode_revision();
   
+       /*
+        * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
+        * and they also have a different bit for STIBP support. Also,
+        * a hypervisor might have set the individual AMD bits even on
+        * Intel CPUs, for finer-grained selection of what's available.
+        */
+       if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+               set_cpu_cap(c, X86_FEATURE_IBRS);
+               set_cpu_cap(c, X86_FEATURE_IBPB);
+       }
+       if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
+               set_cpu_cap(c, X86_FEATURE_STIBP);
+ 
+       /* Now if any of them are set, check the blacklist and clear the lot */
+       if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
+            cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
+               pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
+               clear_cpu_cap(c, X86_FEATURE_IBRS);
+               clear_cpu_cap(c, X86_FEATURE_IBPB);
+               clear_cpu_cap(c, X86_FEATURE_STIBP);
+               clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL);
+               clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP);
+       }
+ 
         /*
          * Atom erratum AAE44/AAF40/AAG38/AAH41:
          *
@@@ -187,6 -264,21 +264,6 @@@
         if (c->x86 == 6 && c->x86_model < 15)
                 clear_cpu_cap(c, X86_FEATURE_PAT);
   
- -#ifdef CONFIG_KMEMCHECK
- -      /*
- -       * P4s have a "fast strings" feature which causes single-
- -       * stepping REP instructions to only generate a #DB on
- -       * cache-line boundaries.
- -       *
- -       * Ingo Molnar reported a Pentium D (model 6) and a Xeon
- -       * (model 2) with the same problem.
- -       */
- -      if (c->x86 == 15)
- -              if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
- -                                MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
- -                      pr_info("kmemcheck: Disabling fast string operations\n");
- -#endif
- -
         /*
          * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
          * clear the fast string and enhanced fast string CPU capabilities.
diff --combined arch/x86/kernel/cpu/scattered.c

index df4d8f7595a5fa5787819d22b025d870762bcfdf,df11f5d604be83fba4b377861a240350e87b14c9..4075d2be5357e0605bb95e3face77d678cf369a2
--- 1/arch/x86/kernel/cpu/scattered.c
--- 2/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@@ -21,12 -21,9 +21,10 @@@ struct cpuid_bit 
   static const struct cpuid_bit cpuid_bits[] = {
         { X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
         { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
-       { X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
-       { X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
         { X86_FEATURE_CAT_L3,           CPUID_EBX,  1, 0x00000010, 0 },
         { X86_FEATURE_CAT_L2,           CPUID_EBX,  2, 0x00000010, 0 },
         { X86_FEATURE_CDP_L3,           CPUID_ECX,  2, 0x00000010, 1 },
+ +      { X86_FEATURE_CDP_L2,           CPUID_ECX,  2, 0x00000010, 2 },
         { X86_FEATURE_MBA,              CPUID_EBX,  3, 0x00000010, 0 },
         { X86_FEATURE_HW_PSTATE,        CPUID_EDX,  7, 0x80000007, 0 },
         { X86_FEATURE_CPB,              CPUID_EDX,  9, 0x80000007, 0 },
diff --combined arch/x86/kvm/emulate.c

index b514b2b2845a334d4b53f28ed0b73c96f12d0e6a,453d8c99010822222b8a1d9433d0bdfc6dc50fd9..290ecf711aec2d6684fd296d07d7379a88232027
--- 1/arch/x86/kvm/emulate.c
--- 2/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@@ -25,6 -25,7 +25,7 @@@
   #include <asm/kvm_emulate.h>
   #include <linux/stringify.h>
   #include <asm/debugreg.h>
+ #include <asm/nospec-branch.h>
   
   #include "x86.h"
   #include "tss.h"
@@@ -1021,8 -1022,8 +1022,8 @@@ static __always_inline u8 test_cc(unsig
         void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
   
         flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
-       asm("push %[flags]; popf; call *%[fastop]"
-           : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
+       asm("push %[flags]; popf; " CALL_NOSPEC
+           : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
         return rc;
   }
   
@@@ -1046,6 -1047,7 +1047,6 @@@ static void fetch_register_operand(stru
   
   static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
   {
- -      ctxt->ops->get_fpu(ctxt);
         switch (reg) {
         case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
         case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@@ -1067,11 -1069,13 +1068,11 @@@
   #endif
         default: BUG();
         }
- -      ctxt->ops->put_fpu(ctxt);
   }
   
   static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
                           int reg)
   {
- -      ctxt->ops->get_fpu(ctxt);
         switch (reg) {
         case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
         case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@@ -1093,10 -1097,12 +1094,10 @@@
   #endif
         default: BUG();
         }
- -      ctxt->ops->put_fpu(ctxt);
   }
   
   static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
   {
- -      ctxt->ops->get_fpu(ctxt);
         switch (reg) {
         case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
         case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@@ -1108,10 -1114,12 +1109,10 @@@
         case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
         default: BUG();
         }
- -      ctxt->ops->put_fpu(ctxt);
   }
   
   static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
   {
- -      ctxt->ops->get_fpu(ctxt);
         switch (reg) {
         case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
         case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@@ -1123,6 -1131,7 +1124,6 @@@
         case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
         default: BUG();
         }
- -      ctxt->ops->put_fpu(ctxt);
   }
   
   static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@@ -1130,7 -1139,9 +1131,7 @@@
         if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
                 return emulate_nm(ctxt);
   
- -      ctxt->ops->get_fpu(ctxt);
         asm volatile("fninit");
- -      ctxt->ops->put_fpu(ctxt);
         return X86EMUL_CONTINUE;
   }
   
@@@ -1141,7 -1152,9 +1142,7 @@@ static int em_fnstcw(struct x86_emulate
         if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
                 return emulate_nm(ctxt);
   
- -      ctxt->ops->get_fpu(ctxt);
         asm volatile("fnstcw %0": "+m"(fcw));
- -      ctxt->ops->put_fpu(ctxt);
   
         ctxt->dst.val = fcw;
   
@@@ -1155,7 -1168,9 +1156,7 @@@ static int em_fnstsw(struct x86_emulate
         if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
                 return emulate_nm(ctxt);
   
- -      ctxt->ops->get_fpu(ctxt);
         asm volatile("fnstsw %0": "+m"(fsw));
- -      ctxt->ops->put_fpu(ctxt);
   
         ctxt->dst.val = fsw;
   
@@@ -2390,21 -2405,9 +2391,21 @@@ static int rsm_load_seg_64(struct x86_e
   }
   
   static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- -                                   u64 cr0, u64 cr4)
+ +                                  u64 cr0, u64 cr3, u64 cr4)
   {
         int bad;
+ +      u64 pcid;
+ +
+ +      /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+ +      pcid = 0;
+ +      if (cr4 & X86_CR4_PCIDE) {
+ +              pcid = cr3 & 0xfff;
+ +              cr3 &= ~0xfff;
+ +      }
+ +
+ +      bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ +      if (bad)
+ +              return X86EMUL_UNHANDLEABLE;
   
         /*
          * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@@ -2423,12 -2426,6 +2424,12 @@@
                 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
                 if (bad)
                         return X86EMUL_UNHANDLEABLE;
+ +              if (pcid) {
+ +                      bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ +                      if (bad)
+ +                              return X86EMUL_UNHANDLEABLE;
+ +              }
+ +
         }
   
         return X86EMUL_CONTINUE;
@@@ -2439,11 -2436,11 +2440,11 @@@ static int rsm_load_state_32(struct x86
         struct desc_struct desc;
         struct desc_ptr dt;
         u16 selector;
- -      u32 val, cr0, cr4;
+ +      u32 val, cr0, cr3, cr4;
         int i;
   
         cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
- -      ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ +      cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
         ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
         ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
   
@@@ -2485,14 -2482,14 +2486,14 @@@
   
         ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
   
- -      return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ +      return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
   }
   
   static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
   {
         struct desc_struct desc;
         struct desc_ptr dt;
- -      u64 val, cr0, cr4;
+ +      u64 val, cr0, cr3, cr4;
         u32 base3;
         u16 selector;
         int i, r;
@@@ -2509,7 -2506,7 +2510,7 @@@
         ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
   
         cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
- -      ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+ +      cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
         cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
         ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
         val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
@@@ -2537,7 -2534,7 +2538,7 @@@
         dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
         ctxt->ops->set_gdt(ctxt, &dt);
   
- -      r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ +      r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
         if (r != X86EMUL_CONTINUE)
                 return r;
   
@@@ -2595,15 -2592,6 +2596,15 @@@ static int em_rsm(struct x86_emulate_ct
         ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
   
         smbase = ctxt->ops->get_smbase(ctxt);
+ +
+ +      /*
+ +       * Give pre_leave_smm() a chance to make ISA-specific changes to the
+ +       * vCPU state (e.g. enter guest mode) before loading state from the SMM
+ +       * state-save area.
+ +       */
+ +      if (ctxt->ops->pre_leave_smm(ctxt, smbase))
+ +              return X86EMUL_UNHANDLEABLE;
+ +
         if (emulator_has_longmode(ctxt))
                 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
         else
@@@ -4005,8 -3993,12 +4006,8 @@@ static int em_fxsave(struct x86_emulate
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
- -      ctxt->ops->get_fpu(ctxt);
- -
         rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
   
- -      ctxt->ops->put_fpu(ctxt);
- -
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
@@@ -4014,26 -4006,6 +4015,26 @@@
                                    fxstate_size(ctxt));
   }
   
+ +/*
+ + * FXRSTOR might restore XMM registers not provided by the guest. Fill
+ + * in the host registers (via FXSAVE) instead, so they won't be modified.
+ + * (preemption has to stay disabled until FXRSTOR).
+ + *
+ + * Use noinline to keep the stack for other functions called by callers small.
+ + */
+ +static noinline int fxregs_fixup(struct fxregs_state *fx_state,
+ +                               const size_t used_size)
+ +{
+ +      struct fxregs_state fx_tmp;
+ +      int rc;
+ +
+ +      rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
+ +      memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
+ +             __fxstate_size(16) - used_size);
+ +
+ +      return rc;
+ +}
+ +
   static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
   {
         struct fxregs_state fx_state;
@@@ -4044,17 -4016,19 +4045,17 @@@
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
- -      ctxt->ops->get_fpu(ctxt);
- -
         size = fxstate_size(ctxt);
+ +      rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+ +      if (rc != X86EMUL_CONTINUE)
+ +              return rc;
+ +
         if (size < __fxstate_size(16)) {
- -              rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+ +              rc = fxregs_fixup(&fx_state, size);
                 if (rc != X86EMUL_CONTINUE)
                         goto out;
         }
   
- -      rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
- -      if (rc != X86EMUL_CONTINUE)
- -              goto out;
- -
         if (fx_state.mxcsr >> 16) {
                 rc = emulate_gp(ctxt, 0);
                 goto out;
@@@ -4064,6 -4038,8 +4065,6 @@@
                 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
   
   out:
- -      ctxt->ops->put_fpu(ctxt);
- -
         return rc;
   }
   
@@@ -5016,8 -4992,6 +5017,8 @@@ int x86_decode_insn(struct x86_emulate_
         bool op_prefix = false;
         bool has_seg_override = false;
         struct opcode opcode;
+ +      u16 dummy;
+ +      struct desc_struct desc;
   
         ctxt->memop.type = OP_NONE;
         ctxt->memopp = NULL;
@@@ -5036,11 -5010,6 +5037,11 @@@
         switch (mode) {
         case X86EMUL_MODE_REAL:
         case X86EMUL_MODE_VM86:
+ +              def_op_bytes = def_ad_bytes = 2;
+ +              ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
+ +              if (desc.d)
+ +                      def_op_bytes = def_ad_bytes = 4;
+ +              break;
         case X86EMUL_MODE_PROT16:
                 def_op_bytes = def_ad_bytes = 2;
                 break;
@@@ -5313,7 -5282,9 +5314,7 @@@ static int flush_pending_x87_faults(str
   {
         int rc;
   
- -      ctxt->ops->get_fpu(ctxt);
         rc = asm_safe("fwait");
- -      ctxt->ops->put_fpu(ctxt);
   
         if (unlikely(rc != X86EMUL_CONTINUE))
                 return emulate_exception(ctxt, MF_VECTOR, 0, false);
@@@ -5335,9 -5306,9 +5336,9 @@@ static int fastop(struct x86_emulate_ct
         if (!(ctxt->d & ByteOp))
                 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
   
-       asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
+       asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
             : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
-             [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
+             [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
             : "c"(ctxt->src2.val));
   
         ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --combined arch/x86/kvm/vmx.c

index c829d89e2e63f85bc36c6821b0ca1d7712297043,924589c5342298a8d200c1a31dd1d5d45411889b..a8b96dc4cd83effdd0239363f700192e2e19b4fb
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -71,9 -71,6 +71,9 @@@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id)
   static bool __read_mostly enable_vpid = 1;
   module_param_named(vpid, enable_vpid, bool, 0444);
   
+ +static bool __read_mostly enable_vnmi = 1;
+ +module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
+ +
   static bool __read_mostly flexpriority_enabled = 1;
   module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
   
@@@ -206,10 -203,6 +206,10 @@@ struct loaded_vmcs 
         bool nmi_known_unmasked;
         unsigned long vmcs_host_cr3;    /* May not match real cr3 */
         unsigned long vmcs_host_cr4;    /* May not match real cr4 */
+ +      /* Support for vnmi-less CPUs */
+ +      int soft_vnmi_blocked;
+ +      ktime_t entry_time;
+ +      s64 vnmi_blocked_time;
         struct list_head loaded_vmcss_on_cpu_link;
   };
   
@@@ -494,14 -487,6 +494,14 @@@ struct nested_vmx 
         u64 nested_vmx_cr4_fixed1;
         u64 nested_vmx_vmcs_enum;
         u64 nested_vmx_vmfunc_controls;
+ +
+ +      /* SMM related state */
+ +      struct {
+ +              /* in VMX operation on SMM entry? */
+ +              bool vmxon;
+ +              /* in guest mode on SMM entry? */
+ +              bool guest_mode;
+ +      } smm;
   };
   
   #define POSTED_INTR_ON  0
@@@ -900,16 -885,8 +900,16 @@@ static inline short vmcs_field_to_offse
   {
         BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
   
- -      if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
- -          vmcs_field_to_offset_table[field] == 0)
+ +      if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+ +              return -ENOENT;
+ +
+ +      /*
+ +       * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
+ +       * generic mechanism.
+ +       */
+ +      asm("lfence");
+ +
+ +      if (vmcs_field_to_offset_table[field] == 0)
                 return -ENOENT;
   
         return vmcs_field_to_offset_table[field];
@@@ -924,13 -901,16 +924,13 @@@ static bool nested_ept_ad_enabled(struc
   static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
   static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
   static bool vmx_xsaves_supported(void);
- -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
   static void vmx_set_segment(struct kvm_vcpu *vcpu,
                             struct kvm_segment *var, int seg);
   static void vmx_get_segment(struct kvm_vcpu *vcpu,
                             struct kvm_segment *var, int seg);
   static bool guest_state_valid(struct kvm_vcpu *vcpu);
   static u32 vmx_segment_access_rights(struct kvm_segment *var);
- -static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
   static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
- -static int alloc_identity_pagetable(struct kvm *kvm);
   static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu);
   static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
   static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
@@@ -1307,11 -1287,6 +1307,11 @@@ static inline bool cpu_has_vmx_invpcid(
                 SECONDARY_EXEC_ENABLE_INVPCID;
   }
   
+ +static inline bool cpu_has_virtual_nmis(void)
+ +{
+ +      return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+ +}
+ +
   static inline bool cpu_has_vmx_wbinvd_exit(void)
   {
         return vmcs_config.cpu_based_2nd_exec_ctrl &
@@@ -1369,6 -1344,11 +1369,6 @@@ static inline bool nested_cpu_has2(stru
                 (vmcs12->secondary_vm_exec_control & bit);
   }
   
- -static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
- -{
- -      return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
- -}
- -
   static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
   {
         return vmcs12->pin_based_vm_exec_control &
@@@ -1619,15 -1599,18 +1619,15 @@@ static inline void vpid_sync_context(in
   
   static inline void ept_sync_global(void)
   {
- -      if (cpu_has_vmx_invept_global())
- -              __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
+ +      __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
   }
   
   static inline void ept_sync_context(u64 eptp)
   {
- -      if (enable_ept) {
- -              if (cpu_has_vmx_invept_context())
- -                      __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
- -              else
- -                      ept_sync_global();
- -      }
+ +      if (cpu_has_vmx_invept_context())
+ +              __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
+ +      else
+ +              ept_sync_global();
   }
   
   static __always_inline void vmcs_check16(unsigned long field)
@@@ -2849,7 -2832,8 +2849,7 @@@ static void nested_vmx_setup_ctls_msrs(
                                 SECONDARY_EXEC_ENABLE_PML;
                         vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
                 }
- -      } else
- -              vmx->nested.nested_vmx_ept_caps = 0;
+ +      }
   
         if (cpu_has_vmx_vmfunc()) {
                 vmx->nested.nested_vmx_secondary_ctls_high |=
@@@ -2858,9 -2842,8 +2858,9 @@@
                  * Advertise EPTP switching unconditionally
                  * since we emulate it
                  */
- -              vmx->nested.nested_vmx_vmfunc_controls =
- -                      VMX_VMFUNC_EPTP_SWITCHING;
+ +              if (enable_ept)
+ +                      vmx->nested.nested_vmx_vmfunc_controls =
+ +                              VMX_VMFUNC_EPTP_SWITCHING;
         }
   
         /*
@@@ -2874,7 -2857,8 +2874,7 @@@
                         SECONDARY_EXEC_ENABLE_VPID;
                 vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
                         VMX_VPID_EXTENT_SUPPORTED_MASK;
- -      } else
- -              vmx->nested.nested_vmx_vpid_caps = 0;
+ +      }
   
         if (enable_unrestricted_guest)
                 vmx->nested.nested_vmx_secondary_ctls_high |=
@@@ -3561,8 -3545,7 +3561,8 @@@ static int hardware_enable(void
                 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
         }
         kvm_cpu_vmxon(phys_addr);
- -      ept_sync_global();
+ +      if (enable_ept)
+ +              ept_sync_global();
   
         return 0;
   }
@@@ -3675,8 -3658,8 +3675,8 @@@ static __init int setup_vmcs_config(str
                         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                         SECONDARY_EXEC_SHADOW_VMCS |
                         SECONDARY_EXEC_XSAVES |
- -                      SECONDARY_EXEC_RDSEED |
- -                      SECONDARY_EXEC_RDRAND |
+ +                      SECONDARY_EXEC_RDSEED_EXITING |
+ +                      SECONDARY_EXEC_RDRAND_EXITING |
                         SECONDARY_EXEC_ENABLE_PML |
                         SECONDARY_EXEC_TSC_SCALING |
                         SECONDARY_EXEC_ENABLE_VMFUNC;
@@@ -3697,25 -3680,14 +3697,25 @@@
                                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
   
+ +      rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
+ +              &vmx_capability.ept, &vmx_capability.vpid);
+ +
         if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
                 /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
                    enabled */
                 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
                                              CPU_BASED_CR3_STORE_EXITING |
                                              CPU_BASED_INVLPG_EXITING);
- -              rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
- -                    vmx_capability.ept, vmx_capability.vpid);
+ +      } else if (vmx_capability.ept) {
+ +              vmx_capability.ept = 0;
+ +              pr_warn_once("EPT CAP should not exist if not support "
+ +                              "1-setting enable EPT VM-execution control\n");
+ +      }
+ +      if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
+ +              vmx_capability.vpid) {
+ +              vmx_capability.vpid = 0;
+ +              pr_warn_once("VPID CAP should not exist if not support "
+ +                              "1-setting enable VPID VM-execution control\n");
         }
   
         min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
@@@ -3728,9 -3700,9 +3728,9 @@@
                                 &_vmexit_control) < 0)
                 return -EIO;
   
- -      min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
- -              PIN_BASED_VIRTUAL_NMIS;
- -      opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+ +      min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ +      opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ +               PIN_BASED_VMX_PREEMPTION_TIMER;
         if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
                                 &_pin_based_exec_control) < 0)
                 return -EIO;
@@@ -4810,18 -4782,18 +4810,18 @@@ static int init_rmode_identity_map(stru
         kvm_pfn_t identity_map_pfn;
         u32 tmp;
   
- -      if (!enable_ept)
- -              return 0;
- -
         /* Protect kvm->arch.ept_identity_pagetable_done. */
         mutex_lock(&kvm->slots_lock);
   
         if (likely(kvm->arch.ept_identity_pagetable_done))
                 goto out2;
   
+ +      if (!kvm->arch.ept_identity_map_addr)
+ +              kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
         identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
   
- -      r = alloc_identity_pagetable(kvm);
+ +      r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ +                                  kvm->arch.ept_identity_map_addr, PAGE_SIZE);
         if (r < 0)
                 goto out2;
   
@@@ -4893,6 -4865,20 +4893,6 @@@ out
         return r;
   }
   
- -static int alloc_identity_pagetable(struct kvm *kvm)
- -{
- -      /* Called with kvm->slots_lock held. */
- -
- -      int r = 0;
- -
- -      BUG_ON(kvm->arch.ept_identity_pagetable_done);
- -
- -      r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
- -                                  kvm->arch.ept_identity_map_addr, PAGE_SIZE);
- -
- -      return r;
- -}
- -
   static int allocate_vpid(void)
   {
         int vpid;
@@@ -5248,10 -5234,6 +5248,10 @@@ static u32 vmx_pin_based_exec_ctrl(stru
   
         if (!kvm_vcpu_apicv_active(&vmx->vcpu))
                 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+ +
+ +      if (!enable_vnmi)
+ +              pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
+ +
         /* Enable the preemption timer dynamically */
         pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
         return pin_based_exec_ctrl;
@@@ -5301,13 -5283,13 +5301,13 @@@ static u32 vmx_exec_control(struct vcpu
   static bool vmx_rdrand_supported(void)
   {
         return vmcs_config.cpu_based_2nd_exec_ctrl &
- -              SECONDARY_EXEC_RDRAND;
+ +              SECONDARY_EXEC_RDRAND_EXITING;
   }
   
   static bool vmx_rdseed_supported(void)
   {
         return vmcs_config.cpu_based_2nd_exec_ctrl &
- -              SECONDARY_EXEC_RDSEED;
+ +              SECONDARY_EXEC_RDSEED_EXITING;
   }
   
   static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
@@@ -5401,30 -5383,30 +5401,30 @@@
         if (vmx_rdrand_supported()) {
                 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
                 if (rdrand_enabled)
- -                      exec_control &= ~SECONDARY_EXEC_RDRAND;
+ +                      exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
   
                 if (nested) {
                         if (rdrand_enabled)
                                 vmx->nested.nested_vmx_secondary_ctls_high |=
- -                                      SECONDARY_EXEC_RDRAND;
+ +                                      SECONDARY_EXEC_RDRAND_EXITING;
                         else
                                 vmx->nested.nested_vmx_secondary_ctls_high &=
- -                                      ~SECONDARY_EXEC_RDRAND;
+ +                                      ~SECONDARY_EXEC_RDRAND_EXITING;
                 }
         }
   
         if (vmx_rdseed_supported()) {
                 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
                 if (rdseed_enabled)
- -                      exec_control &= ~SECONDARY_EXEC_RDSEED;
+ +                      exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
   
                 if (nested) {
                         if (rdseed_enabled)
                                 vmx->nested.nested_vmx_secondary_ctls_high |=
- -                                      SECONDARY_EXEC_RDSEED;
+ +                                      SECONDARY_EXEC_RDSEED_EXITING;
                         else
                                 vmx->nested.nested_vmx_secondary_ctls_high &=
- -                                      ~SECONDARY_EXEC_RDSEED;
+ +                                      ~SECONDARY_EXEC_RDSEED_EXITING;
                 }
         }
   
@@@ -5445,7 -5427,7 +5445,7 @@@ static void ept_set_mmio_spte_mask(void
   /*
    * Sets up the vmcs for emulated real mode.
    */
- -static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ +static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
   {
   #ifdef CONFIG_X86_64
         unsigned long a;
@@@ -5558,6 -5540,8 +5558,6 @@@
                 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
                 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
         }
- -
- -      return 0;
   }
   
   static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@@ -5609,7 -5593,7 +5609,7 @@@
                 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
         }
   
- -      vmcs_writel(GUEST_RFLAGS, 0x02);
+ +      kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
         kvm_rip_write(vcpu, 0xfff0);
   
         vmcs_writel(GUEST_GDTR_BASE, 0);
@@@ -5621,8 -5605,6 +5621,8 @@@
         vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
         vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
         vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ +      if (kvm_mpx_supported())
+ +              vmcs_write64(GUEST_BNDCFGS, 0);
   
         setup_msrs(vmx);
   
@@@ -5686,8 -5668,7 +5686,8 @@@ static void enable_irq_window(struct kv
   
   static void enable_nmi_window(struct kvm_vcpu *vcpu)
   {
- -      if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ +      if (!enable_vnmi ||
+ +          vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
                 enable_irq_window(vcpu);
                 return;
         }
@@@ -5727,19 -5708,6 +5727,19 @@@ static void vmx_inject_nmi(struct kvm_v
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
+ +      if (!enable_vnmi) {
+ +              /*
+ +               * Tracking the NMI-blocked state in software is built upon
+ +               * finding the next open IRQ window. This, in turn, depends on
+ +               * well-behaving guests: They have to keep IRQs disabled at
+ +               * least as long as the NMI handler runs. Otherwise we may
+ +               * cause NMI nesting, maybe breaking the guest. But as this is
+ +               * highly unlikely, we can live with the residual risk.
+ +               */
+ +              vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+ +              vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ +      }
+ +
         ++vcpu->stat.nmi_injections;
         vmx->loaded_vmcs->nmi_known_unmasked = false;
   
@@@ -5758,8 -5726,6 +5758,8 @@@ static bool vmx_get_nmi_mask(struct kvm
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         bool masked;
   
+ +      if (!enable_vnmi)
+ +              return vmx->loaded_vmcs->soft_vnmi_blocked;
         if (vmx->loaded_vmcs->nmi_known_unmasked)
                 return false;
         masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@@ -5771,20 -5737,13 +5771,20 @@@ static void vmx_set_nmi_mask(struct kvm
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
- -      vmx->loaded_vmcs->nmi_known_unmasked = !masked;
- -      if (masked)
- -              vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- -                            GUEST_INTR_STATE_NMI);
- -      else
- -              vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- -                              GUEST_INTR_STATE_NMI);
+ +      if (!enable_vnmi) {
+ +              if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+ +                      vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+ +                      vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ +              }
+ +      } else {
+ +              vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+ +              if (masked)
+ +                      vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ +                                    GUEST_INTR_STATE_NMI);
+ +              else
+ +                      vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ +                                      GUEST_INTR_STATE_NMI);
+ +      }
   }
   
   static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@@ -5792,10 -5751,6 +5792,10 @@@
         if (to_vmx(vcpu)->nested.nested_run_pending)
                 return 0;
   
+ +      if (!enable_vnmi &&
+ +          to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+ +              return 0;
+ +
         return  !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
                   (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
                    | GUEST_INTR_STATE_NMI));
@@@ -5924,9 -5879,11 +5924,9 @@@ static int handle_exception(struct kvm_
                 return 1;  /* already handled by vmx_vcpu_run() */
   
         if (is_invalid_opcode(intr_info)) {
- -              if (is_guest_mode(vcpu)) {
- -                      kvm_queue_exception(vcpu, UD_VECTOR);
- -                      return 1;
- -              }
                 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ +              if (er == EMULATE_USER_EXIT)
+ +                      return 0;
                 if (er != EMULATE_DONE)
                         kvm_queue_exception(vcpu, UD_VECTOR);
                 return 1;
@@@ -5956,7 -5913,8 +5956,7 @@@
                 cr2 = vmcs_readl(EXIT_QUALIFICATION);
                 /* EPT won't cause page fault directly */
                 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
- -              return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
- -                              true);
+ +              return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
         }
   
         ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@@ -6521,7 -6479,6 +6521,7 @@@ static int handle_ept_violation(struct 
          * AAK134, BY25.
          */
         if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ +                      enable_vnmi &&
                         (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
   
@@@ -6581,7 -6538,6 +6581,7 @@@ static int handle_ept_misconfig(struct 
   
   static int handle_nmi_window(struct kvm_vcpu *vcpu)
   {
+ +      WARN_ON_ONCE(!enable_vnmi);
         vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
                         CPU_BASED_VIRTUAL_NMI_PENDING);
         ++vcpu->stat.nmi_window_exits;
@@@ -6609,7 -6565,7 +6609,7 @@@ static int handle_invalid_guest_state(s
                 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
                         return 1;
   
- -              err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
+ +              err = emulate_instruction(vcpu, 0);
   
                 if (err == EMULATE_USER_EXIT) {
                         ++vcpu->stat.mmio_exits;
@@@ -6757,10 -6713,16 +6757,10 @@@ static __init int hardware_setup(void
                         goto out;
         }
   
- -      vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
         memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
         memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
   
- -      /*
- -       * Allow direct access to the PC debug port (it is often used for I/O
- -       * delays, but the vmexits simply slow things down).
- -       */
         memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- -      clear_bit(0x80, vmx_io_bitmap_a);
   
         memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
   
@@@ -6786,22 -6748,21 +6786,22 @@@
   
         if (!cpu_has_vmx_ept() ||
             !cpu_has_vmx_ept_4levels() ||
- -          !cpu_has_vmx_ept_mt_wb()) {
+ +          !cpu_has_vmx_ept_mt_wb() ||
+ +          !cpu_has_vmx_invept_global())
                 enable_ept = 0;
- -              enable_unrestricted_guest = 0;
- -              enable_ept_ad_bits = 0;
- -      }
   
         if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
                 enable_ept_ad_bits = 0;
   
- -      if (!cpu_has_vmx_unrestricted_guest())
+ +      if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
                 enable_unrestricted_guest = 0;
   
         if (!cpu_has_vmx_flexpriority())
                 flexpriority_enabled = 0;
   
+ +      if (!cpu_has_virtual_nmis())
+ +              enable_vnmi = 0;
+ +
         /*
          * set_apic_access_page_addr() is used to reload apic access
          * page upon invalidation.  No need to do anything if not
@@@ -6816,13 -6777,8 +6816,13 @@@
         if (enable_ept && !cpu_has_vmx_ept_2m_page())
                 kvm_disable_largepages();
   
- -      if (!cpu_has_vmx_ple())
+ +      if (!cpu_has_vmx_ple()) {
                 ple_gap = 0;
+ +              ple_window = 0;
+ +              ple_window_grow = 0;
+ +              ple_window_max = 0;
+ +              ple_window_shrink = 0;
+ +      }
   
         if (!cpu_has_vmx_apicv()) {
                 enable_apicv = 0;
@@@ -7006,7 -6962,7 +7006,7 @@@ static struct loaded_vmcs *nested_get_c
         }
   
         /* Create a new VMCS */
- -      item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+ +      item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
         if (!item)
                 return NULL;
         item->vmcs02.vmcs = alloc_vmcs();
@@@ -7415,11 -7371,10 +7415,11 @@@ static inline void nested_release_vmcs1
    */
   static void free_nested(struct vcpu_vmx *vmx)
   {
- -      if (!vmx->nested.vmxon)
+ +      if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
                 return;
   
         vmx->nested.vmxon = false;
+ +      vmx->nested.smm.vmxon = false;
         free_vpid(vmx->nested.vpid02);
         vmx->nested.posted_intr_nv = -1;
         vmx->nested.current_vmptr = -1ull;
@@@ -8024,7 -7979,6 +8024,7 @@@ static int handle_pml_full(struct kvm_v
          * "blocked by NMI" bit has to be set before next VM entry.
          */
         if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ +                      enable_vnmi &&
                         (exit_qualification & INTR_INFO_UNBLOCK_NMI))
                 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
                                 GUEST_INTR_STATE_NMI);
@@@ -8462,9 -8416,9 +8462,9 @@@ static bool nested_vmx_exit_reflected(s
         case EXIT_REASON_RDPMC:
                 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
         case EXIT_REASON_RDRAND:
- -              return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
+ +              return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
         case EXIT_REASON_RDSEED:
- -              return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
+ +              return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
         case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
                 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
         case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@@ -8869,25 -8823,6 +8869,25 @@@ static int vmx_handle_exit(struct kvm_v
                 return 0;
         }
   
+ +      if (unlikely(!enable_vnmi &&
+ +                   vmx->loaded_vmcs->soft_vnmi_blocked)) {
+ +              if (vmx_interrupt_allowed(vcpu)) {
+ +                      vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ +              } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+ +                         vcpu->arch.nmi_pending) {
+ +                      /*
+ +                       * This CPU don't support us in finding the end of an
+ +                       * NMI-blocked window if the guest runs with IRQs
+ +                       * disabled. So we pull the trigger after 1 s of
+ +                       * futile waiting, but inform the user about this.
+ +                       */
+ +                      printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ +                             "state on VCPU %d after 1 s timeout\n",
+ +                             __func__, vcpu->vcpu_id);
+ +                      vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ +              }
+ +      }
+ +
         if (exit_reason < kvm_vmx_max_exit_handlers
             && kvm_vmx_exit_handlers[exit_reason])
                 return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@@ -9129,14 -9064,14 +9129,14 @@@ static void vmx_handle_external_intr(st
   #endif
                         "pushf\n\t"
                         __ASM_SIZE(push) " $%c[cs]\n\t"
-                       "call *%[entry]\n\t"
+                       CALL_NOSPEC
                         :
   #ifdef CONFIG_X86_64
                         [sp]"=&r"(tmp),
   #endif
                         ASM_CALL_CONSTRAINT
                         :
-                       [entry]"r"(entry),
+                       THUNK_TARGET(entry),
                         [ss]"i"(__KERNEL_DS),
                         [cs]"i"(__KERNEL_CS)
                         );
@@@ -9170,38 -9105,33 +9170,38 @@@ static void vmx_recover_nmi_blocking(st
   
         idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
   
- -      if (vmx->loaded_vmcs->nmi_known_unmasked)
- -              return;
- -      /*
- -       * Can't use vmx->exit_intr_info since we're not sure what
- -       * the exit reason is.
- -       */
- -      exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- -      unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- -      vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- -      /*
- -       * SDM 3: 27.7.1.2 (September 2008)
- -       * Re-set bit "block by NMI" before VM entry if vmexit caused by
- -       * a guest IRET fault.
- -       * SDM 3: 23.2.2 (September 2008)
- -       * Bit 12 is undefined in any of the following cases:
- -       *  If the VM exit sets the valid bit in the IDT-vectoring
- -       *   information field.
- -       *  If the VM exit is due to a double fault.
- -       */
- -      if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- -          vector != DF_VECTOR && !idtv_info_valid)
- -              vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- -                            GUEST_INTR_STATE_NMI);
- -      else
- -              vmx->loaded_vmcs->nmi_known_unmasked =
- -                      !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- -                        & GUEST_INTR_STATE_NMI);
+ +      if (enable_vnmi) {
+ +              if (vmx->loaded_vmcs->nmi_known_unmasked)
+ +                      return;
+ +              /*
+ +               * Can't use vmx->exit_intr_info since we're not sure what
+ +               * the exit reason is.
+ +               */
+ +              exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ +              unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ +              vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ +              /*
+ +               * SDM 3: 27.7.1.2 (September 2008)
+ +               * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ +               * a guest IRET fault.
+ +               * SDM 3: 23.2.2 (September 2008)
+ +               * Bit 12 is undefined in any of the following cases:
+ +               *  If the VM exit sets the valid bit in the IDT-vectoring
+ +               *   information field.
+ +               *  If the VM exit is due to a double fault.
+ +               */
+ +              if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ +                  vector != DF_VECTOR && !idtv_info_valid)
+ +                      vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ +                                    GUEST_INTR_STATE_NMI);
+ +              else
+ +                      vmx->loaded_vmcs->nmi_known_unmasked =
+ +                              !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ +                                & GUEST_INTR_STATE_NMI);
+ +      } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+ +              vmx->loaded_vmcs->vnmi_blocked_time +=
+ +                      ktime_to_ns(ktime_sub(ktime_get(),
+ +                                            vmx->loaded_vmcs->entry_time));
   }
   
   static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@@ -9318,11 -9248,6 +9318,11 @@@ static void __noclone vmx_vcpu_run(stru
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long debugctlmsr, cr3, cr4;
   
+ +      /* Record the guest's net vcpu time for enforced NMI injections. */
+ +      if (unlikely(!enable_vnmi &&
+ +                   vmx->loaded_vmcs->soft_vnmi_blocked))
+ +              vmx->loaded_vmcs->entry_time = ktime_get();
+ +
         /* Don't enter VMX if guest state is invalid, let the exit handler
            start emulation until we arrive back to a valid state */
         if (vmx->emulation_required)
@@@ -9421,7 -9346,6 +9421,7 @@@
                 /* Save guest registers, load host registers, keep flags */
                 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
                 "pop %0 \n\t"
+ +              "setbe %c[fail](%0)\n\t"
                 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
                 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
                 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@@ -9438,23 -9362,12 +9438,23 @@@
                 "mov %%r13, %c[r13](%0) \n\t"
                 "mov %%r14, %c[r14](%0) \n\t"
                 "mov %%r15, %c[r15](%0) \n\t"
+ +              "xor %%r8d,  %%r8d \n\t"
+ +              "xor %%r9d,  %%r9d \n\t"
+ +              "xor %%r10d, %%r10d \n\t"
+ +              "xor %%r11d, %%r11d \n\t"
+ +              "xor %%r12d, %%r12d \n\t"
+ +              "xor %%r13d, %%r13d \n\t"
+ +              "xor %%r14d, %%r14d \n\t"
+ +              "xor %%r15d, %%r15d \n\t"
   #endif
                 "mov %%cr2, %%" _ASM_AX "   \n\t"
                 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
   
+ +              "xor %%eax, %%eax \n\t"
+ +              "xor %%ebx, %%ebx \n\t"
+ +              "xor %%esi, %%esi \n\t"
+ +              "xor %%edi, %%edi \n\t"
                 "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
- -              "setbe %c[fail](%0) \n\t"
                 ".pushsection .rodata \n\t"
                 ".global vmx_return \n\t"
                 "vmx_return: " _ASM_PTR " 2b \n\t"
@@@ -9566,6 -9479,7 +9566,6 @@@ static void vmx_switch_vmcs(struct kvm_
         vmx->loaded_vmcs = vmcs;
         vmx_vcpu_put(vcpu);
         vmx_vcpu_load(vcpu, cpu);
- -      vcpu->cpu = cpu;
         put_cpu();
   }
   
@@@ -9646,9 -9560,11 +9646,9 @@@ static struct kvm_vcpu *vmx_create_vcpu
         cpu = get_cpu();
         vmx_vcpu_load(&vmx->vcpu, cpu);
         vmx->vcpu.cpu = cpu;
- -      err = vmx_vcpu_setup(vmx);
+ +      vmx_vcpu_setup(vmx);
         vmx_vcpu_put(&vmx->vcpu);
         put_cpu();
- -      if (err)
- -              goto free_vmcs;
         if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
                 err = alloc_apic_access_page(kvm);
                 if (err)
@@@ -9656,6 -9572,9 +9656,6 @@@
         }
   
         if (enable_ept) {
- -              if (!kvm->arch.ept_identity_map_addr)
- -                      kvm->arch.ept_identity_map_addr =
- -                              VMX_EPT_IDENTITY_PAGETABLE_ADDR;
                 err = init_rmode_identity_map(kvm);
                 if (err)
                         goto free_vmcs;
@@@ -9817,7 -9736,8 +9817,7 @@@ static void nested_vmx_cr_fixed1_bits_u
         cr4_fixed1_update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
         cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
         cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
- -      /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
- -      cr4_fixed1_update(bit(11),            ecx, bit(2));
+ +      cr4_fixed1_update(X86_CR4_UMIP,       ecx, bit(X86_FEATURE_UMIP));
   
   #undef cr4_fixed1_update
   }
@@@ -10891,11 -10811,6 +10891,11 @@@ static int check_vmentry_postreqs(struc
                         return 1;
         }
   
+ +      if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
+ +              (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
+ +              (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+ +                      return 1;
+ +
         return 0;
   }
   
@@@ -11120,12 -11035,13 +11120,12 @@@ static int vmx_check_nested_events(stru
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         unsigned long exit_qual;
- -
- -      if (kvm_event_needs_reinjection(vcpu))
- -              return -EBUSY;
+ +      bool block_nested_events =
+ +          vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
   
         if (vcpu->arch.exception.pending &&
                 nested_vmx_check_exception(vcpu, &exit_qual)) {
- -              if (vmx->nested.nested_run_pending)
+ +              if (block_nested_events)
                         return -EBUSY;
                 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
                 vcpu->arch.exception.pending = false;
@@@ -11134,14 -11050,14 +11134,14 @@@
   
         if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
             vmx->nested.preemption_timer_expired) {
- -              if (vmx->nested.nested_run_pending)
+ +              if (block_nested_events)
                         return -EBUSY;
                 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
                 return 0;
         }
   
         if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
- -              if (vmx->nested.nested_run_pending)
+ +              if (block_nested_events)
                         return -EBUSY;
                 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                                   NMI_VECTOR | INTR_TYPE_NMI_INTR |
@@@ -11157,7 -11073,7 +11157,7 @@@
   
         if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
             nested_exit_on_intr(vcpu)) {
- -              if (vmx->nested.nested_run_pending)
+ +              if (block_nested_events)
                         return -EBUSY;
                 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
                 return 0;
@@@ -11344,24 -11260,6 +11344,24 @@@ static void prepare_vmcs12(struct kvm_v
         kvm_clear_interrupt_queue(vcpu);
   }
   
+ +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
+ +                      struct vmcs12 *vmcs12)
+ +{
+ +      u32 entry_failure_code;
+ +
+ +      nested_ept_uninit_mmu_context(vcpu);
+ +
+ +      /*
+ +       * Only PDPTE load can fail as the value of cr3 was checked on entry and
+ +       * couldn't have changed.
+ +       */
+ +      if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+ +              nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
+ +
+ +      if (!enable_ept)
+ +              vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+ +}
+ +
   /*
    * A part of what we need to when the nested L2 guest exits and we want to
    * run its L1 parent, is to reset L1's guest state to the host state specified
@@@ -11375,6 -11273,7 +11375,6 @@@ static void load_vmcs12_host_state(stru
                                    struct vmcs12 *vmcs12)
   {
         struct kvm_segment seg;
- -      u32 entry_failure_code;
   
         if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
                 vcpu->arch.efer = vmcs12->host_ia32_efer;
@@@ -11401,7 -11300,17 +11401,7 @@@
         vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
         vmx_set_cr4(vcpu, vmcs12->host_cr4);
   
- -      nested_ept_uninit_mmu_context(vcpu);
- -
- -      /*
- -       * Only PDPTE load can fail as the value of cr3 was checked on entry and
- -       * couldn't have changed.
- -       */
- -      if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
- -              nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
- -
- -      if (!enable_ept)
- -              vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+ +      load_vmcs12_mmu_host_state(vcpu, vmcs12);
   
         if (enable_vpid) {
                 /*
@@@ -11420,8 -11329,6 +11420,8 @@@
         vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
         vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
         vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ +      vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
+ +      vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
   
         /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
         if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
@@@ -11518,11 -11425,8 +11518,11 @@@ static void nested_vmx_vmexit(struct kv
         leave_guest_mode(vcpu);
   
         if (likely(!vmx->fail)) {
- -              prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
- -                             exit_qualification);
+ +              if (exit_reason == -1)
+ +                      sync_vmcs12(vcpu, vmcs12);
+ +              else
+ +                      prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
+ +                                     exit_qualification);
   
                 if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
                                          vmcs12->vm_exit_msr_store_count))
@@@ -11586,7 -11490,7 +11586,7 @@@
          */
         kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
   
- -      if (enable_shadow_vmcs)
+ +      if (enable_shadow_vmcs && exit_reason != -1)
                 vmx->nested.sync_shadow_vmcs = true;
   
         /* in case we halted in L2 */
@@@ -11610,13 -11514,12 +11610,13 @@@
                                 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
                 }
   
- -              trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
- -                                             vmcs12->exit_qualification,
- -                                             vmcs12->idt_vectoring_info_field,
- -                                             vmcs12->vm_exit_intr_info,
- -                                             vmcs12->vm_exit_intr_error_code,
- -                                             KVM_ISA_VMX);
+ +              if (exit_reason != -1)
+ +                      trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
+ +                                                     vmcs12->exit_qualification,
+ +                                                     vmcs12->idt_vectoring_info_field,
+ +                                                     vmcs12->vm_exit_intr_info,
+ +                                                     vmcs12->vm_exit_intr_error_code,
+ +                                                     KVM_ISA_VMX);
   
                 load_vmcs12_host_state(vcpu, vmcs12);
   
@@@ -11631,9 -11534,6 +11631,9 @@@
          * accordingly.
          */
         nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ +
+ +      load_vmcs12_mmu_host_state(vcpu, vmcs12);
+ +
         /*
          * The emulated instruction was already skipped in
          * nested_vmx_run, but the updated RIP was never
@@@ -12042,54 -11942,6 +12042,54 @@@ static void vmx_setup_mce(struct kvm_vc
                         ~FEATURE_CONTROL_LMCE;
   }
   
+ +static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
+ +{
+ +      /* we need a nested vmexit to enter SMM, postpone if run is pending */
+ +      if (to_vmx(vcpu)->nested.nested_run_pending)
+ +              return 0;
+ +      return 1;
+ +}
+ +
+ +static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +
+ +      vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
+ +      if (vmx->nested.smm.guest_mode)
+ +              nested_vmx_vmexit(vcpu, -1, 0, 0);
+ +
+ +      vmx->nested.smm.vmxon = vmx->nested.vmxon;
+ +      vmx->nested.vmxon = false;
+ +      return 0;
+ +}
+ +
+ +static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+ +{
+ +      struct vcpu_vmx *vmx = to_vmx(vcpu);
+ +      int ret;
+ +
+ +      if (vmx->nested.smm.vmxon) {
+ +              vmx->nested.vmxon = true;
+ +              vmx->nested.smm.vmxon = false;
+ +      }
+ +
+ +      if (vmx->nested.smm.guest_mode) {
+ +              vcpu->arch.hflags &= ~HF_SMM_MASK;
+ +              ret = enter_vmx_non_root_mode(vcpu, false);
+ +              vcpu->arch.hflags |= HF_SMM_MASK;
+ +              if (ret)
+ +                      return ret;
+ +
+ +              vmx->nested.smm.guest_mode = false;
+ +      }
+ +      return 0;
+ +}
+ +
+ +static int enable_smi_window(struct kvm_vcpu *vcpu)
+ +{
+ +      return 0;
+ +}
+ +
   static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .cpu_has_kvm_support = cpu_has_kvm_support,
         .disabled_by_bios = vmx_disabled_by_bios,
@@@ -12215,11 -12067,6 +12215,11 @@@
   #endif
   
         .setup_mce = vmx_setup_mce,
+ +
+ +      .smi_allowed = vmx_smi_allowed,
+ +      .pre_enter_smm = vmx_pre_enter_smm,
+ +      .pre_leave_smm = vmx_pre_leave_smm,
+ +      .enable_smi_window = enable_smi_window,
   };
   
   static int __init vmx_init(void)
diff --combined arch/x86/lib/Makefile

index f23934bbaf4ebe6a55331669160d6b0aa72ba5d4,d0a3170e6804a3de173358d97f8fee118e6e6176..69a473919260bf1daa5592ac56c34f656fd3a28d
--- 1/arch/x86/lib/Makefile
--- 2/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@@ -24,9 -24,10 +24,10 @@@ lib-y := delay.o misc.o cmdline.o cpu.
   lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
   lib-y += memcpy_$(BITS).o
   lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
- -lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+ +lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
   lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
   lib-$(CONFIG_RETPOLINE) += retpoline.o
+ OBJECT_FILES_NON_STANDARD_retpoline.o :=y
   
   obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
   
diff --combined include/linux/module.h

index c69b49abe8775417587387a8d7def5497e476fc8,b1cc541f2ddf230972259b619f90bf3a8f4668fc..1d8f245967be7f1adad079bc16c55766e1c8edaf
--- 1/include/linux/module.h
--- 2/include/linux/module.h
+++ b/include/linux/module.h
@@@ -639,8 -639,6 +639,8 @@@ static inline bool is_livepatch_module(
   }
   #endif /* CONFIG_LIVEPATCH */
   
+ +bool is_module_sig_enforced(void);
+ +
   #else /* !CONFIG_MODULES... */
   
   static inline struct module *__module_address(unsigned long addr)
@@@ -755,11 -753,6 +755,11 @@@ static inline bool module_requested_asy
         return false;
   }
   
+ +static inline bool is_module_sig_enforced(void)
+ +{
+ +      return false;
+ +}
+ +
   #endif /* CONFIG_MODULES */
   
   #ifdef CONFIG_SYSFS
@@@ -801,6 -794,15 +801,15 @@@ static inline void module_bug_finalize(
   static inline void module_bug_cleanup(struct module *mod) {}
   #endif        /* CONFIG_GENERIC_BUG */
   
+ #ifdef RETPOLINE
+ extern bool retpoline_module_ok(bool has_retpoline);
+ #else
+ static inline bool retpoline_module_ok(bool has_retpoline)
+ {
+       return true;
+ }
+ #endif
+ 
   #ifdef CONFIG_MODULE_SIG
   static inline bool module_sig_ok(struct module *module)
   {
diff --combined kernel/module.c

index dea01ac9cb74c4ef619c51b5eba4e869d9e4fdc8,690c0651c40f4c2121c0e4227660948ff981bac0..09e48eee4d55156d973d96310f4947f32302962d
--- 1/kernel/module.c
--- 2/kernel/module.c
+++ b/kernel/module.c
@@@ -278,16 -278,6 +278,16 @@@ static bool sig_enforce = IS_ENABLED(CO
   module_param(sig_enforce, bool_enable_only, 0644);
   #endif /* !CONFIG_MODULE_SIG_FORCE */
   
+ +/*
+ + * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
+ + * on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
+ + */
+ +bool is_module_sig_enforced(void)
+ +{
+ +      return sig_enforce;
+ +}
+ +EXPORT_SYMBOL(is_module_sig_enforced);
+ +
   /* Block module loading/unloading? */
   int modules_disabled = 0;
   core_param(nomodule, modules_disabled, bint, 0);
@@@ -847,8 -837,10 +847,8 @@@ static int add_module_usage(struct modu
   
         pr_debug("Allocating new usage for %s.\n", a->name);
         use = kmalloc(sizeof(*use), GFP_ATOMIC);
- -      if (!use) {
- -              pr_warn("%s: out of memory loading\n", a->name);
+ +      if (!use)
                 return -ENOMEM;
- -      }
   
         use->source = a;
         use->target = b;
@@@ -1524,7 -1516,7 +1524,7 @@@ static void add_sect_attrs(struct modul
                 sattr->mattr.show = module_sect_show;
                 sattr->mattr.store = NULL;
                 sattr->mattr.attr.name = sattr->name;
- -              sattr->mattr.attr.mode = S_IRUGO;
+ +              sattr->mattr.attr.mode = S_IRUSR;
                 *(gattr++) = &(sattr++)->mattr.attr;
         }
         *gattr = NULL;
@@@ -2863,6 -2855,15 +2863,15 @@@ static int check_modinfo_livepatch(stru
   }
   #endif /* CONFIG_LIVEPATCH */
   
+ static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
+ {
+       if (retpoline_module_ok(get_modinfo(info, "retpoline")))
+               return;
+ 
+       pr_warn("%s: loading module not compiled with retpoline compiler.\n",
+               mod->name);
+ }
+ 
   /* Sets info->hdr and info->len. */
   static int copy_module_from_user(const void __user *umod, unsigned long len,
                                   struct load_info *info)
@@@ -3029,6 -3030,8 +3038,8 @@@ static int check_modinfo(struct module 
                 add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
         }
   
+       check_modinfo_retpoline(mod, info);
+ 
         if (get_modinfo(info, "staging")) {
                 add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
                 pr_warn("%s: module is from the staging directory, the quality "
@@@ -3481,8 -3484,6 +3492,8 @@@ static noinline int do_init_module(stru
         if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC))
                 async_synchronize_full();
   
+ +      ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
+ +                      mod->init_layout.size);
         mutex_lock(&module_mutex);
         /* Drop initial reference. */
         module_put(mod);
@@@ -4157,7 -4158,6 +4168,7 @@@ static int m_show(struct seq_file *m, v
   {
         struct module *mod = list_entry(p, struct module, list);
         char buf[MODULE_FLAGS_BUF_SIZE];
+ +      void *value;
   
         /* We always ignore unformed modules. */
         if (mod->state == MODULE_STATE_UNFORMED)
@@@ -4173,8 -4173,7 +4184,8 @@@
                    mod->state == MODULE_STATE_COMING ? "Loading" :
                    "Live");
         /* Used by oprofile and other similar tools. */
- -      seq_printf(m, " 0x%pK", mod->core_layout.base);
+ +      value = m->private ? NULL : mod->core_layout.base;
+ +      seq_printf(m, " 0x%px", value);
   
         /* Taints info */
         if (mod->taints)
@@@ -4196,23 -4195,9 +4207,23 @@@ static const struct seq_operations modu
         .show   = m_show
   };
   
+ +/*
+ + * This also sets the "private" pointer to non-NULL if the
+ + * kernel pointers should be hidden (so you can just test
+ + * "m->private" to see if you should keep the values private).
+ + *
+ + * We use the same logic as for /proc/kallsyms.
+ + */
   static int modules_open(struct inode *inode, struct file *file)
   {
- -      return seq_open(file, &modules_op);
+ +      int err = seq_open(file, &modules_op);
+ +
+ +      if (!err) {
+ +              struct seq_file *m = file->private_data;
+ +              m->private = kallsyms_show_value() ? NULL : (void *)8ul;
+ +      }
+ +
+ +      return 0;
   }
   
   static const struct file_operations proc_modules_operations = {
diff --combined scripts/mod/modpost.c

index f51cf977c65b22131ff1ac7aa568015771e94f65,54deaa1066cf04a5df710695ed6fa36a7036f9ef..6510536c06df3d4d43cac41f061e49d6e883e91b
--- 1/scripts/mod/modpost.c
--- 2/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@@ -1963,7 -1963,7 +1963,7 @@@ static void read_symbols(char *modname
         }
   
         license = get_modinfo(info.modinfo, info.modinfo_len, "license");
- -      if (info.modinfo && !license && !is_vmlinux(modname))
+ +      if (!license && !is_vmlinux(modname))
                 warn("modpost: missing MODULE_LICENSE() in %s\n"
                      "see include/linux/module.h for "
                      "more information\n", modname);
@@@ -2165,6 -2165,14 +2165,14 @@@ static void add_intree_flag(struct buff
                 buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
   }
   
+ /* Cannot check for assembler */
+ static void add_retpoline(struct buffer *b)
+ {
+       buf_printf(b, "\n#ifdef RETPOLINE\n");
+       buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
+       buf_printf(b, "#endif\n");
+ }
+ 
   static void add_staging_flag(struct buffer *b, const char *name)
   {
         static const char *staging_dir = "drivers/staging";
@@@ -2506,6 -2514,7 +2514,7 @@@ int main(int argc, char **argv
                 err |= check_modname_len(mod);
                 add_header(&buf, mod);
                 add_intree_flag(&buf, !external_module);
+               add_retpoline(&buf);
                 add_staging_flag(&buf, mod->name);
                 err |= add_versions(&buf, mod);
                 add_depends(&buf, mod, modules);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 30 Jan 2018 03:08:02 +0000 (19:08 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 30 Jan 2018 03:08:02 +0000 (19:08 -0800)
		1	2
arch/x86/entry/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/cpufeatures.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/disabled-features.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/msr-index.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/alternative.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/common.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/scattered.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/emulate.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/lib/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/module.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/module.c	patch \|	diff1 \|	diff2 \|	blob \| history
scripts/mod/modpost.c	patch \|	diff1 \|	diff2 \|	blob \| history