--- /dev/null
+From 25a068b8e9a4eb193d755d58efcb3c98928636e0 Mon Sep 17 00:00:00 2001
+From: Dave Hansen <dave.hansen@linux.intel.com>
+Date: Thu, 5 Mar 2020 09:47:08 -0800
+Subject: x86/apic: Add extra serialization for non-serializing MSRs
+
+From: Dave Hansen <dave.hansen@linux.intel.com>
+
+commit 25a068b8e9a4eb193d755d58efcb3c98928636e0 upstream.
+
+Jan Kiszka reported that the x2apic_wrmsr_fence() function uses a plain
+MFENCE while the Intel SDM (10.12.3 MSR Access in x2APIC Mode) calls for
+MFENCE; LFENCE.
+
+Short summary: we have special MSRs that have weaker ordering than all
+the rest. Add fencing consistent with current SDM recommendations.
+
+This is not known to cause any issues in practice, only in theory.
+
+Longer story below:
+
+The reason the kernel uses a different semantic is that the SDM changed
+(roughly in late 2017). The SDM changed because folks at Intel were
+auditing all of the recommended fences in the SDM and realized that the
+x2apic fences were insufficient.
+
+Why was the pain MFENCE judged insufficient?
+
+WRMSR itself is normally a serializing instruction. No fences are needed
+because the instruction itself serializes everything.
+
+But, there are explicit exceptions for this serializing behavior written
+into the WRMSR instruction documentation for two classes of MSRs:
+IA32_TSC_DEADLINE and the X2APIC MSRs.
+
+Back to x2apic: WRMSR is *not* serializing in this specific case.
+But why is MFENCE insufficient? MFENCE makes writes visible, but
+only affects load/store instructions. WRMSR is unfortunately not a
+load/store instruction and is unaffected by MFENCE. This means that a
+non-serializing WRMSR could be reordered by the CPU to execute before
+the writes made visible by the MFENCE have even occurred in the first
+place.
+
+This means that an x2apic IPI could theoretically be triggered before
+there is any (visible) data to process.
+
+Does this affect anything in practice? I honestly don't know. It seems
+quite possible that by the time an interrupt gets to consume the (not
+yet) MFENCE'd data, it has become visible, mostly by accident.
+
+To be safe, add the SDM-recommended fences for all x2apic WRMSRs.
+
+This also leaves open the question of the _other_ weakly-ordered WRMSR:
+MSR_IA32_TSC_DEADLINE. While it has the same ordering architecture as
+the x2APIC MSRs, it seems substantially less likely to be a problem in
+practice. While writes to the in-memory Local Vector Table (LVT) might
+theoretically be reordered with respect to a weakly-ordered WRMSR like
+TSC_DEADLINE, the SDM has this to say:
+
+ In x2APIC mode, the WRMSR instruction is used to write to the LVT
+ entry. The processor ensures the ordering of this write and any
+ subsequent WRMSR to the deadline; no fencing is required.
+
+But, that might still leave xAPIC exposed. The safest thing to do for
+now is to add the extra, recommended LFENCE.
+
+ [ bp: Massage commit message, fix typos, drop accidentally added
+ newline to tools/arch/x86/include/asm/barrier.h. ]
+
+Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lkml.kernel.org/r/20200305174708.F77040DD@viggo.jf.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/apic.h | 10 ----------
+ arch/x86/include/asm/barrier.h | 18 ++++++++++++++++++
+ arch/x86/kernel/apic/apic.c | 4 ++++
+ arch/x86/kernel/apic/x2apic_cluster.c | 6 ++++--
+ arch/x86/kernel/apic/x2apic_phys.c | 6 ++++--
+ 5 files changed, 30 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/include/asm/apic.h
++++ b/arch/x86/include/asm/apic.h
+@@ -190,16 +190,6 @@ static inline void lapic_assign_legacy_v
+ #endif /* !CONFIG_X86_LOCAL_APIC */
+
+ #ifdef CONFIG_X86_X2APIC
+-/*
+- * Make previous memory operations globally visible before
+- * sending the IPI through x2apic wrmsr. We need a serializing instruction or
+- * mfence for this.
+- */
+-static inline void x2apic_wrmsr_fence(void)
+-{
+- asm volatile("mfence" : : : "memory");
+-}
+-
+ static inline void native_apic_msr_write(u32 reg, u32 v)
+ {
+ if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
+--- a/arch/x86/include/asm/barrier.h
++++ b/arch/x86/include/asm/barrier.h
+@@ -85,4 +85,22 @@ do { \
+
+ #include <asm-generic/barrier.h>
+
++/*
++ * Make previous memory operations globally visible before
++ * a WRMSR.
++ *
++ * MFENCE makes writes visible, but only affects load/store
++ * instructions. WRMSR is unfortunately not a load/store
++ * instruction and is unaffected by MFENCE. The LFENCE ensures
++ * that the WRMSR is not reordered.
++ *
++ * Most WRMSRs are full serializing instructions themselves and
++ * do not require this barrier. This is only required for the
++ * IA32_TSC_DEADLINE and X2APIC MSRs.
++ */
++static inline void weak_wrmsr_fence(void)
++{
++ asm volatile("mfence; lfence" : : : "memory");
++}
++
+ #endif /* _ASM_X86_BARRIER_H */
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -41,6 +41,7 @@
+ #include <asm/x86_init.h>
+ #include <asm/pgalloc.h>
+ #include <linux/atomic.h>
++#include <asm/barrier.h>
+ #include <asm/mpspec.h>
+ #include <asm/i8259.h>
+ #include <asm/proto.h>
+@@ -465,6 +466,9 @@ static int lapic_next_deadline(unsigned
+ {
+ u64 tsc;
+
++ /* This MSR is special and need a special fence: */
++ weak_wrmsr_fence();
++
+ tsc = rdtsc();
+ wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+ return 0;
+--- a/arch/x86/kernel/apic/x2apic_cluster.c
++++ b/arch/x86/kernel/apic/x2apic_cluster.c
+@@ -31,7 +31,8 @@ static void x2apic_send_IPI(int cpu, int
+ {
+ u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+- x2apic_wrmsr_fence();
++ /* x2apic MSRs are special and need a special fence: */
++ weak_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
+ }
+
+@@ -43,7 +44,8 @@ __x2apic_send_IPI_mask(const struct cpum
+ unsigned long flags;
+ u32 dest;
+
+- x2apic_wrmsr_fence();
++ /* x2apic MSRs are special and need a special fence: */
++ weak_wrmsr_fence();
+ local_irq_save(flags);
+
+ tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+--- a/arch/x86/kernel/apic/x2apic_phys.c
++++ b/arch/x86/kernel/apic/x2apic_phys.c
+@@ -48,7 +48,8 @@ static void x2apic_send_IPI(int cpu, int
+ {
+ u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
+
+- x2apic_wrmsr_fence();
++ /* x2apic MSRs are special and need a special fence: */
++ weak_wrmsr_fence();
+ __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
+ }
+
+@@ -59,7 +60,8 @@ __x2apic_send_IPI_mask(const struct cpum
+ unsigned long this_cpu;
+ unsigned long flags;
+
+- x2apic_wrmsr_fence();
++ /* x2apic MSRs are special and need a special fence: */
++ weak_wrmsr_fence();
+
+ local_irq_save(flags);
+
--- /dev/null
+From 20bf2b378729c4a0366a53e2018a0b70ace94bcd Mon Sep 17 00:00:00 2001
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+Date: Thu, 28 Jan 2021 15:52:19 -0600
+Subject: x86/build: Disable CET instrumentation in the kernel
+
+From: Josh Poimboeuf <jpoimboe@redhat.com>
+
+commit 20bf2b378729c4a0366a53e2018a0b70ace94bcd upstream.
+
+With retpolines disabled, some configurations of GCC, and specifically
+the GCC versions 9 and 10 in Ubuntu will add Intel CET instrumentation
+to the kernel by default. That breaks certain tracing scenarios by
+adding a superfluous ENDBR64 instruction before the fentry call, for
+functions which can be called indirectly.
+
+CET instrumentation isn't currently necessary in the kernel, as CET is
+only supported in user space. Disable it unconditionally and move it
+into the x86's Makefile as CET/CFI... enablement should be a per-arch
+decision anyway.
+
+ [ bp: Massage and extend commit message. ]
+
+Fixes: 29be86d7f9cb ("kbuild: add -fcf-protection=none when using retpoline flags")
+Reported-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Tested-by: Nikolay Borisov <nborisov@suse.com>
+Cc: <stable@vger.kernel.org>
+Cc: Seth Forshee <seth.forshee@canonical.com>
+Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
+Link: https://lkml.kernel.org/r/20210128215219.6kct3h2eiustncws@treble
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile | 6 ------
+ arch/x86/Makefile | 3 +++
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -859,12 +859,6 @@ KBUILD_CFLAGS += $(call cc-option,-Wer
+ # change __FILE__ to the relative path from the srctree
+ KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
+
+-# ensure -fcf-protection is disabled when using retpoline as it is
+-# incompatible with -mindirect-branch=thunk-extern
+-ifdef CONFIG_RETPOLINE
+-KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
+-endif
+-
+ # use the deterministic mode of AR if available
+ KBUILD_ARFLAGS := $(call ar-option,D)
+
+--- a/arch/x86/Makefile
++++ b/arch/x86/Makefile
+@@ -132,6 +132,9 @@ else
+ KBUILD_CFLAGS += -mno-red-zone
+ KBUILD_CFLAGS += -mcmodel=kernel
+
++ # Intel CET isn't enabled in the kernel
++ KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
++
+ # -funit-at-a-time shrinks the kernel .text considerably
+ # unfortunately it makes reading oopses harder.
+ KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)