]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
powerpc/64s: Add support for RFI flush of L1-D cache
authorMichael Ellerman <mpe@ellerman.id.au>
Tue, 9 Jan 2018 16:07:15 +0000 (03:07 +1100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 13 Feb 2018 11:35:54 +0000 (12:35 +0100)
commit aa8a5e0062ac940f7659394f4817c948dc8c0667 upstream.

On some CPUs we can prevent the Meltdown vulnerability by flushing the
L1-D cache on exit from kernel to user mode, and from hypervisor to
guest.

This is known to be the case on at least Power7, Power8 and Power9. At
this time we do not know the status of the vulnerability on other CPUs
such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale
CPUs. As more information comes to light we can enable this, or other
mechanisms on those CPUs.

The vulnerability occurs when the load of an architecturally
inaccessible memory region (eg. userspace load of kernel memory) is
speculatively executed to the point where its result can influence the
address of a subsequent speculatively executed load.

In order for that to happen, the first load must hit in the L1,
because before the load is sent to the L2 the permission check is
performed. Therefore if no kernel addresses hit in the L1 the
vulnerability can not occur. We can ensure that is the case by
flushing the L1 whenever we return to userspace. Similarly for
hypervisor vs guest.

In order to flush the L1-D cache on exit, we add a section of nops at
each (h)rfi location that returns to a lower privileged context, and
patch that with some sequence. Newer firmwares are able to advertise
to us that there is a special nop instruction that flushes the L1-D.
If we do not see that advertised, we fall back to doing a displacement
flush in software.

For guest kernels we support migration between some CPU versions, and
different CPUs may use different flush instructions. So that we are
prepared to migrate to a machine with a different flush instruction
activated, we may have to patch more than one flush instruction at
boot if the hypervisor tells us to.

In the end this patch is mostly the work of Nicholas Piggin and
Michael Ellerman. However a cast of thousands contributed to analysis
of the issue, earlier versions of the patch, back ports testing etc.
Many thanks to all of them.

Tested-by: Jon Masters <jcm@redhat.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
[Balbir - back ported to stable with changes]
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/setup.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/lib/feature-fixups.c

index 6771cbe44594154eb5008a2f7f3475da5351849e..cab6d2a46c415ca311c7fa4e06e2925a2e6d4e47 100644 (file)
 #define EX_PPR         88      /* SMT thread status register (priority) */
 #define EX_CTR         96
 
-/* Macros for annotating the expected destination of (h)rfid */
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT                                                 \
+       RFI_FLUSH_FIXUP_SECTION;                                        \
+       nop;                                                            \
+       nop;                                                            \
+       nop
 
 #define RFI_TO_KERNEL                                                  \
        rfid
 
 #define RFI_TO_USER                                                    \
-       rfid
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
 
 #define RFI_TO_USER_OR_KERNEL                                          \
-       rfid
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
 
 #define RFI_TO_GUEST                                                   \
-       rfid
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
 
 #define HRFI_TO_KERNEL                                                 \
        hrfid
 
 #define HRFI_TO_USER                                                   \
-       hrfid
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
 
 #define HRFI_TO_USER_OR_KERNEL                                         \
-       hrfid
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
 
 #define HRFI_TO_GUEST                                                  \
-       hrfid
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
 
 #define HRFI_TO_UNKNOWN                                                        \
-       hrfid
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
 
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)                   \
index ddf54f5bbdd1c05efbd286ec305beac3c459d8d6..7b332342071c2d2a7a051a3679426a758d470176 100644 (file)
@@ -189,4 +189,19 @@ void apply_feature_fixups(void);
 void setup_feature_keys(void);
 #endif
 
+#define RFI_FLUSH_FIXUP_SECTION                                \
+951:                                                   \
+       .pushsection __rfi_flush_fixup,"a";             \
+       .align 2;                                       \
+952:                                                   \
+       FTR_ENTRY_OFFSET 951b-952b;                     \
+       .popsection;
+
+
+#ifndef __ASSEMBLY__
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
+#endif
+
 #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
index 6a6792bb39fbc8616034732322bd497ed30c5961..ea43897183fda6057f5f6270eb8465bc174b5219 100644 (file)
@@ -205,6 +205,16 @@ struct paca_struct {
        struct sibling_subcore_state *sibling_subcore_state;
 #endif
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+       /*
+        * rfi fallback flush must be in its own cacheline to prevent
+        * other paca data leaking into the L1d
+        */
+       u64 exrfi[13] __aligned(0x80);
+       void *rfi_flush_fallback_area;
+       u64 l1d_flush_congruence;
+       u64 l1d_flush_sets;
+#endif
 };
 
 #ifdef CONFIG_PPC_BOOK3S
index 654d64c9f3acd8366fdcf109111f3b5c16cdde21..6825a67cc3db33936416b8fc1053efc21f847f7c 100644 (file)
@@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 #endif /* CONFIG_PPC_PSERIES */
 
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+       L1D_FLUSH_NONE          = 0x1,
+       L1D_FLUSH_FALLBACK      = 0x2,
+       L1D_FLUSH_ORI           = 0x4,
+       L1D_FLUSH_MTTRIG        = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_SETUP_H */
index c833d88c423d8be96859a5bb5f2cd37b87ece0a0..64bcbd5804950397b96b6ab1b64ff369747b56f2 100644 (file)
@@ -240,6 +240,10 @@ int main(void)
 #ifdef CONFIG_PPC_BOOK3S_64
        DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
        DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+       DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area));
+       DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi));
+       DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence));
+       DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets));
 #endif
        DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
        DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
index fc72f81411c4e3874ec586ec85e79d864e455503..96db6c3adebe0e314c129a37093e4dc39fcc7fd3 100644 (file)
@@ -1594,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        bl      kernel_bad_stack
        b       1b
 
+       .globl rfi_flush_fallback
+rfi_flush_fallback:
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       rfid
+
+       .globl hrfi_flush_fallback
+hrfi_flush_fallback:
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       hrfid
+
 /*
  * Called from arch_local_irq_enable when an interrupt needs
  * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
index a12be60181bf8def5dd68f165a3bcd4d7df53632..849d086288c6cd4f2673b5a79835fa5b0d143054 100644 (file)
@@ -678,4 +678,83 @@ static int __init disable_hardlockup_detector(void)
        return 0;
 }
 early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+bool rfi_flush;
+
+static void do_nothing(void *unused)
+{
+       /*
+        * We don't need to do the flush explicitly, just enter+exit kernel is
+        * sufficient, the RFI exit handlers will do the right thing.
+        */
+}
+
+void rfi_flush_enable(bool enable)
+{
+       if (rfi_flush == enable)
+               return;
+
+       if (enable) {
+               do_rfi_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else
+               do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+       rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+       u64 l1d_size, limit;
+       int cpu;
+
+       l1d_size = ppc64_caches.dsize;
+       limit = min(safe_stack_limit(), ppc64_rma_size);
+
+       /*
+        * Align to L1d size, and size it at 2x L1d size, to catch possible
+        * hardware prefetch runoff. We don't have a recipe for load patterns to
+        * reliably avoid the prefetcher.
+        */
+       l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+       memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+       for_each_possible_cpu(cpu) {
+               /*
+                * The fallback flush is currently coded for 8-way
+                * associativity. Different associativity is possible, but it
+                * will be treated as 8-way and may not evict the lines as
+                * effectively.
+                *
+                * 128 byte lines are mandatory.
+                */
+               u64 c = l1d_size / 8;
+
+               paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+               paca[cpu].l1d_flush_congruence = c;
+               paca[cpu].l1d_flush_sets = c / 128;
+       }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+       if (types & L1D_FLUSH_FALLBACK) {
+               pr_info("rfi-flush: Using fallback displacement flush\n");
+               init_fallback_flush();
+       }
+
+       if (types & L1D_FLUSH_ORI)
+               pr_info("rfi-flush: Using ori type flush\n");
+
+       if (types & L1D_FLUSH_MTTRIG)
+               pr_info("rfi-flush: Using mttrig type flush\n");
+
+       enabled_flush_types = types;
+
+       rfi_flush_enable(enable);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
 #endif
index 7394b770ae1f6b2402ef2b04fe23e92eae2c7ee2..b61fb7902018e82e68a2b2f5042de63136adb276 100644 (file)
@@ -132,6 +132,15 @@ SECTIONS
        /* Read-only data */
        RODATA
 
+#ifdef CONFIG_PPC64
+       . = ALIGN(8);
+       __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+               __start___rfi_flush_fixup = .;
+               *(__rfi_flush_fixup)
+               __stop___rfi_flush_fixup = .;
+       }
+#endif
+
        EXCEPTION_TABLE(0)
 
        NOTES :kernel :notes
index 043415f0bdb1646fa85f7bb26d04f0241c68ff63..e86bfa111f3c9774350bbdce4c08a3d530efa1d9 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/firmware.h>
+#include <asm/setup.h>
 
 struct fixup_entry {
        unsigned long   mask;
@@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
        }
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___rfi_flush_fixup),
+       end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+
+       if (types & L1D_FLUSH_FALLBACK)
+               /* b .+16 to fallback flush */
+               instrs[0] = 0x48000010;
+
+       i = 0;
+       if (types & L1D_FLUSH_ORI) {
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+       }
+
+       if (types & L1D_FLUSH_MTTRIG)
+               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction(dest, instrs[0]);
+               patch_instruction(dest + 1, instrs[1]);
+               patch_instruction(dest + 2, instrs[2]);
+       }
+
+       printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
        long *start, *end;