]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.14.21/x86-mm-mm-hwpoison-don-t-unconditionally-unmap-kernel-1-1-pages.patch
fixes for 4.19
[thirdparty/kernel/stable-queue.git] / releases / 4.14.21 / x86-mm-mm-hwpoison-don-t-unconditionally-unmap-kernel-1-1-pages.patch
1 From fd0e786d9d09024f67bd71ec094b110237dc3840 Mon Sep 17 00:00:00 2001
2 From: Tony Luck <tony.luck@intel.com>
3 Date: Thu, 25 Jan 2018 14:23:48 -0800
4 Subject: x86/mm, mm/hwpoison: Don't unconditionally unmap kernel 1:1 pages
5
6 From: Tony Luck <tony.luck@intel.com>
7
8 commit fd0e786d9d09024f67bd71ec094b110237dc3840 upstream.
9
10 In the following commit:
11
12 ce0fa3e56ad2 ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages")
13
14 ... we added code to memory_failure() to unmap the page from the
15 kernel 1:1 virtual address space to avoid speculative access to the
16 page logging additional errors.
17
18 But memory_failure() may not always succeed in taking the page offline,
19 especially if the page belongs to the kernel. This can happen if
20 there are too many corrected errors on a page and either mcelog(8)
21 or drivers/ras/cec.c asks to take a page offline.
22
23 Since we remove the 1:1 mapping early in memory_failure(), we can
24 end up with the page unmapped, but still in use. On the next access
25 the kernel crashes :-(
26
27 There are also various debug paths that call memory_failure() to simulate
28 occurrence of an error. Since there is no actual error in memory, we
29 don't need to map out the page for those cases.
30
31 Revert most of the previous attempt and keep the solution local to
32 arch/x86/kernel/cpu/mcheck/mce.c. Unmap the page only when:
33
34 1) there is a real error
35 2) memory_failure() succeeds.
36
37 All of this only applies to 64-bit systems. 32-bit kernel doesn't map
38 all of memory into kernel space. It isn't worth adding the code to unmap
39 the piece that is mapped because nobody would run a 32-bit kernel on a
40 machine that has recoverable machine checks.
41
42 Signed-off-by: Tony Luck <tony.luck@intel.com>
43 Cc: Andrew Morton <akpm@linux-foundation.org>
44 Cc: Andy Lutomirski <luto@kernel.org>
45 Cc: Borislav Petkov <bp@suse.de>
46 Cc: Brian Gerst <brgerst@gmail.com>
47 Cc: Dave <dave.hansen@intel.com>
48 Cc: Denys Vlasenko <dvlasenk@redhat.com>
49 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
50 Cc: Linus Torvalds <torvalds@linux-foundation.org>
51 Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
52 Cc: Peter Zijlstra <peterz@infradead.org>
53 Cc: Robert (Persistent Memory) <elliott@hpe.com>
54 Cc: Thomas Gleixner <tglx@linutronix.de>
55 Cc: linux-mm@kvack.org
56 Cc: stable@vger.kernel.org #v4.14
57 Fixes: ce0fa3e56ad2 ("x86/mm, mm/hwpoison: Clear PRESENT bit for kernel 1:1 mappings of poison pages")
58 Signed-off-by: Ingo Molnar <mingo@kernel.org>
59 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
60
61 ---
62 arch/x86/include/asm/page_64.h | 4 ----
63 arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 +++++++++++++++
64 arch/x86/kernel/cpu/mcheck/mce.c | 17 +++++++++++------
65 include/linux/mm_inline.h | 6 ------
66 mm/memory-failure.c | 2 --
67 5 files changed, 26 insertions(+), 18 deletions(-)
68
69 --- a/arch/x86/include/asm/page_64.h
70 +++ b/arch/x86/include/asm/page_64.h
71 @@ -52,10 +52,6 @@ static inline void clear_page(void *page
72
73 void copy_page(void *to, void *from);
74
75 -#ifdef CONFIG_X86_MCE
76 -#define arch_unmap_kpfn arch_unmap_kpfn
77 -#endif
78 -
79 #endif /* !__ASSEMBLY__ */
80
81 #ifdef CONFIG_X86_VSYSCALL_EMULATION
82 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
83 +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
84 @@ -115,4 +115,19 @@ static inline void mce_unregister_inject
85
86 extern struct mca_config mca_cfg;
87
88 +#ifndef CONFIG_X86_64
89 +/*
90 + * On 32-bit systems it would be difficult to safely unmap a poison page
91 + * from the kernel 1:1 map because there are no non-canonical addresses that
92 + * we can use to refer to the address without risking a speculative access.
93 + * However, this isn't much of an issue because:
94 + * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
95 + * are only mapped into the kernel as needed
96 + * 2) Few people would run a 32-bit kernel on a machine that supports
97 + * recoverable errors because they have too much memory to boot 32-bit.
98 + */
99 +static inline void mce_unmap_kpfn(unsigned long pfn) {}
100 +#define mce_unmap_kpfn mce_unmap_kpfn
101 +#endif
102 +
103 #endif /* __X86_MCE_INTERNAL_H__ */
104 --- a/arch/x86/kernel/cpu/mcheck/mce.c
105 +++ b/arch/x86/kernel/cpu/mcheck/mce.c
106 @@ -106,6 +106,10 @@ static struct irq_work mce_irq_work;
107
108 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
109
110 +#ifndef mce_unmap_kpfn
111 +static void mce_unmap_kpfn(unsigned long pfn);
112 +#endif
113 +
114 /*
115 * CPU/chipset specific EDAC code can register a notifier call here to print
116 * MCE errors in a human-readable form.
117 @@ -582,7 +586,8 @@ static int srao_decode_notifier(struct n
118
119 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
120 pfn = mce->addr >> PAGE_SHIFT;
121 - memory_failure(pfn, MCE_VECTOR, 0);
122 + if (memory_failure(pfn, MCE_VECTOR, 0))
123 + mce_unmap_kpfn(pfn);
124 }
125
126 return NOTIFY_OK;
127 @@ -1049,12 +1054,13 @@ static int do_memory_failure(struct mce
128 ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
129 if (ret)
130 pr_err("Memory error not recovered");
131 + else
132 + mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
133 return ret;
134 }
135
136 -#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
137 -
138 -void arch_unmap_kpfn(unsigned long pfn)
139 +#ifndef mce_unmap_kpfn
140 +static void mce_unmap_kpfn(unsigned long pfn)
141 {
142 unsigned long decoy_addr;
143
144 @@ -1065,7 +1071,7 @@ void arch_unmap_kpfn(unsigned long pfn)
145 * We would like to just call:
146 * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
147 * but doing that would radically increase the odds of a
148 - * speculative access to the posion page because we'd have
149 + * speculative access to the poison page because we'd have
150 * the virtual address of the kernel 1:1 mapping sitting
151 * around in registers.
152 * Instead we get tricky. We create a non-canonical address
153 @@ -1090,7 +1096,6 @@ void arch_unmap_kpfn(unsigned long pfn)
154
155 if (set_memory_np(decoy_addr, 1))
156 pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
157 -
158 }
159 #endif
160
161 --- a/include/linux/mm_inline.h
162 +++ b/include/linux/mm_inline.h
163 @@ -127,10 +127,4 @@ static __always_inline enum lru_list pag
164
165 #define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
166
167 -#ifdef arch_unmap_kpfn
168 -extern void arch_unmap_kpfn(unsigned long pfn);
169 -#else
170 -static __always_inline void arch_unmap_kpfn(unsigned long pfn) { }
171 -#endif
172 -
173 #endif
174 --- a/mm/memory-failure.c
175 +++ b/mm/memory-failure.c
176 @@ -1146,8 +1146,6 @@ int memory_failure(unsigned long pfn, in
177 return 0;
178 }
179
180 - arch_unmap_kpfn(pfn);
181 -
182 orig_head = hpage = compound_head(p);
183 num_poisoned_pages_inc();
184