]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/2.6.35.8/x86-32-separate-1-1-pagetables-from-swapper_pg_dir.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 2.6.35.8 / x86-32-separate-1-1-pagetables-from-swapper_pg_dir.patch
1 From fd89a137924e0710078c3ae855e7cec1c43cb845 Mon Sep 17 00:00:00 2001
2 From: Joerg Roedel <joerg.roedel@amd.com>
3 Date: Mon, 16 Aug 2010 14:38:33 +0200
4 Subject: x86-32: Separate 1:1 pagetables from swapper_pg_dir
5
6 From: Joerg Roedel <joerg.roedel@amd.com>
7
8 commit fd89a137924e0710078c3ae855e7cec1c43cb845 upstream.
9
10 This patch fixes machine crashes which occur when heavily exercising the
11 CPU hotplug codepaths on a 32-bit kernel. These crashes are caused by
12 AMD Erratum 383 and result in a fatal machine check exception. Here's
13 the scenario:
14
15 1. On 32-bit, the swapper_pg_dir page table is used as the initial page
16 table for booting a secondary CPU.
17
18 2. To make this work, swapper_pg_dir needs a direct mapping of physical
19 memory in it (the low mappings). By adding those low, large page (2M)
20 mappings (PAE kernel), we create the necessary conditions for Erratum
21 383 to occur.
22
23 3. Other CPUs which do not participate in the off- and onlining game may
24 use swapper_pg_dir while the low mappings are present (when leave_mm is
25 called). For all steps below, the CPU referred to is a CPU that is using
26 swapper_pg_dir, and not the CPU which is being onlined.
27
28 4. The presence of the low mappings in swapper_pg_dir can result
29 in TLB entries for addresses below __PAGE_OFFSET to be established
30 speculatively. These TLB entries are marked global and large.
31
32 5. When the CPU with such TLB entry switches to another page table, this
33 TLB entry remains because it is global.
34
35 6. The process then generates an access to an address covered by the
36 above TLB entry but there is a permission mismatch - the TLB entry
37 covers a large global page not accessible to userspace.
38
39 7. Due to this permission mismatch a new 4kb, user TLB entry gets
40 established. Further, Erratum 383 provides for a small window of time
41 where both TLB entries are present. This results in an uncorrectable
42 machine check exception signalling a TLB multimatch which panics the
43 machine.
44
45 There are two ways to fix this issue:
46
47 1. Always do a global TLB flush when a new cr3 is loaded and the
48 old page table was swapper_pg_dir. I consider this a hack hard
49 to understand and with performance implications
50
51 2. Do not use swapper_pg_dir to boot secondary CPUs like 64-bit
52 does.
53
54 This patch implements solution 2. It introduces a trampoline_pg_dir
55 which has the same layout as swapper_pg_dir with low_mappings. This page
56 table is used as the initial page table of the booting CPU. Later in the
57 bringup process, it switches to swapper_pg_dir and does a global TLB
58 flush. This fixes the crashes in our test cases.
59
60 -v2: switch to swapper_pg_dir right after entering start_secondary() so
61 that we are able to access percpu data which might not be mapped in the
62 trampoline page table.
63
64 Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
65 LKML-Reference: <20100816123833.GB28147@aftab>
66 Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
67 Signed-off-by: H. Peter Anvin <hpa@zytor.com>
68 Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
69
70 ---
71 arch/x86/include/asm/pgtable_32.h | 1 +
72 arch/x86/include/asm/trampoline.h | 3 +++
73 arch/x86/kernel/head_32.S | 8 +++++++-
74 arch/x86/kernel/setup.c | 2 ++
75 arch/x86/kernel/smpboot.c | 32 +++++++++++++-------------------
76 arch/x86/kernel/trampoline.c | 18 ++++++++++++++++++
77 6 files changed, 44 insertions(+), 20 deletions(-)
78
79 --- a/arch/x86/include/asm/pgtable_32.h
80 +++ b/arch/x86/include/asm/pgtable_32.h
81 @@ -26,6 +26,7 @@ struct mm_struct;
82 struct vm_area_struct;
83
84 extern pgd_t swapper_pg_dir[1024];
85 +extern pgd_t trampoline_pg_dir[1024];
86
87 static inline void pgtable_cache_init(void) { }
88 static inline void check_pgt_cache(void) { }
89 --- a/arch/x86/include/asm/trampoline.h
90 +++ b/arch/x86/include/asm/trampoline.h
91 @@ -13,14 +13,17 @@ extern unsigned char *trampoline_base;
92
93 extern unsigned long init_rsp;
94 extern unsigned long initial_code;
95 +extern unsigned long initial_page_table;
96 extern unsigned long initial_gs;
97
98 #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
99
100 extern unsigned long setup_trampoline(void);
101 +extern void __init setup_trampoline_page_table(void);
102 extern void __init reserve_trampoline_memory(void);
103 #else
104 static inline void reserve_trampoline_memory(void) {};
105 +extern void __init setup_trampoline_page_table(void) {};
106 #endif /* CONFIG_X86_TRAMPOLINE */
107
108 #endif /* __ASSEMBLY__ */
109 --- a/arch/x86/kernel/head_32.S
110 +++ b/arch/x86/kernel/head_32.S
111 @@ -328,7 +328,7 @@ ENTRY(startup_32_smp)
112 /*
113 * Enable paging
114 */
115 - movl $pa(swapper_pg_dir),%eax
116 + movl pa(initial_page_table), %eax
117 movl %eax,%cr3 /* set the page table pointer.. */
118 movl %cr0,%eax
119 orl $X86_CR0_PG,%eax
120 @@ -608,6 +608,8 @@ ignore_int:
121 .align 4
122 ENTRY(initial_code)
123 .long i386_start_kernel
124 +ENTRY(initial_page_table)
125 + .long pa(swapper_pg_dir)
126
127 /*
128 * BSS section
129 @@ -623,6 +625,10 @@ ENTRY(swapper_pg_dir)
130 #endif
131 swapper_pg_fixmap:
132 .fill 1024,4,0
133 +#ifdef CONFIG_X86_TRAMPOLINE
134 +ENTRY(trampoline_pg_dir)
135 + .fill 1024,4,0
136 +#endif
137 ENTRY(empty_zero_page)
138 .fill 4096,1,0
139
140 --- a/arch/x86/kernel/setup.c
141 +++ b/arch/x86/kernel/setup.c
142 @@ -1008,6 +1008,8 @@ void __init setup_arch(char **cmdline_p)
143 paging_init();
144 x86_init.paging.pagetable_setup_done(swapper_pg_dir);
145
146 + setup_trampoline_page_table();
147 +
148 tboot_probe();
149
150 #ifdef CONFIG_X86_64
151 --- a/arch/x86/kernel/smpboot.c
152 +++ b/arch/x86/kernel/smpboot.c
153 @@ -73,7 +73,6 @@
154
155 #ifdef CONFIG_X86_32
156 u8 apicid_2_node[MAX_APICID];
157 -static int low_mappings;
158 #endif
159
160 /* State of each CPU */
161 @@ -300,6 +299,18 @@ notrace static void __cpuinit start_seco
162 * fragile that we want to limit the things done here to the
163 * most necessary things.
164 */
165 +
166 +#ifdef CONFIG_X86_32
167 + /*
168 + * Switch away from the trampoline page-table
169 + *
170 + * Do this before cpu_init() because it needs to access per-cpu
171 + * data which may not be mapped in the trampoline page-table.
172 + */
173 + load_cr3(swapper_pg_dir);
174 + __flush_tlb_all();
175 +#endif
176 +
177 vmi_bringup();
178 cpu_init();
179 preempt_disable();
180 @@ -318,12 +329,6 @@ notrace static void __cpuinit start_seco
181 legacy_pic->chip->unmask(0);
182 }
183
184 -#ifdef CONFIG_X86_32
185 - while (low_mappings)
186 - cpu_relax();
187 - __flush_tlb_all();
188 -#endif
189 -
190 /* This must be done before setting cpu_online_mask */
191 set_cpu_sibling_map(raw_smp_processor_id());
192 wmb();
193 @@ -773,6 +778,7 @@ do_rest:
194 #ifdef CONFIG_X86_32
195 /* Stack for startup_32 can be just as for start_secondary onwards */
196 irq_ctx_init(cpu);
197 + initial_page_table = __pa(&trampoline_pg_dir);
198 #else
199 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
200 initial_gs = per_cpu_offset(cpu);
201 @@ -920,20 +926,8 @@ int __cpuinit native_cpu_up(unsigned int
202
203 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
204
205 -#ifdef CONFIG_X86_32
206 - /* init low mem mapping */
207 - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
208 - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
209 - flush_tlb_all();
210 - low_mappings = 1;
211 -
212 err = do_boot_cpu(apicid, cpu);
213
214 - zap_low_mappings(false);
215 - low_mappings = 0;
216 -#else
217 - err = do_boot_cpu(apicid, cpu);
218 -#endif
219 if (err) {
220 pr_debug("do_boot_cpu failed %d\n", err);
221 return -EIO;
222 --- a/arch/x86/kernel/trampoline.c
223 +++ b/arch/x86/kernel/trampoline.c
224 @@ -1,6 +1,7 @@
225 #include <linux/io.h>
226
227 #include <asm/trampoline.h>
228 +#include <asm/pgtable.h>
229 #include <asm/e820.h>
230
231 #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
232 @@ -37,3 +38,20 @@ unsigned long __trampinit setup_trampoli
233 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
234 return virt_to_phys(trampoline_base);
235 }
236 +
237 +void __init setup_trampoline_page_table(void)
238 +{
239 +#ifdef CONFIG_X86_32
240 + /* Copy kernel address range */
241 + clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
242 + swapper_pg_dir + KERNEL_PGD_BOUNDARY,
243 + min_t(unsigned long, KERNEL_PGD_PTRS,
244 + KERNEL_PGD_BOUNDARY));
245 +
246 + /* Initialize low mappings */
247 + clone_pgd_range(trampoline_pg_dir,
248 + swapper_pg_dir + KERNEL_PGD_BOUNDARY,
249 + min_t(unsigned long, KERNEL_PGD_PTRS,
250 + KERNEL_PGD_BOUNDARY));
251 +#endif
252 +}