]>
Commit | Line | Data |
---|---|---|
cc90b958 BS |
1 | From: jbeulich@novell.com |
2 | Subject: don't require order-1 allocations for pgd-s | |
3 | Patch-mainline: obsolete | |
4 | ||
5 | At the same time remove the useless user mode pair of init_level4_pgt. | |
6 | ||
7 | Index: head-2008-12-01/arch/x86/kernel/cpu/common_64-xen.c | |
8 | =================================================================== | |
9 | --- head-2008-12-01.orig/arch/x86/kernel/cpu/common_64-xen.c 2008-12-01 12:13:15.000000000 +0100 | |
10 | +++ head-2008-12-01/arch/x86/kernel/cpu/common_64-xen.c 2008-12-01 12:13:27.000000000 +0100 | |
11 | @@ -530,8 +530,7 @@ static void __init_refok switch_pt(int c | |
12 | #ifdef CONFIG_XEN | |
13 | if (cpu == 0) | |
14 | xen_init_pt(); | |
15 | - xen_pt_switch(__pa_symbol(init_level4_pgt)); | |
16 | - xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); | |
17 | + xen_pt_switch(init_level4_pgt); | |
18 | #endif | |
19 | } | |
20 | ||
21 | Index: head-2008-12-01/arch/x86/kernel/head_64-xen.S | |
22 | =================================================================== | |
23 | --- head-2008-12-01.orig/arch/x86/kernel/head_64-xen.S 2008-12-01 11:49:07.000000000 +0100 | |
24 | +++ head-2008-12-01/arch/x86/kernel/head_64-xen.S 2008-12-01 12:13:27.000000000 +0100 | |
25 | @@ -44,14 +44,6 @@ ENTRY(name) | |
26 | ||
27 | NEXT_PAGE(init_level4_pgt) | |
28 | .fill 512,8,0 | |
29 | - /* | |
30 | - * We update two pgd entries to make kernel and user pgd consistent | |
31 | - * at pgd_populate(). It can be used for kernel modules. So we place | |
32 | - * this page here for those cases to avoid memory corruption. | |
33 | - * We also use this page to establish the initial mapping for the | |
34 | - * vsyscall area. | |
35 | - */ | |
36 | - .fill 512,8,0 | |
37 | ||
38 | NEXT_PAGE(level3_kernel_pgt) | |
39 | .fill 512,8,0 | |
40 | Index: head-2008-12-01/arch/x86/mm/hypervisor.c | |
41 | =================================================================== | |
42 | --- head-2008-12-01.orig/arch/x86/mm/hypervisor.c 2008-12-01 12:13:22.000000000 +0100 | |
43 | +++ head-2008-12-01/arch/x86/mm/hypervisor.c 2008-12-01 12:13:27.000000000 +0100 | |
44 | @@ -422,7 +422,7 @@ void xen_l3_entry_update(pud_t *ptr, pud | |
45 | #endif | |
46 | ||
47 | #ifdef CONFIG_X86_64 | |
48 | -void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val) | |
49 | +void xen_l4_entry_update(pgd_t *ptr, pgd_t val) | |
50 | { | |
51 | mmu_update_t u[2]; | |
52 | struct page *page = NULL; | |
53 | @@ -435,8 +435,10 @@ void xen_l4_entry_update(pgd_t *ptr, int | |
54 | } | |
55 | u[0].ptr = virt_to_machine(ptr); | |
56 | u[0].val = __pgd_val(val); | |
57 | - if (user) { | |
58 | - u[1].ptr = virt_to_machine(__user_pgd(ptr)); | |
59 | + if (((unsigned long)ptr & ~PAGE_MASK) | |
60 | + < pgd_index(__HYPERVISOR_VIRT_START) * sizeof(*ptr) | |
61 | + && (ptr = __user_pgd(ptr)) != NULL) { | |
62 | + u[1].ptr = virt_to_machine(ptr); | |
63 | u[1].val = __pgd_val(val); | |
64 | do_lN_entry_update(u, 2, page); | |
65 | } else | |
66 | @@ -444,21 +446,25 @@ void xen_l4_entry_update(pgd_t *ptr, int | |
67 | } | |
68 | #endif /* CONFIG_X86_64 */ | |
69 | ||
70 | -void xen_pt_switch(unsigned long ptr) | |
71 | +#ifdef CONFIG_X86_64 | |
72 | +void xen_pt_switch(pgd_t *pgd) | |
73 | { | |
74 | struct mmuext_op op; | |
75 | op.cmd = MMUEXT_NEW_BASEPTR; | |
76 | - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); | |
77 | + op.arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
78 | BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); | |
79 | } | |
80 | ||
81 | -void xen_new_user_pt(unsigned long ptr) | |
82 | +void xen_new_user_pt(pgd_t *pgd) | |
83 | { | |
84 | struct mmuext_op op; | |
85 | + | |
86 | + pgd = __user_pgd(pgd); | |
87 | op.cmd = MMUEXT_NEW_USER_BASEPTR; | |
88 | - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); | |
89 | + op.arg1.mfn = pgd ? pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT) : 0; | |
90 | BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); | |
91 | } | |
92 | +#endif | |
93 | ||
94 | void xen_tlb_flush(void) | |
95 | { | |
96 | @@ -526,28 +532,38 @@ void xen_invlpg_mask(cpumask_t *mask, un | |
97 | void xen_pgd_pin(pgd_t *pgd) | |
98 | { | |
99 | struct mmuext_op op[NR_PGD_PIN_OPS]; | |
100 | + unsigned int nr = NR_PGD_PIN_OPS; | |
101 | ||
102 | op[0].cmd = MMUEXT_PIN_L3_TABLE; | |
103 | op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
104 | #ifdef CONFIG_X86_64 | |
105 | op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE; | |
106 | - op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT); | |
107 | + pgd = __user_pgd(pgd); | |
108 | + if (pgd) | |
109 | + op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
110 | + else | |
111 | + nr = 1; | |
112 | #endif | |
113 | - if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) | |
114 | + if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0) | |
115 | BUG(); | |
116 | } | |
117 | ||
118 | void xen_pgd_unpin(pgd_t *pgd) | |
119 | { | |
120 | struct mmuext_op op[NR_PGD_PIN_OPS]; | |
121 | + unsigned int nr = NR_PGD_PIN_OPS; | |
122 | ||
123 | op[0].cmd = MMUEXT_UNPIN_TABLE; | |
124 | op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
125 | #ifdef CONFIG_X86_64 | |
126 | - op[1].cmd = MMUEXT_UNPIN_TABLE; | |
127 | - op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT); | |
128 | + pgd = __user_pgd(pgd); | |
129 | + if (pgd) { | |
130 | + op[1].cmd = MMUEXT_UNPIN_TABLE; | |
131 | + op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
132 | + } else | |
133 | + nr = 1; | |
134 | #endif | |
135 | - if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) | |
136 | + if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0) | |
137 | BUG(); | |
138 | } | |
139 | ||
140 | Index: head-2008-12-01/arch/x86/mm/init_64-xen.c | |
141 | =================================================================== | |
142 | --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-01 12:13:22.000000000 +0100 | |
143 | +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 12:13:27.000000000 +0100 | |
144 | @@ -604,9 +604,6 @@ void __init xen_init_pt(void) | |
145 | __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE); | |
146 | memcpy(level2_kernel_pgt, page, PAGE_SIZE); | |
147 | ||
148 | - __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] = | |
149 | - __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE); | |
150 | - | |
151 | /* Do an early initialization of the fixmap area. */ | |
152 | addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); | |
153 | level3_kernel_pgt[pud_index(addr)] = | |
154 | @@ -616,8 +613,6 @@ void __init xen_init_pt(void) | |
155 | ||
156 | early_make_page_readonly(init_level4_pgt, | |
157 | XENFEAT_writable_page_tables); | |
158 | - early_make_page_readonly(__user_pgd(init_level4_pgt), | |
159 | - XENFEAT_writable_page_tables); | |
160 | early_make_page_readonly(level3_kernel_pgt, | |
161 | XENFEAT_writable_page_tables); | |
162 | early_make_page_readonly(level3_user_pgt, | |
163 | Index: head-2008-12-01/arch/x86/mm/pgtable-xen.c | |
164 | =================================================================== | |
165 | --- head-2008-12-01.orig/arch/x86/mm/pgtable-xen.c 2008-12-01 12:13:22.000000000 +0100 | |
166 | +++ head-2008-12-01/arch/x86/mm/pgtable-xen.c 2008-12-01 12:13:27.000000000 +0100 | |
167 | @@ -270,9 +270,11 @@ static void pgd_walk(pgd_t *pgd_base, pg | |
168 | BUG(); | |
169 | seq = 0; | |
170 | } | |
171 | + pgd = __user_pgd(pgd_base); | |
172 | + BUG_ON(!pgd); | |
173 | MULTI_update_va_mapping(mcl + seq, | |
174 | - (unsigned long)__user_pgd(pgd_base), | |
175 | - pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), | |
176 | + (unsigned long)pgd, | |
177 | + pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags), | |
178 | 0); | |
179 | MULTI_update_va_mapping(mcl + seq + 1, | |
180 | (unsigned long)pgd_base, | |
181 | @@ -658,12 +660,29 @@ static void pgd_prepopulate_pmd(struct m | |
182 | } | |
183 | } | |
184 | ||
185 | +static inline pgd_t *user_pgd_alloc(pgd_t *pgd) | |
186 | +{ | |
187 | #ifdef CONFIG_X86_64 | |
188 | -/* We allocate two contiguous pages for kernel and user. */ | |
189 | -#define PGD_ORDER 1 | |
190 | -#else | |
191 | -#define PGD_ORDER 0 | |
192 | + if (pgd) { | |
193 | + pgd_t *upgd = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); | |
194 | + | |
195 | + if (upgd) | |
196 | + virt_to_page(pgd)->index = (long)upgd; | |
197 | + else { | |
198 | + free_page((unsigned long)pgd); | |
199 | + pgd = NULL; | |
200 | + } | |
201 | + } | |
202 | +#endif | |
203 | + return pgd; | |
204 | +} | |
205 | + | |
206 | +static inline void user_pgd_free(pgd_t *pgd) | |
207 | +{ | |
208 | +#ifdef CONFIG_X86_64 | |
209 | + free_page(virt_to_page(pgd)->index); | |
210 | #endif | |
211 | +} | |
212 | ||
213 | pgd_t *pgd_alloc(struct mm_struct *mm) | |
214 | { | |
215 | @@ -671,7 +690,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |
216 | pmd_t *pmds[PREALLOCATED_PMDS]; | |
217 | unsigned long flags; | |
218 | ||
219 | - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER); | |
220 | + pgd = user_pgd_alloc((void *)__get_free_page(GFP_KERNEL|__GFP_ZERO)); | |
221 | ||
222 | if (pgd == NULL) | |
223 | goto out; | |
224 | @@ -710,7 +729,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |
225 | out_free_pmds: | |
226 | free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb)); | |
227 | out_free_pgd: | |
228 | - free_pages((unsigned long)pgd, PGD_ORDER); | |
229 | + user_pgd_free(pgd); | |
230 | + free_page((unsigned long)pgd); | |
231 | out: | |
232 | return NULL; | |
233 | } | |
234 | @@ -729,7 +749,8 @@ void pgd_free(struct mm_struct *mm, pgd_ | |
235 | ||
236 | pgd_mop_up_pmds(mm, pgd); | |
237 | paravirt_pgd_free(mm, pgd); | |
238 | - free_pages((unsigned long)pgd, PGD_ORDER); | |
239 | + user_pgd_free(pgd); | |
240 | + free_page((unsigned long)pgd); | |
241 | } | |
242 | ||
243 | /* blktap and gntdev need this, as otherwise they would implicitly (and | |
244 | Index: head-2008-12-01/drivers/xen/core/machine_reboot.c | |
245 | =================================================================== | |
246 | --- head-2008-12-01.orig/drivers/xen/core/machine_reboot.c 2008-12-01 12:13:13.000000000 +0100 | |
247 | +++ head-2008-12-01/drivers/xen/core/machine_reboot.c 2008-12-01 12:13:27.000000000 +0100 | |
248 | @@ -199,8 +199,7 @@ static int take_machine_down(void *_susp | |
249 | * in fast-suspend mode as that implies a new enough Xen. | |
250 | */ | |
251 | if (!suspend->fast_suspend) | |
252 | - xen_new_user_pt(__pa(__user_pgd( | |
253 | - current->active_mm->pgd))); | |
254 | + xen_new_user_pt(current->active_mm->pgd); | |
255 | #endif | |
256 | } | |
257 | ||
258 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h | |
259 | =================================================================== | |
260 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 12:13:22.000000000 +0100 | |
261 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 12:13:27.000000000 +0100 | |
262 | @@ -85,8 +85,8 @@ void do_hypervisor_callback(struct pt_re | |
263 | * be MACHINE addresses. | |
264 | */ | |
265 | ||
266 | -void xen_pt_switch(unsigned long ptr); | |
267 | -void xen_new_user_pt(unsigned long ptr); /* x86_64 only */ | |
268 | +void xen_pt_switch(pgd_t *); | |
269 | +void xen_new_user_pt(pgd_t *); /* x86_64 only */ | |
270 | void xen_load_gs(unsigned int selector); /* x86_64 only */ | |
271 | void xen_tlb_flush(void); | |
272 | void xen_invlpg(unsigned long ptr); | |
273 | @@ -94,7 +94,7 @@ void xen_invlpg(unsigned long ptr); | |
274 | void xen_l1_entry_update(pte_t *ptr, pte_t val); | |
275 | void xen_l2_entry_update(pmd_t *ptr, pmd_t val); | |
276 | void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */ | |
277 | -void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */ | |
278 | +void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */ | |
279 | void xen_pgd_pin(pgd_t *); | |
280 | void xen_pgd_unpin(pgd_t *); | |
281 | ||
282 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_64.h | |
283 | =================================================================== | |
284 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-01 11:49:07.000000000 +0100 | |
285 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-01 12:13:27.000000000 +0100 | |
286 | @@ -46,6 +46,7 @@ static inline void switch_mm(struct mm_s | |
287 | { | |
288 | unsigned cpu = smp_processor_id(); | |
289 | struct mmuext_op _op[3], *op = _op; | |
290 | + pgd_t *upgd; | |
291 | ||
292 | if (likely(prev != next)) { | |
293 | BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && | |
294 | @@ -64,9 +65,11 @@ static inline void switch_mm(struct mm_s | |
295 | op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT); | |
296 | op++; | |
297 | ||
298 | - /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */ | |
299 | + /* xen_new_user_pt(next->pgd) */ | |
300 | op->cmd = MMUEXT_NEW_USER_BASEPTR; | |
301 | - op->arg1.mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT); | |
302 | + upgd = __user_pgd(next->pgd); | |
303 | + op->arg1.mfn = likely(upgd) | |
304 | + ? pfn_to_mfn(__pa(upgd) >> PAGE_SHIFT) : 0; | |
305 | op++; | |
306 | ||
307 | if (unlikely(next->context.ldt != prev->context.ldt)) { | |
308 | @@ -90,7 +93,7 @@ static inline void switch_mm(struct mm_s | |
309 | * to make sure to use no freed page tables. | |
310 | */ | |
311 | load_cr3(next->pgd); | |
312 | - xen_new_user_pt(__pa(__user_pgd(next->pgd))); | |
313 | + xen_new_user_pt(next->pgd); | |
314 | load_LDT_nolock(&next->context); | |
315 | } | |
316 | } | |
317 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc.h | |
318 | =================================================================== | |
319 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2008-12-01 12:13:06.000000000 +0100 | |
320 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc.h 2008-12-01 12:13:27.000000000 +0100 | |
321 | @@ -106,15 +106,13 @@ static inline void pud_populate(struct m | |
322 | #endif /* CONFIG_X86_PAE */ | |
323 | ||
324 | #if PAGETABLE_LEVELS > 3 | |
325 | -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) | |
326 | - | |
327 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | |
328 | { | |
329 | pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud)); | |
330 | ||
331 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | |
332 | if (unlikely(PagePinned(virt_to_page(pgd)))) | |
333 | - xen_l4_entry_update(pgd, 1, ent); | |
334 | + xen_l4_entry_update(pgd, ent); | |
335 | else | |
336 | *__user_pgd(pgd) = *pgd = ent; | |
337 | } | |
338 | Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h | |
339 | =================================================================== | |
340 | --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 12:13:13.000000000 +0100 | |
341 | +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 12:13:27.000000000 +0100 | |
342 | @@ -131,18 +131,25 @@ static inline void xen_set_pud(pud_t *pu | |
343 | : (void)(*__pudp = xen_make_pud(0)); \ | |
344 | }) | |
345 | ||
346 | -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) | |
347 | +static inline pgd_t *__user_pgd(pgd_t *pgd) | |
348 | +{ | |
349 | + if (unlikely(((unsigned long)pgd & PAGE_MASK) | |
350 | + == (unsigned long)init_level4_pgt)) | |
351 | + return NULL; | |
352 | + return (pgd_t *)(virt_to_page(pgd)->index | |
353 | + + ((unsigned long)pgd & ~PAGE_MASK)); | |
354 | +} | |
355 | ||
356 | static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) | |
357 | { | |
358 | - xen_l4_entry_update(pgdp, 0, pgd); | |
359 | + xen_l4_entry_update(pgdp, pgd); | |
360 | } | |
361 | ||
362 | #define xen_pgd_clear(pgd) \ | |
363 | ({ \ | |
364 | pgd_t *__pgdp = (pgd); \ | |
365 | PagePinned(virt_to_page(__pgdp)) \ | |
366 | - ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \ | |
367 | + ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \ | |
368 | : (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \ | |
369 | }) | |
370 |