]>
Commit | Line | Data |
---|---|---|
cc90b958 BS |
1 | From: jbeulich@novell.com |
2 | Subject: don't require order-1 allocations for pgd-s | |
3 | Patch-mainline: obsolete | |
4 | ||
5 | At the same time remove the useless user mode pair of init_level4_pgt. | |
6 | ||
00e5a55c BS |
7 | --- sle11-2009-04-09.orig/arch/x86/kernel/cpu/common_64-xen.c 2008-11-17 14:07:10.000000000 +0100 |
8 | +++ sle11-2009-04-09/arch/x86/kernel/cpu/common_64-xen.c 2009-03-16 16:40:52.000000000 +0100 | |
cc90b958 BS |
9 | @@ -530,8 +530,7 @@ static void __init_refok switch_pt(int c |
10 | #ifdef CONFIG_XEN | |
11 | if (cpu == 0) | |
12 | xen_init_pt(); | |
13 | - xen_pt_switch(__pa_symbol(init_level4_pgt)); | |
14 | - xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); | |
15 | + xen_pt_switch(init_level4_pgt); | |
16 | #endif | |
17 | } | |
18 | ||
00e5a55c BS |
19 | --- sle11-2009-04-09.orig/arch/x86/kernel/head_64-xen.S 2009-03-16 16:38:16.000000000 +0100 |
20 | +++ sle11-2009-04-09/arch/x86/kernel/head_64-xen.S 2009-03-16 16:40:52.000000000 +0100 | |
cc90b958 BS |
21 | @@ -44,14 +44,6 @@ ENTRY(name) |
22 | ||
23 | NEXT_PAGE(init_level4_pgt) | |
24 | .fill 512,8,0 | |
25 | - /* | |
26 | - * We update two pgd entries to make kernel and user pgd consistent | |
27 | - * at pgd_populate(). It can be used for kernel modules. So we place | |
28 | - * this page here for those cases to avoid memory corruption. | |
29 | - * We also use this page to establish the initial mapping for the | |
30 | - * vsyscall area. | |
31 | - */ | |
32 | - .fill 512,8,0 | |
33 | ||
34 | NEXT_PAGE(level3_kernel_pgt) | |
35 | .fill 512,8,0 | |
00e5a55c BS |
36 | --- sle11-2009-04-09.orig/arch/x86/mm/hypervisor.c 2009-03-16 16:40:50.000000000 +0100 |
37 | +++ sle11-2009-04-09/arch/x86/mm/hypervisor.c 2009-03-30 12:19:20.000000000 +0200 | |
38 | @@ -423,7 +423,7 @@ void xen_l3_entry_update(pud_t *ptr, pud | |
cc90b958 BS |
39 | #endif |
40 | ||
41 | #ifdef CONFIG_X86_64 | |
42 | -void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val) | |
43 | +void xen_l4_entry_update(pgd_t *ptr, pgd_t val) | |
44 | { | |
45 | mmu_update_t u[2]; | |
46 | struct page *page = NULL; | |
00e5a55c | 47 | @@ -436,8 +436,10 @@ void xen_l4_entry_update(pgd_t *ptr, int |
cc90b958 BS |
48 | } |
49 | u[0].ptr = virt_to_machine(ptr); | |
50 | u[0].val = __pgd_val(val); | |
51 | - if (user) { | |
52 | - u[1].ptr = virt_to_machine(__user_pgd(ptr)); | |
53 | + if (((unsigned long)ptr & ~PAGE_MASK) | |
54 | + < pgd_index(__HYPERVISOR_VIRT_START) * sizeof(*ptr) | |
55 | + && (ptr = __user_pgd(ptr)) != NULL) { | |
56 | + u[1].ptr = virt_to_machine(ptr); | |
57 | u[1].val = __pgd_val(val); | |
58 | do_lN_entry_update(u, 2, page); | |
59 | } else | |
00e5a55c | 60 | @@ -445,21 +447,25 @@ void xen_l4_entry_update(pgd_t *ptr, int |
cc90b958 BS |
61 | } |
62 | #endif /* CONFIG_X86_64 */ | |
63 | ||
64 | -void xen_pt_switch(unsigned long ptr) | |
65 | +#ifdef CONFIG_X86_64 | |
66 | +void xen_pt_switch(pgd_t *pgd) | |
67 | { | |
68 | struct mmuext_op op; | |
69 | op.cmd = MMUEXT_NEW_BASEPTR; | |
70 | - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); | |
71 | + op.arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
72 | BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); | |
73 | } | |
74 | ||
75 | -void xen_new_user_pt(unsigned long ptr) | |
76 | +void xen_new_user_pt(pgd_t *pgd) | |
77 | { | |
78 | struct mmuext_op op; | |
79 | + | |
80 | + pgd = __user_pgd(pgd); | |
81 | op.cmd = MMUEXT_NEW_USER_BASEPTR; | |
82 | - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT); | |
83 | + op.arg1.mfn = pgd ? pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT) : 0; | |
84 | BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); | |
85 | } | |
86 | +#endif | |
87 | ||
88 | void xen_tlb_flush(void) | |
89 | { | |
00e5a55c | 90 | @@ -529,28 +535,38 @@ void xen_invlpg_mask(cpumask_t *mask, un |
cc90b958 BS |
91 | void xen_pgd_pin(pgd_t *pgd) |
92 | { | |
93 | struct mmuext_op op[NR_PGD_PIN_OPS]; | |
94 | + unsigned int nr = NR_PGD_PIN_OPS; | |
95 | ||
96 | op[0].cmd = MMUEXT_PIN_L3_TABLE; | |
97 | op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
98 | #ifdef CONFIG_X86_64 | |
99 | op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE; | |
100 | - op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT); | |
101 | + pgd = __user_pgd(pgd); | |
102 | + if (pgd) | |
103 | + op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
104 | + else | |
105 | + nr = 1; | |
106 | #endif | |
107 | - if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) | |
108 | + if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0) | |
109 | BUG(); | |
110 | } | |
111 | ||
112 | void xen_pgd_unpin(pgd_t *pgd) | |
113 | { | |
114 | struct mmuext_op op[NR_PGD_PIN_OPS]; | |
115 | + unsigned int nr = NR_PGD_PIN_OPS; | |
116 | ||
117 | op[0].cmd = MMUEXT_UNPIN_TABLE; | |
118 | op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
119 | #ifdef CONFIG_X86_64 | |
120 | - op[1].cmd = MMUEXT_UNPIN_TABLE; | |
121 | - op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT); | |
122 | + pgd = __user_pgd(pgd); | |
123 | + if (pgd) { | |
124 | + op[1].cmd = MMUEXT_UNPIN_TABLE; | |
125 | + op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT); | |
126 | + } else | |
127 | + nr = 1; | |
128 | #endif | |
129 | - if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0) | |
130 | + if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0) | |
131 | BUG(); | |
132 | } | |
133 | ||
00e5a55c BS |
134 | --- sle11-2009-04-09.orig/arch/x86/mm/init_64-xen.c 2009-03-16 16:40:50.000000000 +0100 |
135 | +++ sle11-2009-04-09/arch/x86/mm/init_64-xen.c 2009-03-16 16:40:52.000000000 +0100 | |
136 | @@ -616,9 +616,6 @@ void __init xen_init_pt(void) | |
cc90b958 BS |
137 | __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE); |
138 | memcpy(level2_kernel_pgt, page, PAGE_SIZE); | |
139 | ||
140 | - __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] = | |
141 | - __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE); | |
142 | - | |
143 | /* Do an early initialization of the fixmap area. */ | |
144 | addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); | |
145 | level3_kernel_pgt[pud_index(addr)] = | |
00e5a55c | 146 | @@ -628,8 +625,6 @@ void __init xen_init_pt(void) |
cc90b958 BS |
147 | |
148 | early_make_page_readonly(init_level4_pgt, | |
149 | XENFEAT_writable_page_tables); | |
150 | - early_make_page_readonly(__user_pgd(init_level4_pgt), | |
151 | - XENFEAT_writable_page_tables); | |
152 | early_make_page_readonly(level3_kernel_pgt, | |
153 | XENFEAT_writable_page_tables); | |
154 | early_make_page_readonly(level3_user_pgt, | |
00e5a55c BS |
155 | --- sle11-2009-04-09.orig/arch/x86/mm/pgtable-xen.c 2009-03-16 16:40:50.000000000 +0100 |
156 | +++ sle11-2009-04-09/arch/x86/mm/pgtable-xen.c 2009-04-09 14:54:18.000000000 +0200 | |
cc90b958 BS |
157 | @@ -270,9 +270,11 @@ static void pgd_walk(pgd_t *pgd_base, pg |
158 | BUG(); | |
159 | seq = 0; | |
160 | } | |
161 | + pgd = __user_pgd(pgd_base); | |
162 | + BUG_ON(!pgd); | |
163 | MULTI_update_va_mapping(mcl + seq, | |
164 | - (unsigned long)__user_pgd(pgd_base), | |
165 | - pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), | |
166 | + (unsigned long)pgd, | |
167 | + pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags), | |
168 | 0); | |
169 | MULTI_update_va_mapping(mcl + seq + 1, | |
170 | (unsigned long)pgd_base, | |
00e5a55c | 171 | @@ -662,12 +664,29 @@ static void pgd_prepopulate_pmd(struct m |
cc90b958 BS |
172 | } |
173 | } | |
174 | ||
175 | +static inline pgd_t *user_pgd_alloc(pgd_t *pgd) | |
176 | +{ | |
177 | #ifdef CONFIG_X86_64 | |
178 | -/* We allocate two contiguous pages for kernel and user. */ | |
179 | -#define PGD_ORDER 1 | |
180 | -#else | |
181 | -#define PGD_ORDER 0 | |
182 | + if (pgd) { | |
183 | + pgd_t *upgd = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); | |
184 | + | |
185 | + if (upgd) | |
186 | + virt_to_page(pgd)->index = (long)upgd; | |
187 | + else { | |
188 | + free_page((unsigned long)pgd); | |
189 | + pgd = NULL; | |
190 | + } | |
191 | + } | |
192 | +#endif | |
193 | + return pgd; | |
194 | +} | |
195 | + | |
196 | +static inline void user_pgd_free(pgd_t *pgd) | |
197 | +{ | |
198 | +#ifdef CONFIG_X86_64 | |
199 | + free_page(virt_to_page(pgd)->index); | |
200 | #endif | |
201 | +} | |
202 | ||
203 | pgd_t *pgd_alloc(struct mm_struct *mm) | |
204 | { | |
00e5a55c | 205 | @@ -675,7 +694,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) |
cc90b958 BS |
206 | pmd_t *pmds[PREALLOCATED_PMDS]; |
207 | unsigned long flags; | |
208 | ||
209 | - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER); | |
210 | + pgd = user_pgd_alloc((void *)__get_free_page(GFP_KERNEL|__GFP_ZERO)); | |
211 | ||
212 | if (pgd == NULL) | |
213 | goto out; | |
00e5a55c | 214 | @@ -714,7 +733,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) |
cc90b958 BS |
215 | out_free_pmds: |
216 | free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb)); | |
217 | out_free_pgd: | |
218 | - free_pages((unsigned long)pgd, PGD_ORDER); | |
219 | + user_pgd_free(pgd); | |
220 | + free_page((unsigned long)pgd); | |
221 | out: | |
222 | return NULL; | |
223 | } | |
00e5a55c | 224 | @@ -733,7 +753,8 @@ void pgd_free(struct mm_struct *mm, pgd_ |
cc90b958 BS |
225 | |
226 | pgd_mop_up_pmds(mm, pgd); | |
227 | paravirt_pgd_free(mm, pgd); | |
228 | - free_pages((unsigned long)pgd, PGD_ORDER); | |
229 | + user_pgd_free(pgd); | |
230 | + free_page((unsigned long)pgd); | |
231 | } | |
232 | ||
233 | /* blktap and gntdev need this, as otherwise they would implicitly (and | |
00e5a55c BS |
234 | --- sle11-2009-04-09.orig/drivers/xen/core/machine_reboot.c 2009-02-17 12:25:29.000000000 +0100 |
235 | +++ sle11-2009-04-09/drivers/xen/core/machine_reboot.c 2009-03-16 16:40:52.000000000 +0100 | |
236 | @@ -191,8 +191,7 @@ static int take_machine_down(void *_susp | |
cc90b958 BS |
237 | * in fast-suspend mode as that implies a new enough Xen. |
238 | */ | |
239 | if (!suspend->fast_suspend) | |
240 | - xen_new_user_pt(__pa(__user_pgd( | |
241 | - current->active_mm->pgd))); | |
242 | + xen_new_user_pt(current->active_mm->pgd); | |
243 | #endif | |
244 | } | |
245 | ||
00e5a55c BS |
246 | --- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-16 16:40:50.000000000 +0100 |
247 | +++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-16 16:40:52.000000000 +0100 | |
cc90b958 BS |
248 | @@ -85,8 +85,8 @@ void do_hypervisor_callback(struct pt_re |
249 | * be MACHINE addresses. | |
250 | */ | |
251 | ||
252 | -void xen_pt_switch(unsigned long ptr); | |
253 | -void xen_new_user_pt(unsigned long ptr); /* x86_64 only */ | |
254 | +void xen_pt_switch(pgd_t *); | |
255 | +void xen_new_user_pt(pgd_t *); /* x86_64 only */ | |
256 | void xen_load_gs(unsigned int selector); /* x86_64 only */ | |
257 | void xen_tlb_flush(void); | |
258 | void xen_invlpg(unsigned long ptr); | |
259 | @@ -94,7 +94,7 @@ void xen_invlpg(unsigned long ptr); | |
260 | void xen_l1_entry_update(pte_t *ptr, pte_t val); | |
261 | void xen_l2_entry_update(pmd_t *ptr, pmd_t val); | |
262 | void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */ | |
263 | -void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */ | |
264 | +void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */ | |
265 | void xen_pgd_pin(pgd_t *); | |
266 | void xen_pgd_unpin(pgd_t *); | |
267 | ||
00e5a55c BS |
268 | --- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:38:16.000000000 +0100 |
269 | +++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:40:52.000000000 +0100 | |
cc90b958 BS |
270 | @@ -46,6 +46,7 @@ static inline void switch_mm(struct mm_s |
271 | { | |
272 | unsigned cpu = smp_processor_id(); | |
273 | struct mmuext_op _op[3], *op = _op; | |
274 | + pgd_t *upgd; | |
275 | ||
276 | if (likely(prev != next)) { | |
277 | BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && | |
278 | @@ -64,9 +65,11 @@ static inline void switch_mm(struct mm_s | |
279 | op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT); | |
280 | op++; | |
281 | ||
282 | - /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */ | |
283 | + /* xen_new_user_pt(next->pgd) */ | |
284 | op->cmd = MMUEXT_NEW_USER_BASEPTR; | |
285 | - op->arg1.mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT); | |
286 | + upgd = __user_pgd(next->pgd); | |
287 | + op->arg1.mfn = likely(upgd) | |
288 | + ? pfn_to_mfn(__pa(upgd) >> PAGE_SHIFT) : 0; | |
289 | op++; | |
290 | ||
291 | if (unlikely(next->context.ldt != prev->context.ldt)) { | |
292 | @@ -90,7 +93,7 @@ static inline void switch_mm(struct mm_s | |
293 | * to make sure to use no freed page tables. | |
294 | */ | |
295 | load_cr3(next->pgd); | |
296 | - xen_new_user_pt(__pa(__user_pgd(next->pgd))); | |
297 | + xen_new_user_pt(next->pgd); | |
298 | load_LDT_nolock(&next->context); | |
299 | } | |
300 | } | |
00e5a55c BS |
301 | --- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2009-03-16 16:40:37.000000000 +0100 |
302 | +++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/pgalloc.h 2009-03-16 16:40:52.000000000 +0100 | |
cc90b958 BS |
303 | @@ -106,15 +106,13 @@ static inline void pud_populate(struct m |
304 | #endif /* CONFIG_X86_PAE */ | |
305 | ||
306 | #if PAGETABLE_LEVELS > 3 | |
307 | -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) | |
308 | - | |
309 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) | |
310 | { | |
311 | pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud)); | |
312 | ||
313 | paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); | |
314 | if (unlikely(PagePinned(virt_to_page(pgd)))) | |
315 | - xen_l4_entry_update(pgd, 1, ent); | |
316 | + xen_l4_entry_update(pgd, ent); | |
317 | else | |
318 | *__user_pgd(pgd) = *pgd = ent; | |
319 | } | |
00e5a55c BS |
320 | --- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:40:37.000000000 +0100 |
321 | +++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:40:52.000000000 +0100 | |
cc90b958 BS |
322 | @@ -131,18 +131,25 @@ static inline void xen_set_pud(pud_t *pu |
323 | : (void)(*__pudp = xen_make_pud(0)); \ | |
324 | }) | |
325 | ||
326 | -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD) | |
327 | +static inline pgd_t *__user_pgd(pgd_t *pgd) | |
328 | +{ | |
329 | + if (unlikely(((unsigned long)pgd & PAGE_MASK) | |
330 | + == (unsigned long)init_level4_pgt)) | |
331 | + return NULL; | |
332 | + return (pgd_t *)(virt_to_page(pgd)->index | |
333 | + + ((unsigned long)pgd & ~PAGE_MASK)); | |
334 | +} | |
335 | ||
336 | static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd) | |
337 | { | |
338 | - xen_l4_entry_update(pgdp, 0, pgd); | |
339 | + xen_l4_entry_update(pgdp, pgd); | |
340 | } | |
341 | ||
342 | #define xen_pgd_clear(pgd) \ | |
343 | ({ \ | |
344 | pgd_t *__pgdp = (pgd); \ | |
345 | PagePinned(virt_to_page(__pgdp)) \ | |
346 | - ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \ | |
347 | + ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \ | |
348 | : (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \ | |
349 | }) | |
350 |