]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/60069_xen-x86_64-pgd-alloc-order.patch1
Imported xen patches.
[people/pmueller/ipfire-2.x.git] / src / patches / 60069_xen-x86_64-pgd-alloc-order.patch1
1 From: jbeulich@novell.com
2 Subject: don't require order-1 allocations for pgd-s
3 Patch-mainline: obsolete
4
5 At the same time remove the useless user mode pair of init_level4_pgt.
6
7 Index: head-2008-12-01/arch/x86/kernel/cpu/common_64-xen.c
8 ===================================================================
9 --- head-2008-12-01.orig/arch/x86/kernel/cpu/common_64-xen.c 2008-12-01 12:13:15.000000000 +0100
10 +++ head-2008-12-01/arch/x86/kernel/cpu/common_64-xen.c 2008-12-01 12:13:27.000000000 +0100
11 @@ -530,8 +530,7 @@ static void __init_refok switch_pt(int c
12 #ifdef CONFIG_XEN
13 if (cpu == 0)
14 xen_init_pt();
15 - xen_pt_switch(__pa_symbol(init_level4_pgt));
16 - xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
17 + xen_pt_switch(init_level4_pgt);
18 #endif
19 }
20
21 Index: head-2008-12-01/arch/x86/kernel/head_64-xen.S
22 ===================================================================
23 --- head-2008-12-01.orig/arch/x86/kernel/head_64-xen.S 2008-12-01 11:49:07.000000000 +0100
24 +++ head-2008-12-01/arch/x86/kernel/head_64-xen.S 2008-12-01 12:13:27.000000000 +0100
25 @@ -44,14 +44,6 @@ ENTRY(name)
26
27 NEXT_PAGE(init_level4_pgt)
28 .fill 512,8,0
29 - /*
30 - * We update two pgd entries to make kernel and user pgd consistent
31 - * at pgd_populate(). It can be used for kernel modules. So we place
32 - * this page here for those cases to avoid memory corruption.
33 - * We also use this page to establish the initial mapping for the
34 - * vsyscall area.
35 - */
36 - .fill 512,8,0
37
38 NEXT_PAGE(level3_kernel_pgt)
39 .fill 512,8,0
40 Index: head-2008-12-01/arch/x86/mm/hypervisor.c
41 ===================================================================
42 --- head-2008-12-01.orig/arch/x86/mm/hypervisor.c 2008-12-01 12:13:22.000000000 +0100
43 +++ head-2008-12-01/arch/x86/mm/hypervisor.c 2008-12-01 12:13:27.000000000 +0100
44 @@ -422,7 +422,7 @@ void xen_l3_entry_update(pud_t *ptr, pud
45 #endif
46
47 #ifdef CONFIG_X86_64
48 -void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val)
49 +void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
50 {
51 mmu_update_t u[2];
52 struct page *page = NULL;
53 @@ -435,8 +435,10 @@ void xen_l4_entry_update(pgd_t *ptr, int
54 }
55 u[0].ptr = virt_to_machine(ptr);
56 u[0].val = __pgd_val(val);
57 - if (user) {
58 - u[1].ptr = virt_to_machine(__user_pgd(ptr));
59 + if (((unsigned long)ptr & ~PAGE_MASK)
60 + < pgd_index(__HYPERVISOR_VIRT_START) * sizeof(*ptr)
61 + && (ptr = __user_pgd(ptr)) != NULL) {
62 + u[1].ptr = virt_to_machine(ptr);
63 u[1].val = __pgd_val(val);
64 do_lN_entry_update(u, 2, page);
65 } else
66 @@ -444,21 +446,25 @@ void xen_l4_entry_update(pgd_t *ptr, int
67 }
68 #endif /* CONFIG_X86_64 */
69
70 -void xen_pt_switch(unsigned long ptr)
71 +#ifdef CONFIG_X86_64
72 +void xen_pt_switch(pgd_t *pgd)
73 {
74 struct mmuext_op op;
75 op.cmd = MMUEXT_NEW_BASEPTR;
76 - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
77 + op.arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
78 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
79 }
80
81 -void xen_new_user_pt(unsigned long ptr)
82 +void xen_new_user_pt(pgd_t *pgd)
83 {
84 struct mmuext_op op;
85 +
86 + pgd = __user_pgd(pgd);
87 op.cmd = MMUEXT_NEW_USER_BASEPTR;
88 - op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
89 + op.arg1.mfn = pgd ? pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT) : 0;
90 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
91 }
92 +#endif
93
94 void xen_tlb_flush(void)
95 {
96 @@ -526,28 +532,38 @@ void xen_invlpg_mask(cpumask_t *mask, un
97 void xen_pgd_pin(pgd_t *pgd)
98 {
99 struct mmuext_op op[NR_PGD_PIN_OPS];
100 + unsigned int nr = NR_PGD_PIN_OPS;
101
102 op[0].cmd = MMUEXT_PIN_L3_TABLE;
103 op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
104 #ifdef CONFIG_X86_64
105 op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE;
106 - op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT);
107 + pgd = __user_pgd(pgd);
108 + if (pgd)
109 + op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
110 + else
111 + nr = 1;
112 #endif
113 - if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
114 + if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0)
115 BUG();
116 }
117
118 void xen_pgd_unpin(pgd_t *pgd)
119 {
120 struct mmuext_op op[NR_PGD_PIN_OPS];
121 + unsigned int nr = NR_PGD_PIN_OPS;
122
123 op[0].cmd = MMUEXT_UNPIN_TABLE;
124 op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
125 #ifdef CONFIG_X86_64
126 - op[1].cmd = MMUEXT_UNPIN_TABLE;
127 - op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT);
128 + pgd = __user_pgd(pgd);
129 + if (pgd) {
130 + op[1].cmd = MMUEXT_UNPIN_TABLE;
131 + op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
132 + } else
133 + nr = 1;
134 #endif
135 - if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
136 + if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0)
137 BUG();
138 }
139
140 Index: head-2008-12-01/arch/x86/mm/init_64-xen.c
141 ===================================================================
142 --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-01 12:13:22.000000000 +0100
143 +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 12:13:27.000000000 +0100
144 @@ -604,9 +604,6 @@ void __init xen_init_pt(void)
145 __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE);
146 memcpy(level2_kernel_pgt, page, PAGE_SIZE);
147
148 - __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
149 - __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
150 -
151 /* Do an early initialization of the fixmap area. */
152 addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
153 level3_kernel_pgt[pud_index(addr)] =
154 @@ -616,8 +613,6 @@ void __init xen_init_pt(void)
155
156 early_make_page_readonly(init_level4_pgt,
157 XENFEAT_writable_page_tables);
158 - early_make_page_readonly(__user_pgd(init_level4_pgt),
159 - XENFEAT_writable_page_tables);
160 early_make_page_readonly(level3_kernel_pgt,
161 XENFEAT_writable_page_tables);
162 early_make_page_readonly(level3_user_pgt,
163 Index: head-2008-12-01/arch/x86/mm/pgtable-xen.c
164 ===================================================================
165 --- head-2008-12-01.orig/arch/x86/mm/pgtable-xen.c 2008-12-01 12:13:22.000000000 +0100
166 +++ head-2008-12-01/arch/x86/mm/pgtable-xen.c 2008-12-01 12:13:27.000000000 +0100
167 @@ -270,9 +270,11 @@ static void pgd_walk(pgd_t *pgd_base, pg
168 BUG();
169 seq = 0;
170 }
171 + pgd = __user_pgd(pgd_base);
172 + BUG_ON(!pgd);
173 MULTI_update_va_mapping(mcl + seq,
174 - (unsigned long)__user_pgd(pgd_base),
175 - pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
176 + (unsigned long)pgd,
177 + pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags),
178 0);
179 MULTI_update_va_mapping(mcl + seq + 1,
180 (unsigned long)pgd_base,
181 @@ -658,12 +660,29 @@ static void pgd_prepopulate_pmd(struct m
182 }
183 }
184
185 +static inline pgd_t *user_pgd_alloc(pgd_t *pgd)
186 +{
187 #ifdef CONFIG_X86_64
188 -/* We allocate two contiguous pages for kernel and user. */
189 -#define PGD_ORDER 1
190 -#else
191 -#define PGD_ORDER 0
192 + if (pgd) {
193 + pgd_t *upgd = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
194 +
195 + if (upgd)
196 + virt_to_page(pgd)->index = (long)upgd;
197 + else {
198 + free_page((unsigned long)pgd);
199 + pgd = NULL;
200 + }
201 + }
202 +#endif
203 + return pgd;
204 +}
205 +
206 +static inline void user_pgd_free(pgd_t *pgd)
207 +{
208 +#ifdef CONFIG_X86_64
209 + free_page(virt_to_page(pgd)->index);
210 #endif
211 +}
212
213 pgd_t *pgd_alloc(struct mm_struct *mm)
214 {
215 @@ -671,7 +690,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
216 pmd_t *pmds[PREALLOCATED_PMDS];
217 unsigned long flags;
218
219 - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
220 + pgd = user_pgd_alloc((void *)__get_free_page(GFP_KERNEL|__GFP_ZERO));
221
222 if (pgd == NULL)
223 goto out;
224 @@ -710,7 +729,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
225 out_free_pmds:
226 free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb));
227 out_free_pgd:
228 - free_pages((unsigned long)pgd, PGD_ORDER);
229 + user_pgd_free(pgd);
230 + free_page((unsigned long)pgd);
231 out:
232 return NULL;
233 }
234 @@ -729,7 +749,8 @@ void pgd_free(struct mm_struct *mm, pgd_
235
236 pgd_mop_up_pmds(mm, pgd);
237 paravirt_pgd_free(mm, pgd);
238 - free_pages((unsigned long)pgd, PGD_ORDER);
239 + user_pgd_free(pgd);
240 + free_page((unsigned long)pgd);
241 }
242
243 /* blktap and gntdev need this, as otherwise they would implicitly (and
244 Index: head-2008-12-01/drivers/xen/core/machine_reboot.c
245 ===================================================================
246 --- head-2008-12-01.orig/drivers/xen/core/machine_reboot.c 2008-12-01 12:13:13.000000000 +0100
247 +++ head-2008-12-01/drivers/xen/core/machine_reboot.c 2008-12-01 12:13:27.000000000 +0100
248 @@ -199,8 +199,7 @@ static int take_machine_down(void *_susp
249 * in fast-suspend mode as that implies a new enough Xen.
250 */
251 if (!suspend->fast_suspend)
252 - xen_new_user_pt(__pa(__user_pgd(
253 - current->active_mm->pgd)));
254 + xen_new_user_pt(current->active_mm->pgd);
255 #endif
256 }
257
258 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h
259 ===================================================================
260 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 12:13:22.000000000 +0100
261 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 12:13:27.000000000 +0100
262 @@ -85,8 +85,8 @@ void do_hypervisor_callback(struct pt_re
263 * be MACHINE addresses.
264 */
265
266 -void xen_pt_switch(unsigned long ptr);
267 -void xen_new_user_pt(unsigned long ptr); /* x86_64 only */
268 +void xen_pt_switch(pgd_t *);
269 +void xen_new_user_pt(pgd_t *); /* x86_64 only */
270 void xen_load_gs(unsigned int selector); /* x86_64 only */
271 void xen_tlb_flush(void);
272 void xen_invlpg(unsigned long ptr);
273 @@ -94,7 +94,7 @@ void xen_invlpg(unsigned long ptr);
274 void xen_l1_entry_update(pte_t *ptr, pte_t val);
275 void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
276 void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
277 -void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */
278 +void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
279 void xen_pgd_pin(pgd_t *);
280 void xen_pgd_unpin(pgd_t *);
281
282 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_64.h
283 ===================================================================
284 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-01 11:49:07.000000000 +0100
285 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-01 12:13:27.000000000 +0100
286 @@ -46,6 +46,7 @@ static inline void switch_mm(struct mm_s
287 {
288 unsigned cpu = smp_processor_id();
289 struct mmuext_op _op[3], *op = _op;
290 + pgd_t *upgd;
291
292 if (likely(prev != next)) {
293 BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
294 @@ -64,9 +65,11 @@ static inline void switch_mm(struct mm_s
295 op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
296 op++;
297
298 - /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
299 + /* xen_new_user_pt(next->pgd) */
300 op->cmd = MMUEXT_NEW_USER_BASEPTR;
301 - op->arg1.mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT);
302 + upgd = __user_pgd(next->pgd);
303 + op->arg1.mfn = likely(upgd)
304 + ? pfn_to_mfn(__pa(upgd) >> PAGE_SHIFT) : 0;
305 op++;
306
307 if (unlikely(next->context.ldt != prev->context.ldt)) {
308 @@ -90,7 +93,7 @@ static inline void switch_mm(struct mm_s
309 * to make sure to use no freed page tables.
310 */
311 load_cr3(next->pgd);
312 - xen_new_user_pt(__pa(__user_pgd(next->pgd)));
313 + xen_new_user_pt(next->pgd);
314 load_LDT_nolock(&next->context);
315 }
316 }
317 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc.h
318 ===================================================================
319 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2008-12-01 12:13:06.000000000 +0100
320 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc.h 2008-12-01 12:13:27.000000000 +0100
321 @@ -106,15 +106,13 @@ static inline void pud_populate(struct m
322 #endif /* CONFIG_X86_PAE */
323
324 #if PAGETABLE_LEVELS > 3
325 -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
326 -
327 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
328 {
329 pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud));
330
331 paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
332 if (unlikely(PagePinned(virt_to_page(pgd))))
333 - xen_l4_entry_update(pgd, 1, ent);
334 + xen_l4_entry_update(pgd, ent);
335 else
336 *__user_pgd(pgd) = *pgd = ent;
337 }
338 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h
339 ===================================================================
340 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 12:13:13.000000000 +0100
341 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 12:13:27.000000000 +0100
342 @@ -131,18 +131,25 @@ static inline void xen_set_pud(pud_t *pu
343 : (void)(*__pudp = xen_make_pud(0)); \
344 })
345
346 -#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
347 +static inline pgd_t *__user_pgd(pgd_t *pgd)
348 +{
349 + if (unlikely(((unsigned long)pgd & PAGE_MASK)
350 + == (unsigned long)init_level4_pgt))
351 + return NULL;
352 + return (pgd_t *)(virt_to_page(pgd)->index
353 + + ((unsigned long)pgd & ~PAGE_MASK));
354 +}
355
356 static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
357 {
358 - xen_l4_entry_update(pgdp, 0, pgd);
359 + xen_l4_entry_update(pgdp, pgd);
360 }
361
362 #define xen_pgd_clear(pgd) \
363 ({ \
364 pgd_t *__pgdp = (pgd); \
365 PagePinned(virt_to_page(__pgdp)) \
366 - ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \
367 + ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \
368 : (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \
369 })
370