]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.31/patches.xen/xen-x86_64-pgd-alloc-order
Move xen patchset to new version's subdir.
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen-x86_64-pgd-alloc-order
diff --git a/src/patches/suse-2.6.27.31/patches.xen/xen-x86_64-pgd-alloc-order b/src/patches/suse-2.6.27.31/patches.xen/xen-x86_64-pgd-alloc-order
new file mode 100644 (file)
index 0000000..5d4f4f7
--- /dev/null
@@ -0,0 +1,350 @@
+From: jbeulich@novell.com
+Subject: don't require order-1 allocations for pgd-s
+Patch-mainline: obsolete
+
+At the same time remove the useless user mode pair of init_level4_pgt.
+
+--- sle11-2009-04-09.orig/arch/x86/kernel/cpu/common_64-xen.c  2008-11-17 14:07:10.000000000 +0100
++++ sle11-2009-04-09/arch/x86/kernel/cpu/common_64-xen.c       2009-03-16 16:40:52.000000000 +0100
+@@ -530,8 +530,7 @@ static void __init_refok switch_pt(int c
+ #ifdef CONFIG_XEN
+       if (cpu == 0)
+               xen_init_pt();
+-      xen_pt_switch(__pa_symbol(init_level4_pgt));
+-      xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
++      xen_pt_switch(init_level4_pgt);
+ #endif
+ }
+--- sle11-2009-04-09.orig/arch/x86/kernel/head_64-xen.S        2009-03-16 16:38:16.000000000 +0100
++++ sle11-2009-04-09/arch/x86/kernel/head_64-xen.S     2009-03-16 16:40:52.000000000 +0100
+@@ -44,14 +44,6 @@ ENTRY(name)
+ NEXT_PAGE(init_level4_pgt)
+       .fill   512,8,0
+-        /*
+-         * We update two pgd entries to make kernel and user pgd consistent
+-         * at pgd_populate(). It can be used for kernel modules. So we place 
+-         * this page here for those cases to avoid memory corruption.
+-         * We also use this page to establish the initial mapping for the
+-         * vsyscall area.
+-         */
+-      .fill   512,8,0
+ NEXT_PAGE(level3_kernel_pgt)
+       .fill   512,8,0
+--- sle11-2009-04-09.orig/arch/x86/mm/hypervisor.c     2009-03-16 16:40:50.000000000 +0100
++++ sle11-2009-04-09/arch/x86/mm/hypervisor.c  2009-03-30 12:19:20.000000000 +0200
+@@ -423,7 +423,7 @@ void xen_l3_entry_update(pud_t *ptr, pud
+ #endif
+ #ifdef CONFIG_X86_64
+-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val)
++void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
+ {
+       mmu_update_t u[2];
+       struct page *page = NULL;
+@@ -436,8 +436,10 @@ void xen_l4_entry_update(pgd_t *ptr, int
+       }
+       u[0].ptr = virt_to_machine(ptr);
+       u[0].val = __pgd_val(val);
+-      if (user) {
+-              u[1].ptr = virt_to_machine(__user_pgd(ptr));
++      if (((unsigned long)ptr & ~PAGE_MASK)
++          < pgd_index(__HYPERVISOR_VIRT_START) * sizeof(*ptr)
++          && (ptr = __user_pgd(ptr)) != NULL) {
++              u[1].ptr = virt_to_machine(ptr);
+               u[1].val = __pgd_val(val);
+               do_lN_entry_update(u, 2, page);
+       } else
+@@ -445,21 +447,25 @@ void xen_l4_entry_update(pgd_t *ptr, int
+ }
+ #endif /* CONFIG_X86_64 */
+-void xen_pt_switch(unsigned long ptr)
++#ifdef CONFIG_X86_64
++void xen_pt_switch(pgd_t *pgd)
+ {
+       struct mmuext_op op;
+       op.cmd = MMUEXT_NEW_BASEPTR;
+-      op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
++      op.arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ }
+-void xen_new_user_pt(unsigned long ptr)
++void xen_new_user_pt(pgd_t *pgd)
+ {
+       struct mmuext_op op;
++
++      pgd = __user_pgd(pgd);
+       op.cmd = MMUEXT_NEW_USER_BASEPTR;
+-      op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
++      op.arg1.mfn = pgd ? pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT) : 0;
+       BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
+ }
++#endif
+ void xen_tlb_flush(void)
+ {
+@@ -529,28 +535,38 @@ void xen_invlpg_mask(cpumask_t *mask, un
+ void xen_pgd_pin(pgd_t *pgd)
+ {
+       struct mmuext_op op[NR_PGD_PIN_OPS];
++      unsigned int nr = NR_PGD_PIN_OPS;
+       op[0].cmd = MMUEXT_PIN_L3_TABLE;
+       op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
+ #ifdef CONFIG_X86_64
+       op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE;
+-      op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT);
++      pgd = __user_pgd(pgd);
++      if (pgd)
++              op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
++      else
++              nr = 1;
+ #endif
+-      if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
++      if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0)
+               BUG();
+ }
+ void xen_pgd_unpin(pgd_t *pgd)
+ {
+       struct mmuext_op op[NR_PGD_PIN_OPS];
++      unsigned int nr = NR_PGD_PIN_OPS;
+       op[0].cmd = MMUEXT_UNPIN_TABLE;
+       op[0].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
+ #ifdef CONFIG_X86_64
+-      op[1].cmd = MMUEXT_UNPIN_TABLE;
+-      op[1].arg1.mfn = pfn_to_mfn(__pa(__user_pgd(pgd)) >> PAGE_SHIFT);
++      pgd = __user_pgd(pgd);
++      if (pgd) {
++              op[1].cmd = MMUEXT_UNPIN_TABLE;
++              op[1].arg1.mfn = pfn_to_mfn(__pa(pgd) >> PAGE_SHIFT);
++      } else
++              nr = 1;
+ #endif
+-      if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
++      if (HYPERVISOR_mmuext_op(op, nr, NULL, DOMID_SELF) < 0)
+               BUG();
+ }
+--- sle11-2009-04-09.orig/arch/x86/mm/init_64-xen.c    2009-03-16 16:40:50.000000000 +0100
++++ sle11-2009-04-09/arch/x86/mm/init_64-xen.c 2009-03-16 16:40:52.000000000 +0100
+@@ -616,9 +616,6 @@ void __init xen_init_pt(void)
+               __pud(__pa_symbol(level2_kernel_pgt) | _PAGE_TABLE);
+       memcpy(level2_kernel_pgt, page, PAGE_SIZE);
+-      __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
+-              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
+-
+       /* Do an early initialization of the fixmap area. */
+       addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
+       level3_kernel_pgt[pud_index(addr)] =
+@@ -628,8 +625,6 @@ void __init xen_init_pt(void)
+       early_make_page_readonly(init_level4_pgt,
+                                XENFEAT_writable_page_tables);
+-      early_make_page_readonly(__user_pgd(init_level4_pgt),
+-                               XENFEAT_writable_page_tables);
+       early_make_page_readonly(level3_kernel_pgt,
+                                XENFEAT_writable_page_tables);
+       early_make_page_readonly(level3_user_pgt,
+--- sle11-2009-04-09.orig/arch/x86/mm/pgtable-xen.c    2009-03-16 16:40:50.000000000 +0100
++++ sle11-2009-04-09/arch/x86/mm/pgtable-xen.c 2009-04-09 14:54:18.000000000 +0200
+@@ -270,9 +270,11 @@ static void pgd_walk(pgd_t *pgd_base, pg
+                       BUG();
+               seq = 0;
+       }
++      pgd = __user_pgd(pgd_base);
++      BUG_ON(!pgd);
+       MULTI_update_va_mapping(mcl + seq,
+-             (unsigned long)__user_pgd(pgd_base),
+-             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
++             (unsigned long)pgd,
++             pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags),
+              0);
+       MULTI_update_va_mapping(mcl + seq + 1,
+              (unsigned long)pgd_base,
+@@ -662,12 +664,29 @@ static void pgd_prepopulate_pmd(struct m
+       }
+ }
++static inline pgd_t *user_pgd_alloc(pgd_t *pgd)
++{
+ #ifdef CONFIG_X86_64
+-/* We allocate two contiguous pages for kernel and user. */
+-#define PGD_ORDER 1
+-#else
+-#define PGD_ORDER 0
++      if (pgd) {
++              pgd_t *upgd = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
++
++              if (upgd)
++                      virt_to_page(pgd)->index = (long)upgd;
++              else {
++                      free_page((unsigned long)pgd);
++                      pgd = NULL;
++              }
++      }
++#endif
++      return pgd;
++}
++
++static inline void user_pgd_free(pgd_t *pgd)
++{
++#ifdef CONFIG_X86_64
++      free_page(virt_to_page(pgd)->index);
+ #endif
++}
+ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+@@ -675,7 +694,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+       pmd_t *pmds[PREALLOCATED_PMDS];
+       unsigned long flags;
+-      pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
++      pgd = user_pgd_alloc((void *)__get_free_page(GFP_KERNEL|__GFP_ZERO));
+       if (pgd == NULL)
+               goto out;
+@@ -714,7 +733,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ out_free_pmds:
+       free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb));
+ out_free_pgd:
+-      free_pages((unsigned long)pgd, PGD_ORDER);
++      user_pgd_free(pgd);
++      free_page((unsigned long)pgd);
+ out:
+       return NULL;
+ }
+@@ -733,7 +753,8 @@ void pgd_free(struct mm_struct *mm, pgd_
+       pgd_mop_up_pmds(mm, pgd);
+       paravirt_pgd_free(mm, pgd);
+-      free_pages((unsigned long)pgd, PGD_ORDER);
++      user_pgd_free(pgd);
++      free_page((unsigned long)pgd);
+ }
+ /* blktap and gntdev need this, as otherwise they would implicitly (and
+--- sle11-2009-04-09.orig/drivers/xen/core/machine_reboot.c    2009-02-17 12:25:29.000000000 +0100
++++ sle11-2009-04-09/drivers/xen/core/machine_reboot.c 2009-03-16 16:40:52.000000000 +0100
+@@ -191,8 +191,7 @@ static int take_machine_down(void *_susp
+                * in fast-suspend mode as that implies a new enough Xen.
+                */
+               if (!suspend->fast_suspend)
+-                      xen_new_user_pt(__pa(__user_pgd(
+-                              current->active_mm->pgd)));
++                      xen_new_user_pt(current->active_mm->pgd);
+ #endif
+       }
+--- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/hypervisor.h    2009-03-16 16:40:50.000000000 +0100
++++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-16 16:40:52.000000000 +0100
+@@ -85,8 +85,8 @@ void do_hypervisor_callback(struct pt_re
+  * be MACHINE addresses.
+  */
+-void xen_pt_switch(unsigned long ptr);
+-void xen_new_user_pt(unsigned long ptr); /* x86_64 only */
++void xen_pt_switch(pgd_t *);
++void xen_new_user_pt(pgd_t *); /* x86_64 only */
+ void xen_load_gs(unsigned int selector); /* x86_64 only */
+ void xen_tlb_flush(void);
+ void xen_invlpg(unsigned long ptr);
+@@ -94,7 +94,7 @@ void xen_invlpg(unsigned long ptr);
+ void xen_l1_entry_update(pte_t *ptr, pte_t val);
+ void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
+ void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
+-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */
++void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
+ void xen_pgd_pin(pgd_t *);
+ void xen_pgd_unpin(pgd_t *);
+--- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h        2009-03-16 16:38:16.000000000 +0100
++++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/mmu_context_64.h     2009-03-16 16:40:52.000000000 +0100
+@@ -46,6 +46,7 @@ static inline void switch_mm(struct mm_s
+ {
+       unsigned cpu = smp_processor_id();
+       struct mmuext_op _op[3], *op = _op;
++      pgd_t *upgd;
+       if (likely(prev != next)) {
+               BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
+@@ -64,9 +65,11 @@ static inline void switch_mm(struct mm_s
+               op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
+               op++;
+-              /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
++              /* xen_new_user_pt(next->pgd) */
+               op->cmd = MMUEXT_NEW_USER_BASEPTR;
+-              op->arg1.mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT);
++              upgd = __user_pgd(next->pgd);
++              op->arg1.mfn = likely(upgd)
++                             ? pfn_to_mfn(__pa(upgd) >> PAGE_SHIFT) : 0;
+               op++;
+               
+               if (unlikely(next->context.ldt != prev->context.ldt)) {
+@@ -90,7 +93,7 @@ static inline void switch_mm(struct mm_s
+                        * to make sure to use no freed page tables.
+                        */
+                         load_cr3(next->pgd);
+-                        xen_new_user_pt(__pa(__user_pgd(next->pgd)));         
++                      xen_new_user_pt(next->pgd);
+                       load_LDT_nolock(&next->context);
+               }
+       }
+--- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/pgalloc.h       2009-03-16 16:40:37.000000000 +0100
++++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/pgalloc.h    2009-03-16 16:40:52.000000000 +0100
+@@ -106,15 +106,13 @@ static inline void pud_populate(struct m
+ #endif        /* CONFIG_X86_PAE */
+ #if PAGETABLE_LEVELS > 3
+-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+-
+ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+ {
+       pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud));
+       paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
+       if (unlikely(PagePinned(virt_to_page(pgd))))
+-              xen_l4_entry_update(pgd, 1, ent);
++              xen_l4_entry_update(pgd, ent);
+       else
+               *__user_pgd(pgd) = *pgd = ent;
+ }
+--- sle11-2009-04-09.orig/include/asm-x86/mach-xen/asm/pgtable_64.h    2009-03-16 16:40:37.000000000 +0100
++++ sle11-2009-04-09/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:40:52.000000000 +0100
+@@ -131,18 +131,25 @@ static inline void xen_set_pud(pud_t *pu
+       : (void)(*__pudp = xen_make_pud(0));    \
+ })
+-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
++static inline pgd_t *__user_pgd(pgd_t *pgd)
++{
++      if (unlikely(((unsigned long)pgd & PAGE_MASK)
++                   == (unsigned long)init_level4_pgt))
++              return NULL;
++      return (pgd_t *)(virt_to_page(pgd)->index
++                       + ((unsigned long)pgd & ~PAGE_MASK));
++}
+ static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+-      xen_l4_entry_update(pgdp, 0, pgd);
++      xen_l4_entry_update(pgdp, pgd);
+ }
+ #define xen_pgd_clear(pgd)                    \
+ ({                                            \
+       pgd_t *__pgdp = (pgd);                  \
+       PagePinned(virt_to_page(__pgdp))        \
+-      ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \
++      ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \
+       : (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \
+ })