Updated xen patches taken from suse.

[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.xen / xen3-patch-2.6.26
diff --git a/src/patches/60035_xen3-patch-2.6.26.patch1 b/src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.26

similarity index 94%

rename from src/patches/60035_xen3-patch-2.6.26.patch1

rename to src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.26

index 74dd6ee21f8412114e029f570e5e1aaad14b9826..0f2c30f2fcd64d83a4e6b15211a602db03b9134b 100644 (file)
--- a/src/patches/60035_xen3-patch-2.6.26.patch1
+++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.26
@@ -5,139 +5,53 @@ Patch-mainline: 2.6.26
  Acked-by: Jeff Mahoney <jeffm@suse.com>
  Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches.py
  
----
- arch/x86/Kconfig                              |   10 
- arch/x86/ia32/ia32entry-xen.S                 |   14 
- arch/x86/kernel/Makefile                      |    5 
- arch/x86/kernel/acpi/Makefile                 |    2 
- arch/x86/kernel/acpi/boot.c                   |    8 
- arch/x86/kernel/acpi/sleep-xen.c              |   87 +
- arch/x86/kernel/cpu/common-xen.c              |  158 +--
- arch/x86/kernel/cpu/mtrr/main-xen.c           |  138 +++
- arch/x86/kernel/e820_32-xen.c                 |   32 
- arch/x86/kernel/e820_64-xen.c                 |  197 +++-
- arch/x86/kernel/early_printk-xen.c            |   24 
- arch/x86/kernel/entry_32-xen.S                |   44 
- arch/x86/kernel/entry_64-xen.S                |    8 
- arch/x86/kernel/genapic_64-xen.c              |   55 +
- arch/x86/kernel/genapic_xen_64.c              |    4 
- arch/x86/kernel/head64-xen.c                  |  101 +-
- arch/x86/kernel/head_32-xen.S                 |    2 
- arch/x86/kernel/init_task-xen.c               |    1 
- arch/x86/kernel/io_apic_32-xen.c              |  155 +--
- arch/x86/kernel/io_apic_64-xen.c              |   67 -
- arch/x86/kernel/ipi-xen.c                     |  232 +++++
- arch/x86/kernel/irq_32-xen.c                  |    6 
- arch/x86/kernel/machine_kexec_64.c            |    2 
- arch/x86/kernel/microcode-xen.c               |    2 
- arch/x86/kernel/mmconf-fam10h_64.c            |   10 
- arch/x86/kernel/mpparse-xen.c                 | 1104 ++++++++++++++++++++++++
- arch/x86/kernel/mpparse_32-xen.c              | 1161 --------------------------
- arch/x86/kernel/mpparse_64-xen.c              |  879 -------------------
- arch/x86/kernel/pci-dma-xen.c                 |  735 +++++++++-------
- arch/x86/kernel/pci-nommu-xen.c               |  103 ++
- arch/x86/kernel/process-xen.c                 |  188 ++++
- arch/x86/kernel/process_32-xen.c              |  146 +--
- arch/x86/kernel/process_64-xen.c              |  165 ++-
- arch/x86/kernel/setup-xen.c                   |  141 +++
- arch/x86/kernel/setup64-xen.c                 |  103 --
- arch/x86/kernel/setup_32-xen.c                |  127 ++
- arch/x86/kernel/setup_64-xen.c                |  303 +++---
- arch/x86/kernel/smp-xen.c                     |  329 +++++++
- arch/x86/kernel/smp_32-xen.c                  |  647 --------------
- arch/x86/kernel/smp_64-xen.c                  |  554 ------------
- arch/x86/kernel/time_32-xen.c                 |    2 
- arch/x86/kernel/traps_32-xen.c                |  592 +++++++------
- arch/x86/kernel/traps_64-xen.c                |   46 -
- arch/x86/kernel/vsyscall_64-xen.c             |    2 
- arch/x86/mm/fault-xen.c                       |   11 
- arch/x86/mm/highmem_32-xen.c                  |    1 
- arch/x86/mm/init_32-xen.c                     |  122 +-
- arch/x86/mm/init_64-xen.c                     |  292 +++++-
- arch/x86/mm/ioremap-xen.c                     |  269 ++++--
- arch/x86/mm/pageattr-xen.c                    |  481 ++--------
- arch/x86/mm/pat-xen.c                         |  602 +++++++++++++
- arch/x86/mm/pgtable-xen.c                     |  709 +++++++++++++++
- arch/x86/mm/pgtable_32-xen.c                  |  242 -----
- arch/x86/pci/i386.c                           |    4 
- arch/x86/pci/irq-xen.c                        |   23 
- arch/x86/vdso/vdso32-setup-xen.c              |   15 
- drivers/acpi/processor_core.c                 |    2 
- drivers/input/xen-kbdfront.c                  |    1 
- drivers/oprofile/cpu_buffer.c                 |    2 
- drivers/pci/msi-xen.c                         |   12 
- drivers/video/Kconfig                         |    2 
- drivers/video/xen-fbfront.c                   |    1 
- drivers/xen/Kconfig                           |    2 
- drivers/xen/Makefile                          |    8 
- drivers/xen/blkfront/blkfront.c               |    4 
- drivers/xen/blkfront/block.h                  |    1 
- drivers/xen/blkfront/vbd.c                    |   58 -
- drivers/xen/blktap/blktap.c                   |   27 
- drivers/xen/char/mem.c                        |   53 +
- drivers/xen/console/console.c                 |   13 
- drivers/xen/core/machine_kexec.c              |    8 
- drivers/xen/core/machine_reboot.c             |    8 
- drivers/xen/core/smpboot.c                    |   23 
- drivers/xen/core/xen_proc.c                   |    2 
- drivers/xen/fbfront/xenfb.c                   |   24 
- drivers/xen/gntdev/gntdev.c                   |    8 
- drivers/xen/netfront/netfront.c               |    6 
- drivers/xen/privcmd/privcmd.c                 |    8 
- drivers/xen/xenbus/xenbus_client.c            |    6 
- drivers/xen/xenbus/xenbus_probe.c             |   25 
- fs/aio.c                                      |   15 
- include/asm-x86/dma-mapping.h                 |    5 
- include/asm-x86/genapic_64.h                  |    5 
- include/asm-x86/mach-xen/asm/desc.h           |   65 -
- include/asm-x86/mach-xen/asm/dma-mapping.h    |   22 
- include/asm-x86/mach-xen/asm/dma-mapping_32.h |  141 ---
- include/asm-x86/mach-xen/asm/dma-mapping_64.h |  205 ----
- include/asm-x86/mach-xen/asm/fixmap.h         |    8 
- include/asm-x86/mach-xen/asm/fixmap_32.h      |   22 
- include/asm-x86/mach-xen/asm/fixmap_64.h      |   27 
- include/asm-x86/mach-xen/asm/highmem.h        |    2 
- include/asm-x86/mach-xen/asm/io.h             |   17 
- include/asm-x86/mach-xen/asm/io_32.h          |  156 +--
- include/asm-x86/mach-xen/asm/io_64.h          |  124 +-
- include/asm-x86/mach-xen/asm/irqflags.h       |    8 
- include/asm-x86/mach-xen/asm/mmu_context_32.h |   12 
- include/asm-x86/mach-xen/asm/mmu_context_64.h |   15 
- include/asm-x86/mach-xen/asm/page.h           |   20 
- include/asm-x86/mach-xen/asm/page_64.h        |   10 
- include/asm-x86/mach-xen/asm/pci.h            |   11 
- include/asm-x86/mach-xen/asm/pci_64.h         |   16 
- include/asm-x86/mach-xen/asm/pgalloc.h        |  152 +++
- include/asm-x86/mach-xen/asm/pgalloc_32.h     |  111 --
- include/asm-x86/mach-xen/asm/pgalloc_64.h     |  179 ----
- include/asm-x86/mach-xen/asm/pgtable-3level.h |   43 
- include/asm-x86/mach-xen/asm/pgtable.h        |  292 ++++--
- include/asm-x86/mach-xen/asm/pgtable_32.h     |  107 +-
- include/asm-x86/mach-xen/asm/pgtable_64.h     |  156 +--
- include/asm-x86/mach-xen/asm/processor.h      |  688 ++++++++-------
- include/asm-x86/mach-xen/asm/segment.h        |    3 
- include/asm-x86/mach-xen/asm/smp.h            |  228 +++++
- include/asm-x86/mach-xen/asm/smp_32.h         |  178 ---
- include/asm-x86/mach-xen/asm/smp_64.h         |  103 --
- include/asm-x86/mach-xen/asm/spinlock.h       |   18 
- include/asm-x86/mach-xen/asm/swiotlb.h        |   13 
- include/asm-x86/mach-xen/asm/swiotlb_32.h     |   43 
- include/asm-x86/mach-xen/asm/system.h         |  107 +-
- include/asm-x86/mach-xen/asm/tlbflush.h       |    3 
- include/asm-x86/mach-xen/asm/vga.h            |    4 
- include/asm-x86/mach-xen/asm/xor_64.h         |  294 +++---
- include/asm-x86/scatterlist.h                 |    2 
- include/linux/page-flags.h                    |   31 
- include/xen/balloon.h                         |   10 
- include/xen/interface/grant_table.h           |    7 
- include/xen/interface/io/fbif.h               |    5 
- include/xen/interface/memory.h                |   17 
- include/xen/interface/vcpu.h                  |    4 
- lib/swiotlb-xen.c                             |  236 ++---
- 128 files changed, 8046 insertions(+), 7660 deletions(-)
-
---- a/arch/x86/ia32/ia32entry-xen.S
-+++ b/arch/x86/ia32/ia32entry-xen.S
+--- sle11-2009-05-14.orig/arch/x86/Kconfig     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/Kconfig  2009-03-16 16:38:05.000000000 +0100
+@@ -28,7 +28,7 @@ config X86
+       select HAVE_DYNAMIC_FTRACE
+       select HAVE_FTRACE
+       select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN
+-      select HAVE_ARCH_KGDB if !X86_VOYAGER
++      select HAVE_ARCH_KGDB if !X86_VOYAGER && !XEN
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_GENERIC_DMA_COHERENT if X86_32
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS
+@@ -486,6 +486,7 @@ config PARAVIRT_DEBUG
+ 
+ config MEMTEST
+       bool "Memtest"
++      depends on !XEN
+       help
+         This option adds a kernel parameter 'memtest', which allows memtest
+         to be set.
+@@ -1007,7 +1008,7 @@ config X86_PAE
+ config DIRECT_GBPAGES
+       bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
+       default y
+-      depends on X86_64
++      depends on X86_64 && !XEN
+       help
+         Allow the kernel linear mapping to use 1GB pages on CPUs that
+         support it. This can improve the kernel's performance a tiny bit by
+@@ -1349,8 +1350,7 @@ source kernel/Kconfig.hz
+ 
+ config KEXEC
+       bool "kexec system call"
+-      depends on X86_BIOS_REBOOT
+-      depends on !XEN_UNPRIVILEGED_GUEST
++      depends on X86_BIOS_REBOOT || (XEN && !XEN_UNPRIVILEGED_GUEST)
+       help
+         kexec is a system call that implements the ability to shutdown your
+         current kernel, and to start another kernel.  It is like a reboot
+@@ -1948,6 +1948,4 @@ source "crypto/Kconfig"
+ 
+ source "arch/x86/kvm/Kconfig"
+ 
+-source "drivers/xen/Kconfig"
+-
+ source "lib/Kconfig"
+--- sle11-2009-05-14.orig/arch/x86/ia32/ia32entry-xen.S        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/ia32/ia32entry-xen.S     2009-03-16 16:38:05.000000000 +0100
  @@ -129,12 +129,14 @@ sysenter_tracesys:
         SAVE_REST
         CLEAR_RREGS
@@ -198,43 +112,29 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
         .quad sys_alarm
         .quad sys_fstat /* (old)fstat */
         .quad sys_pause
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -28,6 +28,6 @@ config X86
-       select HAVE_DYNAMIC_FTRACE
-       select HAVE_FTRACE
-       select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN
--      select HAVE_ARCH_KGDB if !X86_VOYAGER
-+      select HAVE_ARCH_KGDB if !X86_VOYAGER && !XEN
-       select HAVE_GENERIC_DMA_COHERENT if X86_32
-       select HAVE_EFFICIENT_UNALIGNED_ACCESS
-@@ -482,6 +482,7 @@ config PARAVIRT_DEBUG
- 
- config MEMTEST
-       bool "Memtest"
-+      depends on !XEN
-       help
-         This option adds a kernel parameter 'memtest', which allows memtest
-         to be set.
-@@ -1345,8 +1346,7 @@ source kernel/Kconfig.hz
+--- sle11-2009-05-14.orig/arch/x86/kernel/Makefile     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/Makefile  2009-03-16 16:38:05.000000000 +0100
+@@ -122,8 +122,7 @@ ifeq ($(CONFIG_X86_64),y)
   
- config KEXEC
-       bool "kexec system call"
--      depends on X86_BIOS_REBOOT
--      depends on !XEN_UNPRIVILEGED_GUEST
-+      depends on X86_BIOS_REBOOT || (XEN && !XEN_UNPRIVILEGED_GUEST)
-       help
-         kexec is a system call that implements the ability to shutdown your
-         current kernel, and to start another kernel.  It is like a reboot
-@@ -1944,6 +1944,4 @@ source "crypto/Kconfig"
+       obj-$(CONFIG_XEN)               += nmi_64.o
+       time_64-$(CONFIG_XEN)           += time_32.o
+-      pci-dma_64-$(CONFIG_XEN)        += pci-dma_32.o
+ endif
   
- source "arch/x86/kvm/Kconfig"
+-disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
+-      smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
++disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
++      pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
+--- sle11-2009-05-14.orig/arch/x86/kernel/acpi/Makefile        2008-12-01 11:11:08.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/acpi/Makefile     2009-03-16 16:38:05.000000000 +0100
+@@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o:    $(obj)/realmode/w
+ $(obj)/realmode/wakeup.bin: FORCE
+       $(Q)$(MAKE) $(build)=$(obj)/realmode
   
--source "drivers/xen/Kconfig"
--
- source "lib/Kconfig"
---- a/arch/x86/kernel/acpi/boot.c
-+++ b/arch/x86/kernel/acpi/boot.c
+-disabled-obj-$(CONFIG_XEN)    := cstate.o wakeup_$(BITS).o
++disabled-obj-$(CONFIG_XEN)    := cstate.o wakeup_%.o
+--- sle11-2009-05-14.orig/arch/x86/kernel/acpi/boot.c  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/acpi/boot.c       2009-03-16 16:38:05.000000000 +0100
  @@ -251,19 +251,23 @@ static int __init acpi_parse_madt(struct
   
   static void __cpuinit acpi_register_lapic(int id, u8 enabled)
@@ -277,16 +177,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   
   static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
   {
---- a/arch/x86/kernel/acpi/Makefile
-+++ b/arch/x86/kernel/acpi/Makefile
-@@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o:    $(obj)/realmode/w
- $(obj)/realmode/wakeup.bin: FORCE
-       $(Q)$(MAKE) $(build)=$(obj)/realmode
- 
--disabled-obj-$(CONFIG_XEN)    := cstate.o wakeup_$(BITS).o
-+disabled-obj-$(CONFIG_XEN)    := cstate.o wakeup_%.o
---- a/arch/x86/kernel/acpi/sleep-xen.c
-+++ b/arch/x86/kernel/acpi/sleep-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/acpi/sleep-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/acpi/sleep-xen.c  2009-03-16 16:38:05.000000000 +0100
  @@ -10,15 +10,19 @@
   #include <linux/dmi.h>
   #include <linux/cpumask.h>
@@ -411,8 +303,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   #endif
   }
   
---- a/arch/x86/kernel/cpu/common-xen.c
-+++ b/arch/x86/kernel/cpu/common-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/cpu/common-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/cpu/common-xen.c  2009-03-16 16:38:05.000000000 +0100
  @@ -5,7 +5,6 @@
   #include <linux/module.h>
   #include <linux/percpu.h>
@@ -805,8 +697,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   void __cpuinit cpu_uninit(void)
   {
         int cpu = raw_smp_processor_id();
---- a/arch/x86/kernel/cpu/mtrr/main-xen.c
-+++ b/arch/x86/kernel/cpu/mtrr/main-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/cpu/mtrr/main-xen.c  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/cpu/mtrr/main-xen.c       2009-03-16 16:38:05.000000000 +0100
  @@ -35,6 +35,8 @@ struct mtrr_ops *mtrr_if = &generic_mtrr
   unsigned int num_var_ranges;
   unsigned int mtrr_usage_table[MAX_VAR_RANGES];
@@ -961,8 +853,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   }
   
   void mtrr_ap_init(void)
---- a/arch/x86/kernel/e820_32-xen.c
-+++ b/arch/x86/kernel/e820_32-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/e820_32-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/e820_32-xen.c     2009-03-16 16:38:05.000000000 +0100
  @@ -469,7 +469,7 @@ int __init sanitize_e820_map(struct e820
    * thinkpad 560x, for example, does not cooperate with the memory
    * detection code.)
@@ -1029,8 +921,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
                 saved_max_pfn = max_pfn;
   #endif
                 e820.nr_map = 0;
---- a/arch/x86/kernel/e820_64-xen.c
-+++ b/arch/x86/kernel/e820_64-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/e820_64-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/e820_64-xen.c     2009-03-16 16:38:05.000000000 +0100
  @@ -40,11 +40,11 @@ struct e820map machine_e820;
   unsigned long end_pfn;
   
@@ -1346,8 +1238,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
                 e820.nr_map = 0;
                 userdef = 1;
                 return 0;
---- a/arch/x86/kernel/early_printk-xen.c
-+++ b/arch/x86/kernel/early_printk-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/early_printk-xen.c   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/early_printk-xen.c        2009-03-16 16:38:05.000000000 +0100
  @@ -13,7 +13,7 @@
   
   #ifndef CONFIG_XEN
@@ -1416,8 +1308,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   #ifdef CONFIG_XEN
         } else if (!strncmp(buf, "xen", 3)) {
                 early_console = &xenboot_console;
---- a/arch/x86/kernel/entry_32-xen.S
-+++ b/arch/x86/kernel/entry_32-xen.S
+--- sle11-2009-05-14.orig/arch/x86/kernel/entry_32-xen.S       2009-05-14 11:18:32.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/entry_32-xen.S    2009-03-16 16:38:05.000000000 +0100
  @@ -1,5 +1,4 @@
   /*
  - *  linux/arch/i386/entry.S
@@ -1585,8 +1477,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
         GET_THREAD_INFO(%ebp)
         movl $-EFAULT,PT_EAX(%esp)
         jmp resume_userspace
---- a/arch/x86/kernel/entry_64-xen.S
-+++ b/arch/x86/kernel/entry_64-xen.S
+--- sle11-2009-05-14.orig/arch/x86/kernel/entry_64-xen.S       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/entry_64-xen.S    2009-03-16 16:38:05.000000000 +0100
  @@ -338,19 +338,17 @@ badsys:
         /* Do syscall tracing */
   tracesys:                      
@@ -1610,8 +1502,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
         /* Use IRET because user could have changed frame */
                 
   /* 
---- a/arch/x86/kernel/genapic_64-xen.c
-+++ b/arch/x86/kernel/genapic_64-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/genapic_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/genapic_64-xen.c  2009-03-16 16:38:05.000000000 +0100
  @@ -15,6 +15,7 @@
   #include <linux/kernel.h>
   #include <linux/ctype.h>
@@ -1702,8 +1594,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
  +      return uv_system_type != UV_NONE;
  +}
  +#endif
---- a/arch/x86/kernel/genapic_xen_64.c
-+++ b/arch/x86/kernel/genapic_xen_64.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/genapic_xen_64.c     2008-12-15 11:27:22.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/genapic_xen_64.c  2009-03-16 16:38:05.000000000 +0100
  @@ -72,9 +72,7 @@ static cpumask_t xen_target_cpus(void)
   
   static cpumask_t xen_vector_allocation_domain(int cpu)
@@ -1715,19 +1607,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   }
   
   /*
---- a/arch/x86/kernel/head_32-xen.S
-+++ b/arch/x86/kernel/head_32-xen.S
-@@ -69,7 +69,7 @@ ENTRY(startup_32)
-       cld                     # gcc2 wants the direction flag cleared at all times
- 
-       pushl $0                # fake return address for unwinder
--      jmp start_kernel
-+      jmp i386_start_kernel
- 
- #define HYPERCALL_PAGE_OFFSET 0x1000
- .org HYPERCALL_PAGE_OFFSET
---- a/arch/x86/kernel/head64-xen.c
-+++ b/arch/x86/kernel/head64-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/head64-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/head64-xen.c      2009-03-16 16:38:05.000000000 +0100
  @@ -17,6 +17,7 @@
   #include <linux/string.h>
   #include <linux/percpu.h>
@@ -1879,8 +1760,19 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   
         /*
          * At this point everything still needed from the boot loader
---- a/arch/x86/kernel/init_task-xen.c
-+++ b/arch/x86/kernel/init_task-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/head_32-xen.S        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/head_32-xen.S     2009-03-16 16:38:05.000000000 +0100
+@@ -69,7 +69,7 @@ ENTRY(startup_32)
+       cld                     # gcc2 wants the direction flag cleared at all times
+ 
+       pushl $0                # fake return address for unwinder
+-      jmp start_kernel
++      jmp i386_start_kernel
+ 
+ #define HYPERCALL_PAGE_OFFSET 0x1000
+ .org HYPERCALL_PAGE_OFFSET
+--- sle11-2009-05-14.orig/arch/x86/kernel/init_task-xen.c      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/init_task-xen.c   2009-03-16 16:38:05.000000000 +0100
  @@ -11,7 +11,6 @@
   #include <asm/desc.h>
   
@@ -1889,8 +1781,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
   static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
   #ifdef CONFIG_X86_XEN
---- a/arch/x86/kernel/io_apic_32-xen.c
-+++ b/arch/x86/kernel/io_apic_32-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_32-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/io_apic_32-xen.c  2009-03-16 16:38:05.000000000 +0100
  @@ -88,6 +88,16 @@ int sis_apic_bug = -1;
    */
   int nr_ioapic_registers[MAX_IO_APICS];
@@ -2219,8 +2111,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   
         return 0;
   }
---- a/arch/x86/kernel/io_apic_64-xen.c
-+++ b/arch/x86/kernel/io_apic_64-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/io_apic_64-xen.c  2009-03-16 16:38:05.000000000 +0100
  @@ -43,13 +43,15 @@
   #include <asm/smp.h>
   #include <asm/desc.h>
@@ -2418,8 +2310,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
                 mem += sizeof(struct resource) * nr_ioapics;
   
                 for (i = 0; i < nr_ioapics; i++) {
---- /dev/null
-+++ b/arch/x86/kernel/ipi-xen.c
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/ipi-xen.c 2009-03-16 16:38:05.000000000 +0100
  @@ -0,0 +1,232 @@
  +#include <linux/cpumask.h>
  +#include <linux/interrupt.h>
@@ -2653,8 +2545,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
  +}
  +#endif
  +#endif
---- a/arch/x86/kernel/irq_32-xen.c
-+++ b/arch/x86/kernel/irq_32-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/irq_32-xen.c      2009-03-16 16:38:05.000000000 +0100
  @@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs
   
         if (unlikely((unsigned)irq >= NR_IRQS)) {
@@ -2682,8 +2574,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   asmlinkage void do_softirq(void)
   {
         unsigned long flags;
---- a/arch/x86/kernel/machine_kexec_64.c
-+++ b/arch/x86/kernel/machine_kexec_64.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/machine_kexec_64.c   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/machine_kexec_64.c        2009-03-16 16:38:05.000000000 +0100
  @@ -120,8 +120,6 @@ int __init machine_kexec_setup_resources
         return 0;
   }
@@ -2693,21 +2585,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   #else /* CONFIG_XEN */
   
   #define x__pmd(x) __pmd(x)
---- a/arch/x86/kernel/Makefile
-+++ b/arch/x86/kernel/Makefile
-@@ -122,8 +122,7 @@ ifeq ($(CONFIG_X86_64),y)
- 
-       obj-$(CONFIG_XEN)               += nmi_64.o
-       time_64-$(CONFIG_XEN)           += time_32.o
--      pci-dma_64-$(CONFIG_XEN)        += pci-dma_32.o
- endif
- 
--disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
--      smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
-+disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
-+      pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
---- a/arch/x86/kernel/microcode-xen.c
-+++ b/arch/x86/kernel/microcode-xen.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/microcode-xen.c      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/microcode-xen.c   2009-03-16 16:38:05.000000000 +0100
  @@ -162,7 +162,7 @@ static int request_microcode(void)
                 c->x86, c->x86_model, c->x86_mask);
         error = request_firmware(&firmware, name, &microcode_pdev->dev);
@@ -2717,8 +2596,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
                 return error;
         }
   
---- a/arch/x86/kernel/mmconf-fam10h_64.c
-+++ b/arch/x86/kernel/mmconf-fam10h_64.c
+--- sle11-2009-05-14.orig/arch/x86/kernel/mmconf-fam10h_64.c   2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/mmconf-fam10h_64.c        2009-03-16 16:38:05.000000000 +0100
  @@ -219,6 +219,16 @@ void __cpuinit fam10h_check_enable_mmcfg
         val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
                FAM10H_MMIO_CONF_ENABLE;
@@ -2736,17348 +2615,16559 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   }
   
   static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
---- a/arch/x86/kernel/mpparse_32-xen.c
-+++ /dev/null
-@@ -1,1161 +0,0 @@
--/*
-- *    Intel Multiprocessor Specification 1.1 and 1.4
-- *    compliant MP-table parsing routines.
-- *
-- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-- *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
-- *
-- *    Fixes
-- *            Erich Boleyn    :       MP v1.4 and additional changes.
-- *            Alan Cox        :       Added EBDA scanning
-- *            Ingo Molnar     :       various cleanups and rewrites
-- *            Maciej W. Rozycki:      Bits for default MP configurations
-- *            Paul Diefenbaugh:       Added full ACPI support
-- */
--
--#include <linux/mm.h>
--#include <linux/init.h>
--#include <linux/acpi.h>
--#include <linux/delay.h>
--#include <linux/bootmem.h>
--#include <linux/kernel_stat.h>
--#include <linux/mc146818rtc.h>
--#include <linux/bitops.h>
--
--#include <asm/smp.h>
--#include <asm/acpi.h>
--#include <asm/mtrr.h>
--#include <asm/mpspec.h>
--#include <asm/io_apic.h>
--
--#include <mach_apic.h>
--#include <mach_apicdef.h>
--#include <mach_mpparse.h>
--#include <bios_ebda.h>
--
--/* Have we found an MP table */
--int smp_found_config;
--unsigned int __cpuinitdata maxcpus = NR_CPUS;
--
--/*
-- * Various Linux-internal data structures created from the
-- * MP-table.
-- */
--int apic_version [MAX_APICS];
--int mp_bus_id_to_type [MAX_MP_BUSSES];
--int mp_bus_id_to_node [MAX_MP_BUSSES];
--int mp_bus_id_to_local [MAX_MP_BUSSES];
--int quad_local_to_mp_bus_id [NR_CPUS/4][4];
--int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
--static int mp_current_pci_id;
--
--/* I/O APIC entries */
--struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
--
--/* # of MP IRQ source entries */
--struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
--
--/* MP IRQ source entries */
--int mp_irq_entries;
--
--int nr_ioapics;
--
--int pic_mode;
--unsigned long mp_lapic_addr;
--
--unsigned int def_to_bigsmp = 0;
--
--/* Processor that is doing the boot up */
--unsigned int boot_cpu_physical_apicid = -1U;
--/* Internal processor count */
--unsigned int num_processors;
--
--/* Bitmask of physically existing CPUs */
--physid_mask_t phys_cpu_present_map;
--
--u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
--
--/*
-- * Intel MP BIOS table parsing routines:
-- */
--
--
--/*
-- * Checksum an MP configuration block.
-- */
--
--static int __init mpf_checksum(unsigned char *mp, int len)
--{
--      int sum = 0;
--
--      while (len--)
--              sum += *mp++;
--
--      return sum & 0xFF;
--}
--
--/*
-- * Have to match translation table entries to main table entries by counter
-- * hence the mpc_record variable .... can't see a less disgusting way of
-- * doing this ....
-- */
--
--static int mpc_record; 
--static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
--
--#ifndef CONFIG_XEN
--static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
--{
--      int ver, apicid;
--      physid_mask_t phys_cpu;
--      
--      if (!(m->mpc_cpuflag & CPU_ENABLED))
--              return;
--
--      apicid = mpc_apic_id(m, translation_table[mpc_record]);
--
--      if (m->mpc_featureflag&(1<<0))
--              Dprintk("    Floating point unit present.\n");
--      if (m->mpc_featureflag&(1<<7))
--              Dprintk("    Machine Exception supported.\n");
--      if (m->mpc_featureflag&(1<<8))
--              Dprintk("    64 bit compare & exchange supported.\n");
--      if (m->mpc_featureflag&(1<<9))
--              Dprintk("    Internal APIC present.\n");
--      if (m->mpc_featureflag&(1<<11))
--              Dprintk("    SEP present.\n");
--      if (m->mpc_featureflag&(1<<12))
--              Dprintk("    MTRR  present.\n");
--      if (m->mpc_featureflag&(1<<13))
--              Dprintk("    PGE  present.\n");
--      if (m->mpc_featureflag&(1<<14))
--              Dprintk("    MCA  present.\n");
--      if (m->mpc_featureflag&(1<<15))
--              Dprintk("    CMOV  present.\n");
--      if (m->mpc_featureflag&(1<<16))
--              Dprintk("    PAT  present.\n");
--      if (m->mpc_featureflag&(1<<17))
--              Dprintk("    PSE  present.\n");
--      if (m->mpc_featureflag&(1<<18))
--              Dprintk("    PSN  present.\n");
--      if (m->mpc_featureflag&(1<<19))
--              Dprintk("    Cache Line Flush Instruction present.\n");
--      /* 20 Reserved */
--      if (m->mpc_featureflag&(1<<21))
--              Dprintk("    Debug Trace and EMON Store present.\n");
--      if (m->mpc_featureflag&(1<<22))
--              Dprintk("    ACPI Thermal Throttle Registers  present.\n");
--      if (m->mpc_featureflag&(1<<23))
--              Dprintk("    MMX  present.\n");
--      if (m->mpc_featureflag&(1<<24))
--              Dprintk("    FXSR  present.\n");
--      if (m->mpc_featureflag&(1<<25))
--              Dprintk("    XMM  present.\n");
--      if (m->mpc_featureflag&(1<<26))
--              Dprintk("    Willamette New Instructions  present.\n");
--      if (m->mpc_featureflag&(1<<27))
--              Dprintk("    Self Snoop  present.\n");
--      if (m->mpc_featureflag&(1<<28))
--              Dprintk("    HT  present.\n");
--      if (m->mpc_featureflag&(1<<29))
--              Dprintk("    Thermal Monitor present.\n");
--      /* 30, 31 Reserved */
--
--
--      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
--              Dprintk("    Bootup CPU\n");
--              boot_cpu_physical_apicid = m->mpc_apicid;
--      }
--
--      ver = m->mpc_apicver;
--
--      /*
--       * Validate version
--       */
--      if (ver == 0x0) {
--              printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
--                              "fixing up to 0x10. (tell your hw vendor)\n",
--                              m->mpc_apicid);
--              ver = 0x10;
--      }
--      apic_version[m->mpc_apicid] = ver;
--
--      phys_cpu = apicid_to_cpu_present(apicid);
--      physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
--
--      if (num_processors >= NR_CPUS) {
--              printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
--                      "  Processor ignored.\n", NR_CPUS);
--              return;
--      }
--
--      if (num_processors >= maxcpus) {
--              printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
--                      " Processor ignored.\n", maxcpus);
--              return;
--      }
--
--      cpu_set(num_processors, cpu_possible_map);
--      num_processors++;
--
--      /*
--       * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
--       * but we need to work other dependencies like SMP_SUSPEND etc
--       * before this can be done without some confusion.
--       * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
--       *       - Ashok Raj <ashok.raj@intel.com>
--       */
--      if (num_processors > 8) {
--              switch (boot_cpu_data.x86_vendor) {
--              case X86_VENDOR_INTEL:
--                      if (!APIC_XAPIC(ver)) {
--                              def_to_bigsmp = 0;
--                              break;
--                      }
--                      /* If P4 and above fall through */
--              case X86_VENDOR_AMD:
--                      def_to_bigsmp = 1;
--              }
--      }
--      bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
--}
--#else
--static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
--{
--      num_processors++;
--}
--#endif /* CONFIG_XEN */
--
--static void __init MP_bus_info (struct mpc_config_bus *m)
--{
--      char str[7];
--
--      memcpy(str, m->mpc_bustype, 6);
--      str[6] = 0;
--
--      mpc_oem_bus_info(m, str, translation_table[mpc_record]);
--
--#if MAX_MP_BUSSES < 256
--      if (m->mpc_busid >= MAX_MP_BUSSES) {
--              printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
--                      " is too large, max. supported is %d\n",
--                      m->mpc_busid, str, MAX_MP_BUSSES - 1);
--              return;
--      }
--#endif
--
--      if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
--              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
--      } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
--              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
--      } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
--              mpc_oem_pci_bus(m, translation_table[mpc_record]);
--              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
--              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
--              mp_current_pci_id++;
--      } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
--              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
--      } else {
--              printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
--      }
--}
--
--static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
--{
--      if (!(m->mpc_flags & MPC_APIC_USABLE))
--              return;
--
--      printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
--              m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
--      if (nr_ioapics >= MAX_IO_APICS) {
--              printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
--                      MAX_IO_APICS, nr_ioapics);
--              panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
--      }
--      if (!m->mpc_apicaddr) {
--              printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
--                      " found in MP table, skipping!\n");
--              return;
--      }
--      mp_ioapics[nr_ioapics] = *m;
--      nr_ioapics++;
--}
--
--static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
--{
--      mp_irqs [mp_irq_entries] = *m;
--      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
--              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
--                      m->mpc_irqtype, m->mpc_irqflag & 3,
--                      (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
--                      m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
--      if (++mp_irq_entries == MAX_IRQ_SOURCES)
--              panic("Max # of irq sources exceeded!!\n");
--}
--
--static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
--{
--      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
--              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
--                      m->mpc_irqtype, m->mpc_irqflag & 3,
--                      (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
--                      m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
--}
--
--#ifdef CONFIG_X86_NUMAQ
--static void __init MP_translation_info (struct mpc_config_translation *m)
--{
--      printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
--
--      if (mpc_record >= MAX_MPC_ENTRY) 
--              printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
--      else
--              translation_table[mpc_record] = m; /* stash this for later */
--      if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
--              node_set_online(m->trans_quad);
--}
--
--/*
-- * Read/parse the MPC oem tables
-- */
--
--static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
--      unsigned short oemsize)
--{
--      int count = sizeof (*oemtable); /* the header size */
--      unsigned char *oemptr = ((unsigned char *)oemtable)+count;
--      
--      mpc_record = 0;
--      printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
--      if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
--      {
--              printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
--                      oemtable->oem_signature[0],
--                      oemtable->oem_signature[1],
--                      oemtable->oem_signature[2],
--                      oemtable->oem_signature[3]);
--              return;
--      }
--      if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
--      {
--              printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
--              return;
--      }
--      while (count < oemtable->oem_length) {
--              switch (*oemptr) {
--                      case MP_TRANSLATION:
--                      {
--                              struct mpc_config_translation *m=
--                                      (struct mpc_config_translation *)oemptr;
--                              MP_translation_info(m);
--                              oemptr += sizeof(*m);
--                              count += sizeof(*m);
--                              ++mpc_record;
--                              break;
--                      }
--                      default:
--                      {
--                              printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
--                              return;
--                      }
--              }
--       }
--}
--
--static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
--              char *productid)
--{
--      if (strncmp(oem, "IBM NUMA", 8))
--              printk("Warning!  May not be a NUMA-Q system!\n");
--      if (mpc->mpc_oemptr)
--              smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
--                              mpc->mpc_oemsize);
--}
--#endif        /* CONFIG_X86_NUMAQ */
--
--/*
-- * Read/parse the MPC
-- */
--
--static int __init smp_read_mpc(struct mp_config_table *mpc)
--{
--      char str[16];
--      char oem[10];
--      int count=sizeof(*mpc);
--      unsigned char *mpt=((unsigned char *)mpc)+count;
--
--      if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
--              printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
--                      *(u32 *)mpc->mpc_signature);
--              return 0;
--      }
--      if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
--              printk(KERN_ERR "SMP mptable: checksum error!\n");
--              return 0;
--      }
--      if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
--              printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
--                      mpc->mpc_spec);
--              return 0;
--      }
--      if (!mpc->mpc_lapic) {
--              printk(KERN_ERR "SMP mptable: null local APIC address!\n");
--              return 0;
--      }
--      memcpy(oem,mpc->mpc_oem,8);
--      oem[8]=0;
--      printk(KERN_INFO "OEM ID: %s ",oem);
--
--      memcpy(str,mpc->mpc_productid,12);
--      str[12]=0;
--      printk("Product ID: %s ",str);
--
--      mps_oem_check(mpc, oem, str);
--
--      printk("APIC at: 0x%X\n", mpc->mpc_lapic);
--
--      /*
--       * Save the local APIC address (it might be non-default) -- but only
--       * if we're not using ACPI.
--       */
--      if (!acpi_lapic)
--              mp_lapic_addr = mpc->mpc_lapic;
--
--      /*
--       *      Now process the configuration blocks.
--       */
--      mpc_record = 0;
--      while (count < mpc->mpc_length) {
--              switch(*mpt) {
--                      case MP_PROCESSOR:
--                      {
--                              struct mpc_config_processor *m=
--                                      (struct mpc_config_processor *)mpt;
--                              /* ACPI may have already provided this data */
--                              if (!acpi_lapic)
--                                      MP_processor_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--                      case MP_BUS:
--                      {
--                              struct mpc_config_bus *m=
--                                      (struct mpc_config_bus *)mpt;
--                              MP_bus_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--                      case MP_IOAPIC:
--                      {
--                              struct mpc_config_ioapic *m=
--                                      (struct mpc_config_ioapic *)mpt;
--                              MP_ioapic_info(m);
--                              mpt+=sizeof(*m);
--                              count+=sizeof(*m);
--                              break;
--                      }
--                      case MP_INTSRC:
--                      {
--                              struct mpc_config_intsrc *m=
--                                      (struct mpc_config_intsrc *)mpt;
--
--                              MP_intsrc_info(m);
--                              mpt+=sizeof(*m);
--                              count+=sizeof(*m);
--                              break;
--                      }
--                      case MP_LINTSRC:
--                      {
--                              struct mpc_config_lintsrc *m=
--                                      (struct mpc_config_lintsrc *)mpt;
--                              MP_lintsrc_info(m);
--                              mpt+=sizeof(*m);
--                              count+=sizeof(*m);
--                              break;
--                      }
--                      default:
--                      {
--                              count = mpc->mpc_length;
--                              break;
--                      }
--              }
--              ++mpc_record;
--      }
--      setup_apic_routing();
--      if (!num_processors)
--              printk(KERN_ERR "SMP mptable: no processors registered!\n");
--      return num_processors;
--}
--
--static int __init ELCR_trigger(unsigned int irq)
--{
--      unsigned int port;
--
--      port = 0x4d0 + (irq >> 3);
--      return (inb(port) >> (irq & 7)) & 1;
--}
--
--static void __init construct_default_ioirq_mptable(int mpc_default_type)
--{
--      struct mpc_config_intsrc intsrc;
--      int i;
--      int ELCR_fallback = 0;
--
--      intsrc.mpc_type = MP_INTSRC;
--      intsrc.mpc_irqflag = 0;                 /* conforming */
--      intsrc.mpc_srcbus = 0;
--      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
--
--      intsrc.mpc_irqtype = mp_INT;
--
--      /*
--       *  If true, we have an ISA/PCI system with no IRQ entries
--       *  in the MP table. To prevent the PCI interrupts from being set up
--       *  incorrectly, we try to use the ELCR. The sanity check to see if
--       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
--       *  never be level sensitive, so we simply see if the ELCR agrees.
--       *  If it does, we assume it's valid.
--       */
--      if (mpc_default_type == 5) {
--              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
--
--              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
--                      printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
--              else {
--                      printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
--                      ELCR_fallback = 1;
--              }
--      }
--
--      for (i = 0; i < 16; i++) {
--              switch (mpc_default_type) {
--              case 2:
--                      if (i == 0 || i == 13)
--                              continue;       /* IRQ0 & IRQ13 not connected */
--                      /* fall through */
--              default:
--                      if (i == 2)
--                              continue;       /* IRQ2 is never connected */
--              }
--
--              if (ELCR_fallback) {
--                      /*
--                       *  If the ELCR indicates a level-sensitive interrupt, we
--                       *  copy that information over to the MP table in the
--                       *  irqflag field (level sensitive, active high polarity).
--                       */
--                      if (ELCR_trigger(i))
--                              intsrc.mpc_irqflag = 13;
--                      else
--                              intsrc.mpc_irqflag = 0;
--              }
--
--              intsrc.mpc_srcbusirq = i;
--              intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
--              MP_intsrc_info(&intsrc);
--      }
--
--      intsrc.mpc_irqtype = mp_ExtINT;
--      intsrc.mpc_srcbusirq = 0;
--      intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
--      MP_intsrc_info(&intsrc);
--}
--
--static inline void __init construct_default_ISA_mptable(int mpc_default_type)
--{
--      struct mpc_config_processor processor;
--      struct mpc_config_bus bus;
--      struct mpc_config_ioapic ioapic;
--      struct mpc_config_lintsrc lintsrc;
--      int linttypes[2] = { mp_ExtINT, mp_NMI };
--      int i;
--
--      /*
--       * local APIC has default address
--       */
--      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
--
--      /*
--       * 2 CPUs, numbered 0 & 1.
--       */
--      processor.mpc_type = MP_PROCESSOR;
--      /* Either an integrated APIC or a discrete 82489DX. */
--      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
--      processor.mpc_cpuflag = CPU_ENABLED;
--      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
--                                 (boot_cpu_data.x86_model << 4) |
--                                 boot_cpu_data.x86_mask;
--      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
--      processor.mpc_reserved[0] = 0;
--      processor.mpc_reserved[1] = 0;
--      for (i = 0; i < 2; i++) {
--              processor.mpc_apicid = i;
--              MP_processor_info(&processor);
--      }
--
--      bus.mpc_type = MP_BUS;
--      bus.mpc_busid = 0;
--      switch (mpc_default_type) {
--              default:
--                      printk("???\n");
--                      printk(KERN_ERR "Unknown standard configuration %d\n",
--                              mpc_default_type);
--                      /* fall through */
--              case 1:
--              case 5:
--                      memcpy(bus.mpc_bustype, "ISA   ", 6);
--                      break;
--              case 2:
--              case 6:
--              case 3:
--                      memcpy(bus.mpc_bustype, "EISA  ", 6);
--                      break;
--              case 4:
--              case 7:
--                      memcpy(bus.mpc_bustype, "MCA   ", 6);
--      }
--      MP_bus_info(&bus);
--      if (mpc_default_type > 4) {
--              bus.mpc_busid = 1;
--              memcpy(bus.mpc_bustype, "PCI   ", 6);
--              MP_bus_info(&bus);
--      }
--
--      ioapic.mpc_type = MP_IOAPIC;
--      ioapic.mpc_apicid = 2;
--      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
--      ioapic.mpc_flags = MPC_APIC_USABLE;
--      ioapic.mpc_apicaddr = 0xFEC00000;
--      MP_ioapic_info(&ioapic);
--
--      /*
--       * We set up most of the low 16 IO-APIC pins according to MPS rules.
--       */
--      construct_default_ioirq_mptable(mpc_default_type);
--
--      lintsrc.mpc_type = MP_LINTSRC;
--      lintsrc.mpc_irqflag = 0;                /* conforming */
--      lintsrc.mpc_srcbusid = 0;
--      lintsrc.mpc_srcbusirq = 0;
--      lintsrc.mpc_destapic = MP_APIC_ALL;
--      for (i = 0; i < 2; i++) {
--              lintsrc.mpc_irqtype = linttypes[i];
--              lintsrc.mpc_destapiclint = i;
--              MP_lintsrc_info(&lintsrc);
--      }
--}
--
--static struct intel_mp_floating *mpf_found;
--
--/*
-- * Scan the memory blocks for an SMP configuration block.
-- */
--void __init get_smp_config (void)
--{
--      struct intel_mp_floating *mpf = mpf_found;
--
--      /*
--       * ACPI supports both logical (e.g. Hyper-Threading) and physical 
--       * processors, where MPS only supports physical.
--       */
--      if (acpi_lapic && acpi_ioapic) {
--              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
--              return;
--      }
--      else if (acpi_lapic)
--              printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
--
--      printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
--      if (mpf->mpf_feature2 & (1<<7)) {
--              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
--              pic_mode = 1;
--      } else {
--              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
--              pic_mode = 0;
--      }
--
--      /*
--       * Now see if we need to read further.
--       */
--      if (mpf->mpf_feature1 != 0) {
--
--              printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
--              construct_default_ISA_mptable(mpf->mpf_feature1);
--
--      } else if (mpf->mpf_physptr) {
--
--              /*
--               * Read the physical hardware table.  Anything here will
--               * override the defaults.
--               */
--              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
--                      smp_found_config = 0;
--                      printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
--                      printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
--                      return;
--              }
--              /*
--               * If there are no explicit MP IRQ entries, then we are
--               * broken.  We set up most of the low 16 IO-APIC pins to
--               * ISA defaults and hope it will work.
--               */
--              if (!mp_irq_entries) {
--                      struct mpc_config_bus bus;
--
--                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
--
--                      bus.mpc_type = MP_BUS;
--                      bus.mpc_busid = 0;
--                      memcpy(bus.mpc_bustype, "ISA   ", 6);
--                      MP_bus_info(&bus);
--
--                      construct_default_ioirq_mptable(0);
--              }
--
--      } else
--              BUG();
--
--      printk(KERN_INFO "Processors: %d\n", num_processors);
--      /*
--       * Only use the first configuration found.
--       */
--}
--
--static int __init smp_scan_config (unsigned long base, unsigned long length)
--{
--      unsigned long *bp = isa_bus_to_virt(base);
--      struct intel_mp_floating *mpf;
--
--      printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
--      if (sizeof(*mpf) != 16)
--              printk("Error: MPF size\n");
--
--      while (length > 0) {
--              mpf = (struct intel_mp_floating *)bp;
--              if ((*bp == SMP_MAGIC_IDENT) &&
--                      (mpf->mpf_length == 1) &&
--                      !mpf_checksum((unsigned char *)bp, 16) &&
--                      ((mpf->mpf_specification == 1)
--                              || (mpf->mpf_specification == 4)) ) {
--
--                      smp_found_config = 1;
--#ifndef CONFIG_XEN
--                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
--                              mpf, virt_to_phys(mpf));
--                      reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
--                                      BOOTMEM_DEFAULT);
--                      if (mpf->mpf_physptr) {
--                              /*
--                               * We cannot access to MPC table to compute
--                               * table size yet, as only few megabytes from
--                               * the bottom is mapped now.
--                               * PC-9800's MPC table places on the very last
--                               * of physical memory; so that simply reserving
--                               * PAGE_SIZE from mpg->mpf_physptr yields BUG()
--                               * in reserve_bootmem.
--                               */
--                              unsigned long size = PAGE_SIZE;
--                              unsigned long end = max_low_pfn * PAGE_SIZE;
--                              if (mpf->mpf_physptr + size > end)
--                                      size = end - mpf->mpf_physptr;
--                              reserve_bootmem(mpf->mpf_physptr, size,
--                                              BOOTMEM_DEFAULT);
--                      }
--#else
--                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
--                              mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
--#endif
--
--                      mpf_found = mpf;
--                      return 1;
--              }
--              bp += 4;
--              length -= 16;
--      }
--      return 0;
--}
--
--void __init find_smp_config (void)
--{
--#ifndef CONFIG_XEN
--      unsigned int address;
--#endif
--
--      /*
--       * FIXME: Linux assumes you have 640K of base ram..
--       * this continues the error...
--       *
--       * 1) Scan the bottom 1K for a signature
--       * 2) Scan the top 1K of base RAM
--       * 3) Scan the 64K of bios
--       */
--      if (smp_scan_config(0x0,0x400) ||
--              smp_scan_config(639*0x400,0x400) ||
--                      smp_scan_config(0xF0000,0x10000))
--              return;
--      /*
--       * If it is an SMP machine we should know now, unless the
--       * configuration is in an EISA/MCA bus machine with an
--       * extended bios data area.
--       *
--       * there is a real-mode segmented pointer pointing to the
--       * 4K EBDA area at 0x40E, calculate and scan it here.
--       *
--       * NOTE! There are Linux loaders that will corrupt the EBDA
--       * area, and as such this kind of SMP config may be less
--       * trustworthy, simply because the SMP table may have been
--       * stomped on during early boot. These loaders are buggy and
--       * should be fixed.
--       *
--       * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
--       */
--
--#ifndef CONFIG_XEN
--      address = get_bios_ebda();
--      if (address)
--              smp_scan_config(address, 0x400);
--#endif
--}
--
--int es7000_plat;
--
--/* --------------------------------------------------------------------------
--                            ACPI-based MP Configuration
--   -------------------------------------------------------------------------- */
--
--#ifdef CONFIG_ACPI
--
--void __init mp_register_lapic_address(u64 address)
--{
--#ifndef CONFIG_XEN
--      mp_lapic_addr = (unsigned long) address;
--
--      set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
--
--      if (boot_cpu_physical_apicid == -1U)
--              boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
--
--      Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
--#endif
--}
--
--void __cpuinit mp_register_lapic (u8 id, u8 enabled)
--{
--      struct mpc_config_processor processor;
--      int boot_cpu = 0;
--      
--      if (MAX_APICS - id <= 0) {
--              printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
--                      id, MAX_APICS);
--              return;
--      }
--
--      if (id == boot_cpu_physical_apicid)
--              boot_cpu = 1;
--
--#ifndef CONFIG_XEN
--      processor.mpc_type = MP_PROCESSOR;
--      processor.mpc_apicid = id;
--      processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
--      processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
--      processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
--      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
--              (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
--      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
--      processor.mpc_reserved[0] = 0;
--      processor.mpc_reserved[1] = 0;
--#endif
--
--      MP_processor_info(&processor);
--}
--
--#ifdef        CONFIG_X86_IO_APIC
--
--#define MP_ISA_BUS            0
--#define MP_MAX_IOAPIC_PIN     127
--
--static struct mp_ioapic_routing {
--      int                     apic_id;
--      int                     gsi_base;
--      int                     gsi_end;
--      u32                     pin_programmed[4];
--} mp_ioapic_routing[MAX_IO_APICS];
--
--static int mp_find_ioapic (int gsi)
--{
--      int i = 0;
--
--      /* Find the IOAPIC that manages this GSI. */
--      for (i = 0; i < nr_ioapics; i++) {
--              if ((gsi >= mp_ioapic_routing[i].gsi_base)
--                      && (gsi <= mp_ioapic_routing[i].gsi_end))
--                      return i;
--      }
--
--      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
--
--      return -1;
--}
--
--void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
--{
--      int idx = 0;
--      int tmpid;
--
--      if (nr_ioapics >= MAX_IO_APICS) {
--              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
--                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
--              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
--      }
--      if (!address) {
--              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
--                      " found in MADT table, skipping!\n");
--              return;
--      }
--
--      idx = nr_ioapics++;
--
--      mp_ioapics[idx].mpc_type = MP_IOAPIC;
--      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
--      mp_ioapics[idx].mpc_apicaddr = address;
--
--#ifndef CONFIG_XEN
--      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
--#endif
--      if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
--              && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
--              tmpid = io_apic_get_unique_id(idx, id);
--      else
--              tmpid = id;
--      if (tmpid == -1) {
--              nr_ioapics--;
--              return;
--      }
--      mp_ioapics[idx].mpc_apicid = tmpid;
--      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
--      
--      /* 
--       * Build basic GSI lookup table to facilitate gsi->io_apic lookups
--       * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
--       */
--      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
--      mp_ioapic_routing[idx].gsi_base = gsi_base;
--      mp_ioapic_routing[idx].gsi_end = gsi_base +
--              io_apic_get_redir_entries(idx);
--
--      printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
--             "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
--             mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
--             mp_ioapic_routing[idx].gsi_base,
--             mp_ioapic_routing[idx].gsi_end);
--}
--
--void __init
--mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
--{
--      struct mpc_config_intsrc intsrc;
--      int                     ioapic = -1;
--      int                     pin = -1;
--
--      /* 
--       * Convert 'gsi' to 'ioapic.pin'.
--       */
--      ioapic = mp_find_ioapic(gsi);
--      if (ioapic < 0)
--              return;
--      pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
--
--      /*
--       * TBD: This check is for faulty timer entries, where the override
--       *      erroneously sets the trigger to level, resulting in a HUGE 
--       *      increase of timer interrupts!
--       */
--      if ((bus_irq == 0) && (trigger == 3))
--              trigger = 1;
--
--      intsrc.mpc_type = MP_INTSRC;
--      intsrc.mpc_irqtype = mp_INT;
--      intsrc.mpc_irqflag = (trigger << 2) | polarity;
--      intsrc.mpc_srcbus = MP_ISA_BUS;
--      intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
--      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
--      intsrc.mpc_dstirq = pin;                                    /* INTIN# */
--
--      Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
--              intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
--              (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
--              intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
--
--      mp_irqs[mp_irq_entries] = intsrc;
--      if (++mp_irq_entries == MAX_IRQ_SOURCES)
--              panic("Max # of irq sources exceeded!\n");
--}
--
--void __init mp_config_acpi_legacy_irqs (void)
--{
--      struct mpc_config_intsrc intsrc;
--      int i = 0;
--      int ioapic = -1;
--
--      /* 
--       * Fabricate the legacy ISA bus (bus #31).
--       */
--      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
--      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
--
--      /*
--       * Older generations of ES7000 have no legacy identity mappings
--       */
--      if (es7000_plat == 1)
--              return;
--
--      /* 
--       * Locate the IOAPIC that manages the ISA IRQs (0-15). 
--       */
--      ioapic = mp_find_ioapic(0);
--      if (ioapic < 0)
--              return;
--
--      intsrc.mpc_type = MP_INTSRC;
--      intsrc.mpc_irqflag = 0;                                 /* Conforming */
--      intsrc.mpc_srcbus = MP_ISA_BUS;
--      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
--
--      /* 
--       * Use the default configuration for the IRQs 0-15.  Unless
--       * overridden by (MADT) interrupt source override entries.
--       */
--      for (i = 0; i < 16; i++) {
--              int idx;
--
--              for (idx = 0; idx < mp_irq_entries; idx++) {
--                      struct mpc_config_intsrc *irq = mp_irqs + idx;
--
--                      /* Do we already have a mapping for this ISA IRQ? */
--                      if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
--                              break;
--
--                      /* Do we already have a mapping for this IOAPIC pin */
--                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
--                              (irq->mpc_dstirq == i))
--                              break;
--              }
--
--              if (idx != mp_irq_entries) {
--                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
--                      continue;                       /* IRQ already used */
--              }
--
--              intsrc.mpc_irqtype = mp_INT;
--              intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
--              intsrc.mpc_dstirq = i;
--
--              Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
--                      "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
--                      (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
--                      intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
--                      intsrc.mpc_dstirq);
--
--              mp_irqs[mp_irq_entries] = intsrc;
--              if (++mp_irq_entries == MAX_IRQ_SOURCES)
--                      panic("Max # of irq sources exceeded!\n");
--      }
--}
--
--#define MAX_GSI_NUM   4096
--#define IRQ_COMPRESSION_START 64
--
--int mp_register_gsi(u32 gsi, int triggering, int polarity)
--{
--      int ioapic = -1;
--      int ioapic_pin = 0;
--      int idx, bit = 0;
--      static int pci_irq = IRQ_COMPRESSION_START;
--      /*
--       * Mapping between Global System Interrupts, which
--       * represent all possible interrupts, and IRQs
--       * assigned to actual devices.
--       */
--      static int              gsi_to_irq[MAX_GSI_NUM];
--
--      /* Don't set up the ACPI SCI because it's already set up */
--      if (acpi_gbl_FADT.sci_interrupt == gsi)
--              return gsi;
--
--      ioapic = mp_find_ioapic(gsi);
--      if (ioapic < 0) {
--              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
--              return gsi;
--      }
--
--      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
--
--      if (ioapic_renumber_irq)
--              gsi = ioapic_renumber_irq(ioapic, gsi);
--
--      /* 
--       * Avoid pin reprogramming.  PRTs typically include entries  
--       * with redundant pin->gsi mappings (but unique PCI devices);
--       * we only program the IOAPIC on the first.
--       */
--      bit = ioapic_pin % 32;
--      idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
--      if (idx > 3) {
--              printk(KERN_ERR "Invalid reference to IOAPIC pin "
--                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
--                      ioapic_pin);
--              return gsi;
--      }
--      if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
--              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
--                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
--              return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
--      }
--
--      mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
--
--      /*
--       * For GSI >= 64, use IRQ compression
--       */
--      if ((gsi >= IRQ_COMPRESSION_START)
--              && (triggering == ACPI_LEVEL_SENSITIVE)) {
--              /*
--               * For PCI devices assign IRQs in order, avoiding gaps
--               * due to unused I/O APIC pins.
--               */
--              int irq = gsi;
--              if (gsi < MAX_GSI_NUM) {
--                      /*
--                       * Retain the VIA chipset work-around (gsi > 15), but
--                       * avoid a problem where the 8254 timer (IRQ0) is setup
--                       * via an override (so it's not on pin 0 of the ioapic),
--                       * and at the same time, the pin 0 interrupt is a PCI
--                       * type.  The gsi > 15 test could cause these two pins
--                       * to be shared as IRQ0, and they are not shareable.
--                       * So test for this condition, and if necessary, avoid
--                       * the pin collision.
--                       */
--                      if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
--                              gsi = pci_irq++;
--                      /*
--                       * Don't assign IRQ used by ACPI SCI
--                       */
--                      if (gsi == acpi_gbl_FADT.sci_interrupt)
--                              gsi = pci_irq++;
--                      gsi_to_irq[irq] = gsi;
--              } else {
--                      printk(KERN_ERR "GSI %u is too high\n", gsi);
--                      return gsi;
--              }
--      }
--
--      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
--                  triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
--                  polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
--      return gsi;
--}
--
--#endif /* CONFIG_X86_IO_APIC */
--#endif /* CONFIG_ACPI */
---- a/arch/x86/kernel/mpparse_64-xen.c
-+++ /dev/null
-@@ -1,879 +0,0 @@
--/*
-- *    Intel Multiprocessor Specification 1.1 and 1.4
-- *    compliant MP-table parsing routines.
-- *
-- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-- *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
-- *
-- *    Fixes
-- *            Erich Boleyn    :       MP v1.4 and additional changes.
-- *            Alan Cox        :       Added EBDA scanning
-- *            Ingo Molnar     :       various cleanups and rewrites
-- *            Maciej W. Rozycki:      Bits for default MP configurations
-- *            Paul Diefenbaugh:       Added full ACPI support
-- */
--
--#include <linux/mm.h>
--#include <linux/init.h>
--#include <linux/delay.h>
--#include <linux/bootmem.h>
--#include <linux/kernel_stat.h>
--#include <linux/mc146818rtc.h>
--#include <linux/acpi.h>
--#include <linux/module.h>
--
--#include <asm/smp.h>
--#include <asm/mtrr.h>
--#include <asm/mpspec.h>
--#include <asm/pgalloc.h>
--#include <asm/io_apic.h>
--#include <asm/proto.h>
--#include <asm/acpi.h>
--
--/* Have we found an MP table */
--int smp_found_config;
--
--/*
-- * Various Linux-internal data structures created from the
-- * MP-table.
-- */
--DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
--int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
--
--static int mp_current_pci_id = 0;
--/* I/O APIC entries */
--struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
--
--/* # of MP IRQ source entries */
--struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
--
--/* MP IRQ source entries */
--int mp_irq_entries;
--
--int nr_ioapics;
--unsigned long mp_lapic_addr = 0;
--
--
--
--/* Processor that is doing the boot up */
--unsigned int boot_cpu_id = -1U;
--EXPORT_SYMBOL(boot_cpu_id);
--
--/* Internal processor count */
--unsigned int num_processors;
--
--unsigned disabled_cpus __cpuinitdata;
--
--/* Bitmask of physically existing CPUs */
--physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
--
--#ifndef CONFIG_XEN
--u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
--                              = { [0 ... NR_CPUS-1] = BAD_APICID };
--void *x86_bios_cpu_apicid_early_ptr;
--#endif
--DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
--EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
--
--
--/*
-- * Intel MP BIOS table parsing routines:
-- */
--
--/*
-- * Checksum an MP configuration block.
-- */
--
--static int __init mpf_checksum(unsigned char *mp, int len)
--{
--      int sum = 0;
--
--      while (len--)
--              sum += *mp++;
--
--      return sum & 0xFF;
--}
--
--#ifndef CONFIG_XEN
--static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
--{
--      int cpu;
--      cpumask_t tmp_map;
--      char *bootup_cpu = "";
--
--      if (!(m->mpc_cpuflag & CPU_ENABLED)) {
--              disabled_cpus++;
--              return;
--      }
--      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
--              bootup_cpu = " (Bootup-CPU)";
--              boot_cpu_id = m->mpc_apicid;
--      }
--
--      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
--
--      if (num_processors >= NR_CPUS) {
--              printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
--                      " Processor ignored.\n", NR_CPUS);
--              return;
--      }
--
--      num_processors++;
--      cpus_complement(tmp_map, cpu_present_map);
--      cpu = first_cpu(tmp_map);
--
--      physid_set(m->mpc_apicid, phys_cpu_present_map);
--      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
--              /*
--               * x86_bios_cpu_apicid is required to have processors listed
--               * in same order as logical cpu numbers. Hence the first
--               * entry is BSP, and so on.
--               */
--              cpu = 0;
--      }
--      /* are we being called early in kernel startup? */
--      if (x86_cpu_to_apicid_early_ptr) {
--              u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
--              u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
--
--              cpu_to_apicid[cpu] = m->mpc_apicid;
--              bios_cpu_apicid[cpu] = m->mpc_apicid;
--      } else {
--              per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
--              per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
--      }
--
--      cpu_set(cpu, cpu_possible_map);
--      cpu_set(cpu, cpu_present_map);
--}
--#else
--static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
--{
--      num_processors++;
--}
--#endif /* CONFIG_XEN */
--
--static void __init MP_bus_info (struct mpc_config_bus *m)
--{
--      char str[7];
--
--      memcpy(str, m->mpc_bustype, 6);
--      str[6] = 0;
--      Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
--
--      if (strncmp(str, "ISA", 3) == 0) {
--              set_bit(m->mpc_busid, mp_bus_not_pci);
--      } else if (strncmp(str, "PCI", 3) == 0) {
--              clear_bit(m->mpc_busid, mp_bus_not_pci);
--              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
--              mp_current_pci_id++;
--      } else {
--              printk(KERN_ERR "Unknown bustype %s\n", str);
--      }
--}
--
--static int bad_ioapic(unsigned long address)
--{
--      if (nr_ioapics >= MAX_IO_APICS) {
--              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
--                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
--              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
--      }
--      if (!address) {
--              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
--                      " found in table, skipping!\n");
--              return 1;
--      }
--      return 0;
--}
--
--static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
--{
--      if (!(m->mpc_flags & MPC_APIC_USABLE))
--              return;
--
--      printk("I/O APIC #%d at 0x%X.\n",
--              m->mpc_apicid, m->mpc_apicaddr);
--
--      if (bad_ioapic(m->mpc_apicaddr))
--              return;
--
--      mp_ioapics[nr_ioapics] = *m;
--      nr_ioapics++;
--}
--
--static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
--{
--      mp_irqs [mp_irq_entries] = *m;
--      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
--              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
--                      m->mpc_irqtype, m->mpc_irqflag & 3,
--                      (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
--                      m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
--      if (++mp_irq_entries >= MAX_IRQ_SOURCES)
--              panic("Max # of irq sources exceeded!!\n");
--}
--
--static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
--{
--      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
--              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
--                      m->mpc_irqtype, m->mpc_irqflag & 3,
--                      (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
--                      m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
--}
--
--/*
-- * Read/parse the MPC
-- */
--
--static int __init smp_read_mpc(struct mp_config_table *mpc)
--{
--      char str[16];
--      int count=sizeof(*mpc);
--      unsigned char *mpt=((unsigned char *)mpc)+count;
--
--      if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
--              printk("MPTABLE: bad signature [%c%c%c%c]!\n",
--                      mpc->mpc_signature[0],
--                      mpc->mpc_signature[1],
--                      mpc->mpc_signature[2],
--                      mpc->mpc_signature[3]);
--              return 0;
--      }
--      if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
--              printk("MPTABLE: checksum error!\n");
--              return 0;
--      }
--      if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
--              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
--                      mpc->mpc_spec);
--              return 0;
--      }
--      if (!mpc->mpc_lapic) {
--              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
--              return 0;
--      }
--      memcpy(str,mpc->mpc_oem,8);
--      str[8] = 0;
--      printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
--
--      memcpy(str,mpc->mpc_productid,12);
--      str[12] = 0;
--      printk("MPTABLE: Product ID: %s ",str);
--
--      printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
--
--      /* save the local APIC address, it might be non-default */
--      if (!acpi_lapic)
--              mp_lapic_addr = mpc->mpc_lapic;
--
--      /*
--       *      Now process the configuration blocks.
--       */
--      while (count < mpc->mpc_length) {
--              switch(*mpt) {
--                      case MP_PROCESSOR:
--                      {
--                              struct mpc_config_processor *m=
--                                      (struct mpc_config_processor *)mpt;
--                              if (!acpi_lapic)
--                                      MP_processor_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--                      case MP_BUS:
--                      {
--                              struct mpc_config_bus *m=
--                                      (struct mpc_config_bus *)mpt;
--                              MP_bus_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--                      case MP_IOAPIC:
--                      {
--                              struct mpc_config_ioapic *m=
--                                      (struct mpc_config_ioapic *)mpt;
--                              MP_ioapic_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--                      case MP_INTSRC:
--                      {
--                              struct mpc_config_intsrc *m=
--                                      (struct mpc_config_intsrc *)mpt;
--
--                              MP_intsrc_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--                      case MP_LINTSRC:
--                      {
--                              struct mpc_config_lintsrc *m=
--                                      (struct mpc_config_lintsrc *)mpt;
--                              MP_lintsrc_info(m);
--                              mpt += sizeof(*m);
--                              count += sizeof(*m);
--                              break;
--                      }
--              }
--      }
--      setup_apic_routing();
--      if (!num_processors)
--              printk(KERN_ERR "MPTABLE: no processors registered!\n");
--      return num_processors;
--}
--
--static int __init ELCR_trigger(unsigned int irq)
--{
--      unsigned int port;
--
--      port = 0x4d0 + (irq >> 3);
--      return (inb(port) >> (irq & 7)) & 1;
--}
--
--static void __init construct_default_ioirq_mptable(int mpc_default_type)
--{
--      struct mpc_config_intsrc intsrc;
--      int i;
--      int ELCR_fallback = 0;
--
--      intsrc.mpc_type = MP_INTSRC;
--      intsrc.mpc_irqflag = 0;                 /* conforming */
--      intsrc.mpc_srcbus = 0;
--      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
--
--      intsrc.mpc_irqtype = mp_INT;
--
--      /*
--       *  If true, we have an ISA/PCI system with no IRQ entries
--       *  in the MP table. To prevent the PCI interrupts from being set up
--       *  incorrectly, we try to use the ELCR. The sanity check to see if
--       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
--       *  never be level sensitive, so we simply see if the ELCR agrees.
--       *  If it does, we assume it's valid.
--       */
--      if (mpc_default_type == 5) {
--              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
--
--              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
--                      printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
--              else {
--                      printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
--                      ELCR_fallback = 1;
--              }
--      }
--
--      for (i = 0; i < 16; i++) {
--              switch (mpc_default_type) {
--              case 2:
--                      if (i == 0 || i == 13)
--                              continue;       /* IRQ0 & IRQ13 not connected */
--                      /* fall through */
--              default:
--                      if (i == 2)
--                              continue;       /* IRQ2 is never connected */
--              }
--
--              if (ELCR_fallback) {
--                      /*
--                       *  If the ELCR indicates a level-sensitive interrupt, we
--                       *  copy that information over to the MP table in the
--                       *  irqflag field (level sensitive, active high polarity).
--                       */
--                      if (ELCR_trigger(i))
--                              intsrc.mpc_irqflag = 13;
--                      else
--                              intsrc.mpc_irqflag = 0;
--              }
--
--              intsrc.mpc_srcbusirq = i;
--              intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
--              MP_intsrc_info(&intsrc);
--      }
--
--      intsrc.mpc_irqtype = mp_ExtINT;
--      intsrc.mpc_srcbusirq = 0;
--      intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
--      MP_intsrc_info(&intsrc);
--}
--
--static inline void __init construct_default_ISA_mptable(int mpc_default_type)
--{
--      struct mpc_config_processor processor;
--      struct mpc_config_bus bus;
--      struct mpc_config_ioapic ioapic;
--      struct mpc_config_lintsrc lintsrc;
--      int linttypes[2] = { mp_ExtINT, mp_NMI };
--      int i;
--
--      /*
--       * local APIC has default address
--       */
--      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
--
--      /*
--       * 2 CPUs, numbered 0 & 1.
--       */
--      processor.mpc_type = MP_PROCESSOR;
--      processor.mpc_apicver = 0;
--      processor.mpc_cpuflag = CPU_ENABLED;
--      processor.mpc_cpufeature = 0;
--      processor.mpc_featureflag = 0;
--      processor.mpc_reserved[0] = 0;
--      processor.mpc_reserved[1] = 0;
--      for (i = 0; i < 2; i++) {
--              processor.mpc_apicid = i;
--              MP_processor_info(&processor);
--      }
--
--      bus.mpc_type = MP_BUS;
--      bus.mpc_busid = 0;
--      switch (mpc_default_type) {
--              default:
--                      printk(KERN_ERR "???\nUnknown standard configuration %d\n",
--                              mpc_default_type);
--                      /* fall through */
--              case 1:
--              case 5:
--                      memcpy(bus.mpc_bustype, "ISA   ", 6);
--                      break;
--      }
--      MP_bus_info(&bus);
--      if (mpc_default_type > 4) {
--              bus.mpc_busid = 1;
--              memcpy(bus.mpc_bustype, "PCI   ", 6);
--              MP_bus_info(&bus);
--      }
--
--      ioapic.mpc_type = MP_IOAPIC;
--      ioapic.mpc_apicid = 2;
--      ioapic.mpc_apicver = 0;
--      ioapic.mpc_flags = MPC_APIC_USABLE;
--      ioapic.mpc_apicaddr = 0xFEC00000;
--      MP_ioapic_info(&ioapic);
--
--      /*
--       * We set up most of the low 16 IO-APIC pins according to MPS rules.
--       */
--      construct_default_ioirq_mptable(mpc_default_type);
--
--      lintsrc.mpc_type = MP_LINTSRC;
--      lintsrc.mpc_irqflag = 0;                /* conforming */
--      lintsrc.mpc_srcbusid = 0;
--      lintsrc.mpc_srcbusirq = 0;
--      lintsrc.mpc_destapic = MP_APIC_ALL;
--      for (i = 0; i < 2; i++) {
--              lintsrc.mpc_irqtype = linttypes[i];
--              lintsrc.mpc_destapiclint = i;
--              MP_lintsrc_info(&lintsrc);
--      }
--}
--
--static struct intel_mp_floating *mpf_found;
--
--/*
-- * Scan the memory blocks for an SMP configuration block.
-- */
--void __init get_smp_config (void)
--{
--      struct intel_mp_floating *mpf = mpf_found;
--
--      /*
--       * ACPI supports both logical (e.g. Hyper-Threading) and physical 
--       * processors, where MPS only supports physical.
--       */
--      if (acpi_lapic && acpi_ioapic) {
--              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
--              return;
--      }
--      else if (acpi_lapic)
--              printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
--
--      printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
--
--      /*
--       * Now see if we need to read further.
--       */
--      if (mpf->mpf_feature1 != 0) {
--
--              printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
--              construct_default_ISA_mptable(mpf->mpf_feature1);
--
--      } else if (mpf->mpf_physptr) {
--
--              /*
--               * Read the physical hardware table.  Anything here will
--               * override the defaults.
--               */
--              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
--                      smp_found_config = 0;
--                      printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
--                      printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
--                      return;
--              }
--              /*
--               * If there are no explicit MP IRQ entries, then we are
--               * broken.  We set up most of the low 16 IO-APIC pins to
--               * ISA defaults and hope it will work.
--               */
--              if (!mp_irq_entries) {
--                      struct mpc_config_bus bus;
--
--                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
--
--                      bus.mpc_type = MP_BUS;
--                      bus.mpc_busid = 0;
--                      memcpy(bus.mpc_bustype, "ISA   ", 6);
--                      MP_bus_info(&bus);
--
--                      construct_default_ioirq_mptable(0);
--              }
--
--      } else
--              BUG();
--
--      printk(KERN_INFO "Processors: %d\n", num_processors);
--      /*
--       * Only use the first configuration found.
--       */
--}
--
--static int __init smp_scan_config (unsigned long base, unsigned long length)
--{
--      extern void __bad_mpf_size(void); 
--      unsigned int *bp = isa_bus_to_virt(base);
--      struct intel_mp_floating *mpf;
--
--      Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
--      if (sizeof(*mpf) != 16)
--              __bad_mpf_size();
--
--      while (length > 0) {
--              mpf = (struct intel_mp_floating *)bp;
--              if ((*bp == SMP_MAGIC_IDENT) &&
--                      (mpf->mpf_length == 1) &&
--                      !mpf_checksum((unsigned char *)bp, 16) &&
--                      ((mpf->mpf_specification == 1)
--                              || (mpf->mpf_specification == 4)) ) {
--
--                      smp_found_config = 1;
--                      mpf_found = mpf;
--                      return 1;
--              }
--              bp += 4;
--              length -= 16;
--      }
--      return 0;
--}
--
--void __init find_smp_config(void)
--{
--      unsigned int address;
--
--      /*
--       * FIXME: Linux assumes you have 640K of base ram..
--       * this continues the error...
--       *
--       * 1) Scan the bottom 1K for a signature
--       * 2) Scan the top 1K of base RAM
--       * 3) Scan the 64K of bios
--       */
--      if (smp_scan_config(0x0,0x400) ||
--              smp_scan_config(639*0x400,0x400) ||
--                      smp_scan_config(0xF0000,0x10000))
--              return;
--      /*
--       * If it is an SMP machine we should know now.
--       *
--       * there is a real-mode segmented pointer pointing to the
--       * 4K EBDA area at 0x40E, calculate and scan it here.
--       *
--       * NOTE! There are Linux loaders that will corrupt the EBDA
--       * area, and as such this kind of SMP config may be less
--       * trustworthy, simply because the SMP table may have been
--       * stomped on during early boot. These loaders are buggy and
--       * should be fixed.
--       */
--
--      address = *(unsigned short *)phys_to_virt(0x40E);
--      address <<= 4;
--      if (smp_scan_config(address, 0x1000))
--              return;
--
--      /* If we have come this far, we did not find an MP table  */
--       printk(KERN_INFO "No mptable found.\n");
--}
--
--/* --------------------------------------------------------------------------
--                            ACPI-based MP Configuration
--   -------------------------------------------------------------------------- */
--
--#ifdef CONFIG_ACPI
--
--void __init mp_register_lapic_address(u64 address)
--{
--#ifndef CONFIG_XEN
--      mp_lapic_addr = (unsigned long) address;
--      set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
--      if (boot_cpu_id == -1U)
--              boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
--#endif
--}
--
--void __cpuinit mp_register_lapic (u8 id, u8 enabled)
--{
--      struct mpc_config_processor processor;
--      int                     boot_cpu = 0;
--      
--      if (id == boot_cpu_id)
--              boot_cpu = 1;
--
--#ifndef CONFIG_XEN
--      processor.mpc_type = MP_PROCESSOR;
--      processor.mpc_apicid = id;
--      processor.mpc_apicver = 0;
--      processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
--      processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
--      processor.mpc_cpufeature = 0;
--      processor.mpc_featureflag = 0;
--      processor.mpc_reserved[0] = 0;
--      processor.mpc_reserved[1] = 0;
--#endif
--
--      MP_processor_info(&processor);
--}
--
--#define MP_ISA_BUS            0
--#define MP_MAX_IOAPIC_PIN     127
--
--static struct mp_ioapic_routing {
--      int                     apic_id;
--      int                     gsi_start;
--      int                     gsi_end;
--      u32                     pin_programmed[4];
--} mp_ioapic_routing[MAX_IO_APICS];
--
--static int mp_find_ioapic(int gsi)
--{
--      int i = 0;
--
--      /* Find the IOAPIC that manages this GSI. */
--      for (i = 0; i < nr_ioapics; i++) {
--              if ((gsi >= mp_ioapic_routing[i].gsi_start)
--                      && (gsi <= mp_ioapic_routing[i].gsi_end))
--                      return i;
--      }
--
--      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
--      return -1;
--}
--
--static u8 uniq_ioapic_id(u8 id)
--{
--      int i;
--      DECLARE_BITMAP(used, 256);
--      bitmap_zero(used, 256);
--      for (i = 0; i < nr_ioapics; i++) {
--              struct mpc_config_ioapic *ia = &mp_ioapics[i];
--              __set_bit(ia->mpc_apicid, used);
--      }
--      if (!test_bit(id, used))
--              return id;
--      return find_first_zero_bit(used, 256);
--}
--
--void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
--{
--      int idx = 0;
--
--      if (bad_ioapic(address))
--              return;
--
--      idx = nr_ioapics;
--
--      mp_ioapics[idx].mpc_type = MP_IOAPIC;
--      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
--      mp_ioapics[idx].mpc_apicaddr = address;
--
--#ifndef CONFIG_XEN
--      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
--#endif
--      mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
--      mp_ioapics[idx].mpc_apicver = 0;
--      
--      /* 
--       * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
--       * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
--       */
--      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
--      mp_ioapic_routing[idx].gsi_start = gsi_base;
--      mp_ioapic_routing[idx].gsi_end = gsi_base + 
--              io_apic_get_redir_entries(idx);
--
--      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
--              "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
--              mp_ioapics[idx].mpc_apicaddr,
--              mp_ioapic_routing[idx].gsi_start,
--              mp_ioapic_routing[idx].gsi_end);
--
--      nr_ioapics++;
--}
--
--void __init
--mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32       gsi)
--{
--      struct mpc_config_intsrc intsrc;
--      int                     ioapic = -1;
--      int                     pin = -1;
--
--      /* 
--       * Convert 'gsi' to 'ioapic.pin'.
--       */
--      ioapic = mp_find_ioapic(gsi);
--      if (ioapic < 0)
--              return;
--      pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
--
--      /*
--       * TBD: This check is for faulty timer entries, where the override
--       *      erroneously sets the trigger to level, resulting in a HUGE 
--       *      increase of timer interrupts!
--       */
--      if ((bus_irq == 0) && (trigger == 3))
--              trigger = 1;
--
--      intsrc.mpc_type = MP_INTSRC;
--      intsrc.mpc_irqtype = mp_INT;
--      intsrc.mpc_irqflag = (trigger << 2) | polarity;
--      intsrc.mpc_srcbus = MP_ISA_BUS;
--      intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
--      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
--      intsrc.mpc_dstirq = pin;                                    /* INTIN# */
--
--      Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", 
--              intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
--              (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
--              intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
--
--      mp_irqs[mp_irq_entries] = intsrc;
--      if (++mp_irq_entries == MAX_IRQ_SOURCES)
--              panic("Max # of irq sources exceeded!\n");
--}
--
--void __init mp_config_acpi_legacy_irqs(void)
--{
--      struct mpc_config_intsrc intsrc;
--      int i = 0;
--      int ioapic = -1;
--
--      /* 
--       * Fabricate the legacy ISA bus (bus #31).
--       */
--      set_bit(MP_ISA_BUS, mp_bus_not_pci);
--
--      /* 
--       * Locate the IOAPIC that manages the ISA IRQs (0-15). 
--       */
--      ioapic = mp_find_ioapic(0);
--      if (ioapic < 0)
--              return;
--
--      intsrc.mpc_type = MP_INTSRC;
--      intsrc.mpc_irqflag = 0;                                 /* Conforming */
--      intsrc.mpc_srcbus = MP_ISA_BUS;
--      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
--
--      /* 
--       * Use the default configuration for the IRQs 0-15.  Unless
--       * overridden by (MADT) interrupt source override entries.
--       */
--      for (i = 0; i < 16; i++) {
--              int idx;
--
--              for (idx = 0; idx < mp_irq_entries; idx++) {
--                      struct mpc_config_intsrc *irq = mp_irqs + idx;
--
--                      /* Do we already have a mapping for this ISA IRQ? */
--                      if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
--                              break;
--
--                      /* Do we already have a mapping for this IOAPIC pin */
--                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
--                              (irq->mpc_dstirq == i))
--                              break;
--              }
--
--              if (idx != mp_irq_entries) {
--                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
--                      continue;                       /* IRQ already used */
--              }
--
--              intsrc.mpc_irqtype = mp_INT;
--              intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
--              intsrc.mpc_dstirq = i;
--
--              Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
--                      "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
--                      (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
--                      intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
--                      intsrc.mpc_dstirq);
--
--              mp_irqs[mp_irq_entries] = intsrc;
--              if (++mp_irq_entries == MAX_IRQ_SOURCES)
--                      panic("Max # of irq sources exceeded!\n");
--      }
--}
--
--int mp_register_gsi(u32 gsi, int triggering, int polarity)
--{
--      int ioapic = -1;
--      int ioapic_pin = 0;
--      int idx, bit = 0;
--
--      if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
--              return gsi;
--
--      /* Don't set up the ACPI SCI because it's already set up */
--      if (acpi_gbl_FADT.sci_interrupt == gsi)
--              return gsi;
--
--      ioapic = mp_find_ioapic(gsi);
--      if (ioapic < 0) {
--              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
--              return gsi;
--      }
--
--      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
--
--      /* 
--       * Avoid pin reprogramming.  PRTs typically include entries  
--       * with redundant pin->gsi mappings (but unique PCI devices);
--       * we only program the IOAPIC on the first.
--       */
--      bit = ioapic_pin % 32;
--      idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
--      if (idx > 3) {
--              printk(KERN_ERR "Invalid reference to IOAPIC pin "
--                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
--                      ioapic_pin);
--              return gsi;
--      }
--      if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
--              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
--                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
--              return gsi;
--      }
--
--      mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
--
--      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
--              triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
--              polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
--      return gsi;
--}
--#endif /*CONFIG_ACPI*/
---- /dev/null
-+++ b/arch/x86/kernel/mpparse-xen.c
-@@ -0,0 +1,1104 @@
-+/*
-+ *    Intel Multiprocessor Specification 1.1 and 1.4
-+ *    compliant MP-table parsing routines.
-+ *
-+ *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-+ *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
-+ *      (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
-+ */
-+
-+#include <linux/mm.h>
-+#include <linux/init.h>
-+#include <linux/delay.h>
-+#include <linux/bootmem.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/mc146818rtc.h>
-+#include <linux/bitops.h>
-+#include <linux/acpi.h>
-+#include <linux/module.h>
-+
-+#include <asm/smp.h>
-+#include <asm/mtrr.h>
-+#include <asm/mpspec.h>
-+#include <asm/pgalloc.h>
-+#include <asm/io_apic.h>
-+#include <asm/proto.h>
-+#include <asm/acpi.h>
-+#include <asm/bios_ebda.h>
-+
-+#include <mach_apic.h>
-+#ifdef CONFIG_X86_32
-+#include <mach_apicdef.h>
-+#include <mach_mpparse.h>
-+#endif
-+
-+/* Have we found an MP table */
-+int smp_found_config;
-+
-+/*
-+ * Various Linux-internal data structures created from the
-+ * MP-table.
-+ */
-+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-+int mp_bus_id_to_type[MAX_MP_BUSSES];
-+#endif
-+
-+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-+int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
-+
-+static int mp_current_pci_id;
-+
-+int pic_mode;
-+
-+/*
-+ * Intel MP BIOS table parsing routines:
-+ */
-+
-+/*
-+ * Checksum an MP configuration block.
-+ */
-+
-+static int __init mpf_checksum(unsigned char *mp, int len)
-+{
-+      int sum = 0;
-+
-+      while (len--)
-+              sum += *mp++;
-+
-+      return sum & 0xFF;
-+}
-+
-+#ifdef CONFIG_X86_NUMAQ
-+/*
-+ * Have to match translation table entries to main table entries by counter
-+ * hence the mpc_record variable .... can't see a less disgusting way of
-+ * doing this ....
-+ */
-+
-+static int mpc_record;
-+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
-+    __cpuinitdata;
-+#endif
-+
-+#ifndef CONFIG_XEN
-+static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
-+{
-+      int apicid;
-+      char *bootup_cpu = "";
-+
-+      if (!(m->mpc_cpuflag & CPU_ENABLED)) {
-+              disabled_cpus++;
-+              return;
-+      }
-+#ifdef CONFIG_X86_NUMAQ
-+      apicid = mpc_apic_id(m, translation_table[mpc_record]);
-+#else
-+      apicid = m->mpc_apicid;
-+#endif
-+      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
-+              bootup_cpu = " (Bootup-CPU)";
-+              boot_cpu_physical_apicid = m->mpc_apicid;
-+      }
-+
-+      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
-+      generic_processor_info(apicid, m->mpc_apicver);
-+}
-+#else
-+static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
-+{
-+      num_processors++;
-+}
-+#endif /* CONFIG_XEN */
-+
-+static void __init MP_bus_info(struct mpc_config_bus *m)
-+{
-+      char str[7];
-+
-+      memcpy(str, m->mpc_bustype, 6);
-+      str[6] = 0;
-+
-+#ifdef CONFIG_X86_NUMAQ
-+      mpc_oem_bus_info(m, str, translation_table[mpc_record]);
-+#else
-+      Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
-+#endif
-+
-+#if MAX_MP_BUSSES < 256
-+      if (m->mpc_busid >= MAX_MP_BUSSES) {
-+              printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
-+                     " is too large, max. supported is %d\n",
-+                     m->mpc_busid, str, MAX_MP_BUSSES - 1);
-+              return;
-+      }
-+#endif
-+
-+      if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
-+               set_bit(m->mpc_busid, mp_bus_not_pci);
-+#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
-+              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
-+#endif
-+      } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
-+#ifdef CONFIG_X86_NUMAQ
-+              mpc_oem_pci_bus(m, translation_table[mpc_record]);
-+#endif
-+              clear_bit(m->mpc_busid, mp_bus_not_pci);
-+              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
-+              mp_current_pci_id++;
-+#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
-+              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
-+      } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
-+              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
-+      } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
-+              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
-+#endif
-+      } else
-+              printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
-+}
-+
-+#ifdef CONFIG_X86_IO_APIC
-+
-+static int bad_ioapic(unsigned long address)
-+{
-+      if (nr_ioapics >= MAX_IO_APICS) {
-+              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
-+                     "(found %d)\n", MAX_IO_APICS, nr_ioapics);
-+              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
-+      }
-+      if (!address) {
-+              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
-+                     " found in table, skipping!\n");
-+              return 1;
-+      }
-+      return 0;
-+}
-+
-+static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
-+{
-+      if (!(m->mpc_flags & MPC_APIC_USABLE))
-+              return;
-+
-+      printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
-+             m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
-+
-+      if (bad_ioapic(m->mpc_apicaddr))
-+              return;
-+
-+      mp_ioapics[nr_ioapics] = *m;
-+      nr_ioapics++;
-+}
-+
-+static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
-+{
-+      mp_irqs[mp_irq_entries] = *m;
-+      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
-+              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
-+              m->mpc_irqtype, m->mpc_irqflag & 3,
-+              (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
-+              m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-+      if (++mp_irq_entries == MAX_IRQ_SOURCES)
-+              panic("Max # of irq sources exceeded!!\n");
-+}
-+
-+#endif
-+
-+static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
-+{
-+      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
-+              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
-+              m->mpc_irqtype, m->mpc_irqflag & 3,
-+              (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
-+              m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
-+}
-+
-+#ifdef CONFIG_X86_NUMAQ
-+static void __init MP_translation_info(struct mpc_config_translation *m)
-+{
-+      printk(KERN_INFO
-+             "Translation: record %d, type %d, quad %d, global %d, local %d\n",
-+             mpc_record, m->trans_type, m->trans_quad, m->trans_global,
-+             m->trans_local);
-+
-+      if (mpc_record >= MAX_MPC_ENTRY)
-+              printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
-+      else
-+              translation_table[mpc_record] = m;      /* stash this for later */
-+      if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
-+              node_set_online(m->trans_quad);
-+}
-+
-+/*
-+ * Read/parse the MPC oem tables
-+ */
-+
-+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
-+                                  unsigned short oemsize)
-+{
-+      int count = sizeof(*oemtable);  /* the header size */
-+      unsigned char *oemptr = ((unsigned char *)oemtable) + count;
-+
-+      mpc_record = 0;
-+      printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
-+             oemtable);
-+      if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
-+              printk(KERN_WARNING
-+                     "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
-+                     oemtable->oem_signature[0], oemtable->oem_signature[1],
-+                     oemtable->oem_signature[2], oemtable->oem_signature[3]);
-+              return;
-+      }
-+      if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
-+              printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
-+              return;
-+      }
-+      while (count < oemtable->oem_length) {
-+              switch (*oemptr) {
-+              case MP_TRANSLATION:
-+                      {
-+                              struct mpc_config_translation *m =
-+                                  (struct mpc_config_translation *)oemptr;
-+                              MP_translation_info(m);
-+                              oemptr += sizeof(*m);
-+                              count += sizeof(*m);
-+                              ++mpc_record;
-+                              break;
-+                      }
-+              default:
-+                      {
-+                              printk(KERN_WARNING
-+                                     "Unrecognised OEM table entry type! - %d\n",
-+                                     (int)*oemptr);
-+                              return;
-+                      }
-+              }
-+      }
-+}
-+
-+static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
-+                               char *productid)
-+{
-+      if (strncmp(oem, "IBM NUMA", 8))
-+              printk("Warning!  May not be a NUMA-Q system!\n");
-+      if (mpc->mpc_oemptr)
-+              smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
-+                               mpc->mpc_oemsize);
-+}
-+#endif /* CONFIG_X86_NUMAQ */
-+
-+/*
-+ * Read/parse the MPC
-+ */
-+
-+static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
-+{
-+      char str[16];
-+      char oem[10];
-+      int count = sizeof(*mpc);
-+      unsigned char *mpt = ((unsigned char *)mpc) + count;
-+
-+      if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
-+              printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
-+                     mpc->mpc_signature[0], mpc->mpc_signature[1],
-+                     mpc->mpc_signature[2], mpc->mpc_signature[3]);
-+              return 0;
-+      }
-+      if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
-+              printk(KERN_ERR "MPTABLE: checksum error!\n");
-+              return 0;
-+      }
-+      if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
-+              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
-+                     mpc->mpc_spec);
-+              return 0;
-+      }
-+      if (!mpc->mpc_lapic) {
-+              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
-+              return 0;
-+      }
-+      memcpy(oem, mpc->mpc_oem, 8);
-+      oem[8] = 0;
-+      printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
-+
-+      memcpy(str, mpc->mpc_productid, 12);
-+      str[12] = 0;
-+      printk("Product ID: %s ", str);
-+
-+#ifdef CONFIG_X86_32
-+      mps_oem_check(mpc, oem, str);
-+#endif
-+      printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
-+
-+      printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
-+
-+      /* save the local APIC address, it might be non-default */
-+      if (!acpi_lapic)
-+              mp_lapic_addr = mpc->mpc_lapic;
-+
-+      if (early)
-+              return 1;
-+
-+      /*
-+       *      Now process the configuration blocks.
-+       */
-+#ifdef CONFIG_X86_NUMAQ
-+      mpc_record = 0;
-+#endif
-+      while (count < mpc->mpc_length) {
-+              switch (*mpt) {
-+              case MP_PROCESSOR:
-+                      {
-+                              struct mpc_config_processor *m =
-+                                  (struct mpc_config_processor *)mpt;
-+                              /* ACPI may have already provided this data */
-+                              if (!acpi_lapic)
-+                                      MP_processor_info(m);
-+                              mpt += sizeof(*m);
-+                              count += sizeof(*m);
-+                              break;
-+                      }
-+              case MP_BUS:
-+                      {
-+                              struct mpc_config_bus *m =
-+                                  (struct mpc_config_bus *)mpt;
-+                              MP_bus_info(m);
-+                              mpt += sizeof(*m);
-+                              count += sizeof(*m);
-+                              break;
-+                      }
-+              case MP_IOAPIC:
-+                      {
-+#ifdef CONFIG_X86_IO_APIC
-+                              struct mpc_config_ioapic *m =
-+                                  (struct mpc_config_ioapic *)mpt;
-+                              MP_ioapic_info(m);
-+#endif
-+                              mpt += sizeof(struct mpc_config_ioapic);
-+                              count += sizeof(struct mpc_config_ioapic);
-+                              break;
-+                      }
-+              case MP_INTSRC:
-+                      {
-+#ifdef CONFIG_X86_IO_APIC
-+                              struct mpc_config_intsrc *m =
-+                                  (struct mpc_config_intsrc *)mpt;
-+
-+                              MP_intsrc_info(m);
-+#endif
-+                              mpt += sizeof(struct mpc_config_intsrc);
-+                              count += sizeof(struct mpc_config_intsrc);
-+                              break;
-+                      }
-+              case MP_LINTSRC:
-+                      {
-+                              struct mpc_config_lintsrc *m =
-+                                  (struct mpc_config_lintsrc *)mpt;
-+                              MP_lintsrc_info(m);
-+                              mpt += sizeof(*m);
-+                              count += sizeof(*m);
-+                              break;
-+                      }
-+              default:
-+                      /* wrong mptable */
-+                      printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
-+                      printk(KERN_ERR "type %x\n", *mpt);
-+                      print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
-+                                      1, mpc, mpc->mpc_length, 1);
-+                      count = mpc->mpc_length;
-+                      break;
-+              }
-+#ifdef CONFIG_X86_NUMAQ
-+              ++mpc_record;
-+#endif
-+      }
-+      setup_apic_routing();
-+      if (!num_processors)
-+              printk(KERN_ERR "MPTABLE: no processors registered!\n");
-+      return num_processors;
-+}
-+
-+#ifdef CONFIG_X86_IO_APIC
-+
-+static int __init ELCR_trigger(unsigned int irq)
-+{
-+      unsigned int port;
-+
-+      port = 0x4d0 + (irq >> 3);
-+      return (inb(port) >> (irq & 7)) & 1;
-+}
-+
-+static void __init construct_default_ioirq_mptable(int mpc_default_type)
-+{
-+      struct mpc_config_intsrc intsrc;
-+      int i;
-+      int ELCR_fallback = 0;
-+
-+      intsrc.mpc_type = MP_INTSRC;
-+      intsrc.mpc_irqflag = 0; /* conforming */
-+      intsrc.mpc_srcbus = 0;
-+      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-+
-+      intsrc.mpc_irqtype = mp_INT;
-+
-+      /*
-+       *  If true, we have an ISA/PCI system with no IRQ entries
-+       *  in the MP table. To prevent the PCI interrupts from being set up
-+       *  incorrectly, we try to use the ELCR. The sanity check to see if
-+       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
-+       *  never be level sensitive, so we simply see if the ELCR agrees.
-+       *  If it does, we assume it's valid.
-+       */
-+      if (mpc_default_type == 5) {
-+              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
-+                     "falling back to ELCR\n");
-+
-+              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
-+                  ELCR_trigger(13))
-+                      printk(KERN_ERR "ELCR contains invalid data... "
-+                             "not using ELCR\n");
-+              else {
-+                      printk(KERN_INFO
-+                             "Using ELCR to identify PCI interrupts\n");
-+                      ELCR_fallback = 1;
-+              }
-+      }
-+
-+      for (i = 0; i < 16; i++) {
-+              switch (mpc_default_type) {
-+              case 2:
-+                      if (i == 0 || i == 13)
-+                              continue;       /* IRQ0 & IRQ13 not connected */
-+                      /* fall through */
-+              default:
-+                      if (i == 2)
-+                              continue;       /* IRQ2 is never connected */
-+              }
-+
-+              if (ELCR_fallback) {
-+                      /*
-+                       *  If the ELCR indicates a level-sensitive interrupt, we
-+                       *  copy that information over to the MP table in the
-+                       *  irqflag field (level sensitive, active high polarity).
-+                       */
-+                      if (ELCR_trigger(i))
-+                              intsrc.mpc_irqflag = 13;
-+                      else
-+                              intsrc.mpc_irqflag = 0;
-+              }
-+
-+              intsrc.mpc_srcbusirq = i;
-+              intsrc.mpc_dstirq = i ? i : 2;  /* IRQ0 to INTIN2 */
-+              MP_intsrc_info(&intsrc);
-+      }
-+
-+      intsrc.mpc_irqtype = mp_ExtINT;
-+      intsrc.mpc_srcbusirq = 0;
-+      intsrc.mpc_dstirq = 0;  /* 8259A to INTIN0 */
-+      MP_intsrc_info(&intsrc);
-+}
-+
-+#endif
-+
-+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-+{
-+      struct mpc_config_processor processor;
-+      struct mpc_config_bus bus;
-+#ifdef CONFIG_X86_IO_APIC
-+      struct mpc_config_ioapic ioapic;
-+#endif
-+      struct mpc_config_lintsrc lintsrc;
-+      int linttypes[2] = { mp_ExtINT, mp_NMI };
-+      int i;
-+
-+      /*
-+       * local APIC has default address
-+       */
-+      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-+
-+      /*
-+       * 2 CPUs, numbered 0 & 1.
-+       */
-+      processor.mpc_type = MP_PROCESSOR;
-+      /* Either an integrated APIC or a discrete 82489DX. */
-+      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-+      processor.mpc_cpuflag = CPU_ENABLED;
-+      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
-+          (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-+      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
-+      processor.mpc_reserved[0] = 0;
-+      processor.mpc_reserved[1] = 0;
-+      for (i = 0; i < 2; i++) {
-+              processor.mpc_apicid = i;
-+              MP_processor_info(&processor);
-+      }
-+
-+      bus.mpc_type = MP_BUS;
-+      bus.mpc_busid = 0;
-+      switch (mpc_default_type) {
-+      default:
-+              printk(KERN_ERR "???\nUnknown standard configuration %d\n",
-+                     mpc_default_type);
-+              /* fall through */
-+      case 1:
-+      case 5:
-+              memcpy(bus.mpc_bustype, "ISA   ", 6);
-+              break;
-+      case 2:
-+      case 6:
-+      case 3:
-+              memcpy(bus.mpc_bustype, "EISA  ", 6);
-+              break;
-+      case 4:
-+      case 7:
-+              memcpy(bus.mpc_bustype, "MCA   ", 6);
-+      }
-+      MP_bus_info(&bus);
-+      if (mpc_default_type > 4) {
-+              bus.mpc_busid = 1;
-+              memcpy(bus.mpc_bustype, "PCI   ", 6);
-+              MP_bus_info(&bus);
-+      }
-+
-+#ifdef CONFIG_X86_IO_APIC
-+      ioapic.mpc_type = MP_IOAPIC;
-+      ioapic.mpc_apicid = 2;
-+      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-+      ioapic.mpc_flags = MPC_APIC_USABLE;
-+      ioapic.mpc_apicaddr = 0xFEC00000;
-+      MP_ioapic_info(&ioapic);
-+
-+      /*
-+       * We set up most of the low 16 IO-APIC pins according to MPS rules.
-+       */
-+      construct_default_ioirq_mptable(mpc_default_type);
-+#endif
-+      lintsrc.mpc_type = MP_LINTSRC;
-+      lintsrc.mpc_irqflag = 0;        /* conforming */
-+      lintsrc.mpc_srcbusid = 0;
-+      lintsrc.mpc_srcbusirq = 0;
-+      lintsrc.mpc_destapic = MP_APIC_ALL;
-+      for (i = 0; i < 2; i++) {
-+              lintsrc.mpc_irqtype = linttypes[i];
-+              lintsrc.mpc_destapiclint = i;
-+              MP_lintsrc_info(&lintsrc);
-+      }
-+}
-+
-+static struct intel_mp_floating *mpf_found;
-+
-+/*
-+ * Scan the memory blocks for an SMP configuration block.
-+ */
-+static void __init __get_smp_config(unsigned early)
-+{
-+      struct intel_mp_floating *mpf = mpf_found;
-+
-+      if (acpi_lapic && early)
-+              return;
-+      /*
-+       * ACPI supports both logical (e.g. Hyper-Threading) and physical
-+       * processors, where MPS only supports physical.
-+       */
-+      if (acpi_lapic && acpi_ioapic) {
-+              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
-+                     "information\n");
-+              return;
-+      } else if (acpi_lapic)
-+              printk(KERN_INFO "Using ACPI for processor (LAPIC) "
-+                     "configuration information\n");
-+
-+      printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
-+             mpf->mpf_specification);
-+#ifdef CONFIG_X86_32
-+      if (mpf->mpf_feature2 & (1 << 7)) {
-+              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
-+              pic_mode = 1;
-+      } else {
-+              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
-+              pic_mode = 0;
-+      }
-+#endif
-+      /*
-+       * Now see if we need to read further.
-+       */
-+      if (mpf->mpf_feature1 != 0) {
-+              if (early) {
-+                      /*
-+                       * local APIC has default address
-+                       */
-+                      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-+                      return;
-+              }
-+
-+              printk(KERN_INFO "Default MP configuration #%d\n",
-+                     mpf->mpf_feature1);
-+              construct_default_ISA_mptable(mpf->mpf_feature1);
-+
-+      } else if (mpf->mpf_physptr) {
-+
-+              /*
-+               * Read the physical hardware table.  Anything here will
-+               * override the defaults.
-+               */
-+              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
-+                      smp_found_config = 0;
-+                      printk(KERN_ERR
-+                             "BIOS bug, MP table errors detected!...\n");
-+                      printk(KERN_ERR "... disabling SMP support. "
-+                             "(tell your hw vendor)\n");
-+                      return;
-+              }
-+
-+              if (early)
-+                      return;
-+#ifdef CONFIG_X86_IO_APIC
-+              /*
-+               * If there are no explicit MP IRQ entries, then we are
-+               * broken.  We set up most of the low 16 IO-APIC pins to
-+               * ISA defaults and hope it will work.
-+               */
-+              if (!mp_irq_entries) {
-+                      struct mpc_config_bus bus;
-+
-+                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
-+                             "using default mptable. "
-+                             "(tell your hw vendor)\n");
-+
-+                      bus.mpc_type = MP_BUS;
-+                      bus.mpc_busid = 0;
-+                      memcpy(bus.mpc_bustype, "ISA   ", 6);
-+                      MP_bus_info(&bus);
-+
-+                      construct_default_ioirq_mptable(0);
-+              }
-+#endif
-+      } else
-+              BUG();
-+
-+      if (!early)
-+              printk(KERN_INFO "Processors: %d\n", num_processors);
-+      /*
-+       * Only use the first configuration found.
-+       */
-+}
-+
-+void __init early_get_smp_config(void)
-+{
-+      __get_smp_config(1);
-+}
-+
-+void __init get_smp_config(void)
-+{
-+      __get_smp_config(0);
-+}
-+
-+static int __init smp_scan_config(unsigned long base, unsigned long length,
-+                                unsigned reserve)
-+{
-+      unsigned int *bp = isa_bus_to_virt(base);
-+      struct intel_mp_floating *mpf;
-+
-+      Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
-+      BUILD_BUG_ON(sizeof(*mpf) != 16);
-+
-+      while (length > 0) {
-+              mpf = (struct intel_mp_floating *)bp;
-+              if ((*bp == SMP_MAGIC_IDENT) &&
-+                  (mpf->mpf_length == 1) &&
-+                  !mpf_checksum((unsigned char *)bp, 16) &&
-+                  ((mpf->mpf_specification == 1)
-+                   || (mpf->mpf_specification == 4))) {
-+
-+                      smp_found_config = 1;
-+                      mpf_found = mpf;
-+#ifdef CONFIG_X86_32
-+#ifndef CONFIG_XEN
-+                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
-+                             mpf, virt_to_phys(mpf));
-+                      reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
-+                                      BOOTMEM_DEFAULT);
-+                      if (mpf->mpf_physptr) {
-+                              /*
-+                               * We cannot access to MPC table to compute
-+                               * table size yet, as only few megabytes from
-+                               * the bottom is mapped now.
-+                               * PC-9800's MPC table places on the very last
-+                               * of physical memory; so that simply reserving
-+                               * PAGE_SIZE from mpg->mpf_physptr yields BUG()
-+                               * in reserve_bootmem.
-+                               */
-+                              unsigned long size = PAGE_SIZE;
-+                              unsigned long end = max_low_pfn * PAGE_SIZE;
-+                              if (mpf->mpf_physptr + size > end)
-+                                      size = end - mpf->mpf_physptr;
-+                              reserve_bootmem(mpf->mpf_physptr, size,
-+                                              BOOTMEM_DEFAULT);
-+                      }
-+#else
-+                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
-+                              mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
-+#endif
-+#elif !defined(CONFIG_XEN)
-+                      if (!reserve)
-+                              return 1;
-+
-+                      reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
-+                      if (mpf->mpf_physptr)
-+                              reserve_bootmem_generic(mpf->mpf_physptr,
-+                                                      PAGE_SIZE);
-+#endif
-+              return 1;
-+              }
-+              bp += 4;
-+              length -= 16;
-+      }
-+      return 0;
-+}
-+
-+static void __init __find_smp_config(unsigned reserve)
-+{
-+#ifndef CONFIG_XEN
-+      unsigned int address;
-+#endif
-+
-+      /*
-+       * FIXME: Linux assumes you have 640K of base ram..
-+       * this continues the error...
-+       *
-+       * 1) Scan the bottom 1K for a signature
-+       * 2) Scan the top 1K of base RAM
-+       * 3) Scan the 64K of bios
-+       */
-+      if (smp_scan_config(0x0, 0x400, reserve) ||
-+          smp_scan_config(639 * 0x400, 0x400, reserve) ||
-+          smp_scan_config(0xF0000, 0x10000, reserve))
-+              return;
-+      /*
-+       * If it is an SMP machine we should know now, unless the
-+       * configuration is in an EISA/MCA bus machine with an
-+       * extended bios data area.
-+       *
-+       * there is a real-mode segmented pointer pointing to the
-+       * 4K EBDA area at 0x40E, calculate and scan it here.
-+       *
-+       * NOTE! There are Linux loaders that will corrupt the EBDA
-+       * area, and as such this kind of SMP config may be less
-+       * trustworthy, simply because the SMP table may have been
-+       * stomped on during early boot. These loaders are buggy and
-+       * should be fixed.
-+       *
-+       * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
-+       */
-+
-+#ifndef CONFIG_XEN
-+      address = get_bios_ebda();
-+      if (address)
-+              smp_scan_config(address, 0x400, reserve);
-+#endif
-+}
-+
-+void __init early_find_smp_config(void)
-+{
-+      __find_smp_config(0);
-+}
-+
-+void __init find_smp_config(void)
-+{
-+      __find_smp_config(1);
-+}
-+
-+/* --------------------------------------------------------------------------
-+                            ACPI-based MP Configuration
-+   -------------------------------------------------------------------------- */
-+
-+/*
-+ * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
-+ */
-+int es7000_plat;
-+
-+#ifdef CONFIG_ACPI
-+
-+#ifdef        CONFIG_X86_IO_APIC
-+
-+#define MP_ISA_BUS            0
-+
-+extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
-+
-+static int mp_find_ioapic(int gsi)
-+{
-+      int i = 0;
-+
-+      /* Find the IOAPIC that manages this GSI. */
-+      for (i = 0; i < nr_ioapics; i++) {
-+              if ((gsi >= mp_ioapic_routing[i].gsi_base)
-+                  && (gsi <= mp_ioapic_routing[i].gsi_end))
-+                      return i;
-+      }
-+
-+      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-+      return -1;
-+}
-+
-+static u8 __init uniq_ioapic_id(u8 id)
-+{
-+#ifdef CONFIG_X86_32
-+      if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-+          !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-+              return io_apic_get_unique_id(nr_ioapics, id);
-+      else
-+              return id;
-+#else
-+      int i;
-+      DECLARE_BITMAP(used, 256);
-+      bitmap_zero(used, 256);
-+      for (i = 0; i < nr_ioapics; i++) {
-+              struct mpc_config_ioapic *ia = &mp_ioapics[i];
-+              __set_bit(ia->mpc_apicid, used);
-+      }
-+      if (!test_bit(id, used))
-+              return id;
-+      return find_first_zero_bit(used, 256);
-+#endif
-+}
-+
-+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
-+{
-+      int idx = 0;
-+
-+      if (bad_ioapic(address))
-+              return;
-+
-+      idx = nr_ioapics;
-+
-+      mp_ioapics[idx].mpc_type = MP_IOAPIC;
-+      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
-+      mp_ioapics[idx].mpc_apicaddr = address;
-+
-+#ifndef CONFIG_XEN
-+      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-+#endif
-+      mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
-+#ifdef CONFIG_X86_32
-+      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
-+#else
-+      mp_ioapics[idx].mpc_apicver = 0;
-+#endif
-+      /*
-+       * Build basic GSI lookup table to facilitate gsi->io_apic lookups
-+       * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
-+       */
-+      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
-+      mp_ioapic_routing[idx].gsi_base = gsi_base;
-+      mp_ioapic_routing[idx].gsi_end = gsi_base +
-+          io_apic_get_redir_entries(idx);
-+
-+      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
-+             "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
-+             mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
-+             mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
-+
-+      nr_ioapics++;
-+}
-+
-+void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
-+{
-+      struct mpc_config_intsrc intsrc;
-+      int ioapic = -1;
-+      int pin = -1;
-+
-+      /*
-+       * Convert 'gsi' to 'ioapic.pin'.
-+       */
-+      ioapic = mp_find_ioapic(gsi);
-+      if (ioapic < 0)
-+              return;
-+      pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
-+
-+      /*
-+       * TBD: This check is for faulty timer entries, where the override
-+       *      erroneously sets the trigger to level, resulting in a HUGE
-+       *      increase of timer interrupts!
-+       */
-+      if ((bus_irq == 0) && (trigger == 3))
-+              trigger = 1;
-+
-+      intsrc.mpc_type = MP_INTSRC;
-+      intsrc.mpc_irqtype = mp_INT;
-+      intsrc.mpc_irqflag = (trigger << 2) | polarity;
-+      intsrc.mpc_srcbus = MP_ISA_BUS;
-+      intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
-+      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;     /* APIC ID */
-+      intsrc.mpc_dstirq = pin;        /* INTIN# */
-+
-+      MP_intsrc_info(&intsrc);
-+}
-+
-+void __init mp_config_acpi_legacy_irqs(void)
-+{
-+      struct mpc_config_intsrc intsrc;
-+      int i = 0;
-+      int ioapic = -1;
-+
-+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-+      /*
-+       * Fabricate the legacy ISA bus (bus #31).
-+       */
-+      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
-+#endif
-+      set_bit(MP_ISA_BUS, mp_bus_not_pci);
-+      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
-+
-+      /*
-+       * Older generations of ES7000 have no legacy identity mappings
-+       */
-+      if (es7000_plat == 1)
-+              return;
-+
-+      /*
-+       * Locate the IOAPIC that manages the ISA IRQs (0-15).
-+       */
-+      ioapic = mp_find_ioapic(0);
-+      if (ioapic < 0)
-+              return;
-+
-+      intsrc.mpc_type = MP_INTSRC;
-+      intsrc.mpc_irqflag = 0; /* Conforming */
-+      intsrc.mpc_srcbus = MP_ISA_BUS;
-+#ifdef CONFIG_X86_IO_APIC
-+      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-+#endif
-+      /*
-+       * Use the default configuration for the IRQs 0-15.  Unless
-+       * overridden by (MADT) interrupt source override entries.
-+       */
-+      for (i = 0; i < 16; i++) {
-+              int idx;
-+
-+              for (idx = 0; idx < mp_irq_entries; idx++) {
-+                      struct mpc_config_intsrc *irq = mp_irqs + idx;
-+
-+                      /* Do we already have a mapping for this ISA IRQ? */
-+                      if (irq->mpc_srcbus == MP_ISA_BUS
-+                          && irq->mpc_srcbusirq == i)
-+                              break;
-+
-+                      /* Do we already have a mapping for this IOAPIC pin */
-+                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
-+                          (irq->mpc_dstirq == i))
-+                              break;
-+              }
-+
-+              if (idx != mp_irq_entries) {
-+                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
-+                      continue;       /* IRQ already used */
-+              }
-+
-+              intsrc.mpc_irqtype = mp_INT;
-+              intsrc.mpc_srcbusirq = i;       /* Identity mapped */
-+              intsrc.mpc_dstirq = i;
-+
-+              MP_intsrc_info(&intsrc);
-+      }
-+}
-+
-+int mp_register_gsi(u32 gsi, int triggering, int polarity)
-+{
-+      int ioapic;
-+      int ioapic_pin;
-+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-+#define MAX_GSI_NUM   4096
-+#define IRQ_COMPRESSION_START 64
-+
-+      static int pci_irq = IRQ_COMPRESSION_START;
-+      /*
-+       * Mapping between Global System Interrupts, which
-+       * represent all possible interrupts, and IRQs
-+       * assigned to actual devices.
-+       */
-+      static int gsi_to_irq[MAX_GSI_NUM];
-+#else
-+
-+      if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
-+              return gsi;
-+#endif
-+
-+      /* Don't set up the ACPI SCI because it's already set up */
-+      if (acpi_gbl_FADT.sci_interrupt == gsi)
-+              return gsi;
-+
-+      ioapic = mp_find_ioapic(gsi);
-+      if (ioapic < 0) {
-+              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
-+              return gsi;
-+      }
-+
-+      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
-+
-+#ifndef CONFIG_X86_32
-+      if (ioapic_renumber_irq)
-+              gsi = ioapic_renumber_irq(ioapic, gsi);
-+#endif
-+
-+      /*
-+       * Avoid pin reprogramming.  PRTs typically include entries
-+       * with redundant pin->gsi mappings (but unique PCI devices);
-+       * we only program the IOAPIC on the first.
-+       */
-+      if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
-+              printk(KERN_ERR "Invalid reference to IOAPIC pin "
-+                     "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
-+                     ioapic_pin);
-+              return gsi;
-+      }
-+      if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-+              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
-+                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-+              return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-+#else
-+              return gsi;
-+#endif
-+      }
-+
-+      set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-+      /*
-+       * For GSI >= 64, use IRQ compression
-+       */
-+      if ((gsi >= IRQ_COMPRESSION_START)
-+          && (triggering == ACPI_LEVEL_SENSITIVE)) {
-+              /*
-+               * For PCI devices assign IRQs in order, avoiding gaps
-+               * due to unused I/O APIC pins.
-+               */
-+              int irq = gsi;
-+              if (gsi < MAX_GSI_NUM) {
-+                      /*
-+                       * Retain the VIA chipset work-around (gsi > 15), but
-+                       * avoid a problem where the 8254 timer (IRQ0) is setup
-+                       * via an override (so it's not on pin 0 of the ioapic),
-+                       * and at the same time, the pin 0 interrupt is a PCI
-+                       * type.  The gsi > 15 test could cause these two pins
-+                       * to be shared as IRQ0, and they are not shareable.
-+                       * So test for this condition, and if necessary, avoid
-+                       * the pin collision.
-+                       */
-+                      gsi = pci_irq++;
-+                      /*
-+                       * Don't assign IRQ used by ACPI SCI
-+                       */
-+                      if (gsi == acpi_gbl_FADT.sci_interrupt)
-+                              gsi = pci_irq++;
-+                      gsi_to_irq[irq] = gsi;
-+              } else {
-+                      printk(KERN_ERR "GSI %u is too high\n", gsi);
-+                      return gsi;
-+              }
-+      }
-+#endif
-+      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-+                              triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-+                              polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-+      return gsi;
-+}
-+
-+#endif /* CONFIG_X86_IO_APIC */
-+#endif /* CONFIG_ACPI */
---- a/arch/x86/kernel/pci-dma-xen.c
-+++ b/arch/x86/kernel/pci-dma-xen.c
-@@ -1,283 +1,251 @@
--/*
-- * Dynamic DMA mapping support.
-- *
-- * On i386 there is no hardware dynamic DMA address translation,
-- * so consistent alloc/free are merely page allocation/freeing.
-- * The rest of the dynamic DMA mapping interface is implemented
-- * in asm/pci.h.
-- */
--
--#include <linux/types.h>
--#include <linux/mm.h>
--#include <linux/string.h>
-+#include <linux/dma-mapping.h>
-+#include <linux/dmar.h>
-+#include <linux/bootmem.h>
- #include <linux/pci.h>
--#include <linux/module.h>
--#include <linux/version.h>
--#include <asm/io.h>
--#include <xen/balloon.h>
--#include <xen/gnttab.h>
--#include <asm/swiotlb.h>
--#include <asm/tlbflush.h>
--#include <asm/swiotlb_32.h>
--#include <asm/gnttab_dma.h>
--#include <asm/bug.h>
- 
--#ifdef __x86_64__
--#include <asm/iommu.h>
-+#include <asm/proto.h>
-+#include <asm/dma.h>
-+#include <asm/gart.h>
-+#include <asm/calgary.h>
-+
-+int forbid_dac __read_mostly;
-+EXPORT_SYMBOL(forbid_dac);
-+
-+const struct dma_mapping_ops *dma_ops;
-+EXPORT_SYMBOL(dma_ops);
-+
-+static int iommu_sac_force __read_mostly;
-+
-+#ifdef CONFIG_IOMMU_DEBUG
-+int panic_on_overflow __read_mostly = 1;
-+int force_iommu __read_mostly = 1;
-+#else
-+int panic_on_overflow __read_mostly = 0;
-+int force_iommu __read_mostly = 0;
-+#endif
- 
- int iommu_merge __read_mostly = 0;
--EXPORT_SYMBOL(iommu_merge);
- 
--dma_addr_t bad_dma_address __read_mostly;
--EXPORT_SYMBOL(bad_dma_address);
-+int no_iommu __read_mostly;
-+/* Set this to 1 if there is a HW IOMMU in the system */
-+int iommu_detected __read_mostly = 0;
- 
- /* This tells the BIO block layer to assume merging. Default to off
-    because we cannot guarantee merging later. */
- int iommu_bio_merge __read_mostly = 0;
- EXPORT_SYMBOL(iommu_bio_merge);
- 
--int force_iommu __read_mostly= 0;
-+dma_addr_t bad_dma_address __read_mostly = 0;
-+EXPORT_SYMBOL(bad_dma_address);
- 
--__init int iommu_setup(char *p)
--{
--    return 1;
--}
-+/* Dummy device used for NULL arguments (normally ISA). Better would
-+   be probably a smaller DMA mask, but this is bug-to-bug compatible
-+   to older i386. */
-+struct device fallback_dev = {
-+      .bus_id = "fallback device",
-+      .coherent_dma_mask = DMA_32BIT_MASK,
-+      .dma_mask = &fallback_dev.coherent_dma_mask,
-+};
- 
--void __init pci_iommu_alloc(void)
-+int dma_set_mask(struct device *dev, u64 mask)
- {
--#ifdef CONFIG_SWIOTLB
--      pci_swiotlb_init();
--#endif
--}
-+      if (!dev->dma_mask || !dma_supported(dev, mask))
-+              return -EIO;
-+
-+      *dev->dma_mask = mask;
- 
--static int __init pci_iommu_init(void)
--{
--      no_iommu_init();
-       return 0;
- }
-+EXPORT_SYMBOL(dma_set_mask);
- 
--/* Must execute after PCI subsystem */
--fs_initcall(pci_iommu_init);
--#endif
--
--struct dma_coherent_mem {
--      void            *virt_base;
--      u32             device_base;
--      int             size;
--      int             flags;
--      unsigned long   *bitmap;
--};
--
--#define IOMMU_BUG_ON(test)                            \
--do {                                                  \
--      if (unlikely(test)) {                           \
--              printk(KERN_ALERT "Fatal DMA error! "   \
--                     "Please use 'swiotlb=force'\n"); \
--              BUG();                                  \
--      }                                               \
--} while (0)
-+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
-+static __initdata void *dma32_bootmem_ptr;
-+static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
- 
--static int check_pages_physically_contiguous(unsigned long pfn, 
--                                           unsigned int offset,
--                                           size_t length)
-+static int __init parse_dma32_size_opt(char *p)
- {
--      unsigned long next_mfn;
--      int i;
--      int nr_pages;
--      
--      next_mfn = pfn_to_mfn(pfn);
--      nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
--      
--      for (i = 1; i < nr_pages; i++) {
--              if (pfn_to_mfn(++pfn) != ++next_mfn) 
--                      return 0;
--      }
--      return 1;
-+      if (!p)
-+              return -EINVAL;
-+      dma32_bootmem_size = memparse(p, &p);
-+      return 0;
- }
-+early_param("dma32_size", parse_dma32_size_opt);
- 
--int range_straddles_page_boundary(paddr_t p, size_t size)
-+void __init dma32_reserve_bootmem(void)
- {
--      unsigned long pfn = p >> PAGE_SHIFT;
--      unsigned int offset = p & ~PAGE_MASK;
-+      unsigned long size, align;
-+      if (end_pfn <= MAX_DMA32_PFN)
-+              return;
- 
--      return ((offset + size > PAGE_SIZE) &&
--              !check_pages_physically_contiguous(pfn, offset, size));
-+      align = 64ULL<<20;
-+      size = round_up(dma32_bootmem_size, align);
-+      dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
-+                               __pa(MAX_DMA_ADDRESS));
-+      if (dma32_bootmem_ptr)
-+              dma32_bootmem_size = size;
-+      else
-+              dma32_bootmem_size = 0;
- }
--
--int
--dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
--         enum dma_data_direction direction)
-+static void __init dma32_free_bootmem(void)
- {
--      int i, rc;
-+      int node;
-+
-+      if (end_pfn <= MAX_DMA32_PFN)
-+              return;
- 
--      BUG_ON(!valid_dma_direction(direction));
--      WARN_ON(nents == 0 || sgl->length == 0);
-+      if (!dma32_bootmem_ptr)
-+              return;
- 
--      if (swiotlb) {
--              rc = swiotlb_map_sg(hwdev, sgl, nents, direction);
--      } else {
--              struct scatterlist *sg;
--
--              for_each_sg(sgl, sg, nents, i) {
--                      BUG_ON(!sg_page(sg));
--                      sg->dma_address =
--                              gnttab_dma_map_page(sg_page(sg)) + sg->offset;
--                      sg->dma_length  = sg->length;
--                      IOMMU_BUG_ON(address_needs_mapping(
--                              hwdev, sg->dma_address));
--                      IOMMU_BUG_ON(range_straddles_page_boundary(
--                              page_to_pseudophys(sg_page(sg)) + sg->offset,
--                              sg->length));
--              }
--              rc = nents;
--      }
-+      for_each_online_node(node)
-+              free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
-+                                dma32_bootmem_size);
- 
--      flush_write_buffers();
--      return rc;
-+      dma32_bootmem_ptr = NULL;
-+      dma32_bootmem_size = 0;
- }
--EXPORT_SYMBOL(dma_map_sg);
-+#else
-+#define dma32_free_bootmem() ((void)0)
-+#endif
- 
--void
--dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
--           enum dma_data_direction direction)
--{
--      int i;
-+static const struct dma_mapping_ops swiotlb_dma_ops = {
-+      .mapping_error = swiotlb_dma_mapping_error,
-+      .map_single = swiotlb_map_single_phys,
-+      .unmap_single = swiotlb_unmap_single,
-+      .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-+      .sync_single_for_device = swiotlb_sync_single_for_device,
-+      .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
-+      .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
-+      .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-+      .sync_sg_for_device = swiotlb_sync_sg_for_device,
-+      .map_sg = swiotlb_map_sg,
-+      .unmap_sg = swiotlb_unmap_sg,
-+      .dma_supported = swiotlb_dma_supported
-+};
- 
--      BUG_ON(!valid_dma_direction(direction));
--      if (swiotlb)
--              swiotlb_unmap_sg(hwdev, sgl, nents, direction);
--      else {
--              struct scatterlist *sg;
-+void __init pci_iommu_alloc(void)
-+{
-+      /* free the range so iommu could get some range less than 4G */
-+      dma32_free_bootmem();
-+      /*
-+       * The order of these functions is important for
-+       * fall-back/fail-over reasons
-+       */
-+#ifdef CONFIG_GART_IOMMU
-+      gart_iommu_hole_init();
-+#endif
- 
--              for_each_sg(sgl, sg, nents, i)
--                      gnttab_dma_unmap_page(sg->dma_address);
--      }
--}
--EXPORT_SYMBOL(dma_unmap_sg);
-+#ifdef CONFIG_CALGARY_IOMMU
-+      detect_calgary();
-+#endif
- 
--#ifdef CONFIG_HIGHMEM
--dma_addr_t
--dma_map_page(struct device *dev, struct page *page, unsigned long offset,
--           size_t size, enum dma_data_direction direction)
--{
--      dma_addr_t dma_addr;
-+      detect_intel_iommu();
- 
--      BUG_ON(!valid_dma_direction(direction));
-+#ifdef CONFIG_SWIOTLB
-+      swiotlb_init();
-       if (swiotlb) {
--              dma_addr = swiotlb_map_page(
--                      dev, page, offset, size, direction);
--      } else {
--              dma_addr = gnttab_dma_map_page(page) + offset;
--              IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
-+              printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
-+              dma_ops = &swiotlb_dma_ops;
-       }
--
--      return dma_addr;
-+#endif
- }
--EXPORT_SYMBOL(dma_map_page);
- 
--void
--dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
--             enum dma_data_direction direction)
-+/*
-+ * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
-+ * documentation.
-+ */
-+static __init int iommu_setup(char *p)
- {
--      BUG_ON(!valid_dma_direction(direction));
--      if (swiotlb)
--              swiotlb_unmap_page(dev, dma_address, size, direction);
--      else
--              gnttab_dma_unmap_page(dma_address);
--}
--EXPORT_SYMBOL(dma_unmap_page);
--#endif /* CONFIG_HIGHMEM */
-+      iommu_merge = 1;
- 
--int
--dma_mapping_error(dma_addr_t dma_addr)
--{
--      if (swiotlb)
--              return swiotlb_dma_mapping_error(dma_addr);
--      return 0;
--}
--EXPORT_SYMBOL(dma_mapping_error);
-+      if (!p)
-+              return -EINVAL;
- 
--int
--dma_supported(struct device *dev, u64 mask)
--{
--      if (swiotlb)
--              return swiotlb_dma_supported(dev, mask);
--      /*
--       * By default we'll BUG when an infeasible DMA is requested, and
--       * request swiotlb=force (see IOMMU_BUG_ON).
--       */
--      return 1;
--}
--EXPORT_SYMBOL(dma_supported);
-+      while (*p) {
-+              if (!strncmp(p, "off", 3))
-+                      no_iommu = 1;
-+              /* gart_parse_options has more force support */
-+              if (!strncmp(p, "force", 5))
-+                      force_iommu = 1;
-+              if (!strncmp(p, "noforce", 7)) {
-+                      iommu_merge = 0;
-+                      force_iommu = 0;
-+              }
- 
--void *dma_alloc_coherent(struct device *dev, size_t size,
--                         dma_addr_t *dma_handle, gfp_t gfp)
--{
--      void *ret;
--      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
--      unsigned int order = get_order(size);
--      unsigned long vstart;
--      u64 mask;
-+              if (!strncmp(p, "biomerge", 8)) {
-+                      iommu_bio_merge = 4096;
-+                      iommu_merge = 1;
-+                      force_iommu = 1;
-+              }
-+              if (!strncmp(p, "panic", 5))
-+                      panic_on_overflow = 1;
-+              if (!strncmp(p, "nopanic", 7))
-+                      panic_on_overflow = 0;
-+              if (!strncmp(p, "merge", 5)) {
-+                      iommu_merge = 1;
-+                      force_iommu = 1;
-+              }
-+              if (!strncmp(p, "nomerge", 7))
-+                      iommu_merge = 0;
-+              if (!strncmp(p, "forcesac", 8))
-+                      iommu_sac_force = 1;
-+              if (!strncmp(p, "allowdac", 8))
-+                      forbid_dac = 0;
-+              if (!strncmp(p, "nodac", 5))
-+                      forbid_dac = -1;
-+              if (!strncmp(p, "usedac", 6)) {
-+                      forbid_dac = -1;
-+                      return 1;
-+              }
-+#ifdef CONFIG_SWIOTLB
-+              if (!strncmp(p, "soft", 4))
-+                      swiotlb = 1;
-+#endif
- 
--      /* ignore region specifiers */
--      gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
-+#ifdef CONFIG_GART_IOMMU
-+              gart_parse_options(p);
-+#endif
- 
--      if (mem) {
--              int page = bitmap_find_free_region(mem->bitmap, mem->size,
--                                                   order);
--              if (page >= 0) {
--                      *dma_handle = mem->device_base + (page << PAGE_SHIFT);
--                      ret = mem->virt_base + (page << PAGE_SHIFT);
--                      memset(ret, 0, size);
--                      return ret;
--              }
--              if (mem->flags & DMA_MEMORY_EXCLUSIVE)
--                      return NULL;
-+#ifdef CONFIG_CALGARY_IOMMU
-+              if (!strncmp(p, "calgary", 7))
-+                      use_calgary = 1;
-+#endif /* CONFIG_CALGARY_IOMMU */
-+
-+              p += strcspn(p, ",");
-+              if (*p == ',')
-+                      ++p;
-       }
-+      return 0;
-+}
-+early_param("iommu", iommu_setup);
- 
--      if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
--              gfp |= GFP_DMA;
--
--      vstart = __get_free_pages(gfp, order);
--      ret = (void *)vstart;
-+static int check_pages_physically_contiguous(unsigned long pfn,
-+                                           unsigned int offset,
-+                                           size_t length)
-+{
-+      unsigned long next_mfn;
-+      int i;
-+      int nr_pages;
- 
--      if (dev != NULL && dev->coherent_dma_mask)
--              mask = dev->coherent_dma_mask;
--      else
--              mask = 0xffffffff;
-+      next_mfn = pfn_to_mfn(pfn);
-+      nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
- 
--      if (ret != NULL) {
--              if (xen_create_contiguous_region(vstart, order,
--                                               fls64(mask)) != 0) {
--                      free_pages(vstart, order);
--                      return NULL;
--              }
--              memset(ret, 0, size);
--              *dma_handle = virt_to_bus(ret);
-+      for (i = 1; i < nr_pages; i++) {
-+              if (pfn_to_mfn(++pfn) != ++next_mfn)
-+                      return 0;
-       }
--      return ret;
-+      return 1;
- }
--EXPORT_SYMBOL(dma_alloc_coherent);
- 
--void dma_free_coherent(struct device *dev, size_t size,
--                       void *vaddr, dma_addr_t dma_handle)
-+int range_straddles_page_boundary(paddr_t p, size_t size)
- {
--      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
--      int order = get_order(size);
--
--      WARN_ON(irqs_disabled());       /* for portability */
--      if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
--              int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-+      unsigned long pfn = p >> PAGE_SHIFT;
-+      unsigned int offset = p & ~PAGE_MASK;
- 
--              bitmap_release_region(mem->bitmap, page, order);
--      } else {
--              xen_destroy_contiguous_region((unsigned long)vaddr, order);
--              free_pages((unsigned long)vaddr, order);
--      }
-+      return ((offset + size > PAGE_SIZE) &&
-+              !check_pages_physically_contiguous(pfn, offset, size));
- }
--EXPORT_SYMBOL(dma_free_coherent);
- 
--#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-+#ifdef CONFIG_X86_32
- int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-                               dma_addr_t device_addr, size_t size, int flags)
- {
-@@ -327,8 +295,8 @@ EXPORT_SYMBOL(dma_declare_coherent_memor
- void dma_release_declared_memory(struct device *dev)
- {
-       struct dma_coherent_mem *mem = dev->dma_mem;
--      
--      if(!mem)
-+
-+      if (!mem)
-               return;
-       dev->dma_mem = NULL;
-       iounmap(mem->virt_base);
-@@ -341,8 +309,10 @@ void *dma_mark_declared_memory_occupied(
-                                       dma_addr_t device_addr, size_t size)
- {
-       struct dma_coherent_mem *mem = dev->dma_mem;
--      int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       int pos, err;
-+      int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
-+
-+      pages >>= PAGE_SHIFT;
- 
-       if (!mem)
-               return ERR_PTR(-EINVAL);
-@@ -354,103 +324,270 @@ void *dma_mark_declared_memory_occupied(
-       return mem->virt_base + (pos << PAGE_SHIFT);
- }
- EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
--#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
--
--#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
--/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
- 
--int forbid_dac;
--EXPORT_SYMBOL(forbid_dac);
--
--static __devinit void via_no_dac(struct pci_dev *dev)
-+static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
-+                                     dma_addr_t *dma_handle, void **ret)
- {
--      if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
--              printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
--              forbid_dac = 1;
-+      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-+      int order = get_order(size);
-+
-+      if (mem) {
-+              int page = bitmap_find_free_region(mem->bitmap, mem->size,
-+                                                   order);
-+              if (page >= 0) {
-+                      *dma_handle = mem->device_base + (page << PAGE_SHIFT);
-+                      *ret = mem->virt_base + (page << PAGE_SHIFT);
-+                      memset(*ret, 0, size);
-+              }
-+              if (mem->flags & DMA_MEMORY_EXCLUSIVE)
-+                      *ret = NULL;
-       }
-+      return (mem != NULL);
- }
--DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
- 
--static int check_iommu(char *s)
-+static int dma_release_coherent(struct device *dev, int order, void *vaddr)
- {
--      if (!strcmp(s, "usedac")) {
--              forbid_dac = -1;
-+      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
-+
-+      if (mem && vaddr >= mem->virt_base && vaddr <
-+                 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-+              int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-+
-+              bitmap_release_region(mem->bitmap, page, order);
-               return 1;
-       }
-       return 0;
- }
--__setup("iommu=", check_iommu);
-+#else
-+#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
-+#define dma_release_coherent(dev, order, vaddr) (0)
-+#endif /* CONFIG_X86_32 */
-+
-+int dma_supported(struct device *dev, u64 mask)
-+{
-+#ifdef CONFIG_PCI
-+      if (mask > 0xffffffff && forbid_dac > 0) {
-+              printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
-+                               dev->bus_id);
-+              return 0;
-+      }
- #endif
- 
--dma_addr_t
--dma_map_single(struct device *dev, void *ptr, size_t size,
--             enum dma_data_direction direction)
-+      if (dma_ops->dma_supported)
-+              return dma_ops->dma_supported(dev, mask);
-+
-+      /* Copied from i386. Doesn't make much sense, because it will
-+         only work for pci_alloc_coherent.
-+         The caller just has to use GFP_DMA in this case. */
-+      if (mask < DMA_24BIT_MASK)
-+              return 0;
-+
-+      /* Tell the device to use SAC when IOMMU force is on.  This
-+         allows the driver to use cheaper accesses in some cases.
-+
-+         Problem with this is that if we overflow the IOMMU area and
-+         return DAC as fallback address the device may not handle it
-+         correctly.
-+
-+         As a special case some controllers have a 39bit address
-+         mode that is as efficient as 32bit (aic79xx). Don't force
-+         SAC for these.  Assume all masks <= 40 bits are of this
-+         type. Normally this doesn't make any difference, but gives
-+         more gentle handling of IOMMU overflow. */
-+      if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
-+              printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
-+                               dev->bus_id, mask);
-+              return 0;
-+      }
-+
-+      return 1;
-+}
-+EXPORT_SYMBOL(dma_supported);
-+
-+/* Allocate DMA memory on node near device */
-+static struct page *
-+dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
- {
--      dma_addr_t dma;
-+      int node;
- 
--      BUG_ON(!valid_dma_direction(direction));
--      WARN_ON(size == 0);
-+      node = dev_to_node(dev);
- 
--      if (swiotlb) {
--              dma = swiotlb_map_single(dev, ptr, size, direction);
--      } else {
--              dma = gnttab_dma_map_page(virt_to_page(ptr)) +
--                    offset_in_page(ptr);
--              IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size));
--              IOMMU_BUG_ON(address_needs_mapping(dev, dma));
--      }
--
--      flush_write_buffers();
--      return dma;
--}
--EXPORT_SYMBOL(dma_map_single);
--
--void
--dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
--               enum dma_data_direction direction)
--{
--      BUG_ON(!valid_dma_direction(direction));
--      if (swiotlb)
--              swiotlb_unmap_single(dev, dma_addr, size, direction);
--      else
--              gnttab_dma_unmap_page(dma_addr);
-+      return alloc_pages_node(node, gfp, order);
-+}
-+
-+/*
-+ * Allocate memory for a coherent mapping.
-+ */
-+void *
-+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-+                 gfp_t gfp)
-+{
-+      void *memory = NULL;
-+      struct page *page;
-+      unsigned long dma_mask = 0;
-+      int noretry = 0;
-+      unsigned int order = get_order(size);
-+
-+      /* ignore region specifiers */
-+      gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-+
-+      if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
-+              return memory;
-+
-+      if (!dev) {
-+              dev = &fallback_dev;
-+              gfp |= GFP_DMA;
-+      }
-+      dma_mask = dev->coherent_dma_mask;
-+      if (dma_mask == 0)
-+              dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
-+
-+      /* Device not DMA able */
-+      if (dev->dma_mask == NULL)
-+              return NULL;
-+
-+      /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
-+      if (gfp & __GFP_DMA)
-+              noretry = 1;
-+
-+#ifdef CONFIG_XEN
-+      gfp &= ~(__GFP_DMA | __GFP_DMA32);
-+#else
-+#ifdef CONFIG_X86_64
-+      /* Why <=? Even when the mask is smaller than 4GB it is often
-+         larger than 16MB and in this case we have a chance of
-+         finding fitting memory in the next higher zone first. If
-+         not retry with true GFP_DMA. -AK */
-+      if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
-+              gfp |= GFP_DMA32;
-+#endif
-+
-+ again:
-+#endif
-+      page = dma_alloc_pages(dev,
-+              noretry ? gfp | __GFP_NORETRY : gfp, order);
-+      if (page == NULL)
-+              return NULL;
-+
-+#ifndef CONFIG_XEN
-+      {
-+              int high, mmu;
-+              dma_addr_t bus = page_to_phys(page);
-+              memory = page_address(page);
-+              high = (bus + size) >= dma_mask;
-+              mmu = high;
-+              if (force_iommu && !(gfp & GFP_DMA))
-+                      mmu = 1;
-+              else if (high) {
-+                      free_pages((unsigned long)memory, order);
-+
-+                      /* Don't use the 16MB ZONE_DMA unless absolutely
-+                         needed. It's better to use remapping first. */
-+                      if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
-+                              gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
-+                              goto again;
-+                      }
-+
-+                      /* Let low level make its own zone decisions */
-+                      gfp &= ~(GFP_DMA32|GFP_DMA);
-+
-+                      if (dma_ops->alloc_coherent)
-+                              return dma_ops->alloc_coherent(dev, size,
-+                                                         dma_handle, gfp);
-+                      return NULL;
-+              }
-+
-+              memset(memory, 0, size);
-+              if (!mmu) {
-+                      *dma_handle = bus;
-+                      return memory;
-+              }
-+      }
-+
-+      if (dma_ops->alloc_coherent) {
-+              free_pages((unsigned long)memory, order);
-+              gfp &= ~(GFP_DMA|GFP_DMA32);
-+              return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
-+      }
-+
-+      if (dma_ops->map_simple) {
-+              *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
-+                                            size,
-+                                            PCI_DMA_BIDIRECTIONAL);
-+              if (*dma_handle != bad_dma_address)
-+                      return memory;
-+      }
-+#else
-+      memory = page_address(page);
-+      if (xen_create_contiguous_region((unsigned long)memory, order,
-+                                       fls64(dma_mask)) == 0) {
-+              memset(memory, 0, size);
-+              *dma_handle = virt_to_bus(memory);
-+              return memory;
-+      }
-+#endif
-+
-+      if (panic_on_overflow)
-+              panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
-+                    (unsigned long)size);
-+      free_pages((unsigned long)memory, order);
-+      return NULL;
- }
--EXPORT_SYMBOL(dma_unmap_single);
-+EXPORT_SYMBOL(dma_alloc_coherent);
- 
--void
--dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
--                      enum dma_data_direction direction)
-+/*
-+ * Unmap coherent memory.
-+ * The caller must ensure that the device has finished accessing the mapping.
-+ */
-+void dma_free_coherent(struct device *dev, size_t size,
-+                       void *vaddr, dma_addr_t bus)
- {
--      if (swiotlb)
--              swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
-+      int order = get_order(size);
-+      WARN_ON(irqs_disabled());       /* for portability */
-+      if (dma_release_coherent(dev, order, vaddr))
-+              return;
-+#ifndef CONFIG_XEN
-+      if (dma_ops->unmap_single)
-+              dma_ops->unmap_single(dev, bus, size, 0);
-+#endif
-+      xen_destroy_contiguous_region((unsigned long)vaddr, order);
-+      free_pages((unsigned long)vaddr, order);
- }
--EXPORT_SYMBOL(dma_sync_single_for_cpu);
-+EXPORT_SYMBOL(dma_free_coherent);
- 
--void
--dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
--                           enum dma_data_direction direction)
-+static int __init pci_iommu_init(void)
- {
--      if (swiotlb)
--              swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
-+#ifdef CONFIG_CALGARY_IOMMU
-+      calgary_iommu_init();
-+#endif
-+
-+      intel_iommu_init();
-+
-+#ifdef CONFIG_GART_IOMMU
-+      gart_iommu_init();
-+#endif
-+
-+      no_iommu_init();
-+      return 0;
- }
--EXPORT_SYMBOL(dma_sync_single_for_device);
- 
--void
--dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
--                  enum dma_data_direction direction)
-+void pci_iommu_shutdown(void)
- {
--      if (swiotlb)
--              swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
--      flush_write_buffers();
-+      gart_iommu_shutdown();
- }
--EXPORT_SYMBOL(dma_sync_sg_for_cpu);
-+/* Must execute after PCI subsystem */
-+fs_initcall(pci_iommu_init);
-+
-+#ifdef CONFIG_PCI
-+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
- 
--void
--dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
--                  enum dma_data_direction direction)
-+static __devinit void via_no_dac(struct pci_dev *dev)
- {
--      if (swiotlb)
--              swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
--      flush_write_buffers();
-+      if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
-+              printk(KERN_INFO "PCI: VIA PCI bridge detected."
-+                               "Disabling DAC.\n");
-+              forbid_dac = 1;
-+      }
- }
--EXPORT_SYMBOL(dma_sync_sg_for_device);
-+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
-+#endif
---- /dev/null
-+++ b/arch/x86/kernel/pci-nommu-xen.c
-@@ -0,0 +1,103 @@
-+#include <linux/dma-mapping.h>
-+#include <linux/dmar.h>
-+#include <linux/bootmem.h>
-+#include <linux/pci.h>
-+
-+#include <xen/gnttab.h>
-+
-+#include <asm/proto.h>
-+#include <asm/dma.h>
-+#include <asm/swiotlb.h>
-+#include <asm/tlbflush.h>
-+#include <asm/gnttab_dma.h>
-+#include <asm/bug.h>
-+
-+#define IOMMU_BUG_ON(test)                            \
-+do {                                                  \
-+      if (unlikely(test)) {                           \
-+              printk(KERN_ALERT "Fatal DMA error! "   \
-+                     "Please use 'swiotlb=force'\n"); \
-+              BUG();                                  \
-+      }                                               \
-+} while (0)
-+
-+static int
-+gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
-+            int direction)
-+{
-+      unsigned int i;
-+      struct scatterlist *sg;
-+
-+      WARN_ON(nents == 0 || sgl->length == 0);
-+
-+      for_each_sg(sgl, sg, nents, i) {
-+              BUG_ON(!sg_page(sg));
-+              sg->dma_address =
-+                      gnttab_dma_map_page(sg_page(sg)) + sg->offset;
-+              sg->dma_length  = sg->length;
-+              IOMMU_BUG_ON(address_needs_mapping(
-+                      hwdev, sg->dma_address));
-+              IOMMU_BUG_ON(range_straddles_page_boundary(
-+                      page_to_pseudophys(sg_page(sg)) + sg->offset,
-+                      sg->length));
-+      }
-+
-+      return nents;
-+}
-+
-+static void
-+gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
-+              int direction)
-+{
-+      unsigned int i;
-+      struct scatterlist *sg;
-+
-+      for_each_sg(sgl, sg, nents, i)
-+              gnttab_dma_unmap_page(sg->dma_address);
-+}
-+
-+static dma_addr_t
-+gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size,
-+                int direction)
-+{
-+      dma_addr_t dma;
-+
-+      WARN_ON(size == 0);
-+
-+      dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
-+            offset_in_page(paddr);
-+      IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
-+      IOMMU_BUG_ON(address_needs_mapping(dev, dma));
-+
-+      return dma;
-+}
-+
-+static void
-+gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-+                  int direction)
-+{
-+      gnttab_dma_unmap_page(dma_addr);
-+}
-+
-+static int nommu_mapping_error(dma_addr_t dma_addr)
-+{
-+      return (dma_addr == bad_dma_address);
-+}
-+
-+static const struct dma_mapping_ops nommu_dma_ops = {
-+      .map_single = gnttab_map_single,
-+      .unmap_single = gnttab_unmap_single,
-+      .map_sg = gnttab_map_sg,
-+      .unmap_sg = gnttab_unmap_sg,
-+      .dma_supported = swiotlb_dma_supported,
-+      .mapping_error = nommu_mapping_error
-+};
-+
-+void __init no_iommu_init(void)
-+{
-+      if (dma_ops)
-+              return;
-+
-+      force_iommu = 0; /* no HW IOMMU */
-+      dma_ops = &nommu_dma_ops;
-+}
---- a/arch/x86/kernel/process_32-xen.c
-+++ b/arch/x86/kernel/process_32-xen.c
-@@ -36,6 +36,7 @@
- #include <linux/personality.h>
- #include <linux/tick.h>
- #include <linux/percpu.h>
-+#include <linux/prctl.h>
- 
- #include <asm/uaccess.h>
- #include <asm/pgtable.h>
-@@ -45,7 +46,6 @@
- #include <asm/processor.h>
- #include <asm/i387.h>
- #include <asm/desc.h>
--#include <asm/vm86.h>
- #ifdef CONFIG_MATH_EMULATION
- #include <asm/math_emu.h>
- #endif
-@@ -102,16 +102,6 @@ void enable_hlt(void)
- 
- EXPORT_SYMBOL(enable_hlt);
- 
--/*
-- * On SMP it's slightly faster (but much more power-consuming!)
-- * to poll the ->work.need_resched flag instead of waiting for the
-- * cross-CPU IPI to arrive. Use this option with caution.
-- */
--static void poll_idle(void)
--{
--      cpu_relax();
--}
--
- static void xen_idle(void)
- {
-       current_thread_info()->status &= ~TS_POLLING;
-@@ -121,20 +111,10 @@ static void xen_idle(void)
-        */
-       smp_mb();
- 
--      local_irq_disable();
--      if (!need_resched()) {
--              ktime_t t0, t1;
--              u64 t0n, t1n;
--
--              t0 = ktime_get();
--              t0n = ktime_to_ns(t0);
-+      if (!need_resched())
-               safe_halt();    /* enables interrupts racelessly */
--              local_irq_disable();
--              t1 = ktime_get();
--              t1n = ktime_to_ns(t1);
--              sched_clock_idle_wakeup_event(t1n - t0n);
--      }
--      local_irq_enable();
-+      else
-+              local_irq_enable();
-       current_thread_info()->status |= TS_POLLING;
- }
- #ifdef CONFIG_APM_MODULE
-@@ -142,7 +122,6 @@ EXPORT_SYMBOL(default_idle);
- #endif
- 
- #ifdef CONFIG_HOTPLUG_CPU
--extern cpumask_t cpu_initialized;
- static inline void play_dead(void)
- {
-       idle_task_exit();
-@@ -187,6 +166,7 @@ void cpu_idle(void)
-                       if (cpu_is_offline(cpu))
-                               play_dead();
- 
-+                      local_irq_disable();
-                       __get_cpu_var(irq_stat).idle_timestamp = jiffies;
-                       idle();
-               }
-@@ -197,44 +177,6 @@ void cpu_idle(void)
-       }
- }
- 
--static void do_nothing(void *unused)
--{
--}
--
--/*
-- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
-- * pm_idle and update to new pm_idle value. Required while changing pm_idle
-- * handler on SMP systems.
-- *
-- * Caller must have changed pm_idle to the new value before the call. Old
-- * pm_idle value will not be used by any CPU after the return of this function.
-- */
--void cpu_idle_wait(void)
--{
--      smp_mb();
--      /* kick all the CPUs so that they exit out of pm_idle */
--      smp_call_function(do_nothing, NULL, 0, 1);
--}
--EXPORT_SYMBOL_GPL(cpu_idle_wait);
--
--void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
--{
--}
--
--static int __init idle_setup(char *str)
--{
--      if (!strcmp(str, "poll")) {
--              printk("using polling idle threads.\n");
--              pm_idle = poll_idle;
--      }
--      else
--              return -1;
--
--      boot_option_idle_override = 1;
--      return 0;
--}
--early_param("idle", idle_setup);
--
- void __show_registers(struct pt_regs *regs, int all)
- {
-       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-@@ -260,7 +202,7 @@ void __show_registers(struct pt_regs *re
-                       init_utsname()->version);
- 
-       printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
--                      0xffff & regs->cs, regs->ip, regs->flags,
-+                      (u16)regs->cs, regs->ip, regs->flags,
-                       smp_processor_id());
-       print_symbol("EIP is at %s\n", regs->ip);
- 
-@@ -269,8 +211,7 @@ void __show_registers(struct pt_regs *re
-       printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
-               regs->si, regs->di, regs->bp, sp);
-       printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
--             regs->ds & 0xffff, regs->es & 0xffff,
--             regs->fs & 0xffff, gs, ss);
-+             (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
- 
-       if (!all)
-               return;
-@@ -367,6 +308,7 @@ void flush_thread(void)
-       /*
-        * Forget coprocessor state..
-        */
-+      tsk->fpu_counter = 0;
-       clear_fpu(tsk);
-       clear_used_math();
- }
-@@ -437,11 +379,30 @@ int copy_thread(int nr, unsigned long cl
-       return err;
- }
- 
--#ifdef CONFIG_SECCOMP
-+void
-+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
-+{
-+      __asm__("movl %0, %%gs" :: "r"(0));
-+      regs->fs                = 0;
-+      set_fs(USER_DS);
-+      regs->ds                = __USER_DS;
-+      regs->es                = __USER_DS;
-+      regs->ss                = __USER_DS;
-+      regs->cs                = __USER_CS;
-+      regs->ip                = new_ip;
-+      regs->sp                = new_sp;
-+      /*
-+       * Free the old FP and other extended state
-+       */
-+      free_thread_xstate(current);
-+}
-+EXPORT_SYMBOL_GPL(start_thread);
-+
- static void hard_disable_TSC(void)
- {
-       write_cr4(read_cr4() | X86_CR4_TSD);
- }
-+
- void disable_TSC(void)
- {
-       preempt_disable();
-@@ -453,11 +414,47 @@ void disable_TSC(void)
-               hard_disable_TSC();
-       preempt_enable();
- }
-+
- static void hard_enable_TSC(void)
- {
-       write_cr4(read_cr4() & ~X86_CR4_TSD);
- }
--#endif /* CONFIG_SECCOMP */
-+
-+static void enable_TSC(void)
-+{
-+      preempt_disable();
-+      if (test_and_clear_thread_flag(TIF_NOTSC))
-+              /*
-+               * Must flip the CPU state synchronously with
-+               * TIF_NOTSC in the current running context.
-+               */
-+              hard_enable_TSC();
-+      preempt_enable();
-+}
-+
-+int get_tsc_mode(unsigned long adr)
-+{
-+      unsigned int val;
-+
-+      if (test_thread_flag(TIF_NOTSC))
-+              val = PR_TSC_SIGSEGV;
-+      else
-+              val = PR_TSC_ENABLE;
-+
-+      return put_user(val, (unsigned int __user *)adr);
-+}
-+
-+int set_tsc_mode(unsigned int val)
-+{
-+      if (val == PR_TSC_SIGSEGV)
-+              disable_TSC();
-+      else if (val == PR_TSC_ENABLE)
-+              enable_TSC();
-+      else
-+              return -EINVAL;
-+
-+      return 0;
-+}
- 
- static noinline void
- __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
-@@ -473,12 +470,12 @@ __switch_to_xtra(struct task_struct *pre
-               /* we clear debugctl to make sure DS
-                * is not in use when we change it */
-               debugctl = 0;
--              wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
-+              update_debugctlmsr(0);
-               wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
-       }
- 
-       if (next->debugctlmsr != debugctl)
--              wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
-+              update_debugctlmsr(next->debugctlmsr);
- 
-       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-               set_debugreg(next->debugreg0, 0);
-@@ -490,7 +487,6 @@ __switch_to_xtra(struct task_struct *pre
-               set_debugreg(next->debugreg7, 7);
-       }
- 
--#ifdef CONFIG_SECCOMP
-       if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-           test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-               /* prev and next are different */
-@@ -499,7 +495,6 @@ __switch_to_xtra(struct task_struct *pre
-               else
-                       hard_enable_TSC();
-       }
--#endif
- 
- #ifdef X86_BTS
-       if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
-@@ -637,7 +632,7 @@ struct task_struct * __switch_to(struct 
- 
-       /* we're going to use this soon, after a few expensive things */
-       if (next_p->fpu_counter > 5)
--              prefetch(&next->i387.fxsave);
-+              prefetch(next->xstate);
- 
-       /*
-        * Now maybe handle debug registers
-@@ -658,8 +653,11 @@ struct task_struct * __switch_to(struct 
-       /* If the task has used fpu the last 5 timeslices, just do a full
-        * restore of the math state immediately to avoid the trap; the
-        * chances of needing FPU soon are obviously high now
-+       *
-+       * tsk_used_math() checks prevent calling math_state_restore(),
-+       * which can sleep in the case of !tsk_used_math()
-        */
--      if (next_p->fpu_counter > 5)
-+      if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-               math_state_restore();
- 
-       /*
---- a/arch/x86/kernel/process_64-xen.c
-+++ b/arch/x86/kernel/process_64-xen.c
-@@ -39,6 +39,7 @@
- #include <linux/kprobes.h>
- #include <linux/kdebug.h>
- #include <linux/tick.h>
-+#include <linux/prctl.h>
- 
- #include <asm/uaccess.h>
- #include <asm/pgtable.h>
-@@ -102,17 +103,6 @@ void exit_idle(void)
-       __exit_idle();
- }
- 
--/*
-- * On SMP it's slightly faster (but much more power-consuming!)
-- * to poll the ->need_resched flag instead of waiting for the
-- * cross-CPU IPI to arrive. Use this option with caution.
-- */
--static void poll_idle(void)
--{
--      local_irq_enable();
--      cpu_relax();
--}
--
- static void xen_idle(void)
- {
-       current_thread_info()->status &= ~TS_POLLING;
-@@ -121,20 +111,10 @@ static void xen_idle(void)
-        * test NEED_RESCHED:
-        */
-       smp_mb();
--      local_irq_disable();
--      if (!need_resched()) {
--              ktime_t t0, t1;
--              u64 t0n, t1n;
--
--              t0 = ktime_get();
--              t0n = ktime_to_ns(t0);
-+      if (!need_resched())
-               safe_halt();    /* enables interrupts racelessly */
--              local_irq_disable();
--              t1 = ktime_get();
--              t1n = ktime_to_ns(t1);
--              sched_clock_idle_wakeup_event(t1n - t0n);
--      }
--      local_irq_enable();
-+      else
-+              local_irq_enable();
-       current_thread_info()->status |= TS_POLLING;
- }
- 
-@@ -195,45 +175,6 @@ void cpu_idle(void)
-       }
- }
- 
--static void do_nothing(void *unused)
--{
--}
--
--/*
-- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
-- * pm_idle and update to new pm_idle value. Required while changing pm_idle
-- * handler on SMP systems.
-- *
-- * Caller must have changed pm_idle to the new value before the call. Old
-- * pm_idle value will not be used by any CPU after the return of this function.
-- */
--void cpu_idle_wait(void)
--{
--      smp_mb();
--      /* kick all the CPUs so that they exit out of pm_idle */
--      smp_call_function(do_nothing, NULL, 0, 1);
--}
--EXPORT_SYMBOL_GPL(cpu_idle_wait);
--
--void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
--{
--}
--
--static int __init idle_setup(char *str)
--{
--      if (!strcmp(str, "poll")) {
--              printk("using polling idle threads.\n");
--              pm_idle = poll_idle;
--      } else if (!strcmp(str, "mwait"))
--              force_mwait = 1;
--      else
--              return -1;
--
--      boot_option_idle_override = 1;
--      return 0;
--}
--early_param("idle", idle_setup);
--
- /* Prints also some state that isn't saved in the pt_regs */
- void __show_regs(struct pt_regs * regs)
- {
-@@ -360,6 +301,7 @@ void flush_thread(void)
-       /*
-        * Forget coprocessor state..
-        */
-+      tsk->fpu_counter = 0;
-       clear_fpu(tsk);
-       clear_used_math();
- }
-@@ -472,6 +414,83 @@ out:
-       return err;
- }
- 
-+void
-+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
-+{
-+      asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
-+      load_gs_index(0);
-+      regs->ip                = new_ip;
-+      regs->sp                = new_sp;
-+      write_pda(oldrsp, new_sp);
-+      regs->cs                = __USER_CS;
-+      regs->ss                = __USER_DS;
-+      regs->flags             = 0x200;
-+      set_fs(USER_DS);
-+      /*
-+       * Free the old FP and other extended state
-+       */
-+      free_thread_xstate(current);
-+}
-+EXPORT_SYMBOL_GPL(start_thread);
-+
-+static void hard_disable_TSC(void)
-+{
-+      write_cr4(read_cr4() | X86_CR4_TSD);
-+}
-+
-+void disable_TSC(void)
-+{
-+      preempt_disable();
-+      if (!test_and_set_thread_flag(TIF_NOTSC))
-+              /*
-+               * Must flip the CPU state synchronously with
-+               * TIF_NOTSC in the current running context.
-+               */
-+              hard_disable_TSC();
-+      preempt_enable();
-+}
-+
-+static void hard_enable_TSC(void)
-+{
-+      write_cr4(read_cr4() & ~X86_CR4_TSD);
-+}
-+
-+static void enable_TSC(void)
-+{
-+      preempt_disable();
-+      if (test_and_clear_thread_flag(TIF_NOTSC))
-+              /*
-+               * Must flip the CPU state synchronously with
-+               * TIF_NOTSC in the current running context.
-+               */
-+              hard_enable_TSC();
-+      preempt_enable();
-+}
-+
-+int get_tsc_mode(unsigned long adr)
-+{
-+      unsigned int val;
-+
-+      if (test_thread_flag(TIF_NOTSC))
-+              val = PR_TSC_SIGSEGV;
-+      else
-+              val = PR_TSC_ENABLE;
-+
-+      return put_user(val, (unsigned int __user *)adr);
-+}
-+
-+int set_tsc_mode(unsigned int val)
-+{
-+      if (val == PR_TSC_SIGSEGV)
-+              disable_TSC();
-+      else if (val == PR_TSC_ENABLE)
-+              enable_TSC();
-+      else
-+              return -EINVAL;
-+
-+      return 0;
-+}
-+
- /*
-  * This special macro can be used to load a debugging register
-  */
-@@ -491,12 +510,12 @@ static inline void __switch_to_xtra(stru
-               /* we clear debugctl to make sure DS
-                * is not in use when we change it */
-               debugctl = 0;
--              wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
-+              update_debugctlmsr(0);
-               wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
-       }
- 
-       if (next->debugctlmsr != debugctl)
--              wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
-+              update_debugctlmsr(next->debugctlmsr);
- 
-       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-               loaddebug(next, 0);
-@@ -508,6 +527,15 @@ static inline void __switch_to_xtra(stru
-               loaddebug(next, 7);
-       }
- 
-+      if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-+          test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-+              /* prev and next are different */
-+              if (test_tsk_thread_flag(next_p, TIF_NOTSC))
-+                      hard_disable_TSC();
-+              else
-+                      hard_enable_TSC();
-+      }
-+
- #ifdef X86_BTS
-       if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
-               ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
-@@ -547,7 +575,7 @@ __switch_to(struct task_struct *prev_p, 
- 
-       /* we're going to use this soon, after a few expensive things */
-       if (next_p->fpu_counter>5)
--              prefetch(&next->i387.fxsave);
-+              prefetch(next->xstate);
- 
-       /*
-        * This is basically '__unlazy_fpu', except that we queue a
-@@ -680,8 +708,11 @@ __switch_to(struct task_struct *prev_p, 
-       /* If the task has used fpu the last 5 timeslices, just do a full
-        * restore of the math state immediately to avoid the trap; the
-        * chances of needing FPU soon are obviously high now
-+       *
-+       * tsk_used_math() checks prevent calling math_state_restore(),
-+       * which can sleep in the case of !tsk_used_math()
-        */
--      if (next_p->fpu_counter>5)
-+      if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-               math_state_restore();
-       return prev_p;
- }
---- /dev/null
-+++ b/arch/x86/kernel/process-xen.c
-@@ -0,0 +1,188 @@
-+#include <linux/errno.h>
-+#include <linux/kernel.h>
-+#include <linux/mm.h>
-+#include <linux/smp.h>
-+#include <linux/slab.h>
-+#include <linux/sched.h>
-+#include <linux/module.h>
-+#include <linux/pm.h>
-+
-+struct kmem_cache *task_xstate_cachep;
-+
-+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
-+{
-+      *dst = *src;
-+      if (src->thread.xstate) {
-+              dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
-+                                                    GFP_KERNEL);
-+              if (!dst->thread.xstate)
-+                      return -ENOMEM;
-+              WARN_ON((unsigned long)dst->thread.xstate & 15);
-+              memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
-+      }
-+      return 0;
-+}
-+
-+void free_thread_xstate(struct task_struct *tsk)
-+{
-+      if (tsk->thread.xstate) {
-+              kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
-+              tsk->thread.xstate = NULL;
-+      }
-+}
-+
-+void free_thread_info(struct thread_info *ti)
-+{
-+      free_thread_xstate(ti->task);
-+      free_pages((unsigned long)ti, get_order(THREAD_SIZE));
-+}
-+
-+void arch_task_cache_init(void)
-+{
-+        task_xstate_cachep =
-+              kmem_cache_create("task_xstate", xstate_size,
-+                                __alignof__(union thread_xstate),
-+                                SLAB_PANIC, NULL);
-+}
-+
-+static void do_nothing(void *unused)
-+{
-+}
-+
-+/*
-+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
-+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
-+ * handler on SMP systems.
-+ *
-+ * Caller must have changed pm_idle to the new value before the call. Old
-+ * pm_idle value will not be used by any CPU after the return of this function.
-+ */
-+void cpu_idle_wait(void)
-+{
-+      smp_mb();
-+      /* kick all the CPUs so that they exit out of pm_idle */
-+      smp_call_function(do_nothing, NULL, 0, 1);
-+}
-+EXPORT_SYMBOL_GPL(cpu_idle_wait);
-+
-+#ifndef CONFIG_XEN
-+/*
-+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
-+ * which can obviate IPI to trigger checking of need_resched.
-+ * We execute MONITOR against need_resched and enter optimized wait state
-+ * through MWAIT. Whenever someone changes need_resched, we would be woken
-+ * up from MWAIT (without an IPI).
-+ *
-+ * New with Core Duo processors, MWAIT can take some hints based on CPU
-+ * capability.
-+ */
-+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-+{
-+      if (!need_resched()) {
-+              __monitor((void *)&current_thread_info()->flags, 0, 0);
-+              smp_mb();
-+              if (!need_resched())
-+                      __mwait(ax, cx);
-+      }
-+}
-+
-+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-+static void mwait_idle(void)
-+{
-+      if (!need_resched()) {
-+              __monitor((void *)&current_thread_info()->flags, 0, 0);
-+              smp_mb();
-+              if (!need_resched())
-+                      __sti_mwait(0, 0);
-+              else
-+                      local_irq_enable();
-+      } else
-+              local_irq_enable();
-+}
-+#endif
-+
-+/*
-+ * On SMP it's slightly faster (but much more power-consuming!)
-+ * to poll the ->work.need_resched flag instead of waiting for the
-+ * cross-CPU IPI to arrive. Use this option with caution.
-+ */
-+static void poll_idle(void)
-+{
-+      local_irq_enable();
-+      cpu_relax();
-+}
-+
-+#ifndef CONFIG_XEN
-+/*
-+ * mwait selection logic:
-+ *
-+ * It depends on the CPU. For AMD CPUs that support MWAIT this is
-+ * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
-+ * then depend on a clock divisor and current Pstate of the core. If
-+ * all cores of a processor are in halt state (C1) the processor can
-+ * enter the C1E (C1 enhanced) state. If mwait is used this will never
-+ * happen.
-+ *
-+ * idle=mwait overrides this decision and forces the usage of mwait.
-+ */
-+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-+{
-+      if (force_mwait)
-+              return 1;
-+
-+      if (c->x86_vendor == X86_VENDOR_AMD) {
-+              switch(c->x86) {
-+              case 0x10:
-+              case 0x11:
-+                      return 0;
-+              }
-+      }
-+      return 1;
-+}
-+#endif
-+
-+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-+{
-+#ifndef CONFIG_XEN
-+      static int selected;
-+
-+      if (selected)
-+              return;
-+#ifdef CONFIG_X86_SMP
-+      if (pm_idle == poll_idle && smp_num_siblings > 1) {
-+              printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
-+                      " performance may degrade.\n");
-+      }
-+#endif
-+      if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-+              /*
-+               * Skip, if setup has overridden idle.
-+               * One CPU supports mwait => All CPUs supports mwait
-+               */
-+              if (!pm_idle) {
-+                      printk(KERN_INFO "using mwait in idle threads.\n");
-+                      pm_idle = mwait_idle;
-+              }
-+      }
-+      selected = 1;
-+#endif
-+}
-+
-+static int __init idle_setup(char *str)
-+{
-+      if (!strcmp(str, "poll")) {
-+              printk("using polling idle threads.\n");
-+              pm_idle = poll_idle;
-+      }
-+#ifndef CONFIG_XEN
-+      else if (!strcmp(str, "mwait"))
-+              force_mwait = 1;
-+#endif
-+      else
-+              return -1;
-+
-+      boot_option_idle_override = 1;
-+      return 0;
-+}
-+early_param("idle", idle_setup);
-+
---- a/arch/x86/kernel/setup_32-xen.c
-+++ b/arch/x86/kernel/setup_32-xen.c
-@@ -39,6 +39,7 @@
- #include <linux/efi.h>
- #include <linux/init.h>
- #include <linux/edd.h>
-+#include <linux/iscsi_ibft.h>
- #include <linux/nodemask.h>
- #include <linux/kernel.h>
- #include <linux/percpu.h>
-@@ -49,6 +50,7 @@
- #include <linux/pfn.h>
- #include <linux/pci.h>
- #include <linux/init_ohci1394_dma.h>
-+#include <linux/kvm_para.h>
- 
- #include <video/edid.h>
- 
-@@ -70,8 +72,9 @@
- #include <xen/firmware.h>
- #include <xen/xencons.h>
- #include <setup_arch.h>
--#include <bios_ebda.h>
-+#include <asm/bios_ebda.h>
- #include <asm/cacheflush.h>
-+#include <asm/processor.h>
- 
- #ifdef CONFIG_XEN
- #include <xen/interface/kexec.h>
-@@ -136,7 +139,12 @@ static struct resource standard_io_resou
- }, {
-       .name   = "keyboard",
-       .start  = 0x0060,
--      .end    = 0x006f,
-+      .end    = 0x0060,
-+      .flags  = IORESOURCE_BUSY | IORESOURCE_IO
-+}, {
-+      .name   = "keyboard",
-+      .start  = 0x0064,
-+      .end    = 0x0064,
-       .flags  = IORESOURCE_BUSY | IORESOURCE_IO
- }, {
-       .name   = "dma page reg",
-@@ -166,6 +174,8 @@ struct cpuinfo_x86 new_cpu_data __cpuini
- struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
- EXPORT_SYMBOL(boot_cpu_data);
- 
-+unsigned int def_to_bigsmp;
-+
- #ifndef CONFIG_X86_PAE
- unsigned long mmu_cr4_features;
- #else
-@@ -204,7 +214,7 @@ EXPORT_SYMBOL(ist_info);
- extern void early_cpu_init(void);
- extern int root_mountflags;
- 
--unsigned long saved_videomode;
-+unsigned long saved_video_mode;
- 
- #define RAMDISK_IMAGE_START_MASK      0x07FF
- #define RAMDISK_PROMPT_FLAG           0x8000
-@@ -259,7 +269,7 @@ static inline void copy_edd(void)
- }
- #endif
- 
--int __initdata user_defined_memmap = 0;
-+int __initdata user_defined_memmap;
- 
- /*
-  * "mem=nopentium" disables the 4MB page tables.
-@@ -420,20 +430,59 @@ unsigned long __init find_max_low_pfn(vo
- }
- 
- #ifndef CONFIG_XEN
-+#define BIOS_LOWMEM_KILOBYTES 0x413
-+
- /*
-- * workaround for Dell systems that neglect to reserve EBDA
-+ * The BIOS places the EBDA/XBDA at the top of conventional
-+ * memory, and usually decreases the reported amount of
-+ * conventional memory (int 0x12) too. This also contains a
-+ * workaround for Dell systems that neglect to reserve EBDA.
-+ * The same workaround also avoids a problem with the AMD768MPX
-+ * chipset: reserve a page before VGA to prevent PCI prefetch
-+ * into it (errata #56). Usually the page is reserved anyways,
-+ * unless you have no PS/2 mouse plugged in.
-  */
- static void __init reserve_ebda_region(void)
- {
--      unsigned int addr;
--      addr = get_bios_ebda();
--      if (addr)
--              reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
-+      unsigned int lowmem, ebda_addr;
-+
-+      /* To determine the position of the EBDA and the */
-+      /* end of conventional memory, we need to look at */
-+      /* the BIOS data area. In a paravirtual environment */
-+      /* that area is absent. We'll just have to assume */
-+      /* that the paravirt case can handle memory setup */
-+      /* correctly, without our help. */
-+      if (paravirt_enabled())
-+              return;
-+
-+      /* end of low (conventional) memory */
-+      lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-+      lowmem <<= 10;
-+
-+      /* start of EBDA area */
-+      ebda_addr = get_bios_ebda();
-+
-+      /* Fixup: bios puts an EBDA in the top 64K segment */
-+      /* of conventional memory, but does not adjust lowmem. */
-+      if ((lowmem - ebda_addr) <= 0x10000)
-+              lowmem = ebda_addr;
-+
-+      /* Fixup: bios does not report an EBDA at all. */
-+      /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
-+      if ((ebda_addr == 0) && (lowmem >= 0x9f000))
-+              lowmem = 0x9f000;
-+
-+      /* Paranoia: should never happen, but... */
-+      if ((lowmem == 0) || (lowmem >= 0x100000))
-+              lowmem = 0x9f000;
-+
-+      /* reserve all memory between lowmem and the 1MB mark */
-+      reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
- }
- #endif
- 
- #ifndef CONFIG_NEED_MULTIPLE_NODES
--void __init setup_bootmem_allocator(void);
-+static void __init setup_bootmem_allocator(void);
- static unsigned long __init setup_memory(void)
- {
-       /*
-@@ -469,7 +518,7 @@ static unsigned long __init setup_memory
-       return max_low_pfn;
- }
- 
--void __init zone_sizes_init(void)
-+static void __init zone_sizes_init(void)
- {
-       unsigned long max_zone_pfns[MAX_NR_ZONES];
-       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-@@ -521,10 +570,16 @@ static void __init reserve_crashkernel(v
-                                       (unsigned long)(crash_size >> 20),
-                                       (unsigned long)(crash_base >> 20),
-                                       (unsigned long)(total_mem >> 20));
-+
-+                      if (reserve_bootmem(crash_base, crash_size,
-+                                      BOOTMEM_EXCLUSIVE) < 0) {
-+                              printk(KERN_INFO "crashkernel reservation "
-+                                      "failed - memory is in use\n");
-+                              return;
-+                      }
-+
-                       crashk_res.start = crash_base;
-                       crashk_res.end   = crash_base + crash_size - 1;
--                      reserve_bootmem(crash_base, crash_size,
--                                      BOOTMEM_DEFAULT);
-               } else
-                       printk(KERN_INFO "crashkernel reservation failed - "
-                                       "you have to specify a base address\n");
-@@ -658,16 +713,9 @@ void __init setup_bootmem_allocator(void
-        */
-       reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
- 
--      /* reserve EBDA region, it's a 4K region */
-+      /* reserve EBDA region */
-       reserve_ebda_region();
- 
--    /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
--       PCI prefetch into it (errata #56). Usually the page is reserved anyways,
--       unless you have no PS/2 mouse plugged in. */
--      if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
--          boot_cpu_data.x86 == 6)
--           reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
--
- #ifdef CONFIG_SMP
-       /*
-        * But first pinch a few for the stack/trampoline stuff
-@@ -689,6 +737,8 @@ void __init setup_bootmem_allocator(void
- #endif
-       numa_kva_reserve();
-       reserve_crashkernel();
-+
-+      reserve_ibft_region();
- }
- 
- /*
-@@ -724,6 +774,18 @@ char * __init __attribute__((weak)) memo
-       return machine_specific_memory_setup();
- }
- 
-+#ifdef CONFIG_NUMA
-+/*
-+ * In the golden day, when everything among i386 and x86_64 will be
-+ * integrated, this will not live here
-+ */
-+void *x86_cpu_to_node_map_early_ptr;
-+int x86_cpu_to_node_map_init[NR_CPUS] = {
-+      [0 ... NR_CPUS-1] = NUMA_NO_NODE
-+};
-+DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
-+#endif
-+
- /*
-  * Determine if we were loaded by an EFI loader.  If so, then we have also been
-  * passed the efi memmap, systab, etc., so we should use these data structures
-@@ -773,7 +835,7 @@ void __init setup_arch(char **cmdline_p)
-       copy_edid();
-       apm_info.bios = boot_params.apm_bios_info;
-       ist_info = boot_params.ist_info;
--      saved_videomode = boot_params.hdr.vid_mode;
-+      saved_video_mode = boot_params.hdr.vid_mode;
-       if( boot_params.sys_desc_table.length != 0 ) {
-               set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
-               machine_id = boot_params.sys_desc_table.table[0];
-@@ -840,15 +902,19 @@ void __init setup_arch(char **cmdline_p)
-               efi_init();
- 
-       /* update e820 for memory not covered by WB MTRRs */
--      find_max_pfn();
-+      propagate_e820_map();
-       mtrr_bp_init();
- #ifndef CONFIG_XEN
-       if (mtrr_trim_uncached_memory(max_pfn))
--              find_max_pfn();
-+              propagate_e820_map();
- #endif
- 
-       max_low_pfn = setup_memory();
- 
-+#ifdef CONFIG_KVM_CLOCK
-+      kvmclock_init();
-+#endif
-+
- #ifdef CONFIG_VMI
-       /*
-        * Must be after max_low_pfn is determined, and before kernel
-@@ -856,6 +922,7 @@ void __init setup_arch(char **cmdline_p)
-        */
-       vmi_init();
- #endif
-+      kvm_guest_init();
- 
-       /*
-        * NOTE: before this point _nobody_ is allowed to allocate
-@@ -977,6 +1044,18 @@ void __init setup_arch(char **cmdline_p)
- 
-       io_delay_init();
- 
-+#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
-+      /*
-+       * setup to use the early static init tables during kernel startup
-+       * X86_SMP will exclude sub-arches that don't deal well with it.
-+       */
-+      x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
-+      x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
-+#ifdef CONFIG_NUMA
-+      x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
-+#endif
-+#endif
-+
- #ifdef CONFIG_X86_GENERICARCH
-       generic_apic_probe();
- #endif
---- a/arch/x86/kernel/setup_64-xen.c
-+++ b/arch/x86/kernel/setup_64-xen.c
-@@ -29,18 +29,22 @@
- #include <linux/crash_dump.h>
- #include <linux/root_dev.h>
- #include <linux/pci.h>
-+#include <asm/pci-direct.h>
- #include <linux/efi.h>
- #include <linux/acpi.h>
- #include <linux/kallsyms.h>
- #include <linux/edd.h>
-+#include <linux/iscsi_ibft.h>
- #include <linux/mmzone.h>
- #include <linux/kexec.h>
- #include <linux/cpufreq.h>
- #include <linux/dmi.h>
- #include <linux/dma-mapping.h>
- #include <linux/ctype.h>
-+#include <linux/sort.h>
- #include <linux/uaccess.h>
- #include <linux/init_ohci1394_dma.h>
-+#include <linux/kvm_para.h>
- 
- #include <asm/mtrr.h>
- #include <asm/uaccess.h>
-@@ -58,7 +62,6 @@
- #include <asm/mmu_context.h>
- #include <asm/proto.h>
- #include <asm/setup.h>
--#include <asm/mach_apic.h>
- #include <asm/numa.h>
- #include <asm/sections.h>
- #include <asm/dmi.h>
-@@ -66,6 +69,9 @@
- #include <asm/mce.h>
- #include <asm/ds.h>
- #include <asm/topology.h>
-+#include <asm/pat.h>
-+
-+#include <mach_apic.h>
- #ifdef CONFIG_XEN
- #include <linux/percpu.h>
- #include <xen/interface/physdev.h>
-@@ -149,7 +155,7 @@ extern int root_mountflags;
- 
- char __initdata command_line[COMMAND_LINE_SIZE];
- 
--struct resource standard_io_resources[] = {
-+static struct resource standard_io_resources[] = {
-       { .name = "dma1", .start = 0x00, .end = 0x1f,
-               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-       { .name = "pic1", .start = 0x20, .end = 0x21,
-@@ -158,7 +164,9 @@ struct resource standard_io_resources[] 
-               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-       { .name = "timer1", .start = 0x50, .end = 0x53,
-               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
--      { .name = "keyboard", .start = 0x60, .end = 0x6f,
-+      { .name = "keyboard", .start = 0x60, .end = 0x60,
-+              .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-+      { .name = "keyboard", .start = 0x64, .end = 0x64,
-               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-       { .name = "dma page reg", .start = 0x80, .end = 0x8f,
-               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
-@@ -224,8 +232,10 @@ contig_initmem_init(unsigned long start_
-       e820_register_active_regions(0, start_pfn, end_pfn);
- #ifdef CONFIG_XEN
-       free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
-+      early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT);
- #else
-       free_bootmem_with_active_regions(0, end_pfn);
-+      early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
- #endif
-       reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
- }
-@@ -290,6 +300,7 @@ static void __init reserve_crashkernel(v
-                               (unsigned long)(total_mem >> 20));
-               crashk_res.start = crash_base;
-               crashk_res.end   = crash_base + crash_size - 1;
-+              insert_resource(&iomem_resource, &crashk_res);
-       }
- }
- #else
-@@ -306,6 +317,40 @@ void __attribute__((weak)) __init memory
-        machine_specific_memory_setup();
- }
- 
-+static void __init parse_setup_data(void)
-+{
-+      struct setup_data *data;
-+      unsigned long pa_data;
-+
-+      if (boot_params.hdr.version < 0x0209)
-+              return;
-+      pa_data = boot_params.hdr.setup_data;
-+      while (pa_data) {
-+              data = early_ioremap(pa_data, PAGE_SIZE);
-+              switch (data->type) {
-+              default:
-+                      break;
-+              }
-+#ifndef CONFIG_DEBUG_BOOT_PARAMS
-+              free_early(pa_data, pa_data+sizeof(*data)+data->len);
-+#endif
-+              pa_data = data->next;
-+              early_iounmap(data, PAGE_SIZE);
-+      }
-+}
-+
-+#ifdef CONFIG_PCI_MMCONFIG
-+extern void __cpuinit fam10h_check_enable_mmcfg(void);
-+extern void __init check_enable_amd_mmconf_dmi(void);
-+#else
-+void __cpuinit fam10h_check_enable_mmcfg(void)
-+{
-+}
-+void __init check_enable_amd_mmconf_dmi(void)
-+{
-+}
-+#endif
-+
- /*
-  * setup_arch - architecture-specific boot-time initializations
-  *
-@@ -389,6 +434,8 @@ void __init setup_arch(char **cmdline_p)
-       strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-       *cmdline_p = command_line;
- 
-+      parse_setup_data();
-+
-       parse_early_param();
- 
- #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
-@@ -398,6 +445,13 @@ void __init setup_arch(char **cmdline_p)
- 
-       finish_e820_parsing();
- 
-+#ifndef CONFIG_XEN
-+      /* after parse_early_param, so could debug it */
-+      insert_resource(&iomem_resource, &code_resource);
-+      insert_resource(&iomem_resource, &data_resource);
-+      insert_resource(&iomem_resource, &bss_resource);
-+#endif
-+
-       early_gart_iommu_check();
- 
-       e820_register_active_regions(0, 0, -1UL);
-@@ -420,15 +474,23 @@ void __init setup_arch(char **cmdline_p)
- 
-       check_efer();
- 
--      init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
-+      max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
-       if (efi_enabled)
-               efi_init();
- 
-+#ifndef CONFIG_XEN
-+      vsmp_init();
-+#endif
-+
-       if (is_initial_xendomain())
-               dmi_scan_machine();
- 
-       io_delay_init();
- 
-+#ifdef CONFIG_KVM_CLOCK
-+      kvmclock_init();
-+#endif
-+
- #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
-       /* setup to use the early static init tables during kernel startup */
-       x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
-@@ -459,9 +521,9 @@ void __init setup_arch(char **cmdline_p)
-       contig_initmem_init(0, end_pfn);
- #endif
- 
--      early_res_to_bootmem();
--
- #ifndef CONFIG_XEN
-+      dma32_reserve_bootmem();
-+
- #ifdef CONFIG_ACPI_SLEEP
-       /*
-        * Reserve low memory region for sleep support.
-@@ -487,16 +549,17 @@ void __init setup_arch(char **cmdline_p)
-               unsigned long end_of_mem    = end_pfn << PAGE_SHIFT;
- 
-               if (ramdisk_end <= end_of_mem) {
--#ifndef CONFIG_XEN
--                      reserve_bootmem_generic(ramdisk_image, ramdisk_size);
--#endif
-+                      /*
-+                       * don't need to reserve again, already reserved early
-+                       * in x86_64_start_kernel, and early_res_to_bootmem
-+                       * convert that to reserved in bootmem
-+                       */
-                       initrd_start = ramdisk_image + PAGE_OFFSET;
-                       initrd_end = initrd_start+ramdisk_size;
- #ifdef CONFIG_XEN
-                       initrd_below_start_ok = 1;
- #endif
-               } else {
--                      /* Assumes everything on node 0 */
-                       free_bootmem(ramdisk_image, ramdisk_size);
-                       printk(KERN_ERR "initrd extends beyond end of memory "
-                              "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-@@ -506,6 +569,9 @@ void __init setup_arch(char **cmdline_p)
-       }
- #endif
-       reserve_crashkernel();
-+
-+      reserve_ibft_region();
-+
-       paging_init();
-       map_vsyscall();
- #ifdef CONFIG_X86_LOCAL_APIC
-@@ -633,16 +699,16 @@ void __init setup_arch(char **cmdline_p)
-       prefill_possible_map();
- #endif
- 
-+      kvm_guest_init();
-+
-       /*
-        * We trust e820 completely. No explicit ROM probing in memory.
-        */
- #ifdef CONFIG_XEN
-       if (is_initial_xendomain())
--              e820_reserve_resources(machine_e820.map, machine_e820.nr_map,
--                                     &code_resource, &data_resource, &bss_resource);
-+              e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
- #else
--      e820_reserve_resources(e820.map, e820.nr_map,
--                             &code_resource, &data_resource, &bss_resource);
-+      e820_reserve_resources(e820.map, e820.nr_map);
-       e820_mark_nosave_regions();
- #endif
- 
-@@ -690,6 +756,9 @@ void __init setup_arch(char **cmdline_p)
- #endif
- 
- #endif /* !CONFIG_XEN */
-+
-+      /* do this before identify_cpu for boot cpu */
-+      check_enable_amd_mmconf_dmi();
- }
- 
- #ifdef CONFIG_XEN
-@@ -786,9 +855,9 @@ static void __cpuinit amd_detect_cmp(str
-       bits = c->x86_coreid_bits;
- 
-       /* Low order bits define the core id (index of core in socket) */
--      c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
--      /* Convert the APIC ID into the socket ID */
--      c->phys_proc_id = phys_pkg_id(bits);
-+      c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
-+      /* Convert the initial APIC ID into the socket ID */
-+      c->phys_proc_id = c->initial_apicid >> bits;
- 
- #ifdef CONFIG_NUMA
-       node = c->phys_proc_id;
-@@ -805,7 +874,7 @@ static void __cpuinit amd_detect_cmp(str
-                  If that doesn't result in a usable node fall back to the
-                  path for the previous case.  */
- 
--              int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
-+              int ht_nodeid = c->initial_apicid;
- 
-               if (ht_nodeid >= 0 &&
-                   apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
-@@ -913,7 +982,7 @@ static void __cpuinit init_amd(struct cp
- 
-       /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-          3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
--      clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
-+      clear_cpu_cap(c, 0*32+31);
- 
-       /* On C+ stepping K8 rep microcode works well for copy/memset */
-       level = cpuid_eax(1);
-@@ -955,9 +1024,25 @@ static void __cpuinit init_amd(struct cp
-       /* MFENCE stops RDTSC speculation */
-       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
- 
-+      if (c->x86 == 0x10)
-+              fam10h_check_enable_mmcfg();
-+
- #ifndef CONFIG_XEN
-       if (amd_apic_timer_broken())
-               disable_apic_timer = 1;
-+
-+      if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
-+              unsigned long long tseg;
-+
-+              /*
-+               * Split up direct mapping around the TSEG SMM area.
-+               * Don't do it for gbpages because there seems very little
-+               * benefit in doing so.
-+               */
-+              if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-+              (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
-+                      set_memory_4k((unsigned long)__va(tseg), 1);
-+      }
- #endif
- }
- 
-@@ -1051,7 +1136,7 @@ static void __cpuinit early_init_intel(s
- {
-       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
-           (c->x86 == 0x6 && c->x86_model >= 0x0e))
--              set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
-+              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
- }
- 
- static void __cpuinit init_intel(struct cpuinfo_x86 *c)
-@@ -1094,9 +1179,6 @@ static void __cpuinit init_intel(struct 
- 
-       if (c->x86 == 15)
-               c->x86_cache_alignment = c->x86_clflush_size * 2;
--      if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
--          (c->x86 == 0x6 && c->x86_model >= 0x0e))
--              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-       if (c->x86 == 6)
-               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-@@ -1105,6 +1187,32 @@ static void __cpuinit init_intel(struct 
-       srat_detect_node();
- }
- 
-+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
-+{
-+      if (c->x86 == 0x6 && c->x86_model >= 0xf)
-+              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-+}
-+
-+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
-+{
-+      /* Cache sizes */
-+      unsigned n;
-+
-+      n = c->extended_cpuid_level;
-+      if (n >= 0x80000008) {
-+              unsigned eax = cpuid_eax(0x80000008);
-+              c->x86_virt_bits = (eax >> 8) & 0xff;
-+              c->x86_phys_bits = eax & 0xff;
-+      }
-+
-+      if (c->x86 == 0x6 && c->x86_model >= 0xf) {
-+              c->x86_cache_alignment = c->x86_clflush_size * 2;
-+              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-+              set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-+      }
-+      set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
-+}
-+
- static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
- {
-       char *v = c->x86_vendor_id;
-@@ -1113,6 +1221,8 @@ static void __cpuinit get_cpu_vendor(str
-               c->x86_vendor = X86_VENDOR_AMD;
-       else if (!strcmp(v, "GenuineIntel"))
-               c->x86_vendor = X86_VENDOR_INTEL;
-+      else if (!strcmp(v, "CentaurHauls"))
-+              c->x86_vendor = X86_VENDOR_CENTAUR;
-       else
-               c->x86_vendor = X86_VENDOR_UNKNOWN;
- }
-@@ -1160,15 +1270,16 @@ static void __cpuinit early_identify_cpu
-                       c->x86 += (tfms >> 20) & 0xff;
-               if (c->x86 >= 0x6)
-                       c->x86_model += ((tfms >> 16) & 0xF) << 4;
--              if (c->x86_capability[0] & (1<<19))
-+              if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
-                       c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
-       } else {
-               /* Have CPUID level 0 only - unheard of */
-               c->x86 = 4;
-       }
- 
-+      c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
- #ifdef CONFIG_SMP
--      c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
-+      c->phys_proc_id = c->initial_apicid;
- #endif
-       /* AMD-defined flags: level 0x80000001 */
-       xlvl = cpuid_eax(0x80000000);
-@@ -1201,8 +1312,12 @@ static void __cpuinit early_identify_cpu
-       case X86_VENDOR_INTEL:
-               early_init_intel(c);
-               break;
-+      case X86_VENDOR_CENTAUR:
-+              early_init_centaur(c);
-+              break;
-       }
- 
-+      validate_pat_support(c);
- }
- 
- /*
-@@ -1237,6 +1352,10 @@ void __cpuinit identify_cpu(struct cpuin
-               init_intel(c);
-               break;
- 
-+      case X86_VENDOR_CENTAUR:
-+              init_centaur(c);
-+              break;
-+
-       case X86_VENDOR_UNKNOWN:
-       default:
-               display_cacheinfo(c);
-@@ -1266,14 +1385,24 @@ void __cpuinit identify_cpu(struct cpuin
- #endif
-       select_idle_routine(c);
- 
--      if (c != &boot_cpu_data)
--              mtrr_ap_init();
- #ifdef CONFIG_NUMA
-       numa_add_cpu(smp_processor_id());
- #endif
- 
- }
- 
-+void __cpuinit identify_boot_cpu(void)
-+{
-+      identify_cpu(&boot_cpu_data);
-+}
-+
-+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
-+{
-+      BUG_ON(c == &boot_cpu_data);
-+      identify_cpu(c);
-+      mtrr_ap_init();
-+}
-+
- static __init int setup_noclflush(char *arg)
- {
-       setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
-@@ -1302,123 +1431,3 @@ static __init int setup_disablecpuid(cha
-       return 1;
- }
- __setup("clearcpuid=", setup_disablecpuid);
--
--/*
-- *    Get CPU information for use by the procfs.
-- */
--
--static int show_cpuinfo(struct seq_file *m, void *v)
--{
--      struct cpuinfo_x86 *c = v;
--      int cpu = 0, i;
--
--#ifdef CONFIG_SMP
--      cpu = c->cpu_index;
--#endif
--
--      seq_printf(m, "processor\t: %u\n"
--                 "vendor_id\t: %s\n"
--                 "cpu family\t: %d\n"
--                 "model\t\t: %d\n"
--                 "model name\t: %s\n",
--                 (unsigned)cpu,
--                 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
--                 c->x86,
--                 (int)c->x86_model,
--                 c->x86_model_id[0] ? c->x86_model_id : "unknown");
--
--      if (c->x86_mask || c->cpuid_level >= 0)
--              seq_printf(m, "stepping\t: %d\n", c->x86_mask);
--      else
--              seq_printf(m, "stepping\t: unknown\n");
--
--      if (cpu_has(c, X86_FEATURE_TSC)) {
--              unsigned int freq = cpufreq_quick_get((unsigned)cpu);
--
--              if (!freq)
--                      freq = cpu_khz;
--              seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
--                         freq / 1000, (freq % 1000));
--      }
--
--      /* Cache size */
--      if (c->x86_cache_size >= 0)
--              seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
--
--#ifdef CONFIG_SMP
--      if (smp_num_siblings * c->x86_max_cores > 1) {
--              seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
--              seq_printf(m, "siblings\t: %d\n",
--                             cpus_weight(per_cpu(cpu_core_map, cpu)));
--              seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
--              seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
--      }
--#endif
--
--      seq_printf(m,
--                 "fpu\t\t: yes\n"
--                 "fpu_exception\t: yes\n"
--                 "cpuid level\t: %d\n"
--                 "wp\t\t: yes\n"
--                 "flags\t\t:",
--                 c->cpuid_level);
--
--      for (i = 0; i < 32*NCAPINTS; i++)
--              if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
--                      seq_printf(m, " %s", x86_cap_flags[i]);
--
--      seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
--                 c->loops_per_jiffy/(500000/HZ),
--                 (c->loops_per_jiffy/(5000/HZ)) % 100);
--
--      if (c->x86_tlbsize > 0)
--              seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
--      seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
--      seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
--
--      seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
--                 c->x86_phys_bits, c->x86_virt_bits);
--
--      seq_printf(m, "power management:");
--      for (i = 0; i < 32; i++) {
--              if (c->x86_power & (1 << i)) {
--                      if (i < ARRAY_SIZE(x86_power_flags) &&
--                          x86_power_flags[i])
--                              seq_printf(m, "%s%s",
--                                         x86_power_flags[i][0]?" ":"",
--                                         x86_power_flags[i]);
--                      else
--                              seq_printf(m, " [%d]", i);
--              }
--      }
--
--      seq_printf(m, "\n\n");
--
--      return 0;
--}
--
--static void *c_start(struct seq_file *m, loff_t *pos)
--{
--      if (*pos == 0)  /* just in case, cpu 0 is not the first */
--              *pos = first_cpu(cpu_online_map);
--      if ((*pos) < NR_CPUS && cpu_online(*pos))
--              return &cpu_data(*pos);
--      return NULL;
--}
--
--static void *c_next(struct seq_file *m, void *v, loff_t *pos)
--{
--      *pos = next_cpu(*pos, cpu_online_map);
--      return c_start(m, pos);
--}
--
--static void c_stop(struct seq_file *m, void *v)
--{
--}
--
--const struct seq_operations cpuinfo_op = {
--      .start = c_start,
--      .next = c_next,
--      .stop = c_stop,
--      .show = show_cpuinfo,
--};
---- a/arch/x86/kernel/setup64-xen.c
-+++ b/arch/x86/kernel/setup64-xen.c
-@@ -15,6 +15,7 @@
- #include <linux/bootmem.h>
- #include <linux/bitops.h>
- #include <linux/module.h>
-+#include <linux/kgdb.h>
- #include <asm/pda.h>
- #include <asm/pgtable.h>
- #include <asm/processor.h>
-@@ -27,6 +28,7 @@
- #include <asm/proto.h>
- #include <asm/sections.h>
- #include <asm/setup.h>
-+#include <asm/genapic.h>
- #ifdef CONFIG_XEN
- #include <asm/hypervisor.h>
- #endif
-@@ -81,8 +83,8 @@ int force_personality32 = 0; 
- Control non executable heap for 32bit processes.
- To control the stack too use noexec=off
- 
--on    PROT_READ does not imply PROT_EXEC for 32bit processes
--off   PROT_READ implies PROT_EXEC (default)
-+on    PROT_READ does not imply PROT_EXEC for 32bit processes (default)
-+off   PROT_READ implies PROT_EXEC
- */
- static int __init nonx32_setup(char *str)
- {
-@@ -94,85 +96,6 @@ static int __init nonx32_setup(char *str
- }
- __setup("noexec32=", nonx32_setup);
- 
--/*
-- * Copy data used in early init routines from the initial arrays to the
-- * per cpu data areas.  These arrays then become expendable and the
-- * *_early_ptr's are zeroed indicating that the static arrays are gone.
-- */
--static void __init setup_per_cpu_maps(void)
--{
--#ifndef CONFIG_XEN
--      int cpu;
--
--      for_each_possible_cpu(cpu) {
--#ifdef CONFIG_SMP
--              if (per_cpu_offset(cpu)) {
--#endif
--                      per_cpu(x86_cpu_to_apicid, cpu) =
--                                              x86_cpu_to_apicid_init[cpu];
--                      per_cpu(x86_bios_cpu_apicid, cpu) =
--                                              x86_bios_cpu_apicid_init[cpu];
--#ifdef CONFIG_NUMA
--                      per_cpu(x86_cpu_to_node_map, cpu) =
--                                              x86_cpu_to_node_map_init[cpu];
--#endif
--#ifdef CONFIG_SMP
--              }
--              else
--                      printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
--                                                                      cpu);
--#endif
--      }
--
--      /* indicate the early static arrays will soon be gone */
--      x86_cpu_to_apicid_early_ptr = NULL;
--      x86_bios_cpu_apicid_early_ptr = NULL;
--#ifdef CONFIG_NUMA
--      x86_cpu_to_node_map_early_ptr = NULL;
--#endif
--#endif
--}
--
--/*
-- * Great future plan:
-- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
-- * Always point %gs to its beginning
-- */
--void __init setup_per_cpu_areas(void)
--{ 
--      int i;
--      unsigned long size;
--
--#ifdef CONFIG_HOTPLUG_CPU
--      prefill_possible_map();
--#endif
--
--      /* Copy section for each CPU (we discard the original) */
--      size = PERCPU_ENOUGH_ROOM;
--
--      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
--      for_each_cpu_mask (i, cpu_possible_map) {
--              char *ptr;
--#ifndef CONFIG_NEED_MULTIPLE_NODES
--              ptr = alloc_bootmem_pages(size);
--#else
--              int node = early_cpu_to_node(i);
--
--              if (!node_online(node) || !NODE_DATA(node))
--                      ptr = alloc_bootmem_pages(size);
--              else
--                      ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
--#endif
--              if (!ptr)
--                      panic("Cannot allocate cpu data for CPU %d\n", i);
--              cpu_pda(i)->data_offset = ptr - __per_cpu_start;
--              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
--      }
--
--      /* setup percpu data maps early */
--      setup_per_cpu_maps();
--} 
--
- #ifdef CONFIG_XEN
- static void __init_refok switch_pt(int cpu)
- {
-@@ -410,6 +333,17 @@ void __cpuinit cpu_init (void)
- #endif
-       load_LDT(&init_mm.context);
- 
-+#ifdef CONFIG_KGDB
-+      /*
-+       * If the kgdb is connected no debug regs should be altered.  This
-+       * is only applicable when KGDB and a KGDB I/O module are built
-+       * into the kernel and you are using early debugging with
-+       * kgdbwait. KGDB will control the kernel HW breakpoint registers.
-+       */
-+      if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
-+              arch_kgdb_ops.correct_hw_break();
-+      else {
-+#endif
-       /*
-        * Clear all 6 debug registers:
-        */
-@@ -420,10 +354,17 @@ void __cpuinit cpu_init (void)
-       set_debugreg(0UL, 3);
-       set_debugreg(0UL, 6);
-       set_debugreg(0UL, 7);
-+#ifdef CONFIG_KGDB
-+      /* If the kgdb is connected no debug regs should be altered. */
-+      }
-+#endif
- 
-       fpu_init(); 
- 
-       asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
-       if (raw_irqs_disabled())
-               kernel_eflags &= ~X86_EFLAGS_IF;
-+
-+      if (is_uv_system())
-+              uv_cpu_init();
- }
---- /dev/null
-+++ b/arch/x86/kernel/setup-xen.c
-@@ -0,0 +1,141 @@
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/bootmem.h>
-+#include <linux/percpu.h>
-+#include <asm/smp.h>
-+#include <asm/percpu.h>
-+#include <asm/sections.h>
-+#include <asm/processor.h>
-+#include <asm/setup.h>
-+#include <asm/topology.h>
-+#include <asm/mpspec.h>
-+#include <asm/apicdef.h>
-+
-+#ifdef CONFIG_X86_LOCAL_APIC
-+unsigned int num_processors;
-+unsigned disabled_cpus __cpuinitdata;
-+/* Processor that is doing the boot up */
-+unsigned int boot_cpu_physical_apicid = -1U;
-+EXPORT_SYMBOL(boot_cpu_physical_apicid);
-+
-+DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
-+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-+
-+/* Bitmask of physically existing CPUs */
-+physid_mask_t phys_cpu_present_map;
-+#endif
-+
-+#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
-+/*
-+ * Copy data used in early init routines from the initial arrays to the
-+ * per cpu data areas.  These arrays then become expendable and the
-+ * *_early_ptr's are zeroed indicating that the static arrays are gone.
-+ */
-+static void __init setup_per_cpu_maps(void)
-+{
-+#ifndef CONFIG_XEN
-+      int cpu;
-+
-+      for_each_possible_cpu(cpu) {
-+              per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
-+              per_cpu(x86_bios_cpu_apicid, cpu) =
-+                                              x86_bios_cpu_apicid_init[cpu];
-+#ifdef CONFIG_NUMA
-+              per_cpu(x86_cpu_to_node_map, cpu) =
-+                                              x86_cpu_to_node_map_init[cpu];
-+#endif
-+      }
-+
-+      /* indicate the early static arrays will soon be gone */
-+      x86_cpu_to_apicid_early_ptr = NULL;
-+      x86_bios_cpu_apicid_early_ptr = NULL;
-+#ifdef CONFIG_NUMA
-+      x86_cpu_to_node_map_early_ptr = NULL;
-+#endif
-+#endif
-+}
-+
-+#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
-+cpumask_t *cpumask_of_cpu_map __read_mostly;
-+EXPORT_SYMBOL(cpumask_of_cpu_map);
-+
-+/* requires nr_cpu_ids to be initialized */
-+static void __init setup_cpumask_of_cpu(void)
-+{
-+      int i;
-+
-+      /* alloc_bootmem zeroes memory */
-+      cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
-+      for (i = 0; i < nr_cpu_ids; i++)
-+              cpu_set(i, cpumask_of_cpu_map[i]);
-+}
-+#else
-+static inline void setup_cpumask_of_cpu(void) { }
-+#endif
-+
-+#ifdef CONFIG_X86_32
-+/*
-+ * Great future not-so-futuristic plan: make i386 and x86_64 do it
-+ * the same way
-+ */
-+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-+EXPORT_SYMBOL(__per_cpu_offset);
-+#endif
-+
-+/*
-+ * Great future plan:
-+ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
-+ * Always point %gs to its beginning
-+ */
-+void __init setup_per_cpu_areas(void)
-+{
-+      int i, highest_cpu = 0;
-+      unsigned long size;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+      prefill_possible_map();
-+#endif
-+
-+      /* Copy section for each CPU (we discard the original) */
-+      size = PERCPU_ENOUGH_ROOM;
-+      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
-+                        size);
-+
-+      for_each_possible_cpu(i) {
-+              char *ptr;
-+#ifndef CONFIG_NEED_MULTIPLE_NODES
-+              ptr = alloc_bootmem_pages(size);
-+#else
-+              int node = early_cpu_to_node(i);
-+              if (!node_online(node) || !NODE_DATA(node)) {
-+                      ptr = alloc_bootmem_pages(size);
-+                      printk(KERN_INFO
-+                             "cpu %d has no node or node-local memory\n", i);
-+              }
-+              else
-+                      ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
-+#endif
-+              if (!ptr)
-+                      panic("Cannot allocate cpu data for CPU %d\n", i);
-+#ifdef CONFIG_X86_64
-+              cpu_pda(i)->data_offset = ptr - __per_cpu_start;
-+#else
-+              __per_cpu_offset[i] = ptr - __per_cpu_start;
-+#endif
-+              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-+
-+              highest_cpu = i;
-+      }
-+
-+      nr_cpu_ids = highest_cpu + 1;
-+      printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
-+
-+      /* Setup percpu data maps */
-+      setup_per_cpu_maps();
-+
-+      /* Setup cpumask_of_cpu map */
-+      setup_cpumask_of_cpu();
-+}
-+
-+#endif
---- a/arch/x86/kernel/smp_32-xen.c
-+++ /dev/null
-@@ -1,647 +0,0 @@
--/*
-- *    Intel SMP support routines.
-- *
-- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-- *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-- *
-- *    This code is released under the GNU General Public License version 2 or
-- *    later.
-- */
--
--#include <linux/init.h>
--
--#include <linux/mm.h>
--#include <linux/delay.h>
--#include <linux/spinlock.h>
--#include <linux/kernel_stat.h>
--#include <linux/mc146818rtc.h>
--#include <linux/cache.h>
--#include <linux/interrupt.h>
--#include <linux/cpu.h>
--#include <linux/module.h>
--
--#include <asm/mtrr.h>
--#include <asm/tlbflush.h>
--#include <asm/mmu_context.h>
--#if 0
--#include <mach_apic.h>
--#endif
--#include <xen/evtchn.h>
--
--/*
-- *    Some notes on x86 processor bugs affecting SMP operation:
-- *
-- *    Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
-- *    The Linux implications for SMP are handled as follows:
-- *
-- *    Pentium III / [Xeon]
-- *            None of the E1AP-E3AP errata are visible to the user.
-- *
-- *    E1AP.   see PII A1AP
-- *    E2AP.   see PII A2AP
-- *    E3AP.   see PII A3AP
-- *
-- *    Pentium II / [Xeon]
-- *            None of the A1AP-A3AP errata are visible to the user.
-- *
-- *    A1AP.   see PPro 1AP
-- *    A2AP.   see PPro 2AP
-- *    A3AP.   see PPro 7AP
-- *
-- *    Pentium Pro
-- *            None of 1AP-9AP errata are visible to the normal user,
-- *    except occasional delivery of 'spurious interrupt' as trap #15.
-- *    This is very rare and a non-problem.
-- *
-- *    1AP.    Linux maps APIC as non-cacheable
-- *    2AP.    worked around in hardware
-- *    3AP.    fixed in C0 and above steppings microcode update.
-- *            Linux does not use excessive STARTUP_IPIs.
-- *    4AP.    worked around in hardware
-- *    5AP.    symmetric IO mode (normal Linux operation) not affected.
-- *            'noapic' mode has vector 0xf filled out properly.
-- *    6AP.    'noapic' mode might be affected - fixed in later steppings
-- *    7AP.    We do not assume writes to the LVT deassering IRQs
-- *    8AP.    We do not enable low power mode (deep sleep) during MP bootup
-- *    9AP.    We do not use mixed mode
-- *
-- *    Pentium
-- *            There is a marginal case where REP MOVS on 100MHz SMP
-- *    machines with B stepping processors can fail. XXX should provide
-- *    an L1cache=Writethrough or L1cache=off option.
-- *
-- *            B stepping CPUs may hang. There are hardware work arounds
-- *    for this. We warn about it in case your board doesn't have the work
-- *    arounds. Basically that's so I can tell anyone with a B stepping
-- *    CPU and SMP problems "tough".
-- *
-- *    Specific items [From Pentium Processor Specification Update]
-- *
-- *    1AP.    Linux doesn't use remote read
-- *    2AP.    Linux doesn't trust APIC errors
-- *    3AP.    We work around this
-- *    4AP.    Linux never generated 3 interrupts of the same priority
-- *            to cause a lost local interrupt.
-- *    5AP.    Remote read is never used
-- *    6AP.    not affected - worked around in hardware
-- *    7AP.    not affected - worked around in hardware
-- *    8AP.    worked around in hardware - we get explicit CS errors if not
-- *    9AP.    only 'noapic' mode affected. Might generate spurious
-- *            interrupts, we log only the first one and count the
-- *            rest silently.
-- *    10AP.   not affected - worked around in hardware
-- *    11AP.   Linux reads the APIC between writes to avoid this, as per
-- *            the documentation. Make sure you preserve this as it affects
-- *            the C stepping chips too.
-- *    12AP.   not affected - worked around in hardware
-- *    13AP.   not affected - worked around in hardware
-- *    14AP.   we always deassert INIT during bootup
-- *    15AP.   not affected - worked around in hardware
-- *    16AP.   not affected - worked around in hardware
-- *    17AP.   not affected - worked around in hardware
-- *    18AP.   not affected - worked around in hardware
-- *    19AP.   not affected - worked around in BIOS
-- *
-- *    If this sounds worrying believe me these bugs are either ___RARE___,
-- *    or are signal timing bugs worked around in hardware and there's
-- *    about nothing of note with C stepping upwards.
-- */
--
--DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
--
--/*
-- * the following functions deal with sending IPIs between CPUs.
-- *
-- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
-- */
--
--static inline int __prepare_ICR (unsigned int shortcut, int vector)
--{
--      unsigned int icr = shortcut | APIC_DEST_LOGICAL;
--
--      switch (vector) {
--      default:
--              icr |= APIC_DM_FIXED | vector;
--              break;
--      case NMI_VECTOR:
--              icr |= APIC_DM_NMI;
--              break;
--      }
--      return icr;
--}
--
--static inline int __prepare_ICR2 (unsigned int mask)
--{
--      return SET_APIC_DEST_FIELD(mask);
--}
--
--DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
--
--static inline void __send_IPI_one(unsigned int cpu, int vector)
--{
--      int irq = per_cpu(ipi_to_irq, cpu)[vector];
--      BUG_ON(irq < 0);
--      notify_remote_via_irq(irq);
--}
--
--void __send_IPI_shortcut(unsigned int shortcut, int vector)
--{
--      int cpu;
--
--      switch (shortcut) {
--      case APIC_DEST_SELF:
--              __send_IPI_one(smp_processor_id(), vector);
--              break;
--      case APIC_DEST_ALLBUT:
--              for (cpu = 0; cpu < NR_CPUS; ++cpu) {
--                      if (cpu == smp_processor_id())
--                              continue;
--                      if (cpu_isset(cpu, cpu_online_map)) {
--                              __send_IPI_one(cpu, vector);
--                      }
--              }
--              break;
--      default:
--              printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
--                     vector);
--              break;
--      }
--}
--
--void send_IPI_self(int vector)
--{
--      __send_IPI_shortcut(APIC_DEST_SELF, vector);
--}
--
--/*
-- * This is only used on smaller machines.
-- */
--void send_IPI_mask_bitmask(cpumask_t mask, int vector)
--{
--      unsigned long flags;
--      unsigned int cpu;
--
--      local_irq_save(flags);
--      WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
--
--      for (cpu = 0; cpu < NR_CPUS; ++cpu) {
--              if (cpu_isset(cpu, mask)) {
--                      __send_IPI_one(cpu, vector);
--              }
--      }
--
--      local_irq_restore(flags);
--}
--
--void send_IPI_mask_sequence(cpumask_t mask, int vector)
--{
--
--      send_IPI_mask_bitmask(mask, vector);
--}
--
--#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
--
--#if 0 /* XEN */
--/*
-- *    Smarter SMP flushing macros. 
-- *            c/o Linus Torvalds.
-- *
-- *    These mean you can really definitely utterly forget about
-- *    writing to user space from interrupts. (Its not allowed anyway).
-- *
-- *    Optimizations Manfred Spraul <manfred@colorfullife.com>
-- */
--
--static cpumask_t flush_cpumask;
--static struct mm_struct * flush_mm;
--static unsigned long flush_va;
--static DEFINE_SPINLOCK(tlbstate_lock);
--
--/*
-- * We cannot call mmdrop() because we are in interrupt context,
-- * instead update mm->cpu_vm_mask.
-- *
-- * We need to reload %cr3 since the page tables may be going
-- * away from under us..
-- */
--void leave_mm(int cpu)
--{
--      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
--              BUG();
--      cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
--      load_cr3(swapper_pg_dir);
--}
--EXPORT_SYMBOL_GPL(leave_mm);
--
--/*
-- *
-- * The flush IPI assumes that a thread switch happens in this order:
-- * [cpu0: the cpu that switches]
-- * 1) switch_mm() either 1a) or 1b)
-- * 1a) thread switch to a different mm
-- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
-- *    Stop ipi delivery for the old mm. This is not synchronized with
-- *    the other cpus, but smp_invalidate_interrupt ignore flush ipis
-- *    for the wrong mm, and in the worst case we perform a superfluous
-- *    tlb flush.
-- * 1a2) set cpu_tlbstate to TLBSTATE_OK
-- *    Now the smp_invalidate_interrupt won't call leave_mm if cpu0
-- *    was in lazy tlb mode.
-- * 1a3) update cpu_tlbstate[].active_mm
-- *    Now cpu0 accepts tlb flushes for the new mm.
-- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
-- *    Now the other cpus will send tlb flush ipis.
-- * 1a4) change cr3.
-- * 1b) thread switch without mm change
-- *    cpu_tlbstate[].active_mm is correct, cpu0 already handles
-- *    flush ipis.
-- * 1b1) set cpu_tlbstate to TLBSTATE_OK
-- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
-- *    Atomically set the bit [other cpus will start sending flush ipis],
-- *    and test the bit.
-- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
-- * 2) switch %%esp, ie current
-- *
-- * The interrupt must handle 2 special cases:
-- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
-- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
-- *   runs in kernel space, the cpu could load tlb entries for user space
-- *   pages.
-- *
-- * The good news is that cpu_tlbstate is local to each cpu, no
-- * write/read ordering problems.
-- */
--
--/*
-- * TLB flush IPI:
-- *
-- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
-- * 2) Leave the mm if we are in the lazy tlb mode.
-- */
--
--irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
--{
--      unsigned long cpu;
--
--      cpu = get_cpu();
--
--      if (!cpu_isset(cpu, flush_cpumask))
--              goto out;
--              /* 
--               * This was a BUG() but until someone can quote me the
--               * line from the intel manual that guarantees an IPI to
--               * multiple CPUs is retried _only_ on the erroring CPUs
--               * its staying as a return
--               *
--               * BUG();
--               */
--               
--      if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
--              if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
--                      if (flush_va == TLB_FLUSH_ALL)
--                              local_flush_tlb();
--                      else
--                              __flush_tlb_one(flush_va);
--              } else
--                      leave_mm(cpu);
--      }
--      smp_mb__before_clear_bit();
--      cpu_clear(cpu, flush_cpumask);
--      smp_mb__after_clear_bit();
--out:
--      put_cpu_no_resched();
--      __get_cpu_var(irq_stat).irq_tlb_count++;
--
--      return IRQ_HANDLED;
--}
--
--void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
--                           unsigned long va)
--{
--      cpumask_t cpumask = *cpumaskp;
--
--      /*
--       * A couple of (to be removed) sanity checks:
--       *
--       * - current CPU must not be in mask
--       * - mask must exist :)
--       */
--      BUG_ON(cpus_empty(cpumask));
--      BUG_ON(cpu_isset(smp_processor_id(), cpumask));
--      BUG_ON(!mm);
--
--#ifdef CONFIG_HOTPLUG_CPU
--      /* If a CPU which we ran on has gone down, OK. */
--      cpus_and(cpumask, cpumask, cpu_online_map);
--      if (unlikely(cpus_empty(cpumask)))
--              return;
--#endif
--
--      /*
--       * i'm not happy about this global shared spinlock in the
--       * MM hot path, but we'll see how contended it is.
--       * AK: x86-64 has a faster method that could be ported.
--       */
--      spin_lock(&tlbstate_lock);
--      
--      flush_mm = mm;
--      flush_va = va;
--      cpus_or(flush_cpumask, cpumask, flush_cpumask);
--      /*
--       * We have to send the IPI only to
--       * CPUs affected.
--       */
--      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
--
--      while (!cpus_empty(flush_cpumask))
--              /* nothing. lockup detection does not belong here */
--              cpu_relax();
--
--      flush_mm = NULL;
--      flush_va = 0;
--      spin_unlock(&tlbstate_lock);
--}
--      
--void flush_tlb_current_task(void)
--{
--      struct mm_struct *mm = current->mm;
--      cpumask_t cpu_mask;
--
--      preempt_disable();
--      cpu_mask = mm->cpu_vm_mask;
--      cpu_clear(smp_processor_id(), cpu_mask);
--
--      local_flush_tlb();
--      if (!cpus_empty(cpu_mask))
--              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
--      preempt_enable();
--}
--
--void flush_tlb_mm (struct mm_struct * mm)
--{
--      cpumask_t cpu_mask;
--
--      preempt_disable();
--      cpu_mask = mm->cpu_vm_mask;
--      cpu_clear(smp_processor_id(), cpu_mask);
--
--      if (current->active_mm == mm) {
--              if (current->mm)
--                      local_flush_tlb();
--              else
--                      leave_mm(smp_processor_id());
--      }
--      if (!cpus_empty(cpu_mask))
--              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
--
--      preempt_enable();
--}
--
--void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
--{
--      struct mm_struct *mm = vma->vm_mm;
--      cpumask_t cpu_mask;
--
--      preempt_disable();
--      cpu_mask = mm->cpu_vm_mask;
--      cpu_clear(smp_processor_id(), cpu_mask);
--
--      if (current->active_mm == mm) {
--              if(current->mm)
--                      __flush_tlb_one(va);
--              else
--                      leave_mm(smp_processor_id());
--      }
--
--      if (!cpus_empty(cpu_mask))
--              flush_tlb_others(cpu_mask, mm, va);
--
--      preempt_enable();
--}
--EXPORT_SYMBOL(flush_tlb_page);
--
--static void do_flush_tlb_all(void* info)
--{
--      unsigned long cpu = smp_processor_id();
--
--      __flush_tlb_all();
--      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
--              leave_mm(cpu);
--}
--
--void flush_tlb_all(void)
--{
--      on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
--}
--
--#endif /* XEN */
--
--/*
-- * this function sends a 'reschedule' IPI to another CPU.
-- * it goes straight through and wastes no time serializing
-- * anything. Worst case is that we lose a reschedule ...
-- */
--void xen_smp_send_reschedule(int cpu)
--{
--      WARN_ON(cpu_is_offline(cpu));
--      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
--}
--
--/*
-- * Structure and data for smp_call_function(). This is designed to minimise
-- * static memory requirements. It also looks cleaner.
-- */
--static DEFINE_SPINLOCK(call_lock);
--
--struct call_data_struct {
--      void (*func) (void *info);
--      void *info;
--      atomic_t started;
--      atomic_t finished;
--      int wait;
--};
--
--void lock_ipi_call_lock(void)
--{
--      spin_lock_irq(&call_lock);
--}
--
--void unlock_ipi_call_lock(void)
--{
--      spin_unlock_irq(&call_lock);
--}
--
--static struct call_data_struct *call_data;
--
--static void __smp_call_function(void (*func) (void *info), void *info,
--                              int nonatomic, int wait)
--{
--      struct call_data_struct data;
--      int cpus = num_online_cpus() - 1;
--
--      if (!cpus)
--              return;
--
--      data.func = func;
--      data.info = info;
--      atomic_set(&data.started, 0);
--      data.wait = wait;
--      if (wait)
--              atomic_set(&data.finished, 0);
--
--      call_data = &data;
--      mb();
--
--      /* Send a message to all other CPUs and wait for them to respond */
--      send_IPI_allbutself(CALL_FUNCTION_VECTOR);
--
--      /* Wait for response */
--      while (atomic_read(&data.started) != cpus)
--              cpu_relax();
--
--      if (wait)
--              while (atomic_read(&data.finished) != cpus)
--                      cpu_relax();
--}
--
--
--/**
-- * smp_call_function_mask(): Run a function on a set of other CPUs.
-- * @mask: The set of cpus to run on.  Must not include the current cpu.
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @wait: If true, wait (atomically) until function has completed on other CPUs.
-- *
--  * Returns 0 on success, else a negative status code.
-- *
-- * If @wait is true, then returns once @func has returned; otherwise
-- * it returns just before the target cpu calls @func.
-- *
-- * You must not call this function with disabled interrupts or from a
-- * hardware interrupt handler or from a bottom half handler.
-- */
--int
--xen_smp_call_function_mask(cpumask_t mask,
--                            void (*func)(void *), void *info,
--                            int wait)
--{
--      struct call_data_struct data;
--      cpumask_t allbutself;
--      int cpus;
--
--      /* Can deadlock when called with interrupts disabled */
--      WARN_ON(irqs_disabled());
--
--      /* Holding any lock stops cpus from going down. */
--      spin_lock(&call_lock);
--
--      allbutself = cpu_online_map;
--      cpu_clear(smp_processor_id(), allbutself);
--
--      cpus_and(mask, mask, allbutself);
--      cpus = cpus_weight(mask);
--
--      if (!cpus) {
--              spin_unlock(&call_lock);
--              return 0;
--      }
--
--      data.func = func;
--      data.info = info;
--      atomic_set(&data.started, 0);
--      data.wait = wait;
--      if (wait)
--              atomic_set(&data.finished, 0);
--
--      call_data = &data;
--      mb();
--
--      /* Send a message to other CPUs */
--      if (cpus_equal(mask, allbutself))
--              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
--      else
--              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
--
--      /* Wait for response */
--      while (atomic_read(&data.started) != cpus)
--              cpu_relax();
--
--      if (wait)
--              while (atomic_read(&data.finished) != cpus)
--                      cpu_relax();
--      spin_unlock(&call_lock);
--
--      return 0;
--}
--
--static void stop_this_cpu (void * dummy)
--{
--      local_irq_disable();
--      /*
--       * Remove this CPU:
--       */
--      cpu_clear(smp_processor_id(), cpu_online_map);
--      disable_all_local_evtchn();
--      if (cpu_data(smp_processor_id()).hlt_works_ok)
--              for(;;) halt();
--      for (;;);
--}
--
--/*
-- * this function calls the 'stop' function on all other CPUs in the system.
-- */
--
--void xen_smp_send_stop(void)
--{
--      /* Don't deadlock on the call lock in panic */
--      int nolock = !spin_trylock(&call_lock);
--      unsigned long flags;
--
--      local_irq_save(flags);
--      __smp_call_function(stop_this_cpu, NULL, 0, 0);
--      if (!nolock)
--              spin_unlock(&call_lock);
--      disable_all_local_evtchn();
--      local_irq_restore(flags);
--}
--
--/*
-- * Reschedule call back. Nothing to do,
-- * all the work is done automatically when
-- * we return from the interrupt.
-- */
--irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
--{
--      __get_cpu_var(irq_stat).irq_resched_count++;
--
--      return IRQ_HANDLED;
--}
--
--#include <linux/kallsyms.h>
--irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
--{
--      void (*func) (void *info) = call_data->func;
--      void *info = call_data->info;
--      int wait = call_data->wait;
--
--      /*
--       * Notify initiating CPU that I've grabbed the data and am
--       * about to execute the function
--       */
--      mb();
--      atomic_inc(&call_data->started);
--      /*
--       * At this point the info structure may be out of scope unless wait==1
--       */
--      irq_enter();
--      (*func)(info);
--      __get_cpu_var(irq_stat).irq_call_count++;
--      irq_exit();
--
--      if (wait) {
--              mb();
--              atomic_inc(&call_data->finished);
--      }
--
--      return IRQ_HANDLED;
--}
---- a/arch/x86/kernel/smp_64-xen.c
-+++ /dev/null
-@@ -1,554 +0,0 @@
--/*
-- *    Intel SMP support routines.
-- *
-- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-- *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-- *      (c) 2002,2003 Andi Kleen, SuSE Labs.
-- *
-- *    This code is released under the GNU General Public License version 2 or
-- *    later.
-- */
--
--#include <linux/init.h>
--
--#include <linux/mm.h>
--#include <linux/delay.h>
--#include <linux/spinlock.h>
--#include <linux/smp.h>
--#include <linux/kernel_stat.h>
--#include <linux/mc146818rtc.h>
--#include <linux/interrupt.h>
--
--#include <asm/mtrr.h>
--#include <asm/pgalloc.h>
--#include <asm/tlbflush.h>
--#include <asm/mach_apic.h>
--#include <asm/mmu_context.h>
--#include <asm/proto.h>
--#include <asm/apicdef.h>
--#include <asm/idle.h>
--#ifdef CONFIG_XEN
--#include <xen/evtchn.h>
--#endif
--
--#ifndef CONFIG_XEN
--/*
-- *    Smarter SMP flushing macros.
-- *            c/o Linus Torvalds.
-- *
-- *    These mean you can really definitely utterly forget about
-- *    writing to user space from interrupts. (Its not allowed anyway).
-- *
-- *    Optimizations Manfred Spraul <manfred@colorfullife.com>
-- *
-- *    More scalable flush, from Andi Kleen
-- *
-- *    To avoid global state use 8 different call vectors.
-- *    Each CPU uses a specific vector to trigger flushes on other
-- *    CPUs. Depending on the received vector the target CPUs look into
-- *    the right per cpu variable for the flush data.
-- *
-- *    With more than 8 CPUs they are hashed to the 8 available
-- *    vectors. The limited global vector space forces us to this right now.
-- *    In future when interrupts are split into per CPU domains this could be
-- *    fixed, at the cost of triggering multiple IPIs in some cases.
-- */
--
--union smp_flush_state {
--      struct {
--              cpumask_t flush_cpumask;
--              struct mm_struct *flush_mm;
--              unsigned long flush_va;
--              spinlock_t tlbstate_lock;
--      };
--      char pad[SMP_CACHE_BYTES];
--} ____cacheline_aligned;
--
--/* State is put into the per CPU data section, but padded
--   to a full cache line because other CPUs can access it and we don't
--   want false sharing in the per cpu data segment. */
--static DEFINE_PER_CPU(union smp_flush_state, flush_state);
--
--/*
-- * We cannot call mmdrop() because we are in interrupt context,
-- * instead update mm->cpu_vm_mask.
-- */
--void leave_mm(int cpu)
--{
--      if (read_pda(mmu_state) == TLBSTATE_OK)
--              BUG();
--      cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
--      load_cr3(swapper_pg_dir);
--}
--EXPORT_SYMBOL_GPL(leave_mm);
--
--/*
-- *
-- * The flush IPI assumes that a thread switch happens in this order:
-- * [cpu0: the cpu that switches]
-- * 1) switch_mm() either 1a) or 1b)
-- * 1a) thread switch to a different mm
-- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
-- *    Stop ipi delivery for the old mm. This is not synchronized with
-- *    the other cpus, but smp_invalidate_interrupt ignore flush ipis
-- *    for the wrong mm, and in the worst case we perform a superfluous
-- *    tlb flush.
-- * 1a2) set cpu mmu_state to TLBSTATE_OK
-- *    Now the smp_invalidate_interrupt won't call leave_mm if cpu0
-- *    was in lazy tlb mode.
-- * 1a3) update cpu active_mm
-- *    Now cpu0 accepts tlb flushes for the new mm.
-- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
-- *    Now the other cpus will send tlb flush ipis.
-- * 1a4) change cr3.
-- * 1b) thread switch without mm change
-- *    cpu active_mm is correct, cpu0 already handles
-- *    flush ipis.
-- * 1b1) set cpu mmu_state to TLBSTATE_OK
-- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
-- *    Atomically set the bit [other cpus will start sending flush ipis],
-- *    and test the bit.
-- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
-- * 2) switch %%esp, ie current
-- *
-- * The interrupt must handle 2 special cases:
-- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
-- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
-- *   runs in kernel space, the cpu could load tlb entries for user space
-- *   pages.
-- *
-- * The good news is that cpu mmu_state is local to each cpu, no
-- * write/read ordering problems.
-- */
--
--/*
-- * TLB flush IPI:
-- *
-- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
-- * 2) Leave the mm if we are in the lazy tlb mode.
-- *
-- * Interrupts are disabled.
-- */
--
--asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
--{
--      int cpu;
--      int sender;
--      union smp_flush_state *f;
--
--      cpu = smp_processor_id();
--      /*
--       * orig_rax contains the negated interrupt vector.
--       * Use that to determine where the sender put the data.
--       */
--      sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
--      f = &per_cpu(flush_state, sender);
--
--      if (!cpu_isset(cpu, f->flush_cpumask))
--              goto out;
--              /*
--               * This was a BUG() but until someone can quote me the
--               * line from the intel manual that guarantees an IPI to
--               * multiple CPUs is retried _only_ on the erroring CPUs
--               * its staying as a return
--               *
--               * BUG();
--               */
--
--      if (f->flush_mm == read_pda(active_mm)) {
--              if (read_pda(mmu_state) == TLBSTATE_OK) {
--                      if (f->flush_va == TLB_FLUSH_ALL)
--                              local_flush_tlb();
--                      else
--                              __flush_tlb_one(f->flush_va);
--              } else
--                      leave_mm(cpu);
--      }
--out:
--      ack_APIC_irq();
--      cpu_clear(cpu, f->flush_cpumask);
--      add_pda(irq_tlb_count, 1);
--}
--
--void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
--                           unsigned long va)
--{
--      int sender;
--      union smp_flush_state *f;
--      cpumask_t cpumask = *cpumaskp;
--
--      /* Caller has disabled preemption */
--      sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
--      f = &per_cpu(flush_state, sender);
--
--      /*
--       * Could avoid this lock when
--       * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
--       * probably not worth checking this for a cache-hot lock.
--       */
--      spin_lock(&f->tlbstate_lock);
--
--      f->flush_mm = mm;
--      f->flush_va = va;
--      cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
--
--      /*
--       * We have to send the IPI only to
--       * CPUs affected.
--       */
--      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
--
--      while (!cpus_empty(f->flush_cpumask))
--              cpu_relax();
--
--      f->flush_mm = NULL;
--      f->flush_va = 0;
--      spin_unlock(&f->tlbstate_lock);
--}
--
--int __cpuinit init_smp_flush(void)
--{
--      int i;
--
--      for_each_cpu_mask(i, cpu_possible_map) {
--              spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
--      }
--      return 0;
--}
--core_initcall(init_smp_flush);
--
--void flush_tlb_current_task(void)
--{
--      struct mm_struct *mm = current->mm;
--      cpumask_t cpu_mask;
--
--      preempt_disable();
--      cpu_mask = mm->cpu_vm_mask;
--      cpu_clear(smp_processor_id(), cpu_mask);
--
--      local_flush_tlb();
--      if (!cpus_empty(cpu_mask))
--              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
--      preempt_enable();
--}
--
--void flush_tlb_mm (struct mm_struct * mm)
--{
--      cpumask_t cpu_mask;
--
--      preempt_disable();
--      cpu_mask = mm->cpu_vm_mask;
--      cpu_clear(smp_processor_id(), cpu_mask);
--
--      if (current->active_mm == mm) {
--              if (current->mm)
--                      local_flush_tlb();
--              else
--                      leave_mm(smp_processor_id());
--      }
--      if (!cpus_empty(cpu_mask))
--              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
--
--      preempt_enable();
--}
--
--void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
--{
--      struct mm_struct *mm = vma->vm_mm;
--      cpumask_t cpu_mask;
--
--      preempt_disable();
--      cpu_mask = mm->cpu_vm_mask;
--      cpu_clear(smp_processor_id(), cpu_mask);
--
--      if (current->active_mm == mm) {
--              if(current->mm)
--                      __flush_tlb_one(va);
--              else
--                      leave_mm(smp_processor_id());
--      }
--
--      if (!cpus_empty(cpu_mask))
--              flush_tlb_others(cpu_mask, mm, va);
--
--      preempt_enable();
--}
--
--static void do_flush_tlb_all(void* info)
--{
--      unsigned long cpu = smp_processor_id();
--
--      __flush_tlb_all();
--      if (read_pda(mmu_state) == TLBSTATE_LAZY)
--              leave_mm(cpu);
--}
--
--void flush_tlb_all(void)
--{
--      on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
--}
--#endif /* Xen */
--
--/*
-- * this function sends a 'reschedule' IPI to another CPU.
-- * it goes straight through and wastes no time serializing
-- * anything. Worst case is that we lose a reschedule ...
-- */
--
--void smp_send_reschedule(int cpu)
--{
--      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
--}
--
--/*
-- * Structure and data for smp_call_function(). This is designed to minimise
-- * static memory requirements. It also looks cleaner.
-- */
--static DEFINE_SPINLOCK(call_lock);
--
--struct call_data_struct {
--      void (*func) (void *info);
--      void *info;
--      atomic_t started;
--      atomic_t finished;
--      int wait;
--};
--
--static struct call_data_struct * call_data;
--
--void lock_ipi_call_lock(void)
--{
--      spin_lock_irq(&call_lock);
--}
--
--void unlock_ipi_call_lock(void)
--{
--      spin_unlock_irq(&call_lock);
--}
--
--/*
-- * this function sends a 'generic call function' IPI to all other CPU
-- * of the system defined in the mask.
-- */
--static int __smp_call_function_mask(cpumask_t mask,
--                                  void (*func)(void *), void *info,
--                                  int wait)
--{
--      struct call_data_struct data;
--      cpumask_t allbutself;
--      int cpus;
--
--      allbutself = cpu_online_map;
--      cpu_clear(smp_processor_id(), allbutself);
--
--      cpus_and(mask, mask, allbutself);
--      cpus = cpus_weight(mask);
--
--      if (!cpus)
--              return 0;
--
--      data.func = func;
--      data.info = info;
--      atomic_set(&data.started, 0);
--      data.wait = wait;
--      if (wait)
--              atomic_set(&data.finished, 0);
--
--      call_data = &data;
--      wmb();
--
--      /* Send a message to other CPUs */
--      if (cpus_equal(mask, allbutself))
--              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
--      else
--              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
--
--      /* Wait for response */
--      while (atomic_read(&data.started) != cpus)
--              cpu_relax();
--
--      if (!wait)
--              return 0;
--
--      while (atomic_read(&data.finished) != cpus)
--              cpu_relax();
--
--      return 0;
--}
--/**
-- * smp_call_function_mask(): Run a function on a set of other CPUs.
-- * @mask: The set of cpus to run on.  Must not include the current cpu.
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @wait: If true, wait (atomically) until function has completed on other CPUs.
-- *
-- * Returns 0 on success, else a negative status code.
-- *
-- * If @wait is true, then returns once @func has returned; otherwise
-- * it returns just before the target cpu calls @func.
-- *
-- * You must not call this function with disabled interrupts or from a
-- * hardware interrupt handler or from a bottom half handler.
-- */
--int smp_call_function_mask(cpumask_t mask,
--                         void (*func)(void *), void *info,
--                         int wait)
--{
--      int ret;
--
--      /* Can deadlock when called with interrupts disabled */
--      WARN_ON(irqs_disabled());
--
--      spin_lock(&call_lock);
--      ret = __smp_call_function_mask(mask, func, info, wait);
--      spin_unlock(&call_lock);
--      return ret;
--}
--EXPORT_SYMBOL(smp_call_function_mask);
--
--/*
-- * smp_call_function_single - Run a function on a specific CPU
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @nonatomic: Currently unused.
-- * @wait: If true, wait until function has completed on other CPUs.
-- *
-- * Retrurns 0 on success, else a negative status code.
-- *
-- * Does not return until the remote CPU is nearly ready to execute <func>
-- * or is or has executed.
-- */
--
--int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
--                            int nonatomic, int wait)
--{
--      /* prevent preemption and reschedule on another processor */
--      int ret, me = get_cpu();
--
--      /* Can deadlock when called with interrupts disabled */
--      WARN_ON(irqs_disabled());
--
--      if (cpu == me) {
--              local_irq_disable();
--              func(info);
--              local_irq_enable();
--              put_cpu();
--              return 0;
--      }
--
--      ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
--
--      put_cpu();
--      return ret;
--}
--EXPORT_SYMBOL(smp_call_function_single);
--
--/*
-- * smp_call_function - run a function on all other CPUs.
-- * @func: The function to run. This must be fast and non-blocking.
-- * @info: An arbitrary pointer to pass to the function.
-- * @nonatomic: currently unused.
-- * @wait: If true, wait (atomically) until function has completed on other
-- *        CPUs.
-- *
-- * Returns 0 on success, else a negative status code. Does not return until
-- * remote CPUs are nearly ready to execute func or are or have executed.
-- *
-- * You must not call this function with disabled interrupts or from a
-- * hardware interrupt handler or from a bottom half handler.
-- * Actually there are a few legal cases, like panic.
-- */
--int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
--                      int wait)
--{
--      return smp_call_function_mask(cpu_online_map, func, info, wait);
--}
--EXPORT_SYMBOL(smp_call_function);
--
--static void stop_this_cpu(void *dummy)
--{
--      local_irq_disable();
--      /*
--       * Remove this CPU:
--       */
--      cpu_clear(smp_processor_id(), cpu_online_map);
--      disable_all_local_evtchn();
--      for (;;)
--              halt();
--}
--
--void smp_send_stop(void)
--{
--      int nolock;
--      unsigned long flags;
--
--#ifndef CONFIG_XEN
--      if (reboot_force)
--              return;
--#endif
--
--      /* Don't deadlock on the call lock in panic */
--      nolock = !spin_trylock(&call_lock);
--      local_irq_save(flags);
--      __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
--      if (!nolock)
--              spin_unlock(&call_lock);
--      disable_all_local_evtchn();
--      local_irq_restore(flags);
--}
--
--/*
-- * Reschedule call back. Nothing to do,
-- * all the work is done automatically when
-- * we return from the interrupt.
-- */
--#ifndef CONFIG_XEN
--asmlinkage void smp_reschedule_interrupt(void)
--#else
--asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
--#endif
--{
--#ifndef CONFIG_XEN
--      ack_APIC_irq();
--#endif
--      add_pda(irq_resched_count, 1);
--#ifdef CONFIG_XEN
--      return IRQ_HANDLED;
--#endif
--}
--
--#ifndef CONFIG_XEN
--asmlinkage void smp_call_function_interrupt(void)
--#else
--asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
--#endif
--{
--      void (*func) (void *info) = call_data->func;
--      void *info = call_data->info;
--      int wait = call_data->wait;
--
--#ifndef CONFIG_XEN
--      ack_APIC_irq();
--#endif
--      /*
--       * Notify initiating CPU that I've grabbed the data and am
--       * about to execute the function
--       */
--      mb();
--      atomic_inc(&call_data->started);
--      /*
--       * At this point the info structure may be out of scope unless wait==1
--       */
--      exit_idle();
--      irq_enter();
--      (*func)(info);
--      add_pda(irq_call_count, 1);
--      irq_exit();
--      if (wait) {
--              mb();
--              atomic_inc(&call_data->finished);
--      }
--#ifdef CONFIG_XEN
--      return IRQ_HANDLED;
--#endif
--}
---- /dev/null
-+++ b/arch/x86/kernel/smp-xen.c
-@@ -0,0 +1,329 @@
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/mpparse-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,1101 @@
++/*
++ *    Intel Multiprocessor Specification 1.1 and 1.4
++ *    compliant MP-table parsing routines.
++ *
++ *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
++ *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
++ *      (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
++ */
++
++#include <linux/mm.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/bootmem.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/bitops.h>
++#include <linux/acpi.h>
++#include <linux/module.h>
++
++#include <asm/smp.h>
++#include <asm/mtrr.h>
++#include <asm/mpspec.h>
++#include <asm/pgalloc.h>
++#include <asm/io_apic.h>
++#include <asm/proto.h>
++#include <asm/acpi.h>
++#include <asm/bios_ebda.h>
++
++#include <mach_apic.h>
++#ifdef CONFIG_X86_32
++#include <mach_apicdef.h>
++#include <mach_mpparse.h>
++#endif
++
++/* Have we found an MP table */
++int smp_found_config;
++
++/*
++ * Various Linux-internal data structures created from the
++ * MP-table.
++ */
++#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
++int mp_bus_id_to_type[MAX_MP_BUSSES];
++#endif
++
++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
++int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
++
++static int mp_current_pci_id;
++
++int pic_mode;
++
++/*
++ * Intel MP BIOS table parsing routines:
++ */
++
++/*
++ * Checksum an MP configuration block.
++ */
++
++static int __init mpf_checksum(unsigned char *mp, int len)
++{
++      int sum = 0;
++
++      while (len--)
++              sum += *mp++;
++
++      return sum & 0xFF;
++}
++
++#ifdef CONFIG_X86_NUMAQ
++/*
++ * Have to match translation table entries to main table entries by counter
++ * hence the mpc_record variable .... can't see a less disgusting way of
++ * doing this ....
++ */
++
++static int mpc_record;
++static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
++    __cpuinitdata;
++#endif
++
++static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
++{
++#ifndef CONFIG_XEN
++      int apicid;
++      char *bootup_cpu = "";
++
++      if (!(m->mpc_cpuflag & CPU_ENABLED)) {
++              disabled_cpus++;
++              return;
++      }
++#ifdef CONFIG_X86_NUMAQ
++      apicid = mpc_apic_id(m, translation_table[mpc_record]);
++#else
++      apicid = m->mpc_apicid;
++#endif
++      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
++              bootup_cpu = " (Bootup-CPU)";
++              boot_cpu_physical_apicid = m->mpc_apicid;
++      }
++
++      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
++      generic_processor_info(apicid, m->mpc_apicver);
++#else /* CONFIG_XEN */
++      num_processors++;
++#endif
++}
++
++static void __init MP_bus_info(struct mpc_config_bus *m)
++{
++      char str[7];
++
++      memcpy(str, m->mpc_bustype, 6);
++      str[6] = 0;
++
++#ifdef CONFIG_X86_NUMAQ
++      mpc_oem_bus_info(m, str, translation_table[mpc_record]);
++#else
++      Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
++#endif
++
++#if MAX_MP_BUSSES < 256
++      if (m->mpc_busid >= MAX_MP_BUSSES) {
++              printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
++                     " is too large, max. supported is %d\n",
++                     m->mpc_busid, str, MAX_MP_BUSSES - 1);
++              return;
++      }
++#endif
++
++      if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
++               set_bit(m->mpc_busid, mp_bus_not_pci);
++#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
++#endif
++      } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
++#ifdef CONFIG_X86_NUMAQ
++              mpc_oem_pci_bus(m, translation_table[mpc_record]);
++#endif
++              clear_bit(m->mpc_busid, mp_bus_not_pci);
++              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
++              mp_current_pci_id++;
++#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
++      } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
++      } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
++#endif
++      } else
++              printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
++}
++
++#ifdef CONFIG_X86_IO_APIC
++
++static int bad_ioapic(unsigned long address)
++{
++      if (nr_ioapics >= MAX_IO_APICS) {
++              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
++                     "(found %d)\n", MAX_IO_APICS, nr_ioapics);
++              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
++      }
++      if (!address) {
++              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
++                     " found in table, skipping!\n");
++              return 1;
++      }
++      return 0;
++}
++
++static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
++{
++      if (!(m->mpc_flags & MPC_APIC_USABLE))
++              return;
++
++      printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
++             m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
++
++      if (bad_ioapic(m->mpc_apicaddr))
++              return;
++
++      mp_ioapics[nr_ioapics] = *m;
++      nr_ioapics++;
++}
++
++static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
++{
++      mp_irqs[mp_irq_entries] = *m;
++      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
++              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
++              m->mpc_irqtype, m->mpc_irqflag & 3,
++              (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
++              m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
++      if (++mp_irq_entries == MAX_IRQ_SOURCES)
++              panic("Max # of irq sources exceeded!!\n");
++}
++
++#endif
++
++static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
++{
++      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
++              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
++              m->mpc_irqtype, m->mpc_irqflag & 3,
++              (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
++              m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
++}
++
++#ifdef CONFIG_X86_NUMAQ
++static void __init MP_translation_info(struct mpc_config_translation *m)
++{
++      printk(KERN_INFO
++             "Translation: record %d, type %d, quad %d, global %d, local %d\n",
++             mpc_record, m->trans_type, m->trans_quad, m->trans_global,
++             m->trans_local);
++
++      if (mpc_record >= MAX_MPC_ENTRY)
++              printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
++      else
++              translation_table[mpc_record] = m;      /* stash this for later */
++      if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
++              node_set_online(m->trans_quad);
++}
++
++/*
++ * Read/parse the MPC oem tables
++ */
++
++static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
++                                  unsigned short oemsize)
++{
++      int count = sizeof(*oemtable);  /* the header size */
++      unsigned char *oemptr = ((unsigned char *)oemtable) + count;
++
++      mpc_record = 0;
++      printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
++             oemtable);
++      if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
++              printk(KERN_WARNING
++                     "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
++                     oemtable->oem_signature[0], oemtable->oem_signature[1],
++                     oemtable->oem_signature[2], oemtable->oem_signature[3]);
++              return;
++      }
++      if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
++              printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
++              return;
++      }
++      while (count < oemtable->oem_length) {
++              switch (*oemptr) {
++              case MP_TRANSLATION:
++                      {
++                              struct mpc_config_translation *m =
++                                  (struct mpc_config_translation *)oemptr;
++                              MP_translation_info(m);
++                              oemptr += sizeof(*m);
++                              count += sizeof(*m);
++                              ++mpc_record;
++                              break;
++                      }
++              default:
++                      {
++                              printk(KERN_WARNING
++                                     "Unrecognised OEM table entry type! - %d\n",
++                                     (int)*oemptr);
++                              return;
++                      }
++              }
++      }
++}
++
++static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
++                               char *productid)
++{
++      if (strncmp(oem, "IBM NUMA", 8))
++              printk("Warning!  May not be a NUMA-Q system!\n");
++      if (mpc->mpc_oemptr)
++              smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
++                               mpc->mpc_oemsize);
++}
++#endif /* CONFIG_X86_NUMAQ */
++
++/*
++ * Read/parse the MPC
++ */
++
++static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
++{
++      char str[16];
++      char oem[10];
++      int count = sizeof(*mpc);
++      unsigned char *mpt = ((unsigned char *)mpc) + count;
++
++      if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
++              printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
++                     mpc->mpc_signature[0], mpc->mpc_signature[1],
++                     mpc->mpc_signature[2], mpc->mpc_signature[3]);
++              return 0;
++      }
++      if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
++              printk(KERN_ERR "MPTABLE: checksum error!\n");
++              return 0;
++      }
++      if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
++              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
++                     mpc->mpc_spec);
++              return 0;
++      }
++      if (!mpc->mpc_lapic) {
++              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
++              return 0;
++      }
++      memcpy(oem, mpc->mpc_oem, 8);
++      oem[8] = 0;
++      printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
++
++      memcpy(str, mpc->mpc_productid, 12);
++      str[12] = 0;
++      printk("Product ID: %s ", str);
++
++#ifdef CONFIG_X86_32
++      mps_oem_check(mpc, oem, str);
++#endif
++      printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
++
++      printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
++
++      /* save the local APIC address, it might be non-default */
++      if (!acpi_lapic)
++              mp_lapic_addr = mpc->mpc_lapic;
++
++      if (early)
++              return 1;
++
++      /*
++       *      Now process the configuration blocks.
++       */
++#ifdef CONFIG_X86_NUMAQ
++      mpc_record = 0;
++#endif
++      while (count < mpc->mpc_length) {
++              switch (*mpt) {
++              case MP_PROCESSOR:
++                      {
++                              struct mpc_config_processor *m =
++                                  (struct mpc_config_processor *)mpt;
++                              /* ACPI may have already provided this data */
++                              if (!acpi_lapic)
++                                      MP_processor_info(m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
++                              break;
++                      }
++              case MP_BUS:
++                      {
++                              struct mpc_config_bus *m =
++                                  (struct mpc_config_bus *)mpt;
++                              MP_bus_info(m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
++                              break;
++                      }
++              case MP_IOAPIC:
++                      {
++#ifdef CONFIG_X86_IO_APIC
++                              struct mpc_config_ioapic *m =
++                                  (struct mpc_config_ioapic *)mpt;
++                              MP_ioapic_info(m);
++#endif
++                              mpt += sizeof(struct mpc_config_ioapic);
++                              count += sizeof(struct mpc_config_ioapic);
++                              break;
++                      }
++              case MP_INTSRC:
++                      {
++#ifdef CONFIG_X86_IO_APIC
++                              struct mpc_config_intsrc *m =
++                                  (struct mpc_config_intsrc *)mpt;
++
++                              MP_intsrc_info(m);
++#endif
++                              mpt += sizeof(struct mpc_config_intsrc);
++                              count += sizeof(struct mpc_config_intsrc);
++                              break;
++                      }
++              case MP_LINTSRC:
++                      {
++                              struct mpc_config_lintsrc *m =
++                                  (struct mpc_config_lintsrc *)mpt;
++                              MP_lintsrc_info(m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
++                              break;
++                      }
++              default:
++                      /* wrong mptable */
++                      printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
++                      printk(KERN_ERR "type %x\n", *mpt);
++                      print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
++                                      1, mpc, mpc->mpc_length, 1);
++                      count = mpc->mpc_length;
++                      break;
++              }
++#ifdef CONFIG_X86_NUMAQ
++              ++mpc_record;
++#endif
++      }
++      setup_apic_routing();
++      if (!num_processors)
++              printk(KERN_ERR "MPTABLE: no processors registered!\n");
++      return num_processors;
++}
++
++#ifdef CONFIG_X86_IO_APIC
++
++static int __init ELCR_trigger(unsigned int irq)
++{
++      unsigned int port;
++
++      port = 0x4d0 + (irq >> 3);
++      return (inb(port) >> (irq & 7)) & 1;
++}
++
++static void __init construct_default_ioirq_mptable(int mpc_default_type)
++{
++      struct mpc_config_intsrc intsrc;
++      int i;
++      int ELCR_fallback = 0;
++
++      intsrc.mpc_type = MP_INTSRC;
++      intsrc.mpc_irqflag = 0; /* conforming */
++      intsrc.mpc_srcbus = 0;
++      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
++
++      intsrc.mpc_irqtype = mp_INT;
++
++      /*
++       *  If true, we have an ISA/PCI system with no IRQ entries
++       *  in the MP table. To prevent the PCI interrupts from being set up
++       *  incorrectly, we try to use the ELCR. The sanity check to see if
++       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
++       *  never be level sensitive, so we simply see if the ELCR agrees.
++       *  If it does, we assume it's valid.
++       */
++      if (mpc_default_type == 5) {
++              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
++                     "falling back to ELCR\n");
++
++              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
++                  ELCR_trigger(13))
++                      printk(KERN_ERR "ELCR contains invalid data... "
++                             "not using ELCR\n");
++              else {
++                      printk(KERN_INFO
++                             "Using ELCR to identify PCI interrupts\n");
++                      ELCR_fallback = 1;
++              }
++      }
++
++      for (i = 0; i < 16; i++) {
++              switch (mpc_default_type) {
++              case 2:
++                      if (i == 0 || i == 13)
++                              continue;       /* IRQ0 & IRQ13 not connected */
++                      /* fall through */
++              default:
++                      if (i == 2)
++                              continue;       /* IRQ2 is never connected */
++              }
++
++              if (ELCR_fallback) {
++                      /*
++                       *  If the ELCR indicates a level-sensitive interrupt, we
++                       *  copy that information over to the MP table in the
++                       *  irqflag field (level sensitive, active high polarity).
++                       */
++                      if (ELCR_trigger(i))
++                              intsrc.mpc_irqflag = 13;
++                      else
++                              intsrc.mpc_irqflag = 0;
++              }
++
++              intsrc.mpc_srcbusirq = i;
++              intsrc.mpc_dstirq = i ? i : 2;  /* IRQ0 to INTIN2 */
++              MP_intsrc_info(&intsrc);
++      }
++
++      intsrc.mpc_irqtype = mp_ExtINT;
++      intsrc.mpc_srcbusirq = 0;
++      intsrc.mpc_dstirq = 0;  /* 8259A to INTIN0 */
++      MP_intsrc_info(&intsrc);
++}
++
++#endif
++
++static inline void __init construct_default_ISA_mptable(int mpc_default_type)
++{
++      struct mpc_config_processor processor;
++      struct mpc_config_bus bus;
++#ifdef CONFIG_X86_IO_APIC
++      struct mpc_config_ioapic ioapic;
++#endif
++      struct mpc_config_lintsrc lintsrc;
++      int linttypes[2] = { mp_ExtINT, mp_NMI };
++      int i;
++
++      /*
++       * local APIC has default address
++       */
++      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++
++      /*
++       * 2 CPUs, numbered 0 & 1.
++       */
++      processor.mpc_type = MP_PROCESSOR;
++      /* Either an integrated APIC or a discrete 82489DX. */
++      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++      processor.mpc_cpuflag = CPU_ENABLED;
++      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
++          (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
++      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
++      processor.mpc_reserved[0] = 0;
++      processor.mpc_reserved[1] = 0;
++      for (i = 0; i < 2; i++) {
++              processor.mpc_apicid = i;
++              MP_processor_info(&processor);
++      }
++
++      bus.mpc_type = MP_BUS;
++      bus.mpc_busid = 0;
++      switch (mpc_default_type) {
++      default:
++              printk(KERN_ERR "???\nUnknown standard configuration %d\n",
++                     mpc_default_type);
++              /* fall through */
++      case 1:
++      case 5:
++              memcpy(bus.mpc_bustype, "ISA   ", 6);
++              break;
++      case 2:
++      case 6:
++      case 3:
++              memcpy(bus.mpc_bustype, "EISA  ", 6);
++              break;
++      case 4:
++      case 7:
++              memcpy(bus.mpc_bustype, "MCA   ", 6);
++      }
++      MP_bus_info(&bus);
++      if (mpc_default_type > 4) {
++              bus.mpc_busid = 1;
++              memcpy(bus.mpc_bustype, "PCI   ", 6);
++              MP_bus_info(&bus);
++      }
++
++#ifdef CONFIG_X86_IO_APIC
++      ioapic.mpc_type = MP_IOAPIC;
++      ioapic.mpc_apicid = 2;
++      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++      ioapic.mpc_flags = MPC_APIC_USABLE;
++      ioapic.mpc_apicaddr = 0xFEC00000;
++      MP_ioapic_info(&ioapic);
++
++      /*
++       * We set up most of the low 16 IO-APIC pins according to MPS rules.
++       */
++      construct_default_ioirq_mptable(mpc_default_type);
++#endif
++      lintsrc.mpc_type = MP_LINTSRC;
++      lintsrc.mpc_irqflag = 0;        /* conforming */
++      lintsrc.mpc_srcbusid = 0;
++      lintsrc.mpc_srcbusirq = 0;
++      lintsrc.mpc_destapic = MP_APIC_ALL;
++      for (i = 0; i < 2; i++) {
++              lintsrc.mpc_irqtype = linttypes[i];
++              lintsrc.mpc_destapiclint = i;
++              MP_lintsrc_info(&lintsrc);
++      }
++}
++
++static struct intel_mp_floating *mpf_found;
++
  +/*
-+ *    Intel SMP support routines.
-+ *
-+ *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
-+ *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
-+ *      (c) 2002,2003 Andi Kleen, SuSE Labs.
-+ *
-+ *    i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
-+ *
-+ *    This code is released under the GNU General Public License version 2 or
-+ *    later.
++ * Scan the memory blocks for an SMP configuration block.
  + */
++static void __init __get_smp_config(unsigned early)
++{
++      struct intel_mp_floating *mpf = mpf_found;
++
++      if (acpi_lapic && early)
++              return;
++      /*
++       * ACPI supports both logical (e.g. Hyper-Threading) and physical
++       * processors, where MPS only supports physical.
++       */
++      if (acpi_lapic && acpi_ioapic) {
++              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
++                     "information\n");
++              return;
++      } else if (acpi_lapic)
++              printk(KERN_INFO "Using ACPI for processor (LAPIC) "
++                     "configuration information\n");
++
++      printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
++             mpf->mpf_specification);
++#ifdef CONFIG_X86_32
++      if (mpf->mpf_feature2 & (1 << 7)) {
++              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
++              pic_mode = 1;
++      } else {
++              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
++              pic_mode = 0;
++      }
++#endif
++      /*
++       * Now see if we need to read further.
++       */
++      if (mpf->mpf_feature1 != 0) {
++              if (early) {
++                      /*
++                       * local APIC has default address
++                       */
++                      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++                      return;
++              }
++
++              printk(KERN_INFO "Default MP configuration #%d\n",
++                     mpf->mpf_feature1);
++              construct_default_ISA_mptable(mpf->mpf_feature1);
++
++      } else if (mpf->mpf_physptr) {
++
++              /*
++               * Read the physical hardware table.  Anything here will
++               * override the defaults.
++               */
++              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
++                      smp_found_config = 0;
++                      printk(KERN_ERR
++                             "BIOS bug, MP table errors detected!...\n");
++                      printk(KERN_ERR "... disabling SMP support. "
++                             "(tell your hw vendor)\n");
++                      return;
++              }
  +
-+#include <linux/init.h>
++              if (early)
++                      return;
++#ifdef CONFIG_X86_IO_APIC
++              /*
++               * If there are no explicit MP IRQ entries, then we are
++               * broken.  We set up most of the low 16 IO-APIC pins to
++               * ISA defaults and hope it will work.
++               */
++              if (!mp_irq_entries) {
++                      struct mpc_config_bus bus;
  +
-+#include <linux/mm.h>
-+#include <linux/delay.h>
-+#include <linux/spinlock.h>
-+#include <linux/kernel_stat.h>
-+#include <linux/mc146818rtc.h>
-+#include <linux/cache.h>
-+#include <linux/interrupt.h>
-+#include <linux/cpu.h>
++                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
++                             "using default mptable. "
++                             "(tell your hw vendor)\n");
  +
-+#include <asm/mtrr.h>
-+#include <asm/tlbflush.h>
-+#include <asm/mmu_context.h>
-+#include <asm/proto.h>
-+#include <mach_ipi.h>
-+#include <xen/evtchn.h>
-+/*
-+ *    Some notes on x86 processor bugs affecting SMP operation:
-+ *
-+ *    Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
-+ *    The Linux implications for SMP are handled as follows:
-+ *
-+ *    Pentium III / [Xeon]
-+ *            None of the E1AP-E3AP errata are visible to the user.
-+ *
-+ *    E1AP.   see PII A1AP
-+ *    E2AP.   see PII A2AP
-+ *    E3AP.   see PII A3AP
-+ *
-+ *    Pentium II / [Xeon]
-+ *            None of the A1AP-A3AP errata are visible to the user.
-+ *
-+ *    A1AP.   see PPro 1AP
-+ *    A2AP.   see PPro 2AP
-+ *    A3AP.   see PPro 7AP
-+ *
-+ *    Pentium Pro
-+ *            None of 1AP-9AP errata are visible to the normal user,
-+ *    except occasional delivery of 'spurious interrupt' as trap #15.
-+ *    This is very rare and a non-problem.
-+ *
-+ *    1AP.    Linux maps APIC as non-cacheable
-+ *    2AP.    worked around in hardware
-+ *    3AP.    fixed in C0 and above steppings microcode update.
-+ *            Linux does not use excessive STARTUP_IPIs.
-+ *    4AP.    worked around in hardware
-+ *    5AP.    symmetric IO mode (normal Linux operation) not affected.
-+ *            'noapic' mode has vector 0xf filled out properly.
-+ *    6AP.    'noapic' mode might be affected - fixed in later steppings
-+ *    7AP.    We do not assume writes to the LVT deassering IRQs
-+ *    8AP.    We do not enable low power mode (deep sleep) during MP bootup
-+ *    9AP.    We do not use mixed mode
-+ *
-+ *    Pentium
-+ *            There is a marginal case where REP MOVS on 100MHz SMP
-+ *    machines with B stepping processors can fail. XXX should provide
-+ *    an L1cache=Writethrough or L1cache=off option.
-+ *
-+ *            B stepping CPUs may hang. There are hardware work arounds
-+ *    for this. We warn about it in case your board doesn't have the work
-+ *    arounds. Basically that's so I can tell anyone with a B stepping
-+ *    CPU and SMP problems "tough".
-+ *
-+ *    Specific items [From Pentium Processor Specification Update]
-+ *
-+ *    1AP.    Linux doesn't use remote read
-+ *    2AP.    Linux doesn't trust APIC errors
-+ *    3AP.    We work around this
-+ *    4AP.    Linux never generated 3 interrupts of the same priority
-+ *            to cause a lost local interrupt.
-+ *    5AP.    Remote read is never used
-+ *    6AP.    not affected - worked around in hardware
-+ *    7AP.    not affected - worked around in hardware
-+ *    8AP.    worked around in hardware - we get explicit CS errors if not
-+ *    9AP.    only 'noapic' mode affected. Might generate spurious
-+ *            interrupts, we log only the first one and count the
-+ *            rest silently.
-+ *    10AP.   not affected - worked around in hardware
-+ *    11AP.   Linux reads the APIC between writes to avoid this, as per
-+ *            the documentation. Make sure you preserve this as it affects
-+ *            the C stepping chips too.
-+ *    12AP.   not affected - worked around in hardware
-+ *    13AP.   not affected - worked around in hardware
-+ *    14AP.   we always deassert INIT during bootup
-+ *    15AP.   not affected - worked around in hardware
-+ *    16AP.   not affected - worked around in hardware
-+ *    17AP.   not affected - worked around in hardware
-+ *    18AP.   not affected - worked around in hardware
-+ *    19AP.   not affected - worked around in BIOS
-+ *
-+ *    If this sounds worrying believe me these bugs are either ___RARE___,
-+ *    or are signal timing bugs worked around in hardware and there's
-+ *    about nothing of note with C stepping upwards.
-+ */
++                      bus.mpc_type = MP_BUS;
++                      bus.mpc_busid = 0;
++                      memcpy(bus.mpc_bustype, "ISA   ", 6);
++                      MP_bus_info(&bus);
  +
-+/*
-+ * this function sends a 'reschedule' IPI to another CPU.
-+ * it goes straight through and wastes no time serializing
-+ * anything. Worst case is that we lose a reschedule ...
-+ */
-+void xen_smp_send_reschedule(int cpu)
++                      construct_default_ioirq_mptable(0);
++              }
++#endif
++      } else
++              BUG();
++
++      if (!early)
++              printk(KERN_INFO "Processors: %d\n", num_processors);
++      /*
++       * Only use the first configuration found.
++       */
++}
++
++void __init early_get_smp_config(void)
++{
++      __get_smp_config(1);
++}
++
++void __init get_smp_config(void)
++{
++      __get_smp_config(0);
++}
++
++static int __init smp_scan_config(unsigned long base, unsigned long length,
++                                unsigned reserve)
++{
++      unsigned int *bp = isa_bus_to_virt(base);
++      struct intel_mp_floating *mpf;
++
++      Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
++      BUILD_BUG_ON(sizeof(*mpf) != 16);
++
++      while (length > 0) {
++              mpf = (struct intel_mp_floating *)bp;
++              if ((*bp == SMP_MAGIC_IDENT) &&
++                  (mpf->mpf_length == 1) &&
++                  !mpf_checksum((unsigned char *)bp, 16) &&
++                  ((mpf->mpf_specification == 1)
++                   || (mpf->mpf_specification == 4))) {
++
++                      smp_found_config = 1;
++                      mpf_found = mpf;
++#ifdef CONFIG_X86_32
++#ifndef CONFIG_XEN
++                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
++                             mpf, virt_to_phys(mpf));
++                      reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
++                                      BOOTMEM_DEFAULT);
++                      if (mpf->mpf_physptr) {
++                              /*
++                               * We cannot access to MPC table to compute
++                               * table size yet, as only few megabytes from
++                               * the bottom is mapped now.
++                               * PC-9800's MPC table places on the very last
++                               * of physical memory; so that simply reserving
++                               * PAGE_SIZE from mpg->mpf_physptr yields BUG()
++                               * in reserve_bootmem.
++                               */
++                              unsigned long size = PAGE_SIZE;
++                              unsigned long end = max_low_pfn * PAGE_SIZE;
++                              if (mpf->mpf_physptr + size > end)
++                                      size = end - mpf->mpf_physptr;
++                              reserve_bootmem(mpf->mpf_physptr, size,
++                                              BOOTMEM_DEFAULT);
++                      }
++#else
++                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
++                              mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
++#endif
++#elif !defined(CONFIG_XEN)
++                      if (!reserve)
++                              return 1;
++
++                      reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
++                      if (mpf->mpf_physptr)
++                              reserve_bootmem_generic(mpf->mpf_physptr,
++                                                      PAGE_SIZE);
++#endif
++              return 1;
++              }
++              bp += 4;
++              length -= 16;
++      }
++      return 0;
++}
++
++static void __init __find_smp_config(unsigned reserve)
++{
++#ifndef CONFIG_XEN
++      unsigned int address;
++#endif
++
++      /*
++       * FIXME: Linux assumes you have 640K of base ram..
++       * this continues the error...
++       *
++       * 1) Scan the bottom 1K for a signature
++       * 2) Scan the top 1K of base RAM
++       * 3) Scan the 64K of bios
++       */
++      if (smp_scan_config(0x0, 0x400, reserve) ||
++          smp_scan_config(639 * 0x400, 0x400, reserve) ||
++          smp_scan_config(0xF0000, 0x10000, reserve))
++              return;
++      /*
++       * If it is an SMP machine we should know now, unless the
++       * configuration is in an EISA/MCA bus machine with an
++       * extended bios data area.
++       *
++       * there is a real-mode segmented pointer pointing to the
++       * 4K EBDA area at 0x40E, calculate and scan it here.
++       *
++       * NOTE! There are Linux loaders that will corrupt the EBDA
++       * area, and as such this kind of SMP config may be less
++       * trustworthy, simply because the SMP table may have been
++       * stomped on during early boot. These loaders are buggy and
++       * should be fixed.
++       *
++       * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
++       */
++
++#ifndef CONFIG_XEN
++      address = get_bios_ebda();
++      if (address)
++              smp_scan_config(address, 0x400, reserve);
++#endif
++}
++
++void __init early_find_smp_config(void)
  +{
-+      if (unlikely(cpu_is_offline(cpu))) {
-+              WARN_ON(1);
-+              return;
-+      }
-+      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
++      __find_smp_config(0);
++}
++
++void __init find_smp_config(void)
++{
++      __find_smp_config(1);
  +}
  +
++/* --------------------------------------------------------------------------
++                            ACPI-based MP Configuration
++   -------------------------------------------------------------------------- */
++
  +/*
-+ * Structure and data for smp_call_function(). This is designed to minimise
-+ * static memory requirements. It also looks cleaner.
++ * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
  + */
-+static DEFINE_SPINLOCK(call_lock);
++int es7000_plat;
  +
-+struct call_data_struct {
-+      void (*func) (void *info);
-+      void *info;
-+      atomic_t started;
-+      atomic_t finished;
-+      int wait;
-+};
++#ifdef CONFIG_ACPI
  +
-+void lock_ipi_call_lock(void)
++#ifdef        CONFIG_X86_IO_APIC
++
++#define MP_ISA_BUS            0
++
++extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
++
++static int mp_find_ioapic(int gsi)
  +{
-+      spin_lock_irq(&call_lock);
++      int i = 0;
++
++      /* Find the IOAPIC that manages this GSI. */
++      for (i = 0; i < nr_ioapics; i++) {
++              if ((gsi >= mp_ioapic_routing[i].gsi_base)
++                  && (gsi <= mp_ioapic_routing[i].gsi_end))
++                      return i;
++      }
++
++      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
++      return -1;
  +}
  +
-+void unlock_ipi_call_lock(void)
++static u8 __init uniq_ioapic_id(u8 id)
  +{
-+      spin_unlock_irq(&call_lock);
++#ifdef CONFIG_X86_32
++      if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
++          !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
++              return io_apic_get_unique_id(nr_ioapics, id);
++      else
++              return id;
++#else
++      int i;
++      DECLARE_BITMAP(used, 256);
++      bitmap_zero(used, 256);
++      for (i = 0; i < nr_ioapics; i++) {
++              struct mpc_config_ioapic *ia = &mp_ioapics[i];
++              __set_bit(ia->mpc_apicid, used);
++      }
++      if (!test_bit(id, used))
++              return id;
++      return find_first_zero_bit(used, 256);
++#endif
  +}
  +
-+static struct call_data_struct *call_data;
-+
-+static void __smp_call_function(void (*func) (void *info), void *info,
-+                              int nonatomic, int wait)
++void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
  +{
-+      struct call_data_struct data;
-+      int cpus = num_online_cpus() - 1;
++      int idx = 0;
  +
-+      if (!cpus)
++      if (bad_ioapic(address))
  +              return;
  +
-+      data.func = func;
-+      data.info = info;
-+      atomic_set(&data.started, 0);
-+      data.wait = wait;
-+      if (wait)
-+              atomic_set(&data.finished, 0);
++      idx = nr_ioapics;
  +
-+      call_data = &data;
-+      mb();
++      mp_ioapics[idx].mpc_type = MP_IOAPIC;
++      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
++      mp_ioapics[idx].mpc_apicaddr = address;
  +
-+      /* Send a message to all other CPUs and wait for them to respond */
-+      send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++#ifndef CONFIG_XEN
++      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
++#endif
++      mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
++#ifdef CONFIG_X86_32
++      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
++#else
++      mp_ioapics[idx].mpc_apicver = 0;
++#endif
++      /*
++       * Build basic GSI lookup table to facilitate gsi->io_apic lookups
++       * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
++       */
++      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
++      mp_ioapic_routing[idx].gsi_base = gsi_base;
++      mp_ioapic_routing[idx].gsi_end = gsi_base +
++          io_apic_get_redir_entries(idx);
  +
-+      /* Wait for response */
-+      while (atomic_read(&data.started) != cpus)
-+              cpu_relax();
++      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
++             "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
++             mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
++             mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
  +
-+      if (wait)
-+              while (atomic_read(&data.finished) != cpus)
-+                      cpu_relax();
++      nr_ioapics++;
  +}
  +
-+
-+/**
-+ * smp_call_function_mask(): Run a function on a set of other CPUs.
-+ * @mask: The set of cpus to run on.  Must not include the current cpu.
-+ * @func: The function to run. This must be fast and non-blocking.
-+ * @info: An arbitrary pointer to pass to the function.
-+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
-+ *
-+  * Returns 0 on success, else a negative status code.
-+ *
-+ * If @wait is true, then returns once @func has returned; otherwise
-+ * it returns just before the target cpu calls @func.
-+ *
-+ * You must not call this function with disabled interrupts or from a
-+ * hardware interrupt handler or from a bottom half handler.
-+ */
-+int
-+xen_smp_call_function_mask(cpumask_t mask,
-+                            void (*func)(void *), void *info,
-+                            int wait)
++void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
  +{
-+      struct call_data_struct data;
-+      cpumask_t allbutself;
-+      int cpus;
++      struct mpc_config_intsrc intsrc;
++      int ioapic = -1;
++      int pin = -1;
  +
-+      /* Can deadlock when called with interrupts disabled */
-+      WARN_ON(irqs_disabled());
++      /*
++       * Convert 'gsi' to 'ioapic.pin'.
++       */
++      ioapic = mp_find_ioapic(gsi);
++      if (ioapic < 0)
++              return;
++      pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
  +
-+      /* Holding any lock stops cpus from going down. */
-+      spin_lock(&call_lock);
++      /*
++       * TBD: This check is for faulty timer entries, where the override
++       *      erroneously sets the trigger to level, resulting in a HUGE
++       *      increase of timer interrupts!
++       */
++      if ((bus_irq == 0) && (trigger == 3))
++              trigger = 1;
  +
-+      allbutself = cpu_online_map;
-+      cpu_clear(smp_processor_id(), allbutself);
++      intsrc.mpc_type = MP_INTSRC;
++      intsrc.mpc_irqtype = mp_INT;
++      intsrc.mpc_irqflag = (trigger << 2) | polarity;
++      intsrc.mpc_srcbus = MP_ISA_BUS;
++      intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
++      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;     /* APIC ID */
++      intsrc.mpc_dstirq = pin;        /* INTIN# */
  +
-+      cpus_and(mask, mask, allbutself);
-+      cpus = cpus_weight(mask);
++      MP_intsrc_info(&intsrc);
++}
  +
-+      if (!cpus) {
-+              spin_unlock(&call_lock);
-+              return 0;
-+      }
++void __init mp_config_acpi_legacy_irqs(void)
++{
++      struct mpc_config_intsrc intsrc;
++      int i = 0;
++      int ioapic = -1;
  +
-+      data.func = func;
-+      data.info = info;
-+      atomic_set(&data.started, 0);
-+      data.wait = wait;
-+      if (wait)
-+              atomic_set(&data.finished, 0);
++#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
++      /*
++       * Fabricate the legacy ISA bus (bus #31).
++       */
++      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
++#endif
++      set_bit(MP_ISA_BUS, mp_bus_not_pci);
++      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
  +
-+      call_data = &data;
-+      wmb();
++      /*
++       * Older generations of ES7000 have no legacy identity mappings
++       */
++      if (es7000_plat == 1)
++              return;
  +
-+      /* Send a message to other CPUs */
-+      if (cpus_equal(mask, allbutself) &&
-+          cpus_equal(cpu_online_map, cpu_callout_map))
-+              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-+      else
-+              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
++      /*
++       * Locate the IOAPIC that manages the ISA IRQs (0-15).
++       */
++      ioapic = mp_find_ioapic(0);
++      if (ioapic < 0)
++              return;
  +
-+      /* Wait for response */
-+      while (atomic_read(&data.started) != cpus)
-+              cpu_relax();
++      intsrc.mpc_type = MP_INTSRC;
++      intsrc.mpc_irqflag = 0; /* Conforming */
++      intsrc.mpc_srcbus = MP_ISA_BUS;
++#ifdef CONFIG_X86_IO_APIC
++      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
++#endif
++      /*
++       * Use the default configuration for the IRQs 0-15.  Unless
++       * overridden by (MADT) interrupt source override entries.
++       */
++      for (i = 0; i < 16; i++) {
++              int idx;
  +
-+      if (wait)
-+              while (atomic_read(&data.finished) != cpus)
-+                      cpu_relax();
-+      spin_unlock(&call_lock);
++              for (idx = 0; idx < mp_irq_entries; idx++) {
++                      struct mpc_config_intsrc *irq = mp_irqs + idx;
++
++                      /* Do we already have a mapping for this ISA IRQ? */
++                      if (irq->mpc_srcbus == MP_ISA_BUS
++                          && irq->mpc_srcbusirq == i)
++                              break;
++
++                      /* Do we already have a mapping for this IOAPIC pin */
++                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
++                          (irq->mpc_dstirq == i))
++                              break;
++              }
++
++              if (idx != mp_irq_entries) {
++                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
++                      continue;       /* IRQ already used */
++              }
  +
-+      return 0;
++              intsrc.mpc_irqtype = mp_INT;
++              intsrc.mpc_srcbusirq = i;       /* Identity mapped */
++              intsrc.mpc_dstirq = i;
++
++              MP_intsrc_info(&intsrc);
++      }
  +}
  +
-+static void stop_this_cpu(void *dummy)
++int mp_register_gsi(u32 gsi, int triggering, int polarity)
  +{
-+      local_irq_disable();
++      int ioapic;
++      int ioapic_pin;
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
++#define MAX_GSI_NUM   4096
++#define IRQ_COMPRESSION_START 64
++
++      static int pci_irq = IRQ_COMPRESSION_START;
  +      /*
-+       * Remove this CPU:
++       * Mapping between Global System Interrupts, which
++       * represent all possible interrupts, and IRQs
++       * assigned to actual devices.
  +       */
-+      cpu_clear(smp_processor_id(), cpu_online_map);
-+      disable_all_local_evtchn();
-+      if (hlt_works(smp_processor_id()))
-+              for (;;) halt();
-+      for (;;);
-+}
++      static int gsi_to_irq[MAX_GSI_NUM];
++#else
  +
-+/*
-+ * this function calls the 'stop' function on all other CPUs in the system.
-+ */
++      if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
++              return gsi;
++#endif
  +
-+void xen_smp_send_stop(void)
-+{
-+      int nolock;
-+      unsigned long flags;
++      /* Don't set up the ACPI SCI because it's already set up */
++      if (acpi_gbl_FADT.sci_interrupt == gsi)
++              return gsi;
  +
-+      /* Don't deadlock on the call lock in panic */
-+      nolock = !spin_trylock(&call_lock);
-+      local_irq_save(flags);
-+      __smp_call_function(stop_this_cpu, NULL, 0, 0);
-+      if (!nolock)
-+              spin_unlock(&call_lock);
-+      disable_all_local_evtchn();
-+      local_irq_restore(flags);
-+}
++      ioapic = mp_find_ioapic(gsi);
++      if (ioapic < 0) {
++              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
++              return gsi;
++      }
  +
-+/*
-+ * Reschedule call back. Nothing to do,
-+ * all the work is done automatically when
-+ * we return from the interrupt.
-+ */
-+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
-+{
-+#ifdef CONFIG_X86_32
-+      __get_cpu_var(irq_stat).irq_resched_count++;
-+#else
-+      add_pda(irq_resched_count, 1);
-+#endif
-+      return IRQ_HANDLED;
-+}
++      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
  +
-+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
-+{
-+      void (*func) (void *info) = call_data->func;
-+      void *info = call_data->info;
-+      int wait = call_data->wait;
++#ifndef CONFIG_X86_32
++      if (ioapic_renumber_irq)
++              gsi = ioapic_renumber_irq(ioapic, gsi);
++#endif
  +
  +      /*
-+       * Notify initiating CPU that I've grabbed the data and am
-+       * about to execute the function
-+       */
-+      mb();
-+      atomic_inc(&call_data->started);
-+      /*
-+       * At this point the info structure may be out of scope unless wait==1
++       * Avoid pin reprogramming.  PRTs typically include entries
++       * with redundant pin->gsi mappings (but unique PCI devices);
++       * we only program the IOAPIC on the first.
  +       */
-+      irq_enter();
-+      (*func)(info);
-+#ifdef CONFIG_X86_32
-+      __get_cpu_var(irq_stat).irq_call_count++;
++      if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
++              printk(KERN_ERR "Invalid reference to IOAPIC pin "
++                     "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
++                     ioapic_pin);
++              return gsi;
++      }
++      if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
++              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
++                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
++              return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
  +#else
-+      add_pda(irq_call_count, 1);
++              return gsi;
  +#endif
-+      irq_exit();
-+
-+      if (wait) {
-+              mb();
-+              atomic_inc(&call_data->finished);
  +      }
  +
-+      return IRQ_HANDLED;
++      set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
++      /*
++       * For GSI >= 64, use IRQ compression
++       */
++      if ((gsi >= IRQ_COMPRESSION_START)
++          && (triggering == ACPI_LEVEL_SENSITIVE)) {
++              /*
++               * For PCI devices assign IRQs in order, avoiding gaps
++               * due to unused I/O APIC pins.
++               */
++              int irq = gsi;
++              if (gsi < MAX_GSI_NUM) {
++                      /*
++                       * Retain the VIA chipset work-around (gsi > 15), but
++                       * avoid a problem where the 8254 timer (IRQ0) is setup
++                       * via an override (so it's not on pin 0 of the ioapic),
++                       * and at the same time, the pin 0 interrupt is a PCI
++                       * type.  The gsi > 15 test could cause these two pins
++                       * to be shared as IRQ0, and they are not shareable.
++                       * So test for this condition, and if necessary, avoid
++                       * the pin collision.
++                       */
++                      gsi = pci_irq++;
++                      /*
++                       * Don't assign IRQ used by ACPI SCI
++                       */
++                      if (gsi == acpi_gbl_FADT.sci_interrupt)
++                              gsi = pci_irq++;
++                      gsi_to_irq[irq] = gsi;
++              } else {
++                      printk(KERN_ERR "GSI %u is too high\n", gsi);
++                      return gsi;
++              }
++      }
++#endif
++      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
++                              triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
++                              polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
++      return gsi;
  +}
---- a/arch/x86/kernel/time_32-xen.c
-+++ b/arch/x86/kernel/time_32-xen.c
-@@ -701,8 +701,6 @@ int xen_update_persistent_clock(void)
-       return 0;
- }
- 
--extern void (*late_time_init)(void);
++
++#endif /* CONFIG_X86_IO_APIC */
++#endif /* CONFIG_ACPI */
+--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_32-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,1161 +0,0 @@
+-/*
+- *    Intel Multiprocessor Specification 1.1 and 1.4
+- *    compliant MP-table parsing routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *    Fixes
+- *            Erich Boleyn    :       MP v1.4 and additional changes.
+- *            Alan Cox        :       Added EBDA scanning
+- *            Ingo Molnar     :       various cleanups and rewrites
+- *            Maciej W. Rozycki:      Bits for default MP configurations
+- *            Paul Diefenbaugh:       Added full ACPI support
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/init.h>
+-#include <linux/acpi.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/bitops.h>
+-
+-#include <asm/smp.h>
+-#include <asm/acpi.h>
+-#include <asm/mtrr.h>
+-#include <asm/mpspec.h>
+-#include <asm/io_apic.h>
+-
+-#include <mach_apic.h>
+-#include <mach_apicdef.h>
+-#include <mach_mpparse.h>
+-#include <bios_ebda.h>
+-
+-/* Have we found an MP table */
+-int smp_found_config;
+-unsigned int __cpuinitdata maxcpus = NR_CPUS;
+-
+-/*
+- * Various Linux-internal data structures created from the
+- * MP-table.
+- */
+-int apic_version [MAX_APICS];
+-int mp_bus_id_to_type [MAX_MP_BUSSES];
+-int mp_bus_id_to_node [MAX_MP_BUSSES];
+-int mp_bus_id_to_local [MAX_MP_BUSSES];
+-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+-static int mp_current_pci_id;
+-
+-/* I/O APIC entries */
+-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+-
+-/* # of MP IRQ source entries */
+-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+-
+-/* MP IRQ source entries */
+-int mp_irq_entries;
+-
+-int nr_ioapics;
+-
+-int pic_mode;
+-unsigned long mp_lapic_addr;
+-
+-unsigned int def_to_bigsmp = 0;
+-
+-/* Processor that is doing the boot up */
+-unsigned int boot_cpu_physical_apicid = -1U;
+-/* Internal processor count */
+-unsigned int num_processors;
+-
+-/* Bitmask of physically existing CPUs */
+-physid_mask_t phys_cpu_present_map;
+-
+-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+-
+-/*
+- * Intel MP BIOS table parsing routines:
+- */
+-
+-
+-/*
+- * Checksum an MP configuration block.
+- */
+-
+-static int __init mpf_checksum(unsigned char *mp, int len)
+-{
+-      int sum = 0;
+-
+-      while (len--)
+-              sum += *mp++;
+-
+-      return sum & 0xFF;
+-}
+-
+-/*
+- * Have to match translation table entries to main table entries by counter
+- * hence the mpc_record variable .... can't see a less disgusting way of
+- * doing this ....
+- */
+-
+-static int mpc_record; 
+-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
+-
+-#ifndef CONFIG_XEN
+-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+-{
+-      int ver, apicid;
+-      physid_mask_t phys_cpu;
+-      
+-      if (!(m->mpc_cpuflag & CPU_ENABLED))
+-              return;
+-
+-      apicid = mpc_apic_id(m, translation_table[mpc_record]);
+-
+-      if (m->mpc_featureflag&(1<<0))
+-              Dprintk("    Floating point unit present.\n");
+-      if (m->mpc_featureflag&(1<<7))
+-              Dprintk("    Machine Exception supported.\n");
+-      if (m->mpc_featureflag&(1<<8))
+-              Dprintk("    64 bit compare & exchange supported.\n");
+-      if (m->mpc_featureflag&(1<<9))
+-              Dprintk("    Internal APIC present.\n");
+-      if (m->mpc_featureflag&(1<<11))
+-              Dprintk("    SEP present.\n");
+-      if (m->mpc_featureflag&(1<<12))
+-              Dprintk("    MTRR  present.\n");
+-      if (m->mpc_featureflag&(1<<13))
+-              Dprintk("    PGE  present.\n");
+-      if (m->mpc_featureflag&(1<<14))
+-              Dprintk("    MCA  present.\n");
+-      if (m->mpc_featureflag&(1<<15))
+-              Dprintk("    CMOV  present.\n");
+-      if (m->mpc_featureflag&(1<<16))
+-              Dprintk("    PAT  present.\n");
+-      if (m->mpc_featureflag&(1<<17))
+-              Dprintk("    PSE  present.\n");
+-      if (m->mpc_featureflag&(1<<18))
+-              Dprintk("    PSN  present.\n");
+-      if (m->mpc_featureflag&(1<<19))
+-              Dprintk("    Cache Line Flush Instruction present.\n");
+-      /* 20 Reserved */
+-      if (m->mpc_featureflag&(1<<21))
+-              Dprintk("    Debug Trace and EMON Store present.\n");
+-      if (m->mpc_featureflag&(1<<22))
+-              Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+-      if (m->mpc_featureflag&(1<<23))
+-              Dprintk("    MMX  present.\n");
+-      if (m->mpc_featureflag&(1<<24))
+-              Dprintk("    FXSR  present.\n");
+-      if (m->mpc_featureflag&(1<<25))
+-              Dprintk("    XMM  present.\n");
+-      if (m->mpc_featureflag&(1<<26))
+-              Dprintk("    Willamette New Instructions  present.\n");
+-      if (m->mpc_featureflag&(1<<27))
+-              Dprintk("    Self Snoop  present.\n");
+-      if (m->mpc_featureflag&(1<<28))
+-              Dprintk("    HT  present.\n");
+-      if (m->mpc_featureflag&(1<<29))
+-              Dprintk("    Thermal Monitor present.\n");
+-      /* 30, 31 Reserved */
+-
+-
+-      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              Dprintk("    Bootup CPU\n");
+-              boot_cpu_physical_apicid = m->mpc_apicid;
+-      }
+-
+-      ver = m->mpc_apicver;
+-
+-      /*
+-       * Validate version
+-       */
+-      if (ver == 0x0) {
+-              printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+-                              "fixing up to 0x10. (tell your hw vendor)\n",
+-                              m->mpc_apicid);
+-              ver = 0x10;
+-      }
+-      apic_version[m->mpc_apicid] = ver;
+-
+-      phys_cpu = apicid_to_cpu_present(apicid);
+-      physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+-
+-      if (num_processors >= NR_CPUS) {
+-              printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+-                      "  Processor ignored.\n", NR_CPUS);
+-              return;
+-      }
+-
+-      if (num_processors >= maxcpus) {
+-              printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+-                      " Processor ignored.\n", maxcpus);
+-              return;
+-      }
+-
+-      cpu_set(num_processors, cpu_possible_map);
+-      num_processors++;
+-
+-      /*
+-       * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+-       * but we need to work other dependencies like SMP_SUSPEND etc
+-       * before this can be done without some confusion.
+-       * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+-       *       - Ashok Raj <ashok.raj@intel.com>
+-       */
+-      if (num_processors > 8) {
+-              switch (boot_cpu_data.x86_vendor) {
+-              case X86_VENDOR_INTEL:
+-                      if (!APIC_XAPIC(ver)) {
+-                              def_to_bigsmp = 0;
+-                              break;
+-                      }
+-                      /* If P4 and above fall through */
+-              case X86_VENDOR_AMD:
+-                      def_to_bigsmp = 1;
+-              }
+-      }
+-      bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+-}
+-#else
+-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+-{
+-      num_processors++;
+-}
+-#endif /* CONFIG_XEN */
+-
+-static void __init MP_bus_info (struct mpc_config_bus *m)
+-{
+-      char str[7];
+-
+-      memcpy(str, m->mpc_bustype, 6);
+-      str[6] = 0;
+-
+-      mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+-
+-#if MAX_MP_BUSSES < 256
+-      if (m->mpc_busid >= MAX_MP_BUSSES) {
+-              printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+-                      " is too large, max. supported is %d\n",
+-                      m->mpc_busid, str, MAX_MP_BUSSES - 1);
+-              return;
+-      }
+-#endif
+-
+-      if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+-      } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+-      } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+-              mpc_oem_pci_bus(m, translation_table[mpc_record]);
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+-              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+-              mp_current_pci_id++;
+-      } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+-      } else {
+-              printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+-      }
+-}
+-
+-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+-{
+-      if (!(m->mpc_flags & MPC_APIC_USABLE))
+-              return;
+-
+-      printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
+-              m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
+-                      MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+-      }
+-      if (!m->mpc_apicaddr) {
+-              printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+-                      " found in MP table, skipping!\n");
+-              return;
+-      }
+-      mp_ioapics[nr_ioapics] = *m;
+-      nr_ioapics++;
+-}
+-
+-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+-{
+-      mp_irqs [mp_irq_entries] = *m;
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+-                      m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+-      if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!!\n");
+-}
+-
+-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+-{
+-      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+-                      m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+-}
+-
+-#ifdef CONFIG_X86_NUMAQ
+-static void __init MP_translation_info (struct mpc_config_translation *m)
+-{
+-      printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+-
+-      if (mpc_record >= MAX_MPC_ENTRY) 
+-              printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+-      else
+-              translation_table[mpc_record] = m; /* stash this for later */
+-      if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+-              node_set_online(m->trans_quad);
+-}
+-
+-/*
+- * Read/parse the MPC oem tables
+- */
+-
+-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+-      unsigned short oemsize)
+-{
+-      int count = sizeof (*oemtable); /* the header size */
+-      unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+-      
+-      mpc_record = 0;
+-      printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+-      if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+-      {
+-              printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+-                      oemtable->oem_signature[0],
+-                      oemtable->oem_signature[1],
+-                      oemtable->oem_signature[2],
+-                      oemtable->oem_signature[3]);
+-              return;
+-      }
+-      if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+-      {
+-              printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+-              return;
+-      }
+-      while (count < oemtable->oem_length) {
+-              switch (*oemptr) {
+-                      case MP_TRANSLATION:
+-                      {
+-                              struct mpc_config_translation *m=
+-                                      (struct mpc_config_translation *)oemptr;
+-                              MP_translation_info(m);
+-                              oemptr += sizeof(*m);
+-                              count += sizeof(*m);
+-                              ++mpc_record;
+-                              break;
+-                      }
+-                      default:
+-                      {
+-                              printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+-                              return;
+-                      }
+-              }
+-       }
+-}
  -
- /* Dynamically-mapped IRQ. */
- DEFINE_PER_CPU(int, timer_irq);
- 
---- a/arch/x86/kernel/traps_32-xen.c
-+++ b/arch/x86/kernel/traps_32-xen.c
-@@ -9,26 +9,28 @@
-  * 'Traps.c' handles hardware traps and faults after we have saved some
-  * state in 'asm.s'.
-  */
--#include <linux/sched.h>
-+#include <linux/interrupt.h>
-+#include <linux/kallsyms.h>
-+#include <linux/spinlock.h>
-+#include <linux/highmem.h>
-+#include <linux/kprobes.h>
-+#include <linux/uaccess.h>
-+#include <linux/utsname.h>
-+#include <linux/kdebug.h>
- #include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/ptrace.h>
- #include <linux/string.h>
-+#include <linux/unwind.h>
-+#include <linux/delay.h>
- #include <linux/errno.h>
-+#include <linux/kexec.h>
-+#include <linux/sched.h>
- #include <linux/timer.h>
--#include <linux/mm.h>
- #include <linux/init.h>
--#include <linux/delay.h>
--#include <linux/spinlock.h>
--#include <linux/interrupt.h>
--#include <linux/highmem.h>
--#include <linux/kallsyms.h>
--#include <linux/ptrace.h>
--#include <linux/utsname.h>
--#include <linux/kprobes.h>
--#include <linux/kexec.h>
--#include <linux/unwind.h>
--#include <linux/uaccess.h>
--#include <linux/nmi.h>
- #include <linux/bug.h>
-+#include <linux/nmi.h>
-+#include <linux/mm.h>
- 
- #ifdef CONFIG_EISA
- #include <linux/ioport.h>
-@@ -43,21 +45,18 @@
- #include <linux/edac.h>
- #endif
- 
-+#include <asm/arch_hooks.h>
-+#include <asm/stacktrace.h>
- #include <asm/processor.h>
--#include <asm/system.h>
--#include <asm/io.h>
--#include <asm/atomic.h>
- #include <asm/debugreg.h>
-+#include <asm/atomic.h>
-+#include <asm/system.h>
-+#include <asm/unwind.h>
- #include <asm/desc.h>
- #include <asm/i387.h>
- #include <asm/nmi.h>
--#include <asm/unwind.h>
- #include <asm/smp.h>
--#include <asm/arch_hooks.h>
--#include <linux/kdebug.h>
--#include <asm/stacktrace.h>
+-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+-              char *productid)
+-{
+-      if (strncmp(oem, "IBM NUMA", 8))
+-              printk("Warning!  May not be a NUMA-Q system!\n");
+-      if (mpc->mpc_oemptr)
+-              smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+-                              mpc->mpc_oemsize);
+-}
+-#endif        /* CONFIG_X86_NUMAQ */
+-
+-/*
+- * Read/parse the MPC
+- */
+-
+-static int __init smp_read_mpc(struct mp_config_table *mpc)
+-{
+-      char str[16];
+-      char oem[10];
+-      int count=sizeof(*mpc);
+-      unsigned char *mpt=((unsigned char *)mpc)+count;
+-
+-      if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+-              printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+-                      *(u32 *)mpc->mpc_signature);
+-              return 0;
+-      }
+-      if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+-              printk(KERN_ERR "SMP mptable: checksum error!\n");
+-              return 0;
+-      }
+-      if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+-              printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+-                      mpc->mpc_spec);
+-              return 0;
+-      }
+-      if (!mpc->mpc_lapic) {
+-              printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+-              return 0;
+-      }
+-      memcpy(oem,mpc->mpc_oem,8);
+-      oem[8]=0;
+-      printk(KERN_INFO "OEM ID: %s ",oem);
+-
+-      memcpy(str,mpc->mpc_productid,12);
+-      str[12]=0;
+-      printk("Product ID: %s ",str);
+-
+-      mps_oem_check(mpc, oem, str);
+-
+-      printk("APIC at: 0x%X\n", mpc->mpc_lapic);
+-
+-      /*
+-       * Save the local APIC address (it might be non-default) -- but only
+-       * if we're not using ACPI.
+-       */
+-      if (!acpi_lapic)
+-              mp_lapic_addr = mpc->mpc_lapic;
+-
+-      /*
+-       *      Now process the configuration blocks.
+-       */
+-      mpc_record = 0;
+-      while (count < mpc->mpc_length) {
+-              switch(*mpt) {
+-                      case MP_PROCESSOR:
+-                      {
+-                              struct mpc_config_processor *m=
+-                                      (struct mpc_config_processor *)mpt;
+-                              /* ACPI may have already provided this data */
+-                              if (!acpi_lapic)
+-                                      MP_processor_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_BUS:
+-                      {
+-                              struct mpc_config_bus *m=
+-                                      (struct mpc_config_bus *)mpt;
+-                              MP_bus_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_IOAPIC:
+-                      {
+-                              struct mpc_config_ioapic *m=
+-                                      (struct mpc_config_ioapic *)mpt;
+-                              MP_ioapic_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_INTSRC:
+-                      {
+-                              struct mpc_config_intsrc *m=
+-                                      (struct mpc_config_intsrc *)mpt;
+-
+-                              MP_intsrc_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_LINTSRC:
+-                      {
+-                              struct mpc_config_lintsrc *m=
+-                                      (struct mpc_config_lintsrc *)mpt;
+-                              MP_lintsrc_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
+-                              break;
+-                      }
+-                      default:
+-                      {
+-                              count = mpc->mpc_length;
+-                              break;
+-                      }
+-              }
+-              ++mpc_record;
+-      }
+-      setup_apic_routing();
+-      if (!num_processors)
+-              printk(KERN_ERR "SMP mptable: no processors registered!\n");
+-      return num_processors;
+-}
+-
+-static int __init ELCR_trigger(unsigned int irq)
+-{
+-      unsigned int port;
+-
+-      port = 0x4d0 + (irq >> 3);
+-      return (inb(port) >> (irq & 7)) & 1;
+-}
+-
+-static void __init construct_default_ioirq_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i;
+-      int ELCR_fallback = 0;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                 /* conforming */
+-      intsrc.mpc_srcbus = 0;
+-      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+-
+-      intsrc.mpc_irqtype = mp_INT;
+-
+-      /*
+-       *  If true, we have an ISA/PCI system with no IRQ entries
+-       *  in the MP table. To prevent the PCI interrupts from being set up
+-       *  incorrectly, we try to use the ELCR. The sanity check to see if
+-       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+-       *  never be level sensitive, so we simply see if the ELCR agrees.
+-       *  If it does, we assume it's valid.
+-       */
+-      if (mpc_default_type == 5) {
+-              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+-
+-              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+-                      printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
+-              else {
+-                      printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+-                      ELCR_fallback = 1;
+-              }
+-      }
+-
+-      for (i = 0; i < 16; i++) {
+-              switch (mpc_default_type) {
+-              case 2:
+-                      if (i == 0 || i == 13)
+-                              continue;       /* IRQ0 & IRQ13 not connected */
+-                      /* fall through */
+-              default:
+-                      if (i == 2)
+-                              continue;       /* IRQ2 is never connected */
+-              }
  -
--#include <linux/module.h>
-+#include <asm/io.h>
- 
- #include "mach_traps.h"
- 
-@@ -71,7 +70,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
- asmlinkage int system_call(void);
- 
- /* Do we ignore FPU interrupts ? */
--char ignore_fpu_irq = 0;
-+char ignore_fpu_irq;
- 
- #ifndef CONFIG_X86_NO_IDT
- /*
-@@ -113,12 +112,13 @@ static unsigned int code_bytes = 64;
- void printk_address(unsigned long address, int reliable)
- {
- #ifdef CONFIG_KALLSYMS
--      unsigned long offset = 0, symsize;
-+      char namebuf[KSYM_NAME_LEN];
-+      unsigned long offset = 0;
-+      unsigned long symsize;
-       const char *symname;
--      char *modname;
--      char *delim = ":";
--      char namebuf[128];
-       char reliab[4] = "";
-+      char *delim = ":";
-+      char *modname;
- 
-       symname = kallsyms_lookup(address, &symsize, &offset,
-                                       &modname, namebuf);
-@@ -146,13 +146,14 @@ static inline int valid_stack_ptr(struct
- 
- /* The form of the top of the frame on the stack */
- struct stack_frame {
--      struct stack_frame *next_frame;
--      unsigned long return_address;
-+      struct stack_frame      *next_frame;
-+      unsigned long           return_address;
- };
- 
--static inline unsigned long print_context_stack(struct thread_info *tinfo,
--                              unsigned long *stack, unsigned long bp,
--                              const struct stacktrace_ops *ops, void *data)
-+static inline unsigned long
-+print_context_stack(struct thread_info *tinfo,
-+                  unsigned long *stack, unsigned long bp,
-+                  const struct stacktrace_ops *ops, void *data)
- {
-       struct stack_frame *frame = (struct stack_frame *)bp;
- 
-@@ -174,7 +175,7 @@ static inline unsigned long print_contex
-       return bp;
- }
- 
--#define MSG(msg) ops->warning(data, msg)
-+#define MSG(msg)              ops->warning(data, msg)
- 
- void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
-@@ -185,6 +186,7 @@ void dump_trace(struct task_struct *task
- 
-       if (!stack) {
-               unsigned long dummy;
-+
-               stack = &dummy;
-               if (task != current)
-                       stack = (unsigned long *)task->thread.sp;
-@@ -194,7 +196,7 @@ void dump_trace(struct task_struct *task
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
--                      asm ("movl %%ebp, %0" : "=r" (bp) : );
-+                      asm("movl %%ebp, %0" : "=r" (bp) :);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-@@ -204,15 +206,18 @@ void dump_trace(struct task_struct *task
- 
-       while (1) {
-               struct thread_info *context;
-+
-               context = (struct thread_info *)
-                       ((unsigned long)stack & (~(THREAD_SIZE - 1)));
-               bp = print_context_stack(context, stack, bp, ops, data);
--              /* Should be after the line below, but somewhere
--                 in early boot context comes out corrupted and we
--                 can't reference it -AK */
-+              /*
-+               * Should be after the line below, but somewhere
-+               * in early boot context comes out corrupted and we
-+               * can't reference it:
-+               */
-               if (ops->stack(data, "IRQ") < 0)
-                       break;
--              stack = (unsigned long*)context->previous_esp;
-+              stack = (unsigned long *)context->previous_esp;
-               if (!stack)
-                       break;
-               touch_nmi_watchdog();
-@@ -251,15 +256,15 @@ static void print_trace_address(void *da
- }
- 
- static const struct stacktrace_ops print_trace_ops = {
--      .warning = print_trace_warning,
--      .warning_symbol = print_trace_warning_symbol,
--      .stack = print_trace_stack,
--      .address = print_trace_address,
-+      .warning                = print_trace_warning,
-+      .warning_symbol         = print_trace_warning_symbol,
-+      .stack                  = print_trace_stack,
-+      .address                = print_trace_address,
- };
- 
- static void
- show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
--              unsigned long *stack, unsigned long bp, char *log_lvl)
-+                 unsigned long *stack, unsigned long bp, char *log_lvl)
- {
-       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-       printk("%s =======================\n", log_lvl);
-@@ -271,21 +276,22 @@ void show_trace(struct task_struct *task
-       show_trace_log_lvl(task, regs, stack, bp, "");
- }
- 
--static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
--                     unsigned long *sp, unsigned long bp, char *log_lvl)
-+static void
-+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-+                 unsigned long *sp, unsigned long bp, char *log_lvl)
- {
-       unsigned long *stack;
-       int i;
- 
-       if (sp == NULL) {
-               if (task)
--                      sp = (unsigned long*)task->thread.sp;
-+                      sp = (unsigned long *)task->thread.sp;
-               else
-                       sp = (unsigned long *)&sp;
-       }
- 
-       stack = sp;
--      for(i = 0; i < kstack_depth_to_print; i++) {
-+      for (i = 0; i < kstack_depth_to_print; i++) {
-               if (kstack_end(stack))
-                       break;
-               if (i && ((i % 8) == 0))
-@@ -293,6 +299,7 @@ static void show_stack_log_lvl(struct ta
-               printk("%08lx ", *stack++);
-       }
-       printk("\n%sCall Trace:\n", log_lvl);
-+
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
- }
- 
-@@ -307,8 +314,8 @@ void show_stack(struct task_struct *task
-  */
- void dump_stack(void)
- {
--      unsigned long stack;
-       unsigned long bp = 0;
-+      unsigned long stack;
- 
- #ifdef CONFIG_FRAME_POINTER
-       if (!bp)
-@@ -320,6 +327,7 @@ void dump_stack(void)
-               init_utsname()->release,
-               (int)strcspn(init_utsname()->version, " "),
-               init_utsname()->version);
-+
-       show_trace(current, NULL, &stack, bp);
- }
- 
-@@ -331,6 +339,7 @@ void show_registers(struct pt_regs *regs
- 
-       print_modules();
-       __show_registers(regs, 0);
-+
-       printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
-               TASK_COMM_LEN, current->comm, task_pid_nr(current),
-               current_thread_info(), current, task_thread_info(current));
-@@ -339,10 +348,10 @@ void show_registers(struct pt_regs *regs
-        * time of the fault..
-        */
-       if (!user_mode_vm(regs)) {
--              u8 *ip;
-               unsigned int code_prologue = code_bytes * 43 / 64;
-               unsigned int code_len = code_bytes;
-               unsigned char c;
-+              u8 *ip;
- 
-               printk("\n" KERN_EMERG "Stack: ");
-               show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
-@@ -369,7 +378,7 @@ void show_registers(struct pt_regs *regs
-               }
-       }
-       printk("\n");
--}     
-+}
- 
- int is_valid_bugaddr(unsigned long ip)
- {
-@@ -385,10 +394,10 @@ int is_valid_bugaddr(unsigned long ip)
- 
- static int die_counter;
- 
--int __kprobes __die(const char * str, struct pt_regs * regs, long err)
-+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
- {
--      unsigned long sp;
-       unsigned short ss;
-+      unsigned long sp;
- 
-       printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
- #ifdef CONFIG_PREEMPT
-@@ -403,8 +412,8 @@ int __kprobes __die(const char * str, st
-       printk("\n");
- 
-       if (notify_die(DIE_OOPS, str, regs, err,
--                              current->thread.trap_no, SIGSEGV) !=
--                      NOTIFY_STOP) {
-+                      current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
-+
-               show_registers(regs);
-               /* Executive summary in case the oops scrolled away */
-               sp = (unsigned long) (&regs->sp);
-@@ -416,17 +425,18 @@ int __kprobes __die(const char * str, st
-               printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
-               print_symbol("%s", regs->ip);
-               printk(" SS:ESP %04x:%08lx\n", ss, sp);
-+
-               return 0;
+-              if (ELCR_fallback) {
+-                      /*
+-                       *  If the ELCR indicates a level-sensitive interrupt, we
+-                       *  copy that information over to the MP table in the
+-                       *  irqflag field (level sensitive, active high polarity).
+-                       */
+-                      if (ELCR_trigger(i))
+-                              intsrc.mpc_irqflag = 13;
+-                      else
+-                              intsrc.mpc_irqflag = 0;
+-              }
+-
+-              intsrc.mpc_srcbusirq = i;
+-              intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+-              MP_intsrc_info(&intsrc);
+-      }
+-
+-      intsrc.mpc_irqtype = mp_ExtINT;
+-      intsrc.mpc_srcbusirq = 0;
+-      intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+-      MP_intsrc_info(&intsrc);
+-}
+-
+-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_processor processor;
+-      struct mpc_config_bus bus;
+-      struct mpc_config_ioapic ioapic;
+-      struct mpc_config_lintsrc lintsrc;
+-      int linttypes[2] = { mp_ExtINT, mp_NMI };
+-      int i;
+-
+-      /*
+-       * local APIC has default address
+-       */
+-      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+-
+-      /*
+-       * 2 CPUs, numbered 0 & 1.
+-       */
+-      processor.mpc_type = MP_PROCESSOR;
+-      /* Either an integrated APIC or a discrete 82489DX. */
+-      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+-      processor.mpc_cpuflag = CPU_ENABLED;
+-      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+-                                 (boot_cpu_data.x86_model << 4) |
+-                                 boot_cpu_data.x86_mask;
+-      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-      for (i = 0; i < 2; i++) {
+-              processor.mpc_apicid = i;
+-              MP_processor_info(&processor);
+-      }
+-
+-      bus.mpc_type = MP_BUS;
+-      bus.mpc_busid = 0;
+-      switch (mpc_default_type) {
+-              default:
+-                      printk("???\n");
+-                      printk(KERN_ERR "Unknown standard configuration %d\n",
+-                              mpc_default_type);
+-                      /* fall through */
+-              case 1:
+-              case 5:
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      break;
+-              case 2:
+-              case 6:
+-              case 3:
+-                      memcpy(bus.mpc_bustype, "EISA  ", 6);
+-                      break;
+-              case 4:
+-              case 7:
+-                      memcpy(bus.mpc_bustype, "MCA   ", 6);
+-      }
+-      MP_bus_info(&bus);
+-      if (mpc_default_type > 4) {
+-              bus.mpc_busid = 1;
+-              memcpy(bus.mpc_bustype, "PCI   ", 6);
+-              MP_bus_info(&bus);
+-      }
+-
+-      ioapic.mpc_type = MP_IOAPIC;
+-      ioapic.mpc_apicid = 2;
+-      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+-      ioapic.mpc_flags = MPC_APIC_USABLE;
+-      ioapic.mpc_apicaddr = 0xFEC00000;
+-      MP_ioapic_info(&ioapic);
+-
+-      /*
+-       * We set up most of the low 16 IO-APIC pins according to MPS rules.
+-       */
+-      construct_default_ioirq_mptable(mpc_default_type);
+-
+-      lintsrc.mpc_type = MP_LINTSRC;
+-      lintsrc.mpc_irqflag = 0;                /* conforming */
+-      lintsrc.mpc_srcbusid = 0;
+-      lintsrc.mpc_srcbusirq = 0;
+-      lintsrc.mpc_destapic = MP_APIC_ALL;
+-      for (i = 0; i < 2; i++) {
+-              lintsrc.mpc_irqtype = linttypes[i];
+-              lintsrc.mpc_destapiclint = i;
+-              MP_lintsrc_info(&lintsrc);
+-      }
+-}
+-
+-static struct intel_mp_floating *mpf_found;
+-
+-/*
+- * Scan the memory blocks for an SMP configuration block.
+- */
+-void __init get_smp_config (void)
+-{
+-      struct intel_mp_floating *mpf = mpf_found;
+-
+-      /*
+-       * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+-       * processors, where MPS only supports physical.
+-       */
+-      if (acpi_lapic && acpi_ioapic) {
+-              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+-              return;
+-      }
+-      else if (acpi_lapic)
+-              printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+-
+-      printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+-      if (mpf->mpf_feature2 & (1<<7)) {
+-              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+-              pic_mode = 1;
  -      } else {
--              return 1;
-       }
-+
-+      return 1;
- }
- 
- /*
-- * This is gone through when something in the kernel has done something bad and
-- * is about to be terminated.
-+ * This is gone through when something in the kernel has done something bad
-+ * and is about to be terminated:
-  */
--void die(const char * str, struct pt_regs * regs, long err)
-+void die(const char *str, struct pt_regs *regs, long err)
- {
-       static struct {
-               raw_spinlock_t lock;
-@@ -448,8 +458,9 @@ void die(const char * str, struct pt_reg
-               die.lock_owner = smp_processor_id();
-               die.lock_owner_depth = 0;
-               bust_spinlocks(1);
--      } else
-+      } else {
-               raw_local_irq_save(flags);
-+      }
- 
-       if (++die.lock_owner_depth < 3) {
-               report_bug(regs->ip, regs);
-@@ -482,19 +493,20 @@ void die(const char * str, struct pt_reg
-       do_exit(SIGSEGV);
- }
- 
--static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
-+static inline void
-+die_if_kernel(const char *str, struct pt_regs *regs, long err)
- {
-       if (!user_mode_vm(regs))
-               die(str, regs, err);
- }
- 
--static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
--                            struct pt_regs * regs, long error_code,
--                            siginfo_t *info)
-+static void __kprobes
-+do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
-+      long error_code, siginfo_t *info)
- {
-       struct task_struct *tsk = current;
- 
--      if (regs->flags & VM_MASK) {
-+      if (regs->flags & X86_VM_MASK) {
-               if (vm86)
-                       goto vm86_trap;
-               goto trap_signal;
-@@ -503,109 +515,112 @@ static void __kprobes do_trap(int trapnr
-       if (!user_mode(regs))
-               goto kernel_trap;
- 
--      trap_signal: {
+-              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+-              pic_mode = 0;
+-      }
+-
+-      /*
+-       * Now see if we need to read further.
+-       */
+-      if (mpf->mpf_feature1 != 0) {
+-
+-              printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+-              construct_default_ISA_mptable(mpf->mpf_feature1);
+-
+-      } else if (mpf->mpf_physptr) {
+-
  -              /*
--               * We want error_code and trap_no set for userspace faults and
--               * kernelspace faults which result in die(), but not
--               * kernelspace faults which are fixed up.  die() gives the
--               * process no chance to handle the signal and notice the
--               * kernel fault information, so that won't result in polluting
--               * the information about previously queued, but not yet
--               * delivered, faults.  See also do_general_protection below.
+-               * Read the physical hardware table.  Anything here will
+-               * override the defaults.
  -               */
--              tsk->thread.error_code = error_code;
--              tsk->thread.trap_no = trapnr;
-+trap_signal:
-+      /*
-+       * We want error_code and trap_no set for userspace faults and
-+       * kernelspace faults which result in die(), but not
-+       * kernelspace faults which are fixed up.  die() gives the
-+       * process no chance to handle the signal and notice the
-+       * kernel fault information, so that won't result in polluting
-+       * the information about previously queued, but not yet
-+       * delivered, faults.  See also do_general_protection below.
-+       */
-+      tsk->thread.error_code = error_code;
-+      tsk->thread.trap_no = trapnr;
- 
--              if (info)
--                      force_sig_info(signr, info, tsk);
--              else
--                      force_sig(signr, tsk);
+-              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+-                      smp_found_config = 0;
+-                      printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+-                      printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+-                      return;
+-              }
+-              /*
+-               * If there are no explicit MP IRQ entries, then we are
+-               * broken.  We set up most of the low 16 IO-APIC pins to
+-               * ISA defaults and hope it will work.
+-               */
+-              if (!mp_irq_entries) {
+-                      struct mpc_config_bus bus;
+-
+-                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+-
+-                      bus.mpc_type = MP_BUS;
+-                      bus.mpc_busid = 0;
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      MP_bus_info(&bus);
+-
+-                      construct_default_ioirq_mptable(0);
+-              }
+-
+-      } else
+-              BUG();
+-
+-      printk(KERN_INFO "Processors: %d\n", num_processors);
+-      /*
+-       * Only use the first configuration found.
+-       */
+-}
+-
+-static int __init smp_scan_config (unsigned long base, unsigned long length)
+-{
+-      unsigned long *bp = isa_bus_to_virt(base);
+-      struct intel_mp_floating *mpf;
+-
+-      printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
+-      if (sizeof(*mpf) != 16)
+-              printk("Error: MPF size\n");
+-
+-      while (length > 0) {
+-              mpf = (struct intel_mp_floating *)bp;
+-              if ((*bp == SMP_MAGIC_IDENT) &&
+-                      (mpf->mpf_length == 1) &&
+-                      !mpf_checksum((unsigned char *)bp, 16) &&
+-                      ((mpf->mpf_specification == 1)
+-                              || (mpf->mpf_specification == 4)) ) {
+-
+-                      smp_found_config = 1;
+-#ifndef CONFIG_XEN
+-                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+-                              mpf, virt_to_phys(mpf));
+-                      reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
+-                                      BOOTMEM_DEFAULT);
+-                      if (mpf->mpf_physptr) {
+-                              /*
+-                               * We cannot access to MPC table to compute
+-                               * table size yet, as only few megabytes from
+-                               * the bottom is mapped now.
+-                               * PC-9800's MPC table places on the very last
+-                               * of physical memory; so that simply reserving
+-                               * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+-                               * in reserve_bootmem.
+-                               */
+-                              unsigned long size = PAGE_SIZE;
+-                              unsigned long end = max_low_pfn * PAGE_SIZE;
+-                              if (mpf->mpf_physptr + size > end)
+-                                      size = end - mpf->mpf_physptr;
+-                              reserve_bootmem(mpf->mpf_physptr, size,
+-                                              BOOTMEM_DEFAULT);
+-                      }
+-#else
+-                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+-                              mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
+-#endif
+-
+-                      mpf_found = mpf;
+-                      return 1;
+-              }
+-              bp += 4;
+-              length -= 16;
+-      }
+-      return 0;
+-}
+-
+-void __init find_smp_config (void)
+-{
+-#ifndef CONFIG_XEN
+-      unsigned int address;
+-#endif
+-
+-      /*
+-       * FIXME: Linux assumes you have 640K of base ram..
+-       * this continues the error...
+-       *
+-       * 1) Scan the bottom 1K for a signature
+-       * 2) Scan the top 1K of base RAM
+-       * 3) Scan the 64K of bios
+-       */
+-      if (smp_scan_config(0x0,0x400) ||
+-              smp_scan_config(639*0x400,0x400) ||
+-                      smp_scan_config(0xF0000,0x10000))
+-              return;
+-      /*
+-       * If it is an SMP machine we should know now, unless the
+-       * configuration is in an EISA/MCA bus machine with an
+-       * extended bios data area.
+-       *
+-       * there is a real-mode segmented pointer pointing to the
+-       * 4K EBDA area at 0x40E, calculate and scan it here.
+-       *
+-       * NOTE! There are Linux loaders that will corrupt the EBDA
+-       * area, and as such this kind of SMP config may be less
+-       * trustworthy, simply because the SMP table may have been
+-       * stomped on during early boot. These loaders are buggy and
+-       * should be fixed.
+-       *
+-       * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+-       */
+-
+-#ifndef CONFIG_XEN
+-      address = get_bios_ebda();
+-      if (address)
+-              smp_scan_config(address, 0x400);
+-#endif
+-}
+-
+-int es7000_plat;
+-
+-/* --------------------------------------------------------------------------
+-                            ACPI-based MP Configuration
+-   -------------------------------------------------------------------------- */
+-
+-#ifdef CONFIG_ACPI
+-
+-void __init mp_register_lapic_address(u64 address)
+-{
+-#ifndef CONFIG_XEN
+-      mp_lapic_addr = (unsigned long) address;
+-
+-      set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+-
+-      if (boot_cpu_physical_apicid == -1U)
+-              boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+-
+-      Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+-#endif
+-}
+-
+-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+-{
+-      struct mpc_config_processor processor;
+-      int boot_cpu = 0;
+-      
+-      if (MAX_APICS - id <= 0) {
+-              printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+-                      id, MAX_APICS);
  -              return;
  -      }
-+      if (info)
-+              force_sig_info(signr, info, tsk);
-+      else
-+              force_sig(signr, tsk);
-+      return;
- 
--      kernel_trap: {
--              if (!fixup_exception(regs)) {
--                      tsk->thread.error_code = error_code;
--                      tsk->thread.trap_no = trapnr;
--                      die(str, regs, error_code);
--              }
+-
+-      if (id == boot_cpu_physical_apicid)
+-              boot_cpu = 1;
+-
+-#ifndef CONFIG_XEN
+-      processor.mpc_type = MP_PROCESSOR;
+-      processor.mpc_apicid = id;
+-      processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+-      processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+-      processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+-      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+-              (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+-      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-#endif
+-
+-      MP_processor_info(&processor);
+-}
+-
+-#ifdef        CONFIG_X86_IO_APIC
+-
+-#define MP_ISA_BUS            0
+-#define MP_MAX_IOAPIC_PIN     127
+-
+-static struct mp_ioapic_routing {
+-      int                     apic_id;
+-      int                     gsi_base;
+-      int                     gsi_end;
+-      u32                     pin_programmed[4];
+-} mp_ioapic_routing[MAX_IO_APICS];
+-
+-static int mp_find_ioapic (int gsi)
+-{
+-      int i = 0;
+-
+-      /* Find the IOAPIC that manages this GSI. */
+-      for (i = 0; i < nr_ioapics; i++) {
+-              if ((gsi >= mp_ioapic_routing[i].gsi_base)
+-                      && (gsi <= mp_ioapic_routing[i].gsi_end))
+-                      return i;
+-      }
+-
+-      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+-
+-      return -1;
+-}
+-
+-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+-{
+-      int idx = 0;
+-      int tmpid;
+-
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+-                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+-      }
+-      if (!address) {
+-              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+-                      " found in MADT table, skipping!\n");
+-              return;
+-      }
+-
+-      idx = nr_ioapics++;
+-
+-      mp_ioapics[idx].mpc_type = MP_IOAPIC;
+-      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+-      mp_ioapics[idx].mpc_apicaddr = address;
+-
+-#ifndef CONFIG_XEN
+-      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+-#endif
+-      if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+-              && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+-              tmpid = io_apic_get_unique_id(idx, id);
+-      else
+-              tmpid = id;
+-      if (tmpid == -1) {
+-              nr_ioapics--;
+-              return;
+-      }
+-      mp_ioapics[idx].mpc_apicid = tmpid;
+-      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+-      
+-      /* 
+-       * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+-       * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+-       */
+-      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+-      mp_ioapic_routing[idx].gsi_base = gsi_base;
+-      mp_ioapic_routing[idx].gsi_end = gsi_base +
+-              io_apic_get_redir_entries(idx);
+-
+-      printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+-             "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+-             mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+-             mp_ioapic_routing[idx].gsi_base,
+-             mp_ioapic_routing[idx].gsi_end);
+-}
+-
+-void __init
+-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int                     ioapic = -1;
+-      int                     pin = -1;
+-
+-      /* 
+-       * Convert 'gsi' to 'ioapic.pin'.
+-       */
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0)
+-              return;
+-      pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+-
+-      /*
+-       * TBD: This check is for faulty timer entries, where the override
+-       *      erroneously sets the trigger to level, resulting in a HUGE 
+-       *      increase of timer interrupts!
+-       */
+-      if ((bus_irq == 0) && (trigger == 3))
+-              trigger = 1;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqtype = mp_INT;
+-      intsrc.mpc_irqflag = (trigger << 2) | polarity;
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+-      intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+-
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+-              intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-              (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-              intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+-
+-      mp_irqs[mp_irq_entries] = intsrc;
+-      if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!\n");
+-}
+-
+-void __init mp_config_acpi_legacy_irqs (void)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i = 0;
+-      int ioapic = -1;
+-
+-      /* 
+-       * Fabricate the legacy ISA bus (bus #31).
+-       */
+-      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+-      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+-
+-      /*
+-       * Older generations of ES7000 have no legacy identity mappings
+-       */
+-      if (es7000_plat == 1)
  -              return;
-+kernel_trap:
-+      if (!fixup_exception(regs)) {
-+              tsk->thread.error_code = error_code;
-+              tsk->thread.trap_no = trapnr;
-+              die(str, regs, error_code);
-       }
-+      return;
- 
--      vm86_trap: {
--              int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
--              if (ret) goto trap_signal;
+-
+-      /* 
+-       * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+-       */
+-      ioapic = mp_find_ioapic(0);
+-      if (ioapic < 0)
  -              return;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                                 /* Conforming */
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+-
+-      /* 
+-       * Use the default configuration for the IRQs 0-15.  Unless
+-       * overridden by (MADT) interrupt source override entries.
+-       */
+-      for (i = 0; i < 16; i++) {
+-              int idx;
+-
+-              for (idx = 0; idx < mp_irq_entries; idx++) {
+-                      struct mpc_config_intsrc *irq = mp_irqs + idx;
+-
+-                      /* Do we already have a mapping for this ISA IRQ? */
+-                      if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+-                              break;
+-
+-                      /* Do we already have a mapping for this IOAPIC pin */
+-                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+-                              (irq->mpc_dstirq == i))
+-                              break;
+-              }
+-
+-              if (idx != mp_irq_entries) {
+-                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+-                      continue;                       /* IRQ already used */
+-              }
+-
+-              intsrc.mpc_irqtype = mp_INT;
+-              intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+-              intsrc.mpc_dstirq = i;
+-
+-              Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+-                      "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-                      (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-                      intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+-                      intsrc.mpc_dstirq);
+-
+-              mp_irqs[mp_irq_entries] = intsrc;
+-              if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-                      panic("Max # of irq sources exceeded!\n");
  -      }
-+vm86_trap:
-+      if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
-+                                              error_code, trapnr))
-+              goto trap_signal;
-+      return;
- }
- 
--#define DO_ERROR(trapnr, signr, str, name) \
--void do_##name(struct pt_regs * regs, long error_code) \
--{ \
--      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
--                                              == NOTIFY_STOP) \
--              return; \
--      do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
  -}
  -
--#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
--void do_##name(struct pt_regs * regs, long error_code) \
--{ \
--      siginfo_t info; \
--      if (irq) \
--              local_irq_enable(); \
--      info.si_signo = signr; \
--      info.si_errno = 0; \
--      info.si_code = sicode; \
--      info.si_addr = (void __user *)siaddr; \
--      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
--                                              == NOTIFY_STOP) \
--              return; \
--      do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+-#define MAX_GSI_NUM   4096
+-#define IRQ_COMPRESSION_START 64
+-
+-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+-{
+-      int ioapic = -1;
+-      int ioapic_pin = 0;
+-      int idx, bit = 0;
+-      static int pci_irq = IRQ_COMPRESSION_START;
+-      /*
+-       * Mapping between Global System Interrupts, which
+-       * represent all possible interrupts, and IRQs
+-       * assigned to actual devices.
+-       */
+-      static int              gsi_to_irq[MAX_GSI_NUM];
+-
+-      /* Don't set up the ACPI SCI because it's already set up */
+-      if (acpi_gbl_FADT.sci_interrupt == gsi)
+-              return gsi;
+-
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0) {
+-              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+-              return gsi;
+-      }
+-
+-      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+-
+-      if (ioapic_renumber_irq)
+-              gsi = ioapic_renumber_irq(ioapic, gsi);
+-
+-      /* 
+-       * Avoid pin reprogramming.  PRTs typically include entries  
+-       * with redundant pin->gsi mappings (but unique PCI devices);
+-       * we only program the IOAPIC on the first.
+-       */
+-      bit = ioapic_pin % 32;
+-      idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+-      if (idx > 3) {
+-              printk(KERN_ERR "Invalid reference to IOAPIC pin "
+-                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+-                      ioapic_pin);
+-              return gsi;
+-      }
+-      if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+-              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+-                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+-              return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
+-      }
+-
+-      mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+-
+-      /*
+-       * For GSI >= 64, use IRQ compression
+-       */
+-      if ((gsi >= IRQ_COMPRESSION_START)
+-              && (triggering == ACPI_LEVEL_SENSITIVE)) {
+-              /*
+-               * For PCI devices assign IRQs in order, avoiding gaps
+-               * due to unused I/O APIC pins.
+-               */
+-              int irq = gsi;
+-              if (gsi < MAX_GSI_NUM) {
+-                      /*
+-                       * Retain the VIA chipset work-around (gsi > 15), but
+-                       * avoid a problem where the 8254 timer (IRQ0) is setup
+-                       * via an override (so it's not on pin 0 of the ioapic),
+-                       * and at the same time, the pin 0 interrupt is a PCI
+-                       * type.  The gsi > 15 test could cause these two pins
+-                       * to be shared as IRQ0, and they are not shareable.
+-                       * So test for this condition, and if necessary, avoid
+-                       * the pin collision.
+-                       */
+-                      if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+-                              gsi = pci_irq++;
+-                      /*
+-                       * Don't assign IRQ used by ACPI SCI
+-                       */
+-                      if (gsi == acpi_gbl_FADT.sci_interrupt)
+-                              gsi = pci_irq++;
+-                      gsi_to_irq[irq] = gsi;
+-              } else {
+-                      printk(KERN_ERR "GSI %u is too high\n", gsi);
+-                      return gsi;
+-              }
+-      }
+-
+-      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+-                  triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+-                  polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+-      return gsi;
  -}
  -
--#define DO_VM86_ERROR(trapnr, signr, str, name) \
--void do_##name(struct pt_regs * regs, long error_code) \
--{ \
--      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
--                                              == NOTIFY_STOP) \
--              return; \
--      do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
--}
+-#endif /* CONFIG_X86_IO_APIC */
+-#endif /* CONFIG_ACPI */
+--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,879 +0,0 @@
+-/*
+- *    Intel Multiprocessor Specification 1.1 and 1.4
+- *    compliant MP-table parsing routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *    Fixes
+- *            Erich Boleyn    :       MP v1.4 and additional changes.
+- *            Alan Cox        :       Added EBDA scanning
+- *            Ingo Molnar     :       various cleanups and rewrites
+- *            Maciej W. Rozycki:      Bits for default MP configurations
+- *            Paul Diefenbaugh:       Added full ACPI support
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/acpi.h>
+-#include <linux/module.h>
+-
+-#include <asm/smp.h>
+-#include <asm/mtrr.h>
+-#include <asm/mpspec.h>
+-#include <asm/pgalloc.h>
+-#include <asm/io_apic.h>
+-#include <asm/proto.h>
+-#include <asm/acpi.h>
+-
+-/* Have we found an MP table */
+-int smp_found_config;
+-
+-/*
+- * Various Linux-internal data structures created from the
+- * MP-table.
+- */
+-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+-
+-static int mp_current_pci_id = 0;
+-/* I/O APIC entries */
+-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+-
+-/* # of MP IRQ source entries */
+-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
  -
--#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
--void do_##name(struct pt_regs * regs, long error_code) \
--{ \
--      siginfo_t info; \
--      info.si_signo = signr; \
--      info.si_errno = 0; \
--      info.si_code = sicode; \
--      info.si_addr = (void __user *)siaddr; \
--      trace_hardirqs_fixup(); \
--      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
--                                              == NOTIFY_STOP) \
--              return; \
--      do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
-+#define DO_ERROR(trapnr, signr, str, name)                            \
-+void do_##name(struct pt_regs *regs, long error_code)                 \
-+{                                                                     \
-+      trace_hardirqs_fixup();                                         \
-+      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
-+                                              == NOTIFY_STOP)         \
-+              return;                                                 \
-+      do_trap(trapnr, signr, str, 0, regs, error_code, NULL);         \
-+}
-+
-+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq)  \
-+void do_##name(struct pt_regs *regs, long error_code)                 \
-+{                                                                     \
-+      siginfo_t info;                                                 \
-+      if (irq)                                                        \
-+              local_irq_enable();                                     \
-+      info.si_signo = signr;                                          \
-+      info.si_errno = 0;                                              \
-+      info.si_code = sicode;                                          \
-+      info.si_addr = (void __user *)siaddr;                           \
-+      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
-+                                              == NOTIFY_STOP)         \
-+              return;                                                 \
-+      do_trap(trapnr, signr, str, 0, regs, error_code, &info);        \
-+}
-+
-+#define DO_VM86_ERROR(trapnr, signr, str, name)                               \
-+void do_##name(struct pt_regs *regs, long error_code)                 \
-+{                                                                     \
-+      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
-+                                              == NOTIFY_STOP)         \
-+              return;                                                 \
-+      do_trap(trapnr, signr, str, 1, regs, error_code, NULL);         \
-+}
-+
-+#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)  \
-+void do_##name(struct pt_regs *regs, long error_code)                 \
-+{                                                                     \
-+      siginfo_t info;                                                 \
-+      info.si_signo = signr;                                          \
-+      info.si_errno = 0;                                              \
-+      info.si_code = sicode;                                          \
-+      info.si_addr = (void __user *)siaddr;                           \
-+      trace_hardirqs_fixup();                                         \
-+      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
-+                                              == NOTIFY_STOP)         \
-+              return;                                                 \
-+      do_trap(trapnr, signr, str, 1, regs, error_code, &info);        \
- }
- 
--DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
-+DO_VM86_ERROR_INFO(0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
- #ifndef CONFIG_KPROBES
--DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
-+DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
- #endif
--DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
--DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
--DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
--DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
-+DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
-+DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
-+DO_ERROR_INFO(6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
-+DO_ERROR(9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
- DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
- DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
- DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
- DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
--DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
-+DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
- 
- void __kprobes do_general_protection(struct pt_regs * regs,
-                                             long error_code)
- {
--      if (regs->flags & VM_MASK)
-+      struct thread_struct *thread;
-+
-+      thread = &current->thread;
-+
-+      if (regs->flags & X86_VM_MASK)
-               goto gp_in_vm86;
- 
-       if (!user_mode(regs))
-@@ -613,6 +628,7 @@ void __kprobes do_general_protection(str
- 
-       current->thread.error_code = error_code;
-       current->thread.trap_no = 13;
-+
-       if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
-           printk_ratelimit()) {
-               printk(KERN_INFO
-@@ -642,22 +658,25 @@ gp_in_kernel:
-       }
- }
- 
--static __kprobes void
--mem_parity_error(unsigned char reason, struct pt_regs * regs)
-+static notrace __kprobes void
-+mem_parity_error(unsigned char reason, struct pt_regs *regs)
- {
--      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
--              "CPU %d.\n", reason, smp_processor_id());
--      printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
-+      printk(KERN_EMERG
-+              "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-+                      reason, smp_processor_id());
-+
-+      printk(KERN_EMERG
-+              "You have some hardware problem, likely on the PCI bus.\n");
- 
- #if defined(CONFIG_EDAC)
--      if(edac_handler_set()) {
-+      if (edac_handler_set()) {
-               edac_atomic_assert_error();
-               return;
-       }
- #endif
- 
-       if (panic_on_unrecovered_nmi)
--                panic("NMI: Not continuing");
-+              panic("NMI: Not continuing");
- 
-       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
- 
-@@ -665,8 +684,8 @@ mem_parity_error(unsigned char reason, s
-       clear_mem_error(reason);
- }
- 
--static __kprobes void
--io_check_error(unsigned char reason, struct pt_regs * regs)
-+static notrace __kprobes void
-+io_check_error(unsigned char reason, struct pt_regs *regs)
- {
-       printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
-       show_registers(regs);
-@@ -675,38 +694,43 @@ io_check_error(unsigned char reason, str
-       clear_io_check_error(reason);
- }
- 
--static __kprobes void
--unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
-+static notrace __kprobes void
-+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
- {
-+      if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
-+              return;
- #ifdef CONFIG_MCA
--      /* Might actually be able to figure out what the guilty party
--      * is. */
--      if( MCA_bus ) {
-+      /*
-+       * Might actually be able to figure out what the guilty party
-+       * is:
-+       */
-+      if (MCA_bus) {
-               mca_handle_nmi();
-               return;
-       }
- #endif
--      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
--              "CPU %d.\n", reason, smp_processor_id());
-+      printk(KERN_EMERG
-+              "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-+                      reason, smp_processor_id());
-+
-       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-       if (panic_on_unrecovered_nmi)
--                panic("NMI: Not continuing");
-+              panic("NMI: Not continuing");
- 
-       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
- }
- 
- static DEFINE_SPINLOCK(nmi_print_lock);
- 
--void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
-+void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
- {
--      if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
--          NOTIFY_STOP)
-+      if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
-               return;
- 
-       spin_lock(&nmi_print_lock);
-       /*
-       * We are in trouble anyway, lets at least try
--      * to get a message out.
-+      * to get a message out:
-       */
-       bust_spinlocks(1);
-       printk(KERN_EMERG "%s", msg);
-@@ -717,9 +741,10 @@ void __kprobes die_nmi(struct pt_regs *r
-       spin_unlock(&nmi_print_lock);
-       bust_spinlocks(0);
- 
--      /* If we are in kernel we are probably nested up pretty bad
--       * and might aswell get out now while we still can.
--      */
-+      /*
-+       * If we are in kernel we are probably nested up pretty bad
-+       * and might aswell get out now while we still can:
-+       */
-       if (!user_mode_vm(regs)) {
-               current->thread.trap_no = 2;
-               crash_kexec(regs);
-@@ -728,14 +753,14 @@ void __kprobes die_nmi(struct pt_regs *r
-       do_exit(SIGSEGV);
- }
- 
--static __kprobes void default_do_nmi(struct pt_regs * regs)
-+static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
- {
-       unsigned char reason = 0;
- 
--      /* Only the BSP gets external NMIs from the system.  */
-+      /* Only the BSP gets external NMIs from the system: */
-       if (!smp_processor_id())
-               reason = get_nmi_reason();
-- 
-+
-       if (!(reason & 0xc0)) {
-               if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
-                                                       == NOTIFY_STOP)
-@@ -748,8 +773,10 @@ static __kprobes void default_do_nmi(str
-               if (nmi_watchdog_tick(regs, reason))
-                       return;
-               if (!do_nmi_callback(regs, smp_processor_id()))
+-/* MP IRQ source entries */
+-int mp_irq_entries;
+-
+-int nr_ioapics;
+-unsigned long mp_lapic_addr = 0;
+-
+-
+-
+-/* Processor that is doing the boot up */
+-unsigned int boot_cpu_id = -1U;
+-EXPORT_SYMBOL(boot_cpu_id);
+-
+-/* Internal processor count */
+-unsigned int num_processors;
+-
+-unsigned disabled_cpus __cpuinitdata;
+-
+-/* Bitmask of physically existing CPUs */
+-physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
+-
+-#ifndef CONFIG_XEN
+-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
+-                              = { [0 ... NR_CPUS-1] = BAD_APICID };
+-void *x86_bios_cpu_apicid_early_ptr;
  -#endif
-                       unknown_nmi_error(reason, regs);
-+#else
-+              unknown_nmi_error(reason, regs);
-+#endif
- 
-               return;
-       }
-@@ -761,14 +788,14 @@ static __kprobes void default_do_nmi(str
-               io_check_error(reason, regs);
-       /*
-        * Reassert NMI in case it became active meanwhile
--       * as it's edge-triggered.
-+       * as it's edge-triggered:
-        */
-       reassert_nmi();
- }
- 
- static int ignore_nmis;
- 
--__kprobes void do_nmi(struct pt_regs * regs, long error_code)
-+notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
- {
-       int cpu;
- 
-@@ -804,9 +831,12 @@ void __kprobes do_int3(struct pt_regs *r
-       if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
-                       == NOTIFY_STOP)
-               return;
--      /* This is an interrupt gate, because kprobes wants interrupts
--      disabled.  Normal trap handlers don't. */
-+      /*
-+       * This is an interrupt gate, because kprobes wants interrupts
-+       * disabled. Normal trap handlers don't.
-+       */
-       restore_interrupts(regs);
-+
-       do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
- }
- #endif
-@@ -821,7 +851,7 @@ void __kprobes do_int3(struct pt_regs *r
-  * from user space. Such code must not hold kernel locks (since it
-  * can equally take a page fault), therefore it is safe to call
-  * force_sig_info even though that claims and releases locks.
-- * 
-+ *
-  * Code in ./signal.c ensures that the debug control register
-  * is restored before we deliver any signal, and therefore that
-  * user code runs with the correct debug control register even though
-@@ -833,10 +863,10 @@ void __kprobes do_int3(struct pt_regs *r
-  * find every occurrence of the TF bit that could be saved away even
-  * by user code)
-  */
--void __kprobes do_debug(struct pt_regs * regs, long error_code)
-+void __kprobes do_debug(struct pt_regs *regs, long error_code)
- {
--      unsigned int condition;
-       struct task_struct *tsk = current;
-+      unsigned int condition;
- 
-       trace_hardirqs_fixup();
- 
-@@ -861,7 +891,7 @@ void __kprobes do_debug(struct pt_regs *
-                       goto clear_dr7;
-       }
- 
--      if (regs->flags & VM_MASK)
-+      if (regs->flags & X86_VM_MASK)
-               goto debug_vm86;
- 
-       /* Save debug status register where ptrace can see it */
-@@ -884,7 +914,8 @@ void __kprobes do_debug(struct pt_regs *
-       /* Ok, finally something we can handle */
-       send_sigtrap(tsk, regs, error_code);
- 
--      /* Disable additional traps. They'll be re-enabled when
-+      /*
-+       * Disable additional traps. They'll be re-enabled when
-        * the signal is delivered.
-        */
- clear_dr7:
-@@ -897,7 +928,7 @@ debug_vm86:
- 
- clear_TF_reenable:
-       set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
--      regs->flags &= ~TF_MASK;
-+      regs->flags &= ~X86_EFLAGS_TF;
-       return;
- }
- 
-@@ -908,9 +939,10 @@ clear_TF_reenable:
-  */
- void math_error(void __user *ip)
- {
--      struct task_struct * task;
-+      struct task_struct *task;
-+      unsigned short cwd;
-+      unsigned short swd;
-       siginfo_t info;
--      unsigned short cwd, swd;
- 
-       /*
-        * Save the info for the exception handler and clear the error.
-@@ -936,36 +968,36 @@ void math_error(void __user *ip)
-       cwd = get_fpu_cwd(task);
-       swd = get_fpu_swd(task);
-       switch (swd & ~cwd & 0x3f) {
--              case 0x000: /* No unmasked exception */
--                      return;
--              default:    /* Multiple exceptions */
--                      break;
--              case 0x001: /* Invalid Op */
+-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+-
+-
+-/*
+- * Intel MP BIOS table parsing routines:
+- */
+-
+-/*
+- * Checksum an MP configuration block.
+- */
+-
+-static int __init mpf_checksum(unsigned char *mp, int len)
+-{
+-      int sum = 0;
+-
+-      while (len--)
+-              sum += *mp++;
+-
+-      return sum & 0xFF;
+-}
+-
+-#ifndef CONFIG_XEN
+-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+-{
+-      int cpu;
+-      cpumask_t tmp_map;
+-      char *bootup_cpu = "";
+-
+-      if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+-              disabled_cpus++;
+-              return;
+-      }
+-      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              bootup_cpu = " (Bootup-CPU)";
+-              boot_cpu_id = m->mpc_apicid;
+-      }
+-
+-      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
+-
+-      if (num_processors >= NR_CPUS) {
+-              printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+-                      " Processor ignored.\n", NR_CPUS);
+-              return;
+-      }
+-
+-      num_processors++;
+-      cpus_complement(tmp_map, cpu_present_map);
+-      cpu = first_cpu(tmp_map);
+-
+-      physid_set(m->mpc_apicid, phys_cpu_present_map);
+-      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              /*
+-               * x86_bios_cpu_apicid is required to have processors listed
+-               * in same order as logical cpu numbers. Hence the first
+-               * entry is BSP, and so on.
+-               */
+-              cpu = 0;
+-      }
+-      /* are we being called early in kernel startup? */
+-      if (x86_cpu_to_apicid_early_ptr) {
+-              u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+-              u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+-
+-              cpu_to_apicid[cpu] = m->mpc_apicid;
+-              bios_cpu_apicid[cpu] = m->mpc_apicid;
+-      } else {
+-              per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+-              per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
+-      }
+-
+-      cpu_set(cpu, cpu_possible_map);
+-      cpu_set(cpu, cpu_present_map);
+-}
+-#else
+-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+-{
+-      num_processors++;
+-}
+-#endif /* CONFIG_XEN */
+-
+-static void __init MP_bus_info (struct mpc_config_bus *m)
+-{
+-      char str[7];
+-
+-      memcpy(str, m->mpc_bustype, 6);
+-      str[6] = 0;
+-      Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+-
+-      if (strncmp(str, "ISA", 3) == 0) {
+-              set_bit(m->mpc_busid, mp_bus_not_pci);
+-      } else if (strncmp(str, "PCI", 3) == 0) {
+-              clear_bit(m->mpc_busid, mp_bus_not_pci);
+-              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+-              mp_current_pci_id++;
+-      } else {
+-              printk(KERN_ERR "Unknown bustype %s\n", str);
+-      }
+-}
+-
+-static int bad_ioapic(unsigned long address)
+-{
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+-                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+-      }
+-      if (!address) {
+-              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+-                      " found in table, skipping!\n");
+-              return 1;
+-      }
+-      return 0;
+-}
+-
+-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+-{
+-      if (!(m->mpc_flags & MPC_APIC_USABLE))
+-              return;
+-
+-      printk("I/O APIC #%d at 0x%X.\n",
+-              m->mpc_apicid, m->mpc_apicaddr);
+-
+-      if (bad_ioapic(m->mpc_apicaddr))
+-              return;
+-
+-      mp_ioapics[nr_ioapics] = *m;
+-      nr_ioapics++;
+-}
+-
+-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+-{
+-      mp_irqs [mp_irq_entries] = *m;
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+-                      m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+-      if (++mp_irq_entries >= MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!!\n");
+-}
+-
+-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+-{
+-      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+-                      m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+-}
+-
+-/*
+- * Read/parse the MPC
+- */
+-
+-static int __init smp_read_mpc(struct mp_config_table *mpc)
+-{
+-      char str[16];
+-      int count=sizeof(*mpc);
+-      unsigned char *mpt=((unsigned char *)mpc)+count;
+-
+-      if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+-              printk("MPTABLE: bad signature [%c%c%c%c]!\n",
+-                      mpc->mpc_signature[0],
+-                      mpc->mpc_signature[1],
+-                      mpc->mpc_signature[2],
+-                      mpc->mpc_signature[3]);
+-              return 0;
+-      }
+-      if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+-              printk("MPTABLE: checksum error!\n");
+-              return 0;
+-      }
+-      if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+-              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
+-                      mpc->mpc_spec);
+-              return 0;
+-      }
+-      if (!mpc->mpc_lapic) {
+-              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
+-              return 0;
+-      }
+-      memcpy(str,mpc->mpc_oem,8);
+-      str[8] = 0;
+-      printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
+-
+-      memcpy(str,mpc->mpc_productid,12);
+-      str[12] = 0;
+-      printk("MPTABLE: Product ID: %s ",str);
+-
+-      printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
+-
+-      /* save the local APIC address, it might be non-default */
+-      if (!acpi_lapic)
+-              mp_lapic_addr = mpc->mpc_lapic;
+-
+-      /*
+-       *      Now process the configuration blocks.
+-       */
+-      while (count < mpc->mpc_length) {
+-              switch(*mpt) {
+-                      case MP_PROCESSOR:
+-                      {
+-                              struct mpc_config_processor *m=
+-                                      (struct mpc_config_processor *)mpt;
+-                              if (!acpi_lapic)
+-                                      MP_processor_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_BUS:
+-                      {
+-                              struct mpc_config_bus *m=
+-                                      (struct mpc_config_bus *)mpt;
+-                              MP_bus_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_IOAPIC:
+-                      {
+-                              struct mpc_config_ioapic *m=
+-                                      (struct mpc_config_ioapic *)mpt;
+-                              MP_ioapic_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_INTSRC:
+-                      {
+-                              struct mpc_config_intsrc *m=
+-                                      (struct mpc_config_intsrc *)mpt;
+-
+-                              MP_intsrc_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_LINTSRC:
+-                      {
+-                              struct mpc_config_lintsrc *m=
+-                                      (struct mpc_config_lintsrc *)mpt;
+-                              MP_lintsrc_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-              }
+-      }
+-      setup_apic_routing();
+-      if (!num_processors)
+-              printk(KERN_ERR "MPTABLE: no processors registered!\n");
+-      return num_processors;
+-}
+-
+-static int __init ELCR_trigger(unsigned int irq)
+-{
+-      unsigned int port;
+-
+-      port = 0x4d0 + (irq >> 3);
+-      return (inb(port) >> (irq & 7)) & 1;
+-}
+-
+-static void __init construct_default_ioirq_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i;
+-      int ELCR_fallback = 0;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                 /* conforming */
+-      intsrc.mpc_srcbus = 0;
+-      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+-
+-      intsrc.mpc_irqtype = mp_INT;
+-
+-      /*
+-       *  If true, we have an ISA/PCI system with no IRQ entries
+-       *  in the MP table. To prevent the PCI interrupts from being set up
+-       *  incorrectly, we try to use the ELCR. The sanity check to see if
+-       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+-       *  never be level sensitive, so we simply see if the ELCR agrees.
+-       *  If it does, we assume it's valid.
+-       */
+-      if (mpc_default_type == 5) {
+-              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+-
+-              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+-                      printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
+-              else {
+-                      printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+-                      ELCR_fallback = 1;
+-              }
+-      }
+-
+-      for (i = 0; i < 16; i++) {
+-              switch (mpc_default_type) {
+-              case 2:
+-                      if (i == 0 || i == 13)
+-                              continue;       /* IRQ0 & IRQ13 not connected */
+-                      /* fall through */
+-              default:
+-                      if (i == 2)
+-                              continue;       /* IRQ2 is never connected */
+-              }
+-
+-              if (ELCR_fallback) {
  -                      /*
--                       * swd & 0x240 == 0x040: Stack Underflow
--                       * swd & 0x240 == 0x240: Stack Overflow
--                       * User must clear the SF bit (0x40) if set
+-                       *  If the ELCR indicates a level-sensitive interrupt, we
+-                       *  copy that information over to the MP table in the
+-                       *  irqflag field (level sensitive, active high polarity).
  -                       */
--                      info.si_code = FPE_FLTINV;
--                      break;
--              case 0x002: /* Denormalize */
--              case 0x010: /* Underflow */
--                      info.si_code = FPE_FLTUND;
--                      break;
--              case 0x004: /* Zero Divide */
--                      info.si_code = FPE_FLTDIV;
--                      break;
--              case 0x008: /* Overflow */
--                      info.si_code = FPE_FLTOVF;
--                      break;
--              case 0x020: /* Precision */
--                      info.si_code = FPE_FLTRES;
--                      break;
-+      case 0x000: /* No unmasked exception */
-+              return;
-+      default:    /* Multiple exceptions */
-+              break;
-+      case 0x001: /* Invalid Op */
-+              /*
-+               * swd & 0x240 == 0x040: Stack Underflow
-+               * swd & 0x240 == 0x240: Stack Overflow
-+               * User must clear the SF bit (0x40) if set
-+               */
-+              info.si_code = FPE_FLTINV;
-+              break;
-+      case 0x002: /* Denormalize */
-+      case 0x010: /* Underflow */
-+              info.si_code = FPE_FLTUND;
-+              break;
-+      case 0x004: /* Zero Divide */
-+              info.si_code = FPE_FLTDIV;
-+              break;
-+      case 0x008: /* Overflow */
-+              info.si_code = FPE_FLTOVF;
-+              break;
-+      case 0x020: /* Precision */
-+              info.si_code = FPE_FLTRES;
-+              break;
-       }
-       force_sig_info(SIGFPE, &info, task);
- }
- 
--void do_coprocessor_error(struct pt_regs * regs, long error_code)
-+void do_coprocessor_error(struct pt_regs *regs, long error_code)
- {
-       ignore_fpu_irq = 1;
-       math_error((void __user *)regs->ip);
-@@ -973,9 +1005,9 @@ void do_coprocessor_error(struct pt_regs
- 
- static void simd_math_error(void __user *ip)
- {
--      struct task_struct * task;
--      siginfo_t info;
-+      struct task_struct *task;
-       unsigned short mxcsr;
-+      siginfo_t info;
- 
-       /*
-        * Save the info for the exception handler and clear the error.
-@@ -996,84 +1028,82 @@ static void simd_math_error(void __user 
-        */
-       mxcsr = get_fpu_mxcsr(task);
-       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
--              case 0x000:
+-                      if (ELCR_trigger(i))
+-                              intsrc.mpc_irqflag = 13;
+-                      else
+-                              intsrc.mpc_irqflag = 0;
+-              }
+-
+-              intsrc.mpc_srcbusirq = i;
+-              intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+-              MP_intsrc_info(&intsrc);
+-      }
+-
+-      intsrc.mpc_irqtype = mp_ExtINT;
+-      intsrc.mpc_srcbusirq = 0;
+-      intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+-      MP_intsrc_info(&intsrc);
+-}
+-
+-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_processor processor;
+-      struct mpc_config_bus bus;
+-      struct mpc_config_ioapic ioapic;
+-      struct mpc_config_lintsrc lintsrc;
+-      int linttypes[2] = { mp_ExtINT, mp_NMI };
+-      int i;
+-
+-      /*
+-       * local APIC has default address
+-       */
+-      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+-
+-      /*
+-       * 2 CPUs, numbered 0 & 1.
+-       */
+-      processor.mpc_type = MP_PROCESSOR;
+-      processor.mpc_apicver = 0;
+-      processor.mpc_cpuflag = CPU_ENABLED;
+-      processor.mpc_cpufeature = 0;
+-      processor.mpc_featureflag = 0;
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-      for (i = 0; i < 2; i++) {
+-              processor.mpc_apicid = i;
+-              MP_processor_info(&processor);
+-      }
+-
+-      bus.mpc_type = MP_BUS;
+-      bus.mpc_busid = 0;
+-      switch (mpc_default_type) {
  -              default:
+-                      printk(KERN_ERR "???\nUnknown standard configuration %d\n",
+-                              mpc_default_type);
+-                      /* fall through */
+-              case 1:
+-              case 5:
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
  -                      break;
--              case 0x001: /* Invalid Op */
--                      info.si_code = FPE_FLTINV;
--                      break;
--              case 0x002: /* Denormalize */
--              case 0x010: /* Underflow */
--                      info.si_code = FPE_FLTUND;
--                      break;
--              case 0x004: /* Zero Divide */
--                      info.si_code = FPE_FLTDIV;
--                      break;
--              case 0x008: /* Overflow */
--                      info.si_code = FPE_FLTOVF;
--                      break;
--              case 0x020: /* Precision */
--                      info.si_code = FPE_FLTRES;
--                      break;
-+      case 0x000:
-+      default:
-+              break;
-+      case 0x001: /* Invalid Op */
-+              info.si_code = FPE_FLTINV;
-+              break;
-+      case 0x002: /* Denormalize */
-+      case 0x010: /* Underflow */
-+              info.si_code = FPE_FLTUND;
-+              break;
-+      case 0x004: /* Zero Divide */
-+              info.si_code = FPE_FLTDIV;
-+              break;
-+      case 0x008: /* Overflow */
-+              info.si_code = FPE_FLTOVF;
-+              break;
-+      case 0x020: /* Precision */
-+              info.si_code = FPE_FLTRES;
-+              break;
-       }
-       force_sig_info(SIGFPE, &info, task);
- }
- 
--void do_simd_coprocessor_error(struct pt_regs * regs,
--                                        long error_code)
-+void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
- {
-       if (cpu_has_xmm) {
-               /* Handle SIMD FPU exceptions on PIII+ processors. */
-               ignore_fpu_irq = 1;
-               simd_math_error((void __user *)regs->ip);
--      } else {
--              /*
--               * Handle strange cache flush from user space exception
--               * in all other cases.  This is undocumented behaviour.
--               */
--              if (regs->flags & VM_MASK) {
--                      handle_vm86_fault((struct kernel_vm86_regs *)regs,
--                                        error_code);
--                      return;
--              }
--              current->thread.trap_no = 19;
--              current->thread.error_code = error_code;
--              die_if_kernel("cache flush denied", regs, error_code);
--              force_sig(SIGSEGV, current);
-+              return;
-+      }
-+      /*
-+       * Handle strange cache flush from user space exception
-+       * in all other cases.  This is undocumented behaviour.
-+       */
-+      if (regs->flags & X86_VM_MASK) {
-+              handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
-+              return;
-       }
-+      current->thread.trap_no = 19;
-+      current->thread.error_code = error_code;
-+      die_if_kernel("cache flush denied", regs, error_code);
-+      force_sig(SIGSEGV, current);
- }
- 
- #ifndef CONFIG_XEN
--void do_spurious_interrupt_bug(struct pt_regs * regs,
--                                        long error_code)
-+void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
- {
- #if 0
-       /* No need to warn about this any longer. */
--      printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
-+      printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
- #endif
- }
- 
--unsigned long patch_espfix_desc(unsigned long uesp,
--                                        unsigned long kesp)
-+unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
- {
-       struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
-       unsigned long base = (kesp - uesp) & -THREAD_SIZE;
-       unsigned long new_kesp = kesp - base;
-       unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
-       __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
-+
-       /* Set up base for espfix segment */
--      desc &= 0x00f0ff0000000000ULL;
--      desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
-+      desc &= 0x00f0ff0000000000ULL;
-+      desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
-               ((((__u64)base) << 32) & 0xff00000000000000ULL) |
-               ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
-               (lim_pages & 0xffff);
-       *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
-+
-       return new_kesp;
- }
- #endif
- 
- /*
-- *  'math_state_restore()' saves the current math information in the
-+ * 'math_state_restore()' saves the current math information in the
-  * old math state array, and gets the new ones from the current task
-  *
-  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
-@@ -1087,9 +1117,22 @@ asmlinkage void math_state_restore(void)
-       struct thread_info *thread = current_thread_info();
-       struct task_struct *tsk = thread->task;
- 
-+      if (!tsk_used_math(tsk)) {
-+              local_irq_enable();
-+              /*
-+               * does a slab alloc which can sleep
-+               */
-+              if (init_fpu(tsk)) {
-+                      /*
-+                       * ran out of memory!
-+                       */
-+                      do_group_exit(SIGKILL);
-+                      return;
-+              }
-+              local_irq_disable();
-+      }
-+
-       /* NB. 'clts' is done for us by Xen during virtual trap. */
--      if (!tsk_used_math(tsk))
--              init_fpu(tsk);
-       restore_fpu(tsk);
-       thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
-       tsk->fpu_counter++;
-@@ -1100,15 +1143,15 @@ EXPORT_SYMBOL_GPL(math_state_restore);
- 
- asmlinkage void math_emulate(long arg)
- {
--      printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
--      printk(KERN_EMERG "killing %s.\n",current->comm);
--      force_sig(SIGFPE,current);
-+      printk(KERN_EMERG
-+              "math-emulation not enabled and no coprocessor found.\n");
-+      printk(KERN_EMERG "killing %s.\n", current->comm);
-+      force_sig(SIGFPE, current);
-       schedule();
- }
- 
- #endif /* CONFIG_MATH_EMULATION */
- 
+-      }
+-      MP_bus_info(&bus);
+-      if (mpc_default_type > 4) {
+-              bus.mpc_busid = 1;
+-              memcpy(bus.mpc_bustype, "PCI   ", 6);
+-              MP_bus_info(&bus);
+-      }
+-
+-      ioapic.mpc_type = MP_IOAPIC;
+-      ioapic.mpc_apicid = 2;
+-      ioapic.mpc_apicver = 0;
+-      ioapic.mpc_flags = MPC_APIC_USABLE;
+-      ioapic.mpc_apicaddr = 0xFEC00000;
+-      MP_ioapic_info(&ioapic);
+-
+-      /*
+-       * We set up most of the low 16 IO-APIC pins according to MPS rules.
+-       */
+-      construct_default_ioirq_mptable(mpc_default_type);
+-
+-      lintsrc.mpc_type = MP_LINTSRC;
+-      lintsrc.mpc_irqflag = 0;                /* conforming */
+-      lintsrc.mpc_srcbusid = 0;
+-      lintsrc.mpc_srcbusirq = 0;
+-      lintsrc.mpc_destapic = MP_APIC_ALL;
+-      for (i = 0; i < 2; i++) {
+-              lintsrc.mpc_irqtype = linttypes[i];
+-              lintsrc.mpc_destapiclint = i;
+-              MP_lintsrc_info(&lintsrc);
+-      }
+-}
+-
+-static struct intel_mp_floating *mpf_found;
+-
+-/*
+- * Scan the memory blocks for an SMP configuration block.
+- */
+-void __init get_smp_config (void)
+-{
+-      struct intel_mp_floating *mpf = mpf_found;
+-
+-      /*
+-       * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+-       * processors, where MPS only supports physical.
+-       */
+-      if (acpi_lapic && acpi_ioapic) {
+-              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+-              return;
+-      }
+-      else if (acpi_lapic)
+-              printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+-
+-      printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+-
+-      /*
+-       * Now see if we need to read further.
+-       */
+-      if (mpf->mpf_feature1 != 0) {
+-
+-              printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+-              construct_default_ISA_mptable(mpf->mpf_feature1);
+-
+-      } else if (mpf->mpf_physptr) {
+-
+-              /*
+-               * Read the physical hardware table.  Anything here will
+-               * override the defaults.
+-               */
+-              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+-                      smp_found_config = 0;
+-                      printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+-                      printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+-                      return;
+-              }
+-              /*
+-               * If there are no explicit MP IRQ entries, then we are
+-               * broken.  We set up most of the low 16 IO-APIC pins to
+-               * ISA defaults and hope it will work.
+-               */
+-              if (!mp_irq_entries) {
+-                      struct mpc_config_bus bus;
+-
+-                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+-
+-                      bus.mpc_type = MP_BUS;
+-                      bus.mpc_busid = 0;
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      MP_bus_info(&bus);
+-
+-                      construct_default_ioirq_mptable(0);
+-              }
+-
+-      } else
+-              BUG();
+-
+-      printk(KERN_INFO "Processors: %d\n", num_processors);
+-      /*
+-       * Only use the first configuration found.
+-       */
+-}
+-
+-static int __init smp_scan_config (unsigned long base, unsigned long length)
+-{
+-      extern void __bad_mpf_size(void); 
+-      unsigned int *bp = isa_bus_to_virt(base);
+-      struct intel_mp_floating *mpf;
+-
+-      Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+-      if (sizeof(*mpf) != 16)
+-              __bad_mpf_size();
+-
+-      while (length > 0) {
+-              mpf = (struct intel_mp_floating *)bp;
+-              if ((*bp == SMP_MAGIC_IDENT) &&
+-                      (mpf->mpf_length == 1) &&
+-                      !mpf_checksum((unsigned char *)bp, 16) &&
+-                      ((mpf->mpf_specification == 1)
+-                              || (mpf->mpf_specification == 4)) ) {
+-
+-                      smp_found_config = 1;
+-                      mpf_found = mpf;
+-                      return 1;
+-              }
+-              bp += 4;
+-              length -= 16;
+-      }
+-      return 0;
+-}
+-
+-void __init find_smp_config(void)
+-{
+-      unsigned int address;
+-
+-      /*
+-       * FIXME: Linux assumes you have 640K of base ram..
+-       * this continues the error...
+-       *
+-       * 1) Scan the bottom 1K for a signature
+-       * 2) Scan the top 1K of base RAM
+-       * 3) Scan the 64K of bios
+-       */
+-      if (smp_scan_config(0x0,0x400) ||
+-              smp_scan_config(639*0x400,0x400) ||
+-                      smp_scan_config(0xF0000,0x10000))
+-              return;
+-      /*
+-       * If it is an SMP machine we should know now.
+-       *
+-       * there is a real-mode segmented pointer pointing to the
+-       * 4K EBDA area at 0x40E, calculate and scan it here.
+-       *
+-       * NOTE! There are Linux loaders that will corrupt the EBDA
+-       * area, and as such this kind of SMP config may be less
+-       * trustworthy, simply because the SMP table may have been
+-       * stomped on during early boot. These loaders are buggy and
+-       * should be fixed.
+-       */
+-
+-      address = *(unsigned short *)phys_to_virt(0x40E);
+-      address <<= 4;
+-      if (smp_scan_config(address, 0x1000))
+-              return;
+-
+-      /* If we have come this far, we did not find an MP table  */
+-       printk(KERN_INFO "No mptable found.\n");
+-}
+-
+-/* --------------------------------------------------------------------------
+-                            ACPI-based MP Configuration
+-   -------------------------------------------------------------------------- */
+-
+-#ifdef CONFIG_ACPI
+-
+-void __init mp_register_lapic_address(u64 address)
+-{
+-#ifndef CONFIG_XEN
+-      mp_lapic_addr = (unsigned long) address;
+-      set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+-      if (boot_cpu_id == -1U)
+-              boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+-#endif
+-}
+-
+-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+-{
+-      struct mpc_config_processor processor;
+-      int                     boot_cpu = 0;
+-      
+-      if (id == boot_cpu_id)
+-              boot_cpu = 1;
+-
+-#ifndef CONFIG_XEN
+-      processor.mpc_type = MP_PROCESSOR;
+-      processor.mpc_apicid = id;
+-      processor.mpc_apicver = 0;
+-      processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+-      processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+-      processor.mpc_cpufeature = 0;
+-      processor.mpc_featureflag = 0;
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-#endif
+-
+-      MP_processor_info(&processor);
+-}
+-
+-#define MP_ISA_BUS            0
+-#define MP_MAX_IOAPIC_PIN     127
+-
+-static struct mp_ioapic_routing {
+-      int                     apic_id;
+-      int                     gsi_start;
+-      int                     gsi_end;
+-      u32                     pin_programmed[4];
+-} mp_ioapic_routing[MAX_IO_APICS];
+-
+-static int mp_find_ioapic(int gsi)
+-{
+-      int i = 0;
+-
+-      /* Find the IOAPIC that manages this GSI. */
+-      for (i = 0; i < nr_ioapics; i++) {
+-              if ((gsi >= mp_ioapic_routing[i].gsi_start)
+-                      && (gsi <= mp_ioapic_routing[i].gsi_end))
+-                      return i;
+-      }
+-
+-      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+-      return -1;
+-}
+-
+-static u8 uniq_ioapic_id(u8 id)
+-{
+-      int i;
+-      DECLARE_BITMAP(used, 256);
+-      bitmap_zero(used, 256);
+-      for (i = 0; i < nr_ioapics; i++) {
+-              struct mpc_config_ioapic *ia = &mp_ioapics[i];
+-              __set_bit(ia->mpc_apicid, used);
+-      }
+-      if (!test_bit(id, used))
+-              return id;
+-      return find_first_zero_bit(used, 256);
+-}
+-
+-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+-{
+-      int idx = 0;
+-
+-      if (bad_ioapic(address))
+-              return;
+-
+-      idx = nr_ioapics;
+-
+-      mp_ioapics[idx].mpc_type = MP_IOAPIC;
+-      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+-      mp_ioapics[idx].mpc_apicaddr = address;
+-
+-#ifndef CONFIG_XEN
+-      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+-#endif
+-      mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
+-      mp_ioapics[idx].mpc_apicver = 0;
+-      
+-      /* 
+-       * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
+-       * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
+-       */
+-      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+-      mp_ioapic_routing[idx].gsi_start = gsi_base;
+-      mp_ioapic_routing[idx].gsi_end = gsi_base + 
+-              io_apic_get_redir_entries(idx);
+-
+-      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
+-              "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+-              mp_ioapics[idx].mpc_apicaddr,
+-              mp_ioapic_routing[idx].gsi_start,
+-              mp_ioapic_routing[idx].gsi_end);
+-
+-      nr_ioapics++;
+-}
+-
+-void __init
+-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32       gsi)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int                     ioapic = -1;
+-      int                     pin = -1;
+-
+-      /* 
+-       * Convert 'gsi' to 'ioapic.pin'.
+-       */
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0)
+-              return;
+-      pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
  -
- /*
-  * NB. All these are "trap gates" (i.e. events_mask isn't set) except
-  * for those that specify <dpl>|4 in the second field.
-@@ -1146,25 +1189,21 @@ void __init trap_init(void)
-       if (ret)
-               printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
- 
  -      /*
--       * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
--       * Generate a build-time error if the alignment is wrong.
+-       * TBD: This check is for faulty timer entries, where the override
+-       *      erroneously sets the trigger to level, resulting in a HUGE 
+-       *      increase of timer interrupts!
  -       */
--      BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
-       if (cpu_has_fxsr) {
-               printk(KERN_INFO "Enabling fast FPU save and restore... ");
-               set_in_cr4(X86_CR4_OSFXSR);
-               printk("done.\n");
-       }
-       if (cpu_has_xmm) {
--              printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
--                              "support... ");
-+              printk(KERN_INFO
-+                      "Enabling unmasked SIMD FPU exception support... ");
-               set_in_cr4(X86_CR4_OSXMMEXCPT);
-               printk("done.\n");
-       }
- 
-+      init_thread_xstate();
-       /*
--       * Should be a barrier for any external CPU state.
-+       * Should be a barrier for any external CPU state:
-        */
-       cpu_init();
- }
-@@ -1183,6 +1222,7 @@ void __cpuinit smp_trap_init(trap_info_t
- static int __init kstack_setup(char *s)
- {
-       kstack_depth_to_print = simple_strtoul(s, NULL, 0);
-+
-       return 1;
- }
- __setup("kstack=", kstack_setup);
---- a/arch/x86/kernel/traps_64-xen.c
-+++ b/arch/x86/kernel/traps_64-xen.c
-@@ -33,6 +33,8 @@
- #include <linux/kdebug.h>
- #include <linux/utsname.h>
- 
-+#include <mach_traps.h>
-+
- #if defined(CONFIG_EDAC)
- #include <linux/edac.h>
- #endif
-@@ -601,10 +603,16 @@ void die(const char * str, struct pt_reg
- }
- 
- #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
--void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
-+notrace __kprobes void
-+die_nmi(char *str, struct pt_regs *regs, int do_panic)
- {
--      unsigned long flags = oops_begin();
-+      unsigned long flags;
-+
-+      if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
-+          NOTIFY_STOP)
-+              return;
- 
-+      flags = oops_begin();
-       /*
-        * We are in trouble anyway, lets at least try
-        * to get a message out.
-@@ -769,7 +777,7 @@ asmlinkage void __kprobes do_general_pro
-       die("general protection fault", regs, error_code);
- }
- 
--static __kprobes void
-+static notrace __kprobes void
- mem_parity_error(unsigned char reason, struct pt_regs * regs)
- {
-       printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
-@@ -792,7 +800,7 @@ mem_parity_error(unsigned char reason, s
-       clear_mem_error(reason);
- }
- 
--static __kprobes void
-+static notrace __kprobes void
- io_check_error(unsigned char reason, struct pt_regs * regs)
- {
-       printk("NMI: IOCK error (debug interrupt?)\n");
-@@ -802,9 +810,11 @@ io_check_error(unsigned char reason, str
-       clear_io_check_error(reason);
- }
- 
--static __kprobes void
-+static notrace __kprobes void
- unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
- {
-+      if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
-+              return;
-       printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
-               reason);
-       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-@@ -817,7 +827,7 @@ unknown_nmi_error(unsigned char reason, 
+-      if ((bus_irq == 0) && (trigger == 3))
+-              trigger = 1;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqtype = mp_INT;
+-      intsrc.mpc_irqflag = (trigger << 2) | polarity;
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+-      intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+-
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", 
+-              intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-              (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-              intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+-
+-      mp_irqs[mp_irq_entries] = intsrc;
+-      if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!\n");
+-}
+-
+-void __init mp_config_acpi_legacy_irqs(void)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i = 0;
+-      int ioapic = -1;
+-
+-      /* 
+-       * Fabricate the legacy ISA bus (bus #31).
+-       */
+-      set_bit(MP_ISA_BUS, mp_bus_not_pci);
+-
+-      /* 
+-       * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+-       */
+-      ioapic = mp_find_ioapic(0);
+-      if (ioapic < 0)
+-              return;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                                 /* Conforming */
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+-
+-      /* 
+-       * Use the default configuration for the IRQs 0-15.  Unless
+-       * overridden by (MADT) interrupt source override entries.
+-       */
+-      for (i = 0; i < 16; i++) {
+-              int idx;
+-
+-              for (idx = 0; idx < mp_irq_entries; idx++) {
+-                      struct mpc_config_intsrc *irq = mp_irqs + idx;
+-
+-                      /* Do we already have a mapping for this ISA IRQ? */
+-                      if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+-                              break;
+-
+-                      /* Do we already have a mapping for this IOAPIC pin */
+-                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+-                              (irq->mpc_dstirq == i))
+-                              break;
+-              }
+-
+-              if (idx != mp_irq_entries) {
+-                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+-                      continue;                       /* IRQ already used */
+-              }
+-
+-              intsrc.mpc_irqtype = mp_INT;
+-              intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+-              intsrc.mpc_dstirq = i;
+-
+-              Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+-                      "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-                      (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-                      intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+-                      intsrc.mpc_dstirq);
+-
+-              mp_irqs[mp_irq_entries] = intsrc;
+-              if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-                      panic("Max # of irq sources exceeded!\n");
+-      }
+-}
+-
+-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+-{
+-      int ioapic = -1;
+-      int ioapic_pin = 0;
+-      int idx, bit = 0;
+-
+-      if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+-              return gsi;
+-
+-      /* Don't set up the ACPI SCI because it's already set up */
+-      if (acpi_gbl_FADT.sci_interrupt == gsi)
+-              return gsi;
+-
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0) {
+-              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+-              return gsi;
+-      }
+-
+-      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
+-
+-      /* 
+-       * Avoid pin reprogramming.  PRTs typically include entries  
+-       * with redundant pin->gsi mappings (but unique PCI devices);
+-       * we only program the IOAPIC on the first.
+-       */
+-      bit = ioapic_pin % 32;
+-      idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+-      if (idx > 3) {
+-              printk(KERN_ERR "Invalid reference to IOAPIC pin "
+-                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+-                      ioapic_pin);
+-              return gsi;
+-      }
+-      if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+-              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+-                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+-              return gsi;
+-      }
+-
+-      mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+-
+-      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+-              triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+-              polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+-      return gsi;
+-}
+-#endif /*CONFIG_ACPI*/
+--- sle11-2009-05-14.orig/arch/x86/kernel/pci-dma-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/pci-dma-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -1,283 +1,251 @@
+-/*
+- * Dynamic DMA mapping support.
+- *
+- * On i386 there is no hardware dynamic DMA address translation,
+- * so consistent alloc/free are merely page allocation/freeing.
+- * The rest of the dynamic DMA mapping interface is implemented
+- * in asm/pci.h.
+- */
+-
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/string.h>
++#include <linux/dma-mapping.h>
++#include <linux/dmar.h>
++#include <linux/bootmem.h>
+ #include <linux/pci.h>
+-#include <linux/module.h>
+-#include <linux/version.h>
+-#include <asm/io.h>
+-#include <xen/balloon.h>
+-#include <xen/gnttab.h>
+-#include <asm/swiotlb.h>
+-#include <asm/tlbflush.h>
+-#include <asm/swiotlb_32.h>
+-#include <asm/gnttab_dma.h>
+-#include <asm/bug.h>
   
- /* Runs on IST stack. This code must keep interrupts off all the time.
-    Nested NMIs are prevented by the CPU. */
--asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
-+asmlinkage notrace  __kprobes void default_do_nmi(struct pt_regs *regs)
- {
-       unsigned char reason = 0;
-       int cpu;
-@@ -1117,11 +1127,25 @@ asmlinkage void __attribute__((weak)) mc
- asmlinkage void math_state_restore(void)
- {
-       struct task_struct *me = current;
-+
-+      if (!used_math()) {
-+              local_irq_enable();
-+              /*
-+               * does a slab alloc which can sleep
-+               */
-+              if (init_fpu(me)) {
-+                      /*
-+                       * ran out of memory!
-+                       */
-+                      do_group_exit(SIGKILL);
-+                      return;
-+              }
-+              local_irq_disable();
-+      }
+-#ifdef __x86_64__
+-#include <asm/iommu.h>
++#include <asm/proto.h>
++#include <asm/dma.h>
++#include <asm/gart.h>
++#include <asm/calgary.h>
  +
-         /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
- 
--      if (!used_math())
--              init_fpu(me);
--      restore_fpu_checking(&me->thread.i387.fxsave);
-+      restore_fpu_checking(&me->thread.xstate->fxsave);
-       task_thread_info(me)->status |= TS_USEDFPU;
-       me->fpu_counter++;
- }
-@@ -1168,6 +1192,10 @@ void __init trap_init(void)
-               printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
- 
-       /*
-+       * initialize the per thread extended state:
-+       */
-+        init_thread_xstate();
-+      /*
-        * Should be a barrier for any external CPU state.
-        */
-       cpu_init();
---- a/arch/x86/kernel/vsyscall_64-xen.c
-+++ b/arch/x86/kernel/vsyscall_64-xen.c
-@@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
-       return 0;
- }
- 
--long __vsyscall(3) venosys_1(void)
-+static long __vsyscall(3) venosys_1(void)
- {
-       return -ENOSYS;
- }
---- a/arch/x86/mm/fault-xen.c
-+++ b/arch/x86/mm/fault-xen.c
-@@ -510,6 +510,11 @@ static int vmalloc_fault(unsigned long a
-       unsigned long pgd_paddr;
-       pmd_t *pmd_k;
-       pte_t *pte_k;
++int forbid_dac __read_mostly;
++EXPORT_SYMBOL(forbid_dac);
  +
-+      /* Make sure we are in vmalloc area */
-+      if (!(address >= VMALLOC_START && address < VMALLOC_END))
-+              return -1;
++const struct dma_mapping_ops *dma_ops;
++EXPORT_SYMBOL(dma_ops);
  +
-       /*
-        * Synchronize this task's top level page-table
-        * with the 'reference' page table.
-@@ -671,7 +676,7 @@ void __kprobes do_page_fault(struct pt_r
- #ifdef CONFIG_X86_32
-       /* It's safe to allow irq's after cr2 has been saved and the vmalloc
-          fault has been handled. */
--      if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
-+      if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
-               local_irq_enable();
- 
-       /*
-@@ -1018,9 +1023,5 @@ void vmalloc_sync_all(void)
-               if (address == start)
-                       start = address + PGDIR_SIZE;
-       }
--      /* Check that there is no need to do the same for the modules area. */
--      BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
--      BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
--                              (__START_KERNEL & PGDIR_MASK)));
- #endif
- }
---- a/arch/x86/mm/highmem_32-xen.c
-+++ b/arch/x86/mm/highmem_32-xen.c
-@@ -200,6 +200,5 @@ EXPORT_SYMBOL(kmap);
- EXPORT_SYMBOL(kunmap);
- EXPORT_SYMBOL(kmap_atomic);
- EXPORT_SYMBOL(kunmap_atomic);
--EXPORT_SYMBOL(kmap_atomic_to_page);
- EXPORT_SYMBOL(clear_highpage);
- EXPORT_SYMBOL(copy_highpage);
---- a/arch/x86/mm/init_32-xen.c
-+++ b/arch/x86/mm/init_32-xen.c
-@@ -1,5 +1,4 @@
- /*
-- *  linux/arch/i386/mm/init.c
-  *
-  *  Copyright (C) 1995  Linus Torvalds
-  *
-@@ -22,6 +21,7 @@
- #include <linux/init.h>
- #include <linux/highmem.h>
- #include <linux/pagemap.h>
-+#include <linux/pci.h>
- #include <linux/pfn.h>
- #include <linux/poison.h>
- #include <linux/bootmem.h>
-@@ -54,6 +54,8 @@
- 
- unsigned int __VMALLOC_RESERVE = 128 << 20;
- 
-+unsigned long max_pfn_mapped;
++static int iommu_sac_force __read_mostly;
  +
- DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
- unsigned long highstart_pfn, highend_pfn;
- 
-@@ -73,7 +75,7 @@ static pmd_t * __init one_md_table_init(
-       if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
-               pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- 
--              paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
-+              paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
-               make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
-               set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-               pud = pud_offset(pgd, 0);
-@@ -107,7 +109,7 @@ static pte_t * __init one_page_table_ini
-                               (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
-               }
- 
--              paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
-+              paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
-               make_lowmem_page_readonly(page_table,
-                                         XENFEAT_writable_page_tables);
-               set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
-@@ -209,8 +211,13 @@ static void __init kernel_physical_mappi
-                       /*
-                        * Map with big pages if possible, otherwise
-                        * create normal page tables:
-+                       *
-+                       * Don't use a large page for the first 2/4MB of memory
-+                       * because there are often fixed size MTRRs in there
-+                       * and overlapping MTRRs into large pages can cause
-+                       * slowdowns.
-                        */
--                      if (cpu_has_pse) {
-+                      if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
-                               unsigned int addr2;
-                               pgprot_t prot = PAGE_KERNEL_LARGE;
- 
-@@ -224,6 +231,7 @@ static void __init kernel_physical_mappi
-                               set_pmd(pmd, pfn_pmd(pfn, prot));
++#ifdef CONFIG_IOMMU_DEBUG
++int panic_on_overflow __read_mostly = 1;
++int force_iommu __read_mostly = 1;
++#else
++int panic_on_overflow __read_mostly = 0;
++int force_iommu __read_mostly = 0;
++#endif
   
-                               pfn += PTRS_PER_PTE;
-+                              max_pfn_mapped = pfn;
-                               continue;
-                       }
-                       pte = one_page_table_init(pmd);
-@@ -241,6 +249,7 @@ static void __init kernel_physical_mappi
+ int iommu_merge __read_mostly = 0;
+-EXPORT_SYMBOL(iommu_merge);
   
-                               set_pte(pte, pfn_pte(pfn, prot));
-                       }
-+                      max_pfn_mapped = pfn;
-                       pte_ofs = 0;
-               }
-               pmd_idx = 0;
-@@ -262,6 +271,25 @@ static inline int page_kills_ppro(unsign
+-dma_addr_t bad_dma_address __read_mostly;
+-EXPORT_SYMBOL(bad_dma_address);
++int no_iommu __read_mostly;
++/* Set this to 1 if there is a HW IOMMU in the system */
++int iommu_detected __read_mostly = 0;
   
- #endif
+ /* This tells the BIO block layer to assume merging. Default to off
+    because we cannot guarantee merging later. */
+ int iommu_bio_merge __read_mostly = 0;
+ EXPORT_SYMBOL(iommu_bio_merge);
   
-+/*
-+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
-+ * is valid. The argument is a physical page number.
-+ *
-+ *
-+ * On x86, access has to be given to the first megabyte of ram because that area
-+ * contains bios code and data regions used by X and dosemu and similar apps.
-+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
-+ * mmio resources as well as potential bios/acpi data regions.
-+ */
-+int devmem_is_allowed(unsigned long pagenr)
-+{
-+      if (pagenr <= 256)
-+              return 1;
-+      if (mfn_to_local_pfn(pagenr) >= max_pfn)
-+              return 1;
-+      return 0;
-+}
-+
- #ifdef CONFIG_HIGHMEM
- pte_t *kmap_pte;
- pgprot_t kmap_prot;
-@@ -303,48 +331,18 @@ static void __init permanent_kmaps_init(
-       pkmap_page_table = pte;
- }
+-int force_iommu __read_mostly= 0;
++dma_addr_t bad_dma_address __read_mostly = 0;
++EXPORT_SYMBOL(bad_dma_address);
   
--static void __meminit free_new_highpage(struct page *page, int pfn)
+-__init int iommu_setup(char *p)
  -{
--      init_page_count(page);
--      if (pfn < xen_start_info->nr_pages)
--              __free_page(page);
--      totalhigh_pages++;
+-    return 1;
  -}
--
- void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
++/* Dummy device used for NULL arguments (normally ISA). Better would
++   be probably a smaller DMA mask, but this is bug-to-bug compatible
++   to older i386. */
++struct device fallback_dev = {
++      .bus_id = "fallback device",
++      .coherent_dma_mask = DMA_32BIT_MASK,
++      .dma_mask = &fallback_dev.coherent_dma_mask,
++};
+ 
+-void __init pci_iommu_alloc(void)
++int dma_set_mask(struct device *dev, u64 mask)
   {
-       if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
-               ClearPageReserved(page);
--              free_new_highpage(page, pfn);
-+              init_page_count(page);
-+              if (pfn < xen_start_info->nr_pages)
-+                      __free_page(page);
-+              totalhigh_pages++;
-       } else
-               SetPageReserved(page);
- }
+-#ifdef CONFIG_SWIOTLB
+-      pci_swiotlb_init();
+-#endif
+-}
++      if (!dev->dma_mask || !dma_supported(dev, mask))
++              return -EIO;
++
++      *dev->dma_mask = mask;
   
--static int __meminit
--add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+-static int __init pci_iommu_init(void)
  -{
--      free_new_highpage(page, pfn);
--      totalram_pages++;
--#ifdef CONFIG_FLATMEM
--      max_mapnr = max(pfn, max_mapnr);
+-      no_iommu_init();
+       return 0;
+ }
++EXPORT_SYMBOL(dma_set_mask);
+ 
+-/* Must execute after PCI subsystem */
+-fs_initcall(pci_iommu_init);
  -#endif
--      num_physpages++;
--
--      return 0;
--}
  -
--/*
-- * Not currently handling the NUMA case.
-- * Assuming single node and all memory that
-- * has been added dynamically that would be
-- * onlined here is in HIGHMEM.
-- */
--void __meminit online_page(struct page *page)
--{
--      ClearPageReserved(page);
--      add_one_highpage_hotplug(page, page_to_pfn(page));
--}
+-struct dma_coherent_mem {
+-      void            *virt_base;
+-      u32             device_base;
+-      int             size;
+-      int             flags;
+-      unsigned long   *bitmap;
+-};
  -
- #ifndef CONFIG_NUMA
- static void __init set_highmem_pages_init(int bad_ppro)
+-#define IOMMU_BUG_ON(test)                            \
+-do {                                                  \
+-      if (unlikely(test)) {                           \
+-              printk(KERN_ALERT "Fatal DMA error! "   \
+-                     "Please use 'swiotlb=force'\n"); \
+-              BUG();                                  \
+-      }                                               \
+-} while (0)
++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
++static __initdata void *dma32_bootmem_ptr;
++static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
+ 
+-static int check_pages_physically_contiguous(unsigned long pfn, 
+-                                           unsigned int offset,
+-                                           size_t length)
++static int __init parse_dma32_size_opt(char *p)
   {
-@@ -459,15 +457,13 @@ void zap_low_mappings(void)
+-      unsigned long next_mfn;
+-      int i;
+-      int nr_pages;
+-      
+-      next_mfn = pfn_to_mfn(pfn);
+-      nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
+-      
+-      for (i = 1; i < nr_pages; i++) {
+-              if (pfn_to_mfn(++pfn) != ++next_mfn) 
+-                      return 0;
+-      }
+-      return 1;
++      if (!p)
++              return -EINVAL;
++      dma32_bootmem_size = memparse(p, &p);
++      return 0;
+ }
++early_param("dma32_size", parse_dma32_size_opt);
+ 
+-int range_straddles_page_boundary(paddr_t p, size_t size)
++void __init dma32_reserve_bootmem(void)
   {
-       int i;
+-      unsigned long pfn = p >> PAGE_SHIFT;
+-      unsigned int offset = p & ~PAGE_MASK;
++      unsigned long size, align;
++      if (end_pfn <= MAX_DMA32_PFN)
++              return;
   
--      save_pg_dir();
+-      return ((offset + size > PAGE_SIZE) &&
+-              !check_pages_physically_contiguous(pfn, offset, size));
++      align = 64ULL<<20;
++      size = round_up(dma32_bootmem_size, align);
++      dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
++                               __pa(MAX_DMA_ADDRESS));
++      if (dma32_bootmem_ptr)
++              dma32_bootmem_size = size;
++      else
++              dma32_bootmem_size = 0;
+ }
  -
-       /*
-        * Zap initial low-memory mappings.
-        *
-        * Note that "pgd_clear()" doesn't do it for
-        * us, because pgd_clear() is a no-op on i386.
-        */
--      for (i = 0; i < USER_PTRS_PER_PGD; i++) {
-+      for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
- #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
-               set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
- #else
-@@ -572,9 +568,9 @@ void __init paging_init(void)
- 
- /*
-  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
-- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
-- * used to involve black magic jumps to work around some nasty CPU bugs,
-- * but fortunately the switch to using exceptions got rid of all that.
-+ * and also on some strange 486's. All 586+'s are OK. This used to involve
-+ * black magic jumps to work around some nasty CPU bugs, but fortunately the
-+ * switch to using exceptions got rid of all that.
-  */
- static void __init test_wp_bit(void)
+-int
+-dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
+-         enum dma_data_direction direction)
++static void __init dma32_free_bootmem(void)
   {
-@@ -605,9 +601,7 @@ void __init mem_init(void)
-       int tmp, bad_ppro;
-       unsigned long pfn;
- 
--#if defined(CONFIG_SWIOTLB)
--      swiotlb_init(); 
--#endif
-+      pci_iommu_alloc();
+-      int i, rc;
++      int node;
++
++      if (end_pfn <= MAX_DMA32_PFN)
++              return;
   
- #ifdef CONFIG_FLATMEM
-       BUG_ON(!mem_map);
-@@ -710,16 +704,8 @@ void __init mem_init(void)
-               test_wp_bit();
+-      BUG_ON(!valid_dma_direction(direction));
+-      WARN_ON(nents == 0 || sgl->length == 0);
++      if (!dma32_bootmem_ptr)
++              return;
   
-       cpa_init();
+-      if (swiotlb) {
+-              rc = swiotlb_map_sg(hwdev, sgl, nents, direction);
+-      } else {
+-              struct scatterlist *sg;
  -
--      /*
--       * Subtle. SMP is doing it's boot stuff late (because it has to
--       * fork idle threads) - but it also needs low mappings for the
--       * protected-mode entry to work. We zap these entries only after
--       * the WP-bit has been tested.
--       */
--#ifndef CONFIG_SMP
-+      save_pg_dir();
-       zap_low_mappings();
--#endif
+-              for_each_sg(sgl, sg, nents, i) {
+-                      BUG_ON(!sg_page(sg));
+-                      sg->dma_address =
+-                              gnttab_dma_map_page(sg_page(sg)) + sg->offset;
+-                      sg->dma_length  = sg->length;
+-                      IOMMU_BUG_ON(address_needs_mapping(
+-                              hwdev, sg->dma_address));
+-                      IOMMU_BUG_ON(range_straddles_page_boundary(
+-                              page_to_pseudophys(sg_page(sg)) + sg->offset,
+-                              sg->length));
+-              }
+-              rc = nents;
+-      }
++      for_each_online_node(node)
++              free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
++                                dma32_bootmem_size);
   
-       SetPagePinned(virt_to_page(init_mm.pgd));
+-      flush_write_buffers();
+-      return rc;
++      dma32_bootmem_ptr = NULL;
++      dma32_bootmem_size = 0;
   }
-@@ -769,25 +755,17 @@ void mark_rodata_ro(void)
-       unsigned long start = PFN_ALIGN(_text);
-       unsigned long size = PFN_ALIGN(_etext) - start;
+-EXPORT_SYMBOL(dma_map_sg);
++#else
++#define dma32_free_bootmem() ((void)0)
++#endif
   
--#ifndef CONFIG_KPROBES
--#ifdef CONFIG_HOTPLUG_CPU
--      /* It must still be possible to apply SMP alternatives. */
--      if (num_possible_cpus() <= 1)
--#endif
--      {
--              set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
--              printk(KERN_INFO "Write protecting the kernel text: %luk\n",
--                      size >> 10);
-+      set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
-+      printk(KERN_INFO "Write protecting the kernel text: %luk\n",
-+              size >> 10);
+-void
+-dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
+-           enum dma_data_direction direction)
+-{
+-      int i;
++static const struct dma_mapping_ops swiotlb_dma_ops = {
++      .mapping_error = swiotlb_dma_mapping_error,
++      .map_single = swiotlb_map_single_phys,
++      .unmap_single = swiotlb_unmap_single,
++      .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
++      .sync_single_for_device = swiotlb_sync_single_for_device,
++      .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
++      .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
++      .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
++      .sync_sg_for_device = swiotlb_sync_sg_for_device,
++      .map_sg = swiotlb_map_sg,
++      .unmap_sg = swiotlb_unmap_sg,
++      .dma_supported = swiotlb_dma_supported
++};
+ 
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (swiotlb)
+-              swiotlb_unmap_sg(hwdev, sgl, nents, direction);
+-      else {
+-              struct scatterlist *sg;
++void __init pci_iommu_alloc(void)
++{
++      /* free the range so iommu could get some range less than 4G */
++      dma32_free_bootmem();
++      /*
++       * The order of these functions is important for
++       * fall-back/fail-over reasons
++       */
++#ifdef CONFIG_GART_IOMMU
++      gart_iommu_hole_init();
++#endif
+ 
+-              for_each_sg(sgl, sg, nents, i)
+-                      gnttab_dma_unmap_page(sg->dma_address);
+-      }
+-}
+-EXPORT_SYMBOL(dma_unmap_sg);
++#ifdef CONFIG_CALGARY_IOMMU
++      detect_calgary();
++#endif
+ 
+-#ifdef CONFIG_HIGHMEM
+-dma_addr_t
+-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+-           size_t size, enum dma_data_direction direction)
+-{
+-      dma_addr_t dma_addr;
++      detect_intel_iommu();
+ 
+-      BUG_ON(!valid_dma_direction(direction));
++#ifdef CONFIG_SWIOTLB
++      swiotlb_init();
+       if (swiotlb) {
+-              dma_addr = swiotlb_map_page(
+-                      dev, page, offset, size, direction);
+-      } else {
+-              dma_addr = gnttab_dma_map_page(page) + offset;
+-              IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
++              printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
++              dma_ops = &swiotlb_dma_ops;
+       }
+-
+-      return dma_addr;
++#endif
+ }
+-EXPORT_SYMBOL(dma_map_page);
   
- #ifdef CONFIG_CPA_DEBUG
--              printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
--                      start, start+size);
--              set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
-+      printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
-+              start, start+size);
-+      set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+-void
+-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+-             enum dma_data_direction direction)
++/*
++ * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
++ * documentation.
++ */
++static __init int iommu_setup(char *p)
+ {
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (swiotlb)
+-              swiotlb_unmap_page(dev, dma_address, size, direction);
+-      else
+-              gnttab_dma_unmap_page(dma_address);
+-}
+-EXPORT_SYMBOL(dma_unmap_page);
+-#endif /* CONFIG_HIGHMEM */
++      iommu_merge = 1;
   
--              printk(KERN_INFO "Testing CPA: write protecting again\n");
--              set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
--#endif
--      }
-+      printk(KERN_INFO "Testing CPA: write protecting again\n");
-+      set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
- #endif
-       start += size;
-       size = (unsigned long)__end_rodata - start;
---- a/arch/x86/mm/init_64-xen.c
-+++ b/arch/x86/mm/init_64-xen.c
-@@ -52,9 +52,6 @@
+-int
+-dma_mapping_error(dma_addr_t dma_addr)
+-{
+-      if (swiotlb)
+-              return swiotlb_dma_mapping_error(dma_addr);
+-      return 0;
+-}
+-EXPORT_SYMBOL(dma_mapping_error);
++      if (!p)
++              return -EINVAL;
   
- #include <xen/features.h>
+-int
+-dma_supported(struct device *dev, u64 mask)
+-{
+-      if (swiotlb)
+-              return swiotlb_dma_supported(dev, mask);
+-      /*
+-       * By default we'll BUG when an infeasible DMA is requested, and
+-       * request swiotlb=force (see IOMMU_BUG_ON).
+-       */
+-      return 1;
+-}
+-EXPORT_SYMBOL(dma_supported);
++      while (*p) {
++              if (!strncmp(p, "off", 3))
++                      no_iommu = 1;
++              /* gart_parse_options has more force support */
++              if (!strncmp(p, "force", 5))
++                      force_iommu = 1;
++              if (!strncmp(p, "noforce", 7)) {
++                      iommu_merge = 0;
++                      force_iommu = 0;
++              }
   
--const struct dma_mapping_ops *dma_ops;
--EXPORT_SYMBOL(dma_ops);
--
- #if CONFIG_XEN_COMPAT <= 0x030002
- unsigned int __kernel_page_user;
- EXPORT_SYMBOL(__kernel_page_user);
-@@ -68,6 +65,28 @@ extern unsigned long start_pfn;
- extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
- extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
+-void *dma_alloc_coherent(struct device *dev, size_t size,
+-                         dma_addr_t *dma_handle, gfp_t gfp)
+-{
+-      void *ret;
+-      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+-      unsigned int order = get_order(size);
+-      unsigned long vstart;
+-      u64 mask;
++              if (!strncmp(p, "biomerge", 8)) {
++                      iommu_bio_merge = 4096;
++                      iommu_merge = 1;
++                      force_iommu = 1;
++              }
++              if (!strncmp(p, "panic", 5))
++                      panic_on_overflow = 1;
++              if (!strncmp(p, "nopanic", 7))
++                      panic_on_overflow = 0;
++              if (!strncmp(p, "merge", 5)) {
++                      iommu_merge = 1;
++                      force_iommu = 1;
++              }
++              if (!strncmp(p, "nomerge", 7))
++                      iommu_merge = 0;
++              if (!strncmp(p, "forcesac", 8))
++                      iommu_sac_force = 1;
++              if (!strncmp(p, "allowdac", 8))
++                      forbid_dac = 0;
++              if (!strncmp(p, "nodac", 5))
++                      forbid_dac = -1;
++              if (!strncmp(p, "usedac", 6)) {
++                      forbid_dac = -1;
++                      return 1;
++              }
++#ifdef CONFIG_SWIOTLB
++              if (!strncmp(p, "soft", 4))
++                      swiotlb = 1;
++#endif
   
-+int direct_gbpages __meminitdata
-+#ifdef CONFIG_DIRECT_GBPAGES
-+                              = 1
+-      /* ignore region specifiers */
+-      gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
++#ifdef CONFIG_GART_IOMMU
++              gart_parse_options(p);
  +#endif
-+;
-+
-+#ifndef CONFIG_XEN
-+static int __init parse_direct_gbpages_off(char *arg)
-+{
-+      direct_gbpages = 0;
-+      return 0;
-+}
-+early_param("nogbpages", parse_direct_gbpages_off);
+ 
+-      if (mem) {
+-              int page = bitmap_find_free_region(mem->bitmap, mem->size,
+-                                                   order);
+-              if (page >= 0) {
+-                      *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+-                      ret = mem->virt_base + (page << PAGE_SHIFT);
+-                      memset(ret, 0, size);
+-                      return ret;
+-              }
+-              if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+-                      return NULL;
++#ifdef CONFIG_CALGARY_IOMMU
++              if (!strncmp(p, "calgary", 7))
++                      use_calgary = 1;
++#endif /* CONFIG_CALGARY_IOMMU */
  +
-+static int __init parse_direct_gbpages_on(char *arg)
-+{
-+      direct_gbpages = 1;
++              p += strcspn(p, ",");
++              if (*p == ',')
++                      ++p;
+       }
  +      return 0;
  +}
-+early_param("gbpages", parse_direct_gbpages_on);
-+#endif
-+
- /*
-  * Use this until direct mapping is established, i.e. before __va() is 
-  * available in init_memory_mapping().
-@@ -135,9 +154,6 @@ void show_mem(void)
++early_param("iommu", iommu_setup);
   
-       printk(KERN_INFO "Mem-info:\n");
-       show_free_areas();
--      printk(KERN_INFO "Free swap:       %6ldkB\n",
--              nr_swap_pages << (PAGE_SHIFT-10));
+-      if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+-              gfp |= GFP_DMA;
  -
-       for_each_online_pgdat(pgdat) {
-               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
-                       /*
-@@ -328,7 +344,7 @@ void __init cleanup_highmap(void)
-       pmd_t *last_pmd = pmd + PTRS_PER_PMD;
- 
-       for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
--              if (!pmd_present(*pmd))
-+              if (pmd_none(*pmd))
-                       continue;
-               if (vaddr < (unsigned long) _text || vaddr > end)
-                       set_pmd(pmd, __pmd(0));
-@@ -337,8 +353,7 @@ void __init cleanup_highmap(void)
- #endif
- 
- /* NOTE: this is meant to be run only at boot */
--void __init
--__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
-+void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
- {
-       unsigned long address = __fix_to_virt(idx);
+-      vstart = __get_free_pages(gfp, order);
+-      ret = (void *)vstart;
++static int check_pages_physically_contiguous(unsigned long pfn,
++                                           unsigned int offset,
++                                           size_t length)
++{
++      unsigned long next_mfn;
++      int i;
++      int nr_pages;
   
-@@ -463,7 +478,7 @@ __meminit void early_iounmap(void *addr,
- }
- #endif
+-      if (dev != NULL && dev->coherent_dma_mask)
+-              mask = dev->coherent_dma_mask;
+-      else
+-              mask = 0xffffffff;
++      next_mfn = pfn_to_mfn(pfn);
++      nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
   
--static void __meminit
-+static unsigned long __meminit
- phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
- {
-       int i = pmd_index(address);
-@@ -503,21 +518,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
-                       set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
-               }
+-      if (ret != NULL) {
+-              if (xen_create_contiguous_region(vstart, order,
+-                                               fls64(mask)) != 0) {
+-                      free_pages(vstart, order);
+-                      return NULL;
+-              }
+-              memset(ret, 0, size);
+-              *dma_handle = virt_to_bus(ret);
++      for (i = 1; i < nr_pages; i++) {
++              if (pfn_to_mfn(++pfn) != ++next_mfn)
++                      return 0;
         }
-+      return address;
- }
- 
--static void __meminit
-+static unsigned long __meminit
- phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
- {
-       pmd_t *pmd = pmd_offset(pud, 0);
-+      unsigned long last_map_addr;
-+
-       spin_lock(&init_mm.page_table_lock);
--      phys_pmd_init(pmd, address, end);
-+      last_map_addr = phys_pmd_init(pmd, address, end);
-       spin_unlock(&init_mm.page_table_lock);
-       __flush_tlb_all();
-+      return last_map_addr;
+-      return ret;
++      return 1;
   }
+-EXPORT_SYMBOL(dma_alloc_coherent);
   
--static void __meminit
-+static unsigned long __meminit
- phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+-void dma_free_coherent(struct device *dev, size_t size,
+-                       void *vaddr, dma_addr_t dma_handle)
++int range_straddles_page_boundary(paddr_t p, size_t size)
   {
-+      unsigned long last_map_addr = end;
-       int i = pud_index(addr);
+-      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+-      int order = get_order(size);
+-
+-      WARN_ON(irqs_disabled());       /* for portability */
+-      if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+-              int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
++      unsigned long pfn = p >> PAGE_SHIFT;
++      unsigned int offset = p & ~PAGE_MASK;
   
-       for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
-@@ -529,7 +549,15 @@ phys_pud_init(pud_t *pud_page, unsigned 
-                       break;
+-              bitmap_release_region(mem->bitmap, page, order);
+-      } else {
+-              xen_destroy_contiguous_region((unsigned long)vaddr, order);
+-              free_pages((unsigned long)vaddr, order);
+-      }
++      return ((offset + size > PAGE_SIZE) &&
++              !check_pages_physically_contiguous(pfn, offset, size));
+ }
+-EXPORT_SYMBOL(dma_free_coherent);
   
-               if (__pud_val(*pud)) {
--                      phys_pmd_update(pud, addr, end);
-+                      if (!pud_large(*pud))
-+                              last_map_addr = phys_pmd_update(pud, addr, end);
-+                      continue;
-+              }
+-#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
++#ifdef CONFIG_X86_32
+ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+                               dma_addr_t device_addr, size_t size, int flags)
+ {
+@@ -327,8 +295,8 @@ EXPORT_SYMBOL(dma_declare_coherent_memor
+ void dma_release_declared_memory(struct device *dev)
+ {
+       struct dma_coherent_mem *mem = dev->dma_mem;
+-      
+-      if(!mem)
  +
-+              if (direct_gbpages) {
-+                      set_pte((pte_t *)pud,
-+                              pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
-+                      last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
-                       continue;
-               }
- 
-@@ -537,12 +565,14 @@ phys_pud_init(pud_t *pud_page, unsigned 
- 
-               spin_lock(&init_mm.page_table_lock);
-               *pud = __pud(pmd_phys | _KERNPG_TABLE);
--              phys_pmd_init(pmd, addr, end);
-+              last_map_addr = phys_pmd_init(pmd, addr, end);
-               spin_unlock(&init_mm.page_table_lock);
- 
-               early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
-       }
-       __flush_tlb_all();
++      if (!mem)
+               return;
+       dev->dma_mem = NULL;
+       iounmap(mem->virt_base);
+@@ -341,8 +309,10 @@ void *dma_mark_declared_memory_occupied(
+                                       dma_addr_t device_addr, size_t size)
+ {
+       struct dma_coherent_mem *mem = dev->dma_mem;
+-      int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       int pos, err;
++      int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
  +
-+      return last_map_addr >> PAGE_SHIFT;
- }
++      pages >>= PAGE_SHIFT;
   
- void __init xen_init_pt(void)
-@@ -763,16 +793,136 @@ static void __init xen_finish_init_mappi
-       table_end = start_pfn;
+       if (!mem)
+               return ERR_PTR(-EINVAL);
+@@ -354,103 +324,270 @@ void *dma_mark_declared_memory_occupied(
+       return mem->virt_base + (pos << PAGE_SHIFT);
   }
+ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
+-#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
+-
+-#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
+-/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
   
-+static void __init init_gbpages(void)
-+{
-+      if (direct_gbpages && cpu_has_gbpages)
-+              printk(KERN_INFO "Using GB pages for direct mapping\n");
-+      else
-+              direct_gbpages = 0;
-+}
-+
-+#ifdef CONFIG_MEMTEST_BOOTPARAM
-+
-+static void __init memtest(unsigned long start_phys, unsigned long size,
-+                               unsigned pattern)
-+{
-+      unsigned long i;
-+      unsigned long *start;
-+      unsigned long start_bad;
-+      unsigned long last_bad;
-+      unsigned long val;
-+      unsigned long start_phys_aligned;
-+      unsigned long count;
-+      unsigned long incr;
-+
-+      switch (pattern) {
-+      case 0:
-+              val = 0UL;
-+              break;
-+      case 1:
-+              val = -1UL;
-+              break;
-+      case 2:
-+              val = 0x5555555555555555UL;
-+              break;
-+      case 3:
-+              val = 0xaaaaaaaaaaaaaaaaUL;
-+              break;
-+      default:
-+              return;
-+      }
-+
-+      incr = sizeof(unsigned long);
-+      start_phys_aligned = ALIGN(start_phys, incr);
-+      count = (size - (start_phys_aligned - start_phys))/incr;
-+      start = __va(start_phys_aligned);
-+      start_bad = 0;
-+      last_bad = 0;
+-int forbid_dac;
+-EXPORT_SYMBOL(forbid_dac);
+-
+-static __devinit void via_no_dac(struct pci_dev *dev)
++static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
++                                     dma_addr_t *dma_handle, void **ret)
+ {
+-      if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+-              printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
+-              forbid_dac = 1;
++      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
++      int order = get_order(size);
  +
-+      for (i = 0; i < count; i++)
-+              start[i] = val;
-+      for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
-+              if (*start != val) {
-+                      if (start_phys_aligned == last_bad + incr) {
-+                              last_bad += incr;
-+                      } else {
-+                              if (start_bad) {
-+                                      printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
-+                                              val, start_bad, last_bad + incr);
-+                                      reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
-+                              }
-+                              start_bad = last_bad = start_phys_aligned;
-+                      }
++      if (mem) {
++              int page = bitmap_find_free_region(mem->bitmap, mem->size,
++                                                   order);
++              if (page >= 0) {
++                      *dma_handle = mem->device_base + (page << PAGE_SHIFT);
++                      *ret = mem->virt_base + (page << PAGE_SHIFT);
++                      memset(*ret, 0, size);
  +              }
-+      }
-+      if (start_bad) {
-+              printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
-+                      val, start_bad, last_bad + incr);
-+              reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
-+      }
-+
-+}
-+
-+static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
++              if (mem->flags & DMA_MEMORY_EXCLUSIVE)
++                      *ret = NULL;
+       }
++      return (mem != NULL);
+ }
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+ 
+-static int check_iommu(char *s)
++static int dma_release_coherent(struct device *dev, int order, void *vaddr)
+ {
+-      if (!strcmp(s, "usedac")) {
+-              forbid_dac = -1;
++      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
  +
-+static int __init parse_memtest(char *arg)
-+{
-+      if (arg)
-+              memtest_pattern = simple_strtoul(arg, NULL, 0);
-+      return 0;
-+}
++      if (mem && vaddr >= mem->virt_base && vaddr <
++                 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
++              int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
  +
-+early_param("memtest", parse_memtest);
++              bitmap_release_region(mem->bitmap, page, order);
+               return 1;
+       }
+       return 0;
+ }
+-__setup("iommu=", check_iommu);
++#else
++#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
++#define dma_release_coherent(dev, order, vaddr) (0)
++#endif /* CONFIG_X86_32 */
  +
-+static void __init early_memtest(unsigned long start, unsigned long end)
++int dma_supported(struct device *dev, u64 mask)
  +{
-+      u64 t_start, t_size;
-+      unsigned pattern;
-+
-+      if (!memtest_pattern)
-+              return;
-+
-+      printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
-+      for (pattern = 0; pattern < memtest_pattern; pattern++) {
-+              t_start = start;
-+              t_size = 0;
-+              while (t_start < end) {
-+                      t_start = find_e820_area_size(t_start, &t_size, 1);
++#ifdef CONFIG_PCI
++      if (mask > 0xffffffff && forbid_dac > 0) {
++              printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
++                               dev->bus_id);
++              return 0;
++      }
+ #endif
+ 
+-dma_addr_t
+-dma_map_single(struct device *dev, void *ptr, size_t size,
+-             enum dma_data_direction direction)
++      if (dma_ops->dma_supported)
++              return dma_ops->dma_supported(dev, mask);
  +
-+                      /* done ? */
-+                      if (t_start >= end)
-+                              break;
-+                      if (t_start + t_size > end)
-+                              t_size = end - t_start;
++      /* Copied from i386. Doesn't make much sense, because it will
++         only work for pci_alloc_coherent.
++         The caller just has to use GFP_DMA in this case. */
++      if (mask < DMA_24BIT_MASK)
++              return 0;
  +
-+                      printk(KERN_CONT "\n  %016llx - %016llx pattern %d",
-+                              (unsigned long long)t_start,
-+                              (unsigned long long)t_start + t_size, pattern);
++      /* Tell the device to use SAC when IOMMU force is on.  This
++         allows the driver to use cheaper accesses in some cases.
  +
-+                      memtest(t_start, t_size, pattern);
++         Problem with this is that if we overflow the IOMMU area and
++         return DAC as fallback address the device may not handle it
++         correctly.
  +
-+                      t_start += t_size;
-+              }
++         As a special case some controllers have a 39bit address
++         mode that is as efficient as 32bit (aic79xx). Don't force
++         SAC for these.  Assume all masks <= 40 bits are of this
++         type. Normally this doesn't make any difference, but gives
++         more gentle handling of IOMMU overflow. */
++      if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
++              printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
++                               dev->bus_id, mask);
++              return 0;
  +      }
-+      printk(KERN_CONT "\n");
-+}
-+#else
-+static void __init early_memtest(unsigned long start, unsigned long end)
-+{
++
++      return 1;
  +}
-+#endif
++EXPORT_SYMBOL(dma_supported);
  +
- /*
-  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
-  * This runs before bootmem is initialized and gets pages directly from
-  * the physical memory. To access them they are temporarily mapped.
-  */
--void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
-+unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
++/* Allocate DMA memory on node near device */
++static struct page *
++dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
   {
--      unsigned long next;
-+      unsigned long next, last_map_addr = end;
-+      unsigned long start_phys = start, end_phys = end;
- 
--      pr_debug("init_memory_mapping\n");
-+      printk(KERN_INFO "init_memory_mapping\n");
- 
-       /*
-        * Find space for the kernel direct mapping tables.
-@@ -781,8 +931,10 @@ void __init_refok init_memory_mapping(un
-        * memory mapped. Unfortunately this is done currently before the
-        * nodes are discovered.
-        */
--      if (!after_bootmem)
-+      if (!after_bootmem) {
-+              init_gbpages();
-               find_early_table_space(end);
-+      }
+-      dma_addr_t dma;
++      int node;
   
-       start = (unsigned long)__va(start);
-       end = (unsigned long)__va(end);
-@@ -799,7 +951,7 @@ void __init_refok init_memory_mapping(un
-               next = start + PGDIR_SIZE;
-               if (next > end)
-                       next = end;
--              phys_pud_init(pud, __pa(start), __pa(next));
-+              last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
-               if (!after_bootmem) {
-                       early_make_page_readonly(pud, XENFEAT_writable_page_tables);
-                       set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
-@@ -816,6 +968,11 @@ void __init_refok init_memory_mapping(un
-       if (!after_bootmem)
-               reserve_early(table_start << PAGE_SHIFT,
-                             table_end << PAGE_SHIFT, "PGTABLE");
-+
-+      if (!after_bootmem)
-+              early_memtest(start_phys, end_phys);
-+
-+      return last_map_addr;
- }
+-      BUG_ON(!valid_dma_direction(direction));
+-      WARN_ON(size == 0);
++      node = dev_to_node(dev);
   
- #ifndef CONFIG_NUMA
-@@ -839,15 +996,6 @@ void __init paging_init(void)
- /*
-  * Memory hotplug specific functions
-  */
--void online_page(struct page *page)
--{
--      ClearPageReserved(page);
--      init_page_count(page);
--      __free_page(page);
--      totalram_pages++;
--      num_physpages++;
+-      if (swiotlb) {
+-              dma = swiotlb_map_single(dev, ptr, size, direction);
+-      } else {
+-              dma = gnttab_dma_map_page(virt_to_page(ptr)) +
+-                    offset_in_page(ptr);
+-              IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size));
+-              IOMMU_BUG_ON(address_needs_mapping(dev, dma));
+-      }
+-
+-      flush_write_buffers();
+-      return dma;
  -}
+-EXPORT_SYMBOL(dma_map_single);
  -
- #ifdef CONFIG_MEMORY_HOTPLUG
- /*
-  * Memory is added always to NORMAL zone. This means you will never get
-@@ -857,11 +1005,13 @@ int arch_add_memory(int nid, u64 start, 
- {
-       struct pglist_data *pgdat = NODE_DATA(nid);
-       struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
--      unsigned long start_pfn = start >> PAGE_SHIFT;
-+      unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
-       unsigned long nr_pages = size >> PAGE_SHIFT;
-       int ret;
- 
--      init_memory_mapping(start, start + size-1);
-+      last_mapped_pfn = init_memory_mapping(start, start + size-1);
-+      if (last_mapped_pfn > max_pfn_mapped)
-+              max_pfn_mapped = last_mapped_pfn;
- 
-       ret = __add_pages(zone, start_pfn, nr_pages);
-       WARN_ON(1);
-@@ -880,6 +1030,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
- 
- #endif /* CONFIG_MEMORY_HOTPLUG */
- 
+-void
+-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+-               enum dma_data_direction direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (swiotlb)
+-              swiotlb_unmap_single(dev, dma_addr, size, direction);
+-      else
+-              gnttab_dma_unmap_page(dma_addr);
++      return alloc_pages_node(node, gfp, order);
++}
++
  +/*
-+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
-+ * is valid. The argument is a physical page number.
-+ *
-+ *
-+ * On x86, access has to be given to the first megabyte of ram because that area
-+ * contains bios code and data regions used by X and dosemu and similar apps.
-+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
-+ * mmio resources as well as potential bios/acpi data regions.
++ * Allocate memory for a coherent mapping.
  + */
-+int devmem_is_allowed(unsigned long pagenr)
++void *
++dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
++                 gfp_t gfp)
  +{
-+      if (pagenr <= 256)
-+              return 1;
-+      if (mfn_to_local_pfn(pagenr) >= max_pfn)
-+              return 1;
-+      return 0;
-+}
++      void *memory = NULL;
++      struct page *page;
++      unsigned long dma_mask = 0;
++      int noretry = 0;
++      unsigned int order = get_order(size);
  +
++      /* ignore region specifiers */
++      gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
  +
- static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
-                        kcore_modules, kcore_vsyscall;
- 
-@@ -988,24 +1158,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
- 
- void mark_rodata_ro(void)
- {
--      unsigned long start = (unsigned long)_stext, end;
--
--#ifdef CONFIG_HOTPLUG_CPU
--      /* It must still be possible to apply SMP alternatives. */
--      if (num_possible_cpus() > 1)
--              start = (unsigned long)_etext;
--#endif
--
--#ifdef CONFIG_KPROBES
--      start = (unsigned long)__start_rodata;
--#endif
--
--      end = (unsigned long)__end_rodata;
--      start = (start + PAGE_SIZE - 1) & PAGE_MASK;
--      end &= PAGE_MASK;
--      if (end <= start)
--              return;
--
-+      unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
- 
-       printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
-              (end - start) >> 10);
-@@ -1028,6 +1181,7 @@ void mark_rodata_ro(void)
-       set_memory_ro(start, (end-start) >> PAGE_SHIFT);
- #endif
- }
++      if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
++              return memory;
  +
- #endif
- 
- #ifdef CONFIG_BLK_DEV_INITRD
-@@ -1040,7 +1194,7 @@ void free_initrd_mem(unsigned long start
- void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
- {
- #ifdef CONFIG_NUMA
--      int nid = phys_to_nid(phys);
-+      int nid, next_nid;
- #endif
-       unsigned long pfn = phys >> PAGE_SHIFT;
- 
-@@ -1049,7 +1203,7 @@ void __init reserve_bootmem_generic(unsi
-                * This can happen with kdump kernels when accessing
-                * firmware tables:
-                */
--              if (pfn < end_pfn_map)
-+              if (pfn < max_pfn_mapped)
-                       return;
- 
-               printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
-@@ -1059,10 +1213,16 @@ void __init reserve_bootmem_generic(unsi
- 
-       /* Should check here against the e820 map to avoid double free */
- #ifdef CONFIG_NUMA
--      reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
-+      nid = phys_to_nid(phys);
-+      next_nid = phys_to_nid(phys + len - 1);
-+      if (nid == next_nid)
-+              reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
-+      else
-+              reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
- #else
-       reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
- #endif
++      if (!dev) {
++              dev = &fallback_dev;
++              gfp |= GFP_DMA;
++      }
++      dma_mask = dev->coherent_dma_mask;
++      if (dma_mask == 0)
++              dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
  +
- #ifndef CONFIG_XEN
-       if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
-               static unsigned long dma_reserve __initdata;
-@@ -1160,6 +1320,10 @@ const char *arch_vma_name(struct vm_area
- /*
-  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
-  */
-+static long __meminitdata addr_start, addr_end;
-+static void __meminitdata *p_start, *p_end;
-+static int __meminitdata node_start;
++      /* Device not DMA able */
++      if (dev->dma_mask == NULL)
++              return NULL;
++
++      /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
++      if (gfp & __GFP_DMA)
++              noretry = 1;
++
++#ifdef CONFIG_XEN
++      gfp &= ~(__GFP_DMA | __GFP_DMA32);
++#else
++#ifdef CONFIG_X86_64
++      /* Why <=? Even when the mask is smaller than 4GB it is often
++         larger than 16MB and in this case we have a chance of
++         finding fitting memory in the next higher zone first. If
++         not retry with true GFP_DMA. -AK */
++      if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
++              gfp |= GFP_DMA32;
++#endif
++
++ again:
++#endif
++      page = dma_alloc_pages(dev,
++              noretry ? gfp | __GFP_NORETRY : gfp, order);
++      if (page == NULL)
++              return NULL;
  +
- int __meminit
- vmemmap_populate(struct page *start_page, unsigned long size, int node)
- {
-@@ -1194,12 +1358,32 @@ vmemmap_populate(struct page *start_page
-                                                       PAGE_KERNEL_LARGE);
-                       set_pmd(pmd, __pmd_ma(__pte_val(entry)));
- 
--                      printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
--                              addr, addr + PMD_SIZE - 1, p, node);
-+                      /* check to see if we have contiguous blocks */
-+                      if (p_end != p || node_start != node) {
-+                              if (p_start)
-+                                      printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
-+                                              addr_start, addr_end-1, p_start, p_end-1, node_start);
-+                              addr_start = addr;
-+                              node_start = node;
-+                              p_start = p;
++#ifndef CONFIG_XEN
++      {
++              int high, mmu;
++              dma_addr_t bus = page_to_phys(page);
++              memory = page_address(page);
++              high = (bus + size) >= dma_mask;
++              mmu = high;
++              if (force_iommu && !(gfp & GFP_DMA))
++                      mmu = 1;
++              else if (high) {
++                      free_pages((unsigned long)memory, order);
++
++                      /* Don't use the 16MB ZONE_DMA unless absolutely
++                         needed. It's better to use remapping first. */
++                      if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
++                              gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
++                              goto again;
  +                      }
-+                      addr_end = addr + PMD_SIZE;
-+                      p_end = p + PMD_SIZE;
-               } else {
-                       vmemmap_verify((pte_t *)pmd, node, addr, next);
-               }
-       }
-       return 0;
- }
  +
-+void __meminit vmemmap_populate_print_last(void)
-+{
-+      if (p_start) {
-+              printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
-+                      addr_start, addr_end-1, p_start, p_end-1, node_start);
-+              p_start = NULL;
-+              p_end = NULL;
-+              node_start = 0;
++                      /* Let low level make its own zone decisions */
++                      gfp &= ~(GFP_DMA32|GFP_DMA);
++
++                      if (dma_ops->alloc_coherent)
++                              return dma_ops->alloc_coherent(dev, size,
++                                                         dma_handle, gfp);
++                      return NULL;
++              }
++
++              memset(memory, 0, size);
++              if (!mmu) {
++                      *dma_handle = bus;
++                      return memory;
++              }
  +      }
-+}
- #endif
---- a/arch/x86/mm/ioremap-xen.c
-+++ b/arch/x86/mm/ioremap-xen.c
-@@ -20,14 +20,11 @@
- #include <asm/pgtable.h>
- #include <asm/tlbflush.h>
- #include <asm/pgalloc.h>
-+#include <asm/pat.h>
- 
--enum ioremap_mode {
--      IOR_MODE_UNCACHED,
--      IOR_MODE_CACHED,
--};
--
--#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
-+#ifdef CONFIG_X86_64
++
++      if (dma_ops->alloc_coherent) {
++              free_pages((unsigned long)memory, order);
++              gfp &= ~(GFP_DMA|GFP_DMA32);
++              return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
++      }
++
++      if (dma_ops->map_simple) {
++              *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
++                                            size,
++                                            PCI_DMA_BIDIRECTIONAL);
++              if (*dma_handle != bad_dma_address)
++                      return memory;
++      }
++#else
++      memory = page_address(page);
++      if (xen_create_contiguous_region((unsigned long)memory, order,
++                                       fls64(dma_mask)) == 0) {
++              memset(memory, 0, size);
++              *dma_handle = virt_to_bus(memory);
++              return memory;
++      }
++#endif
++
++      if (panic_on_overflow)
++              panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
++                    (unsigned long)size);
++      free_pages((unsigned long)memory, order);
++      return NULL;
+ }
+-EXPORT_SYMBOL(dma_unmap_single);
++EXPORT_SYMBOL(dma_alloc_coherent);
   
-+#ifndef CONFIG_XEN
- unsigned long __phys_addr(unsigned long x)
+-void
+-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                      enum dma_data_direction direction)
++/*
++ * Unmap coherent memory.
++ * The caller must ensure that the device has finished accessing the mapping.
++ */
++void dma_free_coherent(struct device *dev, size_t size,
++                       void *vaddr, dma_addr_t bus)
   {
-       if (x >= __START_KERNEL_map)
-@@ -35,6 +32,19 @@ unsigned long __phys_addr(unsigned long 
-       return x - PAGE_OFFSET;
+-      if (swiotlb)
+-              swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
++      int order = get_order(size);
++      WARN_ON(irqs_disabled());       /* for portability */
++      if (dma_release_coherent(dev, order, vaddr))
++              return;
++#ifndef CONFIG_XEN
++      if (dma_ops->unmap_single)
++              dma_ops->unmap_single(dev, bus, size, 0);
++#endif
++      xen_destroy_contiguous_region((unsigned long)vaddr, order);
++      free_pages((unsigned long)vaddr, order);
   }
- EXPORT_SYMBOL(__phys_addr);
+-EXPORT_SYMBOL(dma_sync_single_for_cpu);
++EXPORT_SYMBOL(dma_free_coherent);
+ 
+-void
+-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                           enum dma_data_direction direction)
++static int __init pci_iommu_init(void)
+ {
+-      if (swiotlb)
+-              swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
++#ifdef CONFIG_CALGARY_IOMMU
++      calgary_iommu_init();
  +#endif
  +
-+static inline int phys_addr_valid(unsigned long addr)
-+{
-+      return addr < (1UL << boot_cpu_data.x86_phys_bits);
-+}
++      intel_iommu_init();
  +
-+#else
++#ifdef CONFIG_GART_IOMMU
++      gart_iommu_init();
++#endif
  +
-+static inline int phys_addr_valid(unsigned long addr)
-+{
-+      return 1;
-+}
- 
- #endif
- 
-@@ -92,7 +102,8 @@ static int __direct_remap_pfn_range(stru
-                * Fill in the machine address: PTE ptr is done later by
-                * apply_to_page_range().
-                */
--              v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO;
-+              v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)))
-+                       | _PAGE_IO;
- 
-               mfn++;
-               address += PAGE_SIZE;
-@@ -189,10 +200,9 @@ int touch_pte_range(struct mm_struct *mm
- 
- EXPORT_SYMBOL(touch_pte_range);
++      no_iommu_init();
++      return 0;
+ }
+-EXPORT_SYMBOL(dma_sync_single_for_device);
   
--#ifdef CONFIG_X86_32
- int page_is_ram(unsigned long pagenr)
+-void
+-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction)
++void pci_iommu_shutdown(void)
   {
--      unsigned long addr, end;
-+      resource_size_t addr, end;
-       int i;
- 
- #ifndef CONFIG_XEN
-@@ -228,31 +238,51 @@ int page_is_ram(unsigned long pagenr)
-       }
-       return 0;
+-      if (swiotlb)
+-              swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
+-      flush_write_buffers();
++      gart_iommu_shutdown();
   }
--#endif
+-EXPORT_SYMBOL(dma_sync_sg_for_cpu);
++/* Must execute after PCI subsystem */
++fs_initcall(pci_iommu_init);
++
++#ifdef CONFIG_PCI
++/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
   
- /*
-  * Fix up the linear direct mapping of the kernel to avoid cache attribute
-  * conflicts.
-  */
- static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
--                             enum ioremap_mode mode)
-+                             unsigned long prot_val)
+-void
+-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction)
++static __devinit void via_no_dac(struct pci_dev *dev)
   {
-       unsigned long nrpages = size >> PAGE_SHIFT;
-       int err;
- 
--      switch (mode) {
--      case IOR_MODE_UNCACHED:
-+      switch (prot_val) {
-+      case _PAGE_CACHE_UC:
-       default:
--              err = set_memory_uc(vaddr, nrpages);
-+              err = _set_memory_uc(vaddr, nrpages);
-+              break;
-+      case _PAGE_CACHE_WC:
-+              err = _set_memory_wc(vaddr, nrpages);
-               break;
--      case IOR_MODE_CACHED:
--              err = set_memory_wb(vaddr, nrpages);
-+      case _PAGE_CACHE_WB:
-+              err = _set_memory_wb(vaddr, nrpages);
-               break;
-       }
- 
-       return err;
+-      if (swiotlb)
+-              swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
+-      flush_write_buffers();
++      if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
++              printk(KERN_INFO "PCI: VIA PCI bridge detected."
++                               "Disabling DAC.\n");
++              forbid_dac = 1;
++      }
   }
- 
-+int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
-+                            unsigned long prot_val)
+-EXPORT_SYMBOL(dma_sync_sg_for_device);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
++#endif
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/pci-nommu-xen.c   2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,103 @@
++#include <linux/dma-mapping.h>
++#include <linux/dmar.h>
++#include <linux/bootmem.h>
++#include <linux/pci.h>
++
++#include <xen/gnttab.h>
++
++#include <asm/proto.h>
++#include <asm/dma.h>
++#include <asm/swiotlb.h>
++#include <asm/tlbflush.h>
++#include <asm/gnttab_dma.h>
++#include <asm/bug.h>
++
++#define IOMMU_BUG_ON(test)                            \
++do {                                                  \
++      if (unlikely(test)) {                           \
++              printk(KERN_ALERT "Fatal DMA error! "   \
++                     "Please use 'swiotlb=force'\n"); \
++              BUG();                                  \
++      }                                               \
++} while (0)
++
++static int
++gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
++            int direction)
  +{
-+      unsigned long sz;
-+      int rc;
++      unsigned int i;
++      struct scatterlist *sg;
  +
-+      for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
-+              unsigned long pfn = mfn_to_local_pfn(mfn);
++      WARN_ON(nents == 0 || sgl->length == 0);
  +
-+              if (pfn >= max_pfn_mapped)
-+                      continue;
-+              rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
-+                                       PAGE_SIZE, prot_val);
++      for_each_sg(sgl, sg, nents, i) {
++              BUG_ON(!sg_page(sg));
++              sg->dma_address =
++                      gnttab_dma_map_page(sg_page(sg)) + sg->offset;
++              sg->dma_length  = sg->length;
++              IOMMU_BUG_ON(address_needs_mapping(
++                      hwdev, sg->dma_address));
++              IOMMU_BUG_ON(range_straddles_page_boundary(
++                      page_to_pseudophys(sg_page(sg)) + sg->offset,
++                      sg->length));
  +      }
  +
-+      return rc;
++      return nents;
  +}
  +
- /*
-  * Remap an arbitrary physical address space into the kernel virtual
-  * address space. Needed when the kernel wants to access high addresses
-@@ -262,12 +292,15 @@ static int ioremap_change_attr(unsigned 
-  * have to convert them into an offset in a page-aligned mapping, but the
-  * caller shouldn't need to know that small detail.
-  */
--static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
--                             enum ioremap_mode mode)
-+static void __iomem *__ioremap_caller(resource_size_t phys_addr,
-+              unsigned long size, unsigned long prot_val, void *caller)
- {
--      unsigned long mfn, offset, last_addr, vaddr;
-+      unsigned long mfn, offset, vaddr;
-+      resource_size_t last_addr;
-       struct vm_struct *area;
-+      unsigned long new_prot_val;
-       pgprot_t prot;
-+      int retval;
-       domid_t domid = DOMID_IO;
- 
-       /* Don't allow wraparound or zero size */
-@@ -275,6 +308,13 @@ static void __iomem *__ioremap(resource_
-       if (!size || last_addr < phys_addr)
-               return NULL;
- 
-+      if (!phys_addr_valid(phys_addr)) {
-+              printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
-+                     (unsigned long long)phys_addr);
-+              WARN_ON_ONCE(1);
-+              return NULL;
-+      }
++static void
++gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
++              int direction)
++{
++      unsigned int i;
++      struct scatterlist *sg;
  +
-       /*
-        * Don't remap the low PCI/ISA area, it's always mapped..
-        */
-@@ -287,55 +327,86 @@ static void __iomem *__ioremap(resource_
-       for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
-               unsigned long pfn = mfn_to_local_pfn(mfn);
- 
--              if (pfn >= max_pfn)
--                      continue;
-+              if (pfn_valid(pfn)) {
-+                      if (!PageReserved(pfn_to_page(pfn)))
-+                              return NULL;
-+                      domid = DOMID_SELF;
-+              }
-+      }
-+      WARN_ON_ONCE(domid == DOMID_SELF);
- 
--              domid = DOMID_SELF;
-+      /*
-+       * Mappings have to be page-aligned
-+       */
-+      offset = phys_addr & ~PAGE_MASK;
-+      phys_addr &= PAGE_MASK;
-+      size = PAGE_ALIGN(last_addr+1) - phys_addr;
- 
--              if (pfn >= max_pfn_mapped) /* bogus */
--                      continue;
-+      retval = reserve_memtype(phys_addr, phys_addr + size,
-+                                              prot_val, &new_prot_val);
-+      if (retval) {
-+              pr_debug("Warning: reserve_memtype returned %d\n", retval);
-+              return NULL;
-+      }
- 
--              if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
-+      if (prot_val != new_prot_val) {
-+              /*
-+               * Do not fallback to certain memory types with certain
-+               * requested type:
-+               * - request is uc-, return cannot be write-back
-+               * - request is uc-, return cannot be write-combine
-+               * - request is write-combine, return cannot be write-back
-+               */
-+              if ((prot_val == _PAGE_CACHE_UC_MINUS &&
-+                   (new_prot_val == _PAGE_CACHE_WB ||
-+                    new_prot_val == _PAGE_CACHE_WC)) ||
-+                  (prot_val == _PAGE_CACHE_WC &&
-+                   new_prot_val == _PAGE_CACHE_WB)) {
-+                      pr_debug(
-+              "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
-+                              (unsigned long long)phys_addr,
-+                              (unsigned long long)(phys_addr + size),
-+                              prot_val, new_prot_val);
-+                      free_memtype(phys_addr, phys_addr + size);
-                       return NULL;
-+              }
-+              prot_val = new_prot_val;
-       }
- 
--      switch (mode) {
--      case IOR_MODE_UNCACHED:
-+      switch (prot_val) {
-+      case _PAGE_CACHE_UC:
-       default:
--              /*
--               * FIXME: we will use UC MINUS for now, as video fb drivers
--               * depend on it. Upcoming ioremap_wc() will fix this behavior.
--               */
-+              prot = PAGE_KERNEL_NOCACHE;
-+              break;
-+      case _PAGE_CACHE_UC_MINUS:
-               prot = PAGE_KERNEL_UC_MINUS;
-               break;
--      case IOR_MODE_CACHED:
-+      case _PAGE_CACHE_WC:
-+              prot = PAGE_KERNEL_WC;
-+              break;
-+      case _PAGE_CACHE_WB:
-               prot = PAGE_KERNEL;
-               break;
-       }
- 
-       /*
--       * Mappings have to be page-aligned
--       */
--      offset = phys_addr & ~PAGE_MASK;
--      phys_addr &= PAGE_MASK;
--      size = PAGE_ALIGN(last_addr+1) - phys_addr;
--
--      /*
-        * Ok, go for it..
-        */
--      area = get_vm_area(size, VM_IOREMAP | (mode << 20));
-+      area = get_vm_area_caller(size, VM_IOREMAP, caller);
-       if (!area)
-               return NULL;
-       area->phys_addr = phys_addr;
-       vaddr = (unsigned long) area->addr;
-       if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
-                                    size, prot, domid)) {
-+              free_memtype(phys_addr, phys_addr + size);
-               free_vm_area(area);
-               return NULL;
-       }
- 
--      if (ioremap_change_attr(vaddr, size, mode) < 0) {
--              iounmap((void __iomem *) vaddr);
-+      if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
-+              free_memtype(phys_addr, phys_addr + size);
-+              vunmap(area->addr);
-               return NULL;
-       }
- 
-@@ -365,16 +436,72 @@ static void __iomem *__ioremap(resource_
-  */
- void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
- {
--      return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
-+      /*
-+       * Ideally, this should be:
-+       *      pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
-+       *
-+       * Till we fix all X drivers to use ioremap_wc(), we will use
-+       * UC MINUS.
-+       */
-+      unsigned long val = _PAGE_CACHE_UC_MINUS;
++      for_each_sg(sgl, sg, nents, i)
++              gnttab_dma_unmap_page(sg->dma_address);
++}
++
++static dma_addr_t
++gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size,
++                int direction)
++{
++      dma_addr_t dma;
++
++      WARN_ON(size == 0);
++
++      dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
++            offset_in_page(paddr);
++      IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
++      IOMMU_BUG_ON(address_needs_mapping(dev, dma));
++
++      return dma;
++}
++
++static void
++gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
++                  int direction)
++{
++      gnttab_dma_unmap_page(dma_addr);
++}
++
++static int nommu_mapping_error(dma_addr_t dma_addr)
++{
++      return (dma_addr == bad_dma_address);
++}
++
++static const struct dma_mapping_ops nommu_dma_ops = {
++      .map_single = gnttab_map_single,
++      .unmap_single = gnttab_unmap_single,
++      .map_sg = gnttab_map_sg,
++      .unmap_sg = gnttab_unmap_sg,
++      .dma_supported = swiotlb_dma_supported,
++      .mapping_error = nommu_mapping_error
++};
++
++void __init no_iommu_init(void)
++{
++      if (dma_ops)
++              return;
  +
-+      return __ioremap_caller(phys_addr, size, val,
-+                              __builtin_return_address(0));
- }
- EXPORT_SYMBOL(ioremap_nocache);
- 
-+/**
-+ * ioremap_wc -       map memory into CPU space write combined
-+ * @offset:   bus address of the memory
-+ * @size:     size of the resource to map
-+ *
-+ * This version of ioremap ensures that the memory is marked write combining.
-+ * Write combining allows faster writes to some hardware devices.
++      force_iommu = 0; /* no HW IOMMU */
++      dma_ops = &nommu_dma_ops;
++}
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/process-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,188 @@
++#include <linux/errno.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/smp.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/pm.h>
++
++struct kmem_cache *task_xstate_cachep;
++
++int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
++{
++      *dst = *src;
++      if (src->thread.xstate) {
++              dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
++                                                    GFP_KERNEL);
++              if (!dst->thread.xstate)
++                      return -ENOMEM;
++              WARN_ON((unsigned long)dst->thread.xstate & 15);
++              memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
++      }
++      return 0;
++}
++
++void free_thread_xstate(struct task_struct *tsk)
++{
++      if (tsk->thread.xstate) {
++              kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
++              tsk->thread.xstate = NULL;
++      }
++}
++
++void free_thread_info(struct thread_info *ti)
++{
++      free_thread_xstate(ti->task);
++      free_pages((unsigned long)ti, get_order(THREAD_SIZE));
++}
++
++void arch_task_cache_init(void)
++{
++        task_xstate_cachep =
++              kmem_cache_create("task_xstate", xstate_size,
++                                __alignof__(union thread_xstate),
++                                SLAB_PANIC, NULL);
++}
++
++static void do_nothing(void *unused)
++{
++}
++
++/*
++ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
++ * pm_idle and update to new pm_idle value. Required while changing pm_idle
++ * handler on SMP systems.
  + *
-+ * Must be freed with iounmap.
++ * Caller must have changed pm_idle to the new value before the call. Old
++ * pm_idle value will not be used by any CPU after the return of this function.
  + */
-+void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
++void cpu_idle_wait(void)
  +{
-+      if (pat_wc_enabled)
-+              return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
-+                                      __builtin_return_address(0));
-+      else
-+              return ioremap_nocache(phys_addr, size);
++      smp_mb();
++      /* kick all the CPUs so that they exit out of pm_idle */
++      smp_call_function(do_nothing, NULL, 0, 1);
  +}
-+EXPORT_SYMBOL(ioremap_wc);
++EXPORT_SYMBOL_GPL(cpu_idle_wait);
  +
- void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
- {
--      return __ioremap(phys_addr, size, IOR_MODE_CACHED);
-+      return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
-+                              __builtin_return_address(0));
- }
- EXPORT_SYMBOL(ioremap_cache);
- 
  +#ifndef CONFIG_XEN
-+static void __iomem *ioremap_default(resource_size_t phys_addr,
-+                                      unsigned long size)
++/*
++ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
++ * which can obviate IPI to trigger checking of need_resched.
++ * We execute MONITOR against need_resched and enter optimized wait state
++ * through MWAIT. Whenever someone changes need_resched, we would be woken
++ * up from MWAIT (without an IPI).
++ *
++ * New with Core Duo processors, MWAIT can take some hints based on CPU
++ * capability.
++ */
++void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
  +{
-+      unsigned long flags;
-+      void *ret;
-+      int err;
-+
-+      /*
-+       * - WB for WB-able memory and no other conflicting mappings
-+       * - UC_MINUS for non-WB-able memory with no other conflicting mappings
-+       * - Inherit from confliting mappings otherwise
-+       */
-+      err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
-+      if (err < 0)
-+              return NULL;
-+
-+      ret = (void *) __ioremap_caller(phys_addr, size, flags,
-+                                      __builtin_return_address(0));
++      if (!need_resched()) {
++              __monitor((void *)&current_thread_info()->flags, 0, 0);
++              smp_mb();
++              if (!need_resched())
++                      __mwait(ax, cx);
++      }
++}
  +
-+      free_memtype(phys_addr, phys_addr + size);
-+      return (void __iomem *)ret;
++/* Default MONITOR/MWAIT with no hints, used for default C1 state */
++static void mwait_idle(void)
++{
++      if (!need_resched()) {
++              __monitor((void *)&current_thread_info()->flags, 0, 0);
++              smp_mb();
++              if (!need_resched())
++                      __sti_mwait(0, 0);
++              else
++                      local_irq_enable();
++      } else
++              local_irq_enable();
  +}
  +#endif
  +
- /**
-  * iounmap - Free a IO remapping
-  * @addr: virtual address from ioremap_*
-@@ -417,15 +544,7 @@ void iounmap(volatile void __iomem *addr
-               return;
-       }
- 
--      if ((p->flags >> 20) != IOR_MODE_CACHED) {
--              unsigned long n = get_vm_area_size(p) >> PAGE_SHIFT;
--              unsigned long mfn = p->phys_addr;
--              unsigned long va = (unsigned long)addr;
--
--              for (; n > 0; n--, mfn++, va += PAGE_SIZE)
--                      if (mfn_to_local_pfn(mfn) < max_pfn)
--                              set_memory_wb(va, 1);
--      }
-+      free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
- 
-       /* Finally remove it */
-       o = remove_vm_area((void *)addr);
-@@ -434,6 +553,37 @@ void iounmap(volatile void __iomem *addr
- }
- EXPORT_SYMBOL(iounmap);
- 
++/*
++ * On SMP it's slightly faster (but much more power-consuming!)
++ * to poll the ->work.need_resched flag instead of waiting for the
++ * cross-CPU IPI to arrive. Use this option with caution.
++ */
++static void poll_idle(void)
++{
++      local_irq_enable();
++      cpu_relax();
++}
++
  +#ifndef CONFIG_XEN
  +/*
-+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
-+ * access
++ * mwait selection logic:
++ *
++ * It depends on the CPU. For AMD CPUs that support MWAIT this is
++ * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
++ * then depend on a clock divisor and current Pstate of the core. If
++ * all cores of a processor are in halt state (C1) the processor can
++ * enter the C1E (C1 enhanced) state. If mwait is used this will never
++ * happen.
++ *
++ * idle=mwait overrides this decision and forces the usage of mwait.
  + */
-+void *xlate_dev_mem_ptr(unsigned long phys)
++static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
  +{
-+      void *addr;
-+      unsigned long start = phys & PAGE_MASK;
++      if (force_mwait)
++              return 1;
  +
-+      /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
-+      if (page_is_ram(start >> PAGE_SHIFT))
-+              return __va(phys);
++      if (c->x86_vendor == X86_VENDOR_AMD) {
++              switch(c->x86) {
++              case 0x10:
++              case 0x11:
++                      return 0;
++              }
++      }
++      return 1;
++}
++#endif
  +
-+      addr = (void *)ioremap_default(start, PAGE_SIZE);
-+      if (addr)
-+              addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
++void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
++{
++#ifndef CONFIG_XEN
++      static int selected;
  +
-+      return addr;
++      if (selected)
++              return;
++#ifdef CONFIG_X86_SMP
++      if (pm_idle == poll_idle && smp_num_siblings > 1) {
++              printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
++                      " performance may degrade.\n");
++      }
++#endif
++      if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
++              /*
++               * Skip, if setup has overridden idle.
++               * One CPU supports mwait => All CPUs supports mwait
++               */
++              if (!pm_idle) {
++                      printk(KERN_INFO "using mwait in idle threads.\n");
++                      pm_idle = mwait_idle;
++              }
++      }
++      selected = 1;
++#endif
  +}
  +
-+void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
++static int __init idle_setup(char *str)
  +{
-+      if (page_is_ram(phys >> PAGE_SHIFT))
-+              return;
++      if (!strcmp(str, "poll")) {
++              printk("using polling idle threads.\n");
++              pm_idle = poll_idle;
++      }
++#ifndef CONFIG_XEN
++      else if (!strcmp(str, "mwait"))
++              force_mwait = 1;
++#endif
++      else
++              return -1;
  +
-+      iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
-+      return;
++      boot_option_idle_override = 1;
++      return 0;
  +}
-+#endif
++early_param("idle", idle_setup);
  +
- int __initdata early_ioremap_debug;
- 
- static int __init early_ioremap_debug_setup(char *str)
-@@ -445,8 +595,8 @@ static int __init early_ioremap_debug_se
- early_param("early_ioremap_debug", early_ioremap_debug_setup);
- 
- static __initdata int after_paging_init;
--static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
--                              __attribute__((aligned(PAGE_SIZE)));
-+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
-+              __section(.bss.page_aligned);
+--- sle11-2009-05-14.orig/arch/x86/kernel/process_32-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/process_32-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -36,6 +36,7 @@
+ #include <linux/personality.h>
+ #include <linux/tick.h>
+ #include <linux/percpu.h>
++#include <linux/prctl.h>
   
- #ifdef CONFIG_X86_32
- static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
-@@ -461,8 +611,8 @@ static inline pmd_t * __init early_iorem
- }
- #else
- #define early_ioremap_pmd early_get_pmd
-+#undef make_lowmem_page_readonly
- #define make_lowmem_page_readonly early_make_page_readonly
--#define make_lowmem_page_writable make_page_writable
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -45,7 +46,6 @@
+ #include <asm/processor.h>
+ #include <asm/i387.h>
+ #include <asm/desc.h>
+-#include <asm/vm86.h>
+ #ifdef CONFIG_MATH_EMULATION
+ #include <asm/math_emu.h>
   #endif
+@@ -102,16 +102,6 @@ void enable_hlt(void)
   
- static inline pte_t * __init early_ioremap_pte(unsigned long addr)
-@@ -511,7 +661,7 @@ void __init early_ioremap_clear(void)
-       pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
-       pmd_clear(pmd);
-       make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables);
--      /* paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); */
-+      /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */
-       __flush_tlb_all();
- }
- 
-@@ -652,10 +802,11 @@ void __init early_iounmap(void *addr, un
-       unsigned long offset;
-       unsigned int nrpages;
-       enum fixed_addresses idx;
--      unsigned int nesting;
-+      int nesting;
- 
-       nesting = --early_ioremap_nested;
--      WARN_ON(nesting < 0);
-+      if (WARN_ON(nesting < 0))
-+              return;
- 
-       if (early_ioremap_debug) {
-               printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
---- a/arch/x86/mm/pageattr-xen.c
-+++ b/arch/x86/mm/pageattr-xen.c
-@@ -9,6 +9,8 @@
- #include <linux/slab.h>
- #include <linux/mm.h>
- #include <linux/interrupt.h>
-+#include <linux/seq_file.h>
-+#include <linux/debugfs.h>
+ EXPORT_SYMBOL(enable_hlt);
   
- #include <asm/e820.h>
- #include <asm/processor.h>
-@@ -17,370 +19,7 @@
- #include <asm/uaccess.h>
- #include <asm/pgalloc.h>
- #include <asm/proto.h>
--#include <asm/mmu_context.h>
--
--#ifndef CONFIG_X86_64
--#define TASK_SIZE64 TASK_SIZE
--#endif
--
--static void _pin_lock(struct mm_struct *mm, int lock) {
--      if (lock)
--              spin_lock(&mm->page_table_lock);
--#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
--      /* While mm->page_table_lock protects us against insertions and
--       * removals of higher level page table pages, it doesn't protect
--       * against updates of pte-s. Such updates, however, require the
--       * pte pages to be in consistent state (unpinned+writable or
--       * pinned+readonly). The pinning and attribute changes, however
--       * cannot be done atomically, which is why such updates must be
--       * prevented from happening concurrently.
--       * Note that no pte lock can ever elsewhere be acquired nesting
--       * with an already acquired one in the same mm, or with the mm's
--       * page_table_lock already acquired, as that would break in the
--       * non-split case (where all these are actually resolving to the
--       * one page_table_lock). Thus acquiring all of them here is not
--       * going to result in dead locks, and the order of acquires
--       * doesn't matter.
--       */
--      {
--              pgd_t *pgd = mm->pgd;
--              unsigned g;
--
--              for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
--                      pud_t *pud;
--                      unsigned u;
--
--                      if (pgd_none(*pgd))
--                              continue;
--                      pud = pud_offset(pgd, 0);
--                      for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
--                              pmd_t *pmd;
--                              unsigned m;
--
--                              if (pud_none(*pud))
--                                      continue;
--                              pmd = pmd_offset(pud, 0);
--                              for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
--                                      spinlock_t *ptl;
--
--                                      if (pmd_none(*pmd))
--                                              continue;
--                                      ptl = pte_lockptr(0, pmd);
--                                      if (lock)
--                                              spin_lock(ptl);
--                                      else
--                                              spin_unlock(ptl);
--                              }
--                      }
--              }
--      }
--#endif
--      if (!lock)
--              spin_unlock(&mm->page_table_lock);
--}
--#define pin_lock(mm) _pin_lock(mm, 1)
--#define pin_unlock(mm) _pin_lock(mm, 0)
--
--#define PIN_BATCH sizeof(void *)
--static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
--
--static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
--                                           unsigned int cpu, unsigned int seq)
+-/*
+- * On SMP it's slightly faster (but much more power-consuming!)
+- * to poll the ->work.need_resched flag instead of waiting for the
+- * cross-CPU IPI to arrive. Use this option with caution.
+- */
+-static void poll_idle(void)
  -{
--      unsigned long pfn = page_to_pfn(page);
--
--      if (PageHighMem(page)) {
--              if (pgprot_val(flags) & _PAGE_RW)
--                      ClearPagePinned(page);
--              else
--                      SetPagePinned(page);
--      } else {
--              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
--                                      (unsigned long)__va(pfn << PAGE_SHIFT),
--                                      pfn_pte(pfn, flags), 0);
--              if (unlikely(++seq == PIN_BATCH)) {
--                      if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
--                                                              PIN_BATCH, NULL)))
--                              BUG();
--                      seq = 0;
--              }
--      }
--
--      return seq;
+-      cpu_relax();
  -}
  -
--static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
--{
--      pgd_t       *pgd = pgd_base;
--      pud_t       *pud;
--      pmd_t       *pmd;
--      int          g,u,m;
--      unsigned int cpu, seq;
--      multicall_entry_t *mcl;
--
--      if (xen_feature(XENFEAT_auto_translated_physmap))
--              return;
--
--      cpu = get_cpu();
--
--      /*
--       * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
--       * may not be the 'current' task's pagetables (e.g., current may be
--       * 32-bit, but the pagetables may be for a 64-bit task).
--       * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
--       * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
--       */
--      for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
--              if (pgd_none(*pgd))
--                      continue;
--              pud = pud_offset(pgd, 0);
--              if (PTRS_PER_PUD > 1) /* not folded */
--                      seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
--              for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
--                      if (pud_none(*pud))
--                              continue;
--                      pmd = pmd_offset(pud, 0);
--                      if (PTRS_PER_PMD > 1) /* not folded */
--                              seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
--                      for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
--                              if (pmd_none(*pmd))
--                                      continue;
--                              seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
--                      }
--              }
--      }
+ static void xen_idle(void)
+ {
+       current_thread_info()->status &= ~TS_POLLING;
+@@ -121,20 +111,10 @@ static void xen_idle(void)
+        */
+       smp_mb();
+ 
+-      local_irq_disable();
+-      if (!need_resched()) {
+-              ktime_t t0, t1;
+-              u64 t0n, t1n;
  -
--      mcl = per_cpu(pb_mcl, cpu);
--#ifdef CONFIG_X86_64
--      if (unlikely(seq > PIN_BATCH - 2)) {
--              if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
--                      BUG();
--              seq = 0;
--      }
--      MULTI_update_va_mapping(mcl + seq,
--             (unsigned long)__user_pgd(pgd_base),
--             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
--             0);
--      MULTI_update_va_mapping(mcl + seq + 1,
--             (unsigned long)pgd_base,
--             pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
--             UVMF_TLB_FLUSH);
--      if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
--              BUG();
--#else
--      if (likely(seq != 0)) {
--              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
--                      (unsigned long)pgd_base,
--                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
--                      UVMF_TLB_FLUSH);
--              if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
--                                                      seq + 1, NULL)))
--                      BUG();
--      } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
--                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
--                      UVMF_TLB_FLUSH))
--              BUG();
--#endif
+-              t0 = ktime_get();
+-              t0n = ktime_to_ns(t0);
++      if (!need_resched())
+               safe_halt();    /* enables interrupts racelessly */
+-              local_irq_disable();
+-              t1 = ktime_get();
+-              t1n = ktime_to_ns(t1);
+-              sched_clock_idle_wakeup_event(t1n - t0n);
+-      }
+-      local_irq_enable();
++      else
++              local_irq_enable();
+       current_thread_info()->status |= TS_POLLING;
+ }
+ #ifdef CONFIG_APM_MODULE
+@@ -142,7 +122,6 @@ EXPORT_SYMBOL(default_idle);
+ #endif
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-extern cpumask_t cpu_initialized;
+ static inline void play_dead(void)
+ {
+       idle_task_exit();
+@@ -187,6 +166,7 @@ void cpu_idle(void)
+                       if (cpu_is_offline(cpu))
+                               play_dead();
+ 
++                      local_irq_disable();
+                       __get_cpu_var(irq_stat).idle_timestamp = jiffies;
+                       idle();
+               }
+@@ -197,44 +177,6 @@ void cpu_idle(void)
+       }
+ }
+ 
+-static void do_nothing(void *unused)
+-{
+-}
  -
--      put_cpu();
+-/*
+- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+- * pm_idle and update to new pm_idle value. Required while changing pm_idle
+- * handler on SMP systems.
+- *
+- * Caller must have changed pm_idle to the new value before the call. Old
+- * pm_idle value will not be used by any CPU after the return of this function.
+- */
+-void cpu_idle_wait(void)
+-{
+-      smp_mb();
+-      /* kick all the CPUs so that they exit out of pm_idle */
+-      smp_call_function(do_nothing, NULL, 0, 1);
  -}
+-EXPORT_SYMBOL_GPL(cpu_idle_wait);
  -
--static void __pgd_pin(pgd_t *pgd)
+-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
  -{
--      pgd_walk(pgd, PAGE_KERNEL_RO);
--      kmap_flush_unused();
--      xen_pgd_pin(__pa(pgd)); /* kernel */
--#ifdef CONFIG_X86_64
--      xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
--#endif
--      SetPagePinned(virt_to_page(pgd));
  -}
  -
--static void __pgd_unpin(pgd_t *pgd)
+-static int __init idle_setup(char *str)
  -{
--      xen_pgd_unpin(__pa(pgd));
--#ifdef CONFIG_X86_64
--      xen_pgd_unpin(__pa(__user_pgd(pgd)));
--#endif
--      pgd_walk(pgd, PAGE_KERNEL);
--      ClearPagePinned(virt_to_page(pgd));
+-      if (!strcmp(str, "poll")) {
+-              printk("using polling idle threads.\n");
+-              pm_idle = poll_idle;
+-      }
+-      else
+-              return -1;
+-
+-      boot_option_idle_override = 1;
+-      return 0;
  -}
+-early_param("idle", idle_setup);
  -
--void pgd_test_and_unpin(pgd_t *pgd)
+ void __show_registers(struct pt_regs *regs, int all)
+ {
+       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+@@ -260,7 +202,7 @@ void __show_registers(struct pt_regs *re
+                       init_utsname()->version);
+ 
+       printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+-                      0xffff & regs->cs, regs->ip, regs->flags,
++                      (u16)regs->cs, regs->ip, regs->flags,
+                       smp_processor_id());
+       print_symbol("EIP is at %s\n", regs->ip);
+ 
+@@ -269,8 +211,7 @@ void __show_registers(struct pt_regs *re
+       printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+               regs->si, regs->di, regs->bp, sp);
+       printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+-             regs->ds & 0xffff, regs->es & 0xffff,
+-             regs->fs & 0xffff, gs, ss);
++             (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+ 
+       if (!all)
+               return;
+@@ -367,6 +308,7 @@ void flush_thread(void)
+       /*
+        * Forget coprocessor state..
+        */
++      tsk->fpu_counter = 0;
+       clear_fpu(tsk);
+       clear_used_math();
+ }
+@@ -437,11 +379,30 @@ int copy_thread(int nr, unsigned long cl
+       return err;
+ }
+ 
+-#ifdef CONFIG_SECCOMP
++void
++start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
++{
++      __asm__("movl %0, %%gs" :: "r"(0));
++      regs->fs                = 0;
++      set_fs(USER_DS);
++      regs->ds                = __USER_DS;
++      regs->es                = __USER_DS;
++      regs->ss                = __USER_DS;
++      regs->cs                = __USER_CS;
++      regs->ip                = new_ip;
++      regs->sp                = new_sp;
++      /*
++       * Free the old FP and other extended state
++       */
++      free_thread_xstate(current);
++}
++EXPORT_SYMBOL_GPL(start_thread);
++
+ static void hard_disable_TSC(void)
+ {
+       write_cr4(read_cr4() | X86_CR4_TSD);
+ }
++
+ void disable_TSC(void)
+ {
+       preempt_disable();
+@@ -453,11 +414,47 @@ void disable_TSC(void)
+               hard_disable_TSC();
+       preempt_enable();
+ }
++
+ static void hard_enable_TSC(void)
+ {
+       write_cr4(read_cr4() & ~X86_CR4_TSD);
+ }
+-#endif /* CONFIG_SECCOMP */
++
++static void enable_TSC(void)
++{
++      preempt_disable();
++      if (test_and_clear_thread_flag(TIF_NOTSC))
++              /*
++               * Must flip the CPU state synchronously with
++               * TIF_NOTSC in the current running context.
++               */
++              hard_enable_TSC();
++      preempt_enable();
++}
++
++int get_tsc_mode(unsigned long adr)
++{
++      unsigned int val;
++
++      if (test_thread_flag(TIF_NOTSC))
++              val = PR_TSC_SIGSEGV;
++      else
++              val = PR_TSC_ENABLE;
++
++      return put_user(val, (unsigned int __user *)adr);
++}
++
++int set_tsc_mode(unsigned int val)
++{
++      if (val == PR_TSC_SIGSEGV)
++              disable_TSC();
++      else if (val == PR_TSC_ENABLE)
++              enable_TSC();
++      else
++              return -EINVAL;
++
++      return 0;
++}
+ 
+ static noinline void
+ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
+@@ -473,12 +470,12 @@ __switch_to_xtra(struct task_struct *pre
+               /* we clear debugctl to make sure DS
+                * is not in use when we change it */
+               debugctl = 0;
+-              wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
++              update_debugctlmsr(0);
+               wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
+       }
+ 
+       if (next->debugctlmsr != debugctl)
+-              wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
++              update_debugctlmsr(next->debugctlmsr);
+ 
+       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+               set_debugreg(next->debugreg0, 0);
+@@ -490,7 +487,6 @@ __switch_to_xtra(struct task_struct *pre
+               set_debugreg(next->debugreg7, 7);
+       }
+ 
+-#ifdef CONFIG_SECCOMP
+       if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+           test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+               /* prev and next are different */
+@@ -499,7 +495,6 @@ __switch_to_xtra(struct task_struct *pre
+               else
+                       hard_enable_TSC();
+       }
+-#endif
+ 
+ #ifdef X86_BTS
+       if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+@@ -637,7 +632,7 @@ struct task_struct * __switch_to(struct 
+ 
+       /* we're going to use this soon, after a few expensive things */
+       if (next_p->fpu_counter > 5)
+-              prefetch(&next->i387.fxsave);
++              prefetch(next->xstate);
+ 
+       /*
+        * Now maybe handle debug registers
+@@ -658,8 +653,11 @@ struct task_struct * __switch_to(struct 
+       /* If the task has used fpu the last 5 timeslices, just do a full
+        * restore of the math state immediately to avoid the trap; the
+        * chances of needing FPU soon are obviously high now
++       *
++       * tsk_used_math() checks prevent calling math_state_restore(),
++       * which can sleep in the case of !tsk_used_math()
+        */
+-      if (next_p->fpu_counter > 5)
++      if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
+               math_state_restore();
+ 
+       /*
+--- sle11-2009-05-14.orig/arch/x86/kernel/process_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/process_64-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/kdebug.h>
+ #include <linux/tick.h>
++#include <linux/prctl.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -102,17 +103,6 @@ void exit_idle(void)
+       __exit_idle();
+ }
+ 
+-/*
+- * On SMP it's slightly faster (but much more power-consuming!)
+- * to poll the ->need_resched flag instead of waiting for the
+- * cross-CPU IPI to arrive. Use this option with caution.
+- */
+-static void poll_idle(void)
  -{
--      if (PagePinned(virt_to_page(pgd)))
--              __pgd_unpin(pgd);
+-      local_irq_enable();
+-      cpu_relax();
  -}
  -
--void mm_pin(struct mm_struct *mm)
--{
--      if (xen_feature(XENFEAT_writable_page_tables))
--              return;
--
--      pin_lock(mm);
--      __pgd_pin(mm->pgd);
--      pin_unlock(mm);
--}
+ static void xen_idle(void)
+ {
+       current_thread_info()->status &= ~TS_POLLING;
+@@ -121,20 +111,10 @@ static void xen_idle(void)
+        * test NEED_RESCHED:
+        */
+       smp_mb();
+-      local_irq_disable();
+-      if (!need_resched()) {
+-              ktime_t t0, t1;
+-              u64 t0n, t1n;
  -
--void mm_unpin(struct mm_struct *mm)
+-              t0 = ktime_get();
+-              t0n = ktime_to_ns(t0);
++      if (!need_resched())
+               safe_halt();    /* enables interrupts racelessly */
+-              local_irq_disable();
+-              t1 = ktime_get();
+-              t1n = ktime_to_ns(t1);
+-              sched_clock_idle_wakeup_event(t1n - t0n);
+-      }
+-      local_irq_enable();
++      else
++              local_irq_enable();
+       current_thread_info()->status |= TS_POLLING;
+ }
+ 
+@@ -195,45 +175,6 @@ void cpu_idle(void)
+       }
+ }
+ 
+-static void do_nothing(void *unused)
  -{
--      if (xen_feature(XENFEAT_writable_page_tables))
--              return;
--
--      pin_lock(mm);
--      __pgd_unpin(mm->pgd);
--      pin_unlock(mm);
  -}
  -
--void mm_pin_all(void)
+-/*
+- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+- * pm_idle and update to new pm_idle value. Required while changing pm_idle
+- * handler on SMP systems.
+- *
+- * Caller must have changed pm_idle to the new value before the call. Old
+- * pm_idle value will not be used by any CPU after the return of this function.
+- */
+-void cpu_idle_wait(void)
  -{
--      struct page *page;
--      unsigned long flags;
--
--      if (xen_feature(XENFEAT_writable_page_tables))
--              return;
--
--      /*
--       * Allow uninterrupted access to the pgd_list. Also protects
--       * __pgd_pin() by disabling preemption.
--       * All other CPUs must be at a safe point (e.g., in stop_machine
--       * or offlined entirely).
--       */
--      spin_lock_irqsave(&pgd_lock, flags);
--      list_for_each_entry(page, &pgd_list, lru) {
--              if (!PagePinned(page))
--                      __pgd_pin((pgd_t *)page_address(page));
--      }
--      spin_unlock_irqrestore(&pgd_lock, flags);
+-      smp_mb();
+-      /* kick all the CPUs so that they exit out of pm_idle */
+-      smp_call_function(do_nothing, NULL, 0, 1);
  -}
+-EXPORT_SYMBOL_GPL(cpu_idle_wait);
  -
--void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
  -{
--      if (!PagePinned(virt_to_page(mm->pgd)))
--              mm_pin(mm);
  -}
  -
--void arch_exit_mmap(struct mm_struct *mm)
+-static int __init idle_setup(char *str)
  -{
--      struct task_struct *tsk = current;
--
--      task_lock(tsk);
--
--      /*
--       * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
--       * *much* faster this way, as no tlb flushes means bigger wrpt batches.
--       */
--      if (tsk->active_mm == mm) {
--              tsk->active_mm = &init_mm;
--              atomic_inc(&init_mm.mm_count);
--
--              switch_mm(mm, &init_mm, tsk);
--
--              atomic_dec(&mm->mm_count);
--              BUG_ON(atomic_read(&mm->mm_count) == 0);
--      }
--
--      task_unlock(tsk);
--
--      if (PagePinned(virt_to_page(mm->pgd))
--          && atomic_read(&mm->mm_count) == 1
--          && !mm->context.has_foreign_mappings)
--              mm_unpin(mm);
--}
+-      if (!strcmp(str, "poll")) {
+-              printk("using polling idle threads.\n");
+-              pm_idle = poll_idle;
+-      } else if (!strcmp(str, "mwait"))
+-              force_mwait = 1;
+-      else
+-              return -1;
  -
--static void _pte_free(struct page *page, unsigned int order)
--{
--      BUG_ON(order);
--      __pte_free(page);
+-      boot_option_idle_override = 1;
+-      return 0;
  -}
+-early_param("idle", idle_setup);
  -
--pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+ /* Prints also some state that isn't saved in the pt_regs */
+ void __show_regs(struct pt_regs * regs)
+ {
+@@ -360,6 +301,7 @@ void flush_thread(void)
+       /*
+        * Forget coprocessor state..
+        */
++      tsk->fpu_counter = 0;
+       clear_fpu(tsk);
+       clear_used_math();
+ }
+@@ -472,6 +414,83 @@ out:
+       return err;
+ }
+ 
++void
++start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
++{
++      asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
++      load_gs_index(0);
++      regs->ip                = new_ip;
++      regs->sp                = new_sp;
++      write_pda(oldrsp, new_sp);
++      regs->cs                = __USER_CS;
++      regs->ss                = __USER_DS;
++      regs->flags             = 0x200;
++      set_fs(USER_DS);
++      /*
++       * Free the old FP and other extended state
++       */
++      free_thread_xstate(current);
++}
++EXPORT_SYMBOL_GPL(start_thread);
++
++static void hard_disable_TSC(void)
++{
++      write_cr4(read_cr4() | X86_CR4_TSD);
++}
++
++void disable_TSC(void)
++{
++      preempt_disable();
++      if (!test_and_set_thread_flag(TIF_NOTSC))
++              /*
++               * Must flip the CPU state synchronously with
++               * TIF_NOTSC in the current running context.
++               */
++              hard_disable_TSC();
++      preempt_enable();
++}
++
++static void hard_enable_TSC(void)
++{
++      write_cr4(read_cr4() & ~X86_CR4_TSD);
++}
++
++static void enable_TSC(void)
++{
++      preempt_disable();
++      if (test_and_clear_thread_flag(TIF_NOTSC))
++              /*
++               * Must flip the CPU state synchronously with
++               * TIF_NOTSC in the current running context.
++               */
++              hard_enable_TSC();
++      preempt_enable();
++}
++
++int get_tsc_mode(unsigned long adr)
++{
++      unsigned int val;
++
++      if (test_thread_flag(TIF_NOTSC))
++              val = PR_TSC_SIGSEGV;
++      else
++              val = PR_TSC_ENABLE;
++
++      return put_user(val, (unsigned int __user *)adr);
++}
++
++int set_tsc_mode(unsigned int val)
++{
++      if (val == PR_TSC_SIGSEGV)
++              disable_TSC();
++      else if (val == PR_TSC_ENABLE)
++              enable_TSC();
++      else
++              return -EINVAL;
++
++      return 0;
++}
++
+ /*
+  * This special macro can be used to load a debugging register
+  */
+@@ -491,12 +510,12 @@ static inline void __switch_to_xtra(stru
+               /* we clear debugctl to make sure DS
+                * is not in use when we change it */
+               debugctl = 0;
+-              wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
++              update_debugctlmsr(0);
+               wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+       }
+ 
+       if (next->debugctlmsr != debugctl)
+-              wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
++              update_debugctlmsr(next->debugctlmsr);
+ 
+       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+               loaddebug(next, 0);
+@@ -508,6 +527,15 @@ static inline void __switch_to_xtra(stru
+               loaddebug(next, 7);
+       }
+ 
++      if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
++          test_tsk_thread_flag(next_p, TIF_NOTSC)) {
++              /* prev and next are different */
++              if (test_tsk_thread_flag(next_p, TIF_NOTSC))
++                      hard_disable_TSC();
++              else
++                      hard_enable_TSC();
++      }
++
+ #ifdef X86_BTS
+       if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+               ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+@@ -547,7 +575,7 @@ __switch_to(struct task_struct *prev_p, 
+ 
+       /* we're going to use this soon, after a few expensive things */
+       if (next_p->fpu_counter>5)
+-              prefetch(&next->i387.fxsave);
++              prefetch(next->xstate);
+ 
+       /*
+        * This is basically '__unlazy_fpu', except that we queue a
+@@ -680,8 +708,11 @@ __switch_to(struct task_struct *prev_p, 
+       /* If the task has used fpu the last 5 timeslices, just do a full
+        * restore of the math state immediately to avoid the trap; the
+        * chances of needing FPU soon are obviously high now
++       *
++       * tsk_used_math() checks prevent calling math_state_restore(),
++       * which can sleep in the case of !tsk_used_math()
+        */
+-      if (next_p->fpu_counter>5)
++      if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
+               math_state_restore();
+       return prev_p;
+ }
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/setup-xen.c       2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,141 @@
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/bootmem.h>
++#include <linux/percpu.h>
++#include <asm/smp.h>
++#include <asm/percpu.h>
++#include <asm/sections.h>
++#include <asm/processor.h>
++#include <asm/setup.h>
++#include <asm/topology.h>
++#include <asm/mpspec.h>
++#include <asm/apicdef.h>
++
++#ifdef CONFIG_X86_LOCAL_APIC
++unsigned int num_processors;
++unsigned disabled_cpus __cpuinitdata;
++/* Processor that is doing the boot up */
++unsigned int boot_cpu_physical_apicid = -1U;
++EXPORT_SYMBOL(boot_cpu_physical_apicid);
++
++DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
++EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
++
++/* Bitmask of physically existing CPUs */
++physid_mask_t phys_cpu_present_map;
++#endif
++
++#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
++/*
++ * Copy data used in early init routines from the initial arrays to the
++ * per cpu data areas.  These arrays then become expendable and the
++ * *_early_ptr's are zeroed indicating that the static arrays are gone.
++ */
++static void __init setup_per_cpu_maps(void)
++{
++#ifndef CONFIG_XEN
++      int cpu;
++
++      for_each_possible_cpu(cpu) {
++              per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
++              per_cpu(x86_bios_cpu_apicid, cpu) =
++                                              x86_bios_cpu_apicid_init[cpu];
++#ifdef CONFIG_NUMA
++              per_cpu(x86_cpu_to_node_map, cpu) =
++                                              x86_cpu_to_node_map_init[cpu];
++#endif
++      }
++
++      /* indicate the early static arrays will soon be gone */
++      x86_cpu_to_apicid_early_ptr = NULL;
++      x86_bios_cpu_apicid_early_ptr = NULL;
++#ifdef CONFIG_NUMA
++      x86_cpu_to_node_map_early_ptr = NULL;
++#endif
++#endif
++}
++
++#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
++cpumask_t *cpumask_of_cpu_map __read_mostly;
++EXPORT_SYMBOL(cpumask_of_cpu_map);
++
++/* requires nr_cpu_ids to be initialized */
++static void __init setup_cpumask_of_cpu(void)
++{
++      int i;
++
++      /* alloc_bootmem zeroes memory */
++      cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
++      for (i = 0; i < nr_cpu_ids; i++)
++              cpu_set(i, cpumask_of_cpu_map[i]);
++}
++#else
++static inline void setup_cpumask_of_cpu(void) { }
++#endif
++
++#ifdef CONFIG_X86_32
++/*
++ * Great future not-so-futuristic plan: make i386 and x86_64 do it
++ * the same way
++ */
++unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
++EXPORT_SYMBOL(__per_cpu_offset);
++#endif
++
++/*
++ * Great future plan:
++ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
++ * Always point %gs to its beginning
++ */
++void __init setup_per_cpu_areas(void)
++{
++      int i, highest_cpu = 0;
++      unsigned long size;
++
++#ifdef CONFIG_HOTPLUG_CPU
++      prefill_possible_map();
++#endif
++
++      /* Copy section for each CPU (we discard the original) */
++      size = PERCPU_ENOUGH_ROOM;
++      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
++                        size);
++
++      for_each_possible_cpu(i) {
++              char *ptr;
++#ifndef CONFIG_NEED_MULTIPLE_NODES
++              ptr = alloc_bootmem_pages(size);
++#else
++              int node = early_cpu_to_node(i);
++              if (!node_online(node) || !NODE_DATA(node)) {
++                      ptr = alloc_bootmem_pages(size);
++                      printk(KERN_INFO
++                             "cpu %d has no node or node-local memory\n", i);
++              }
++              else
++                      ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
++#endif
++              if (!ptr)
++                      panic("Cannot allocate cpu data for CPU %d\n", i);
++#ifdef CONFIG_X86_64
++              cpu_pda(i)->data_offset = ptr - __per_cpu_start;
++#else
++              __per_cpu_offset[i] = ptr - __per_cpu_start;
++#endif
++              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
++
++              highest_cpu = i;
++      }
++
++      nr_cpu_ids = highest_cpu + 1;
++      printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
++
++      /* Setup percpu data maps */
++      setup_per_cpu_maps();
++
++      /* Setup cpumask_of_cpu map */
++      setup_cpumask_of_cpu();
++}
++
++#endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup64-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/setup64-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/bootmem.h>
+ #include <linux/bitops.h>
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ #include <asm/pda.h>
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+@@ -27,6 +28,7 @@
+ #include <asm/proto.h>
+ #include <asm/sections.h>
+ #include <asm/setup.h>
++#include <asm/genapic.h>
+ #ifdef CONFIG_XEN
+ #include <asm/hypervisor.h>
+ #endif
+@@ -81,8 +83,8 @@ int force_personality32 = 0; 
+ Control non executable heap for 32bit processes.
+ To control the stack too use noexec=off
+ 
+-on    PROT_READ does not imply PROT_EXEC for 32bit processes
+-off   PROT_READ implies PROT_EXEC (default)
++on    PROT_READ does not imply PROT_EXEC for 32bit processes (default)
++off   PROT_READ implies PROT_EXEC
+ */
+ static int __init nonx32_setup(char *str)
+ {
+@@ -94,85 +96,6 @@ static int __init nonx32_setup(char *str
+ }
+ __setup("noexec32=", nonx32_setup);
+ 
+-/*
+- * Copy data used in early init routines from the initial arrays to the
+- * per cpu data areas.  These arrays then become expendable and the
+- * *_early_ptr's are zeroed indicating that the static arrays are gone.
+- */
+-static void __init setup_per_cpu_maps(void)
  -{
--      struct page *pte;
+-#ifndef CONFIG_XEN
+-      int cpu;
  -
--#ifdef CONFIG_HIGHPTE
--      pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
--#else
--      pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+-      for_each_possible_cpu(cpu) {
+-#ifdef CONFIG_SMP
+-              if (per_cpu_offset(cpu)) {
  -#endif
--      if (pte) {
--              pgtable_page_ctor(pte);
--              SetPageForeign(pte, _pte_free);
--              init_page_count(pte);
--      }
--      return pte;
--}
--
--void __pte_free(pgtable_t pte)
--{
--      if (!PageHighMem(pte)) {
--              unsigned long va = (unsigned long)page_address(pte);
--              unsigned int level;
--              pte_t *ptep = lookup_address(va, &level);
--
--              BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
--              if (!pte_write(*ptep)
--                  && HYPERVISOR_update_va_mapping(va,
--                                                  mk_pte(pte, PAGE_KERNEL),
--                                                  0))
--                      BUG();
--      } else
--#ifdef CONFIG_HIGHPTE
--              ClearPagePinned(pte);
--#else
--              BUG();
+-                      per_cpu(x86_cpu_to_apicid, cpu) =
+-                                              x86_cpu_to_apicid_init[cpu];
+-                      per_cpu(x86_bios_cpu_apicid, cpu) =
+-                                              x86_bios_cpu_apicid_init[cpu];
+-#ifdef CONFIG_NUMA
+-                      per_cpu(x86_cpu_to_node_map, cpu) =
+-                                              x86_cpu_to_node_map_init[cpu];
  -#endif
+-#ifdef CONFIG_SMP
+-              }
+-              else
+-                      printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
+-                                                                      cpu);
+-#endif
+-      }
  -
--      ClearPageForeign(pte);
--      init_page_count(pte);
--      pgtable_page_dtor(pte);
--      __free_page(pte);
--}
--
--#if PAGETABLE_LEVELS >= 3
--static void _pmd_free(struct page *page, unsigned int order)
--{
--      BUG_ON(order);
--      __pmd_free(page);
+-      /* indicate the early static arrays will soon be gone */
+-      x86_cpu_to_apicid_early_ptr = NULL;
+-      x86_bios_cpu_apicid_early_ptr = NULL;
+-#ifdef CONFIG_NUMA
+-      x86_cpu_to_node_map_early_ptr = NULL;
+-#endif
+-#endif
  -}
  -
--pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
--{
--      struct page *pmd;
+-/*
+- * Great future plan:
+- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
+- * Always point %gs to its beginning
+- */
+-void __init setup_per_cpu_areas(void)
+-{ 
+-      int i;
+-      unsigned long size;
  -
--      pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
--      if (!pmd)
--              return NULL;
--      SetPageForeign(pmd, _pmd_free);
--      init_page_count(pmd);
--      return page_address(pmd);
--}
+-#ifdef CONFIG_HOTPLUG_CPU
+-      prefill_possible_map();
+-#endif
  -
--void __pmd_free(pgtable_t pmd)
--{
--      unsigned long va = (unsigned long)page_address(pmd);
--      unsigned int level;
--      pte_t *ptep = lookup_address(va, &level);
+-      /* Copy section for each CPU (we discard the original) */
+-      size = PERCPU_ENOUGH_ROOM;
  -
--      BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
--      if (!pte_write(*ptep)
--          && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
--              BUG();
+-      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
+-      for_each_cpu_mask (i, cpu_possible_map) {
+-              char *ptr;
+-#ifndef CONFIG_NEED_MULTIPLE_NODES
+-              ptr = alloc_bootmem_pages(size);
+-#else
+-              int node = early_cpu_to_node(i);
  -
--      ClearPageForeign(pmd);
--      init_page_count(pmd);
--      __free_page(pmd);
--}
+-              if (!node_online(node) || !NODE_DATA(node))
+-                      ptr = alloc_bootmem_pages(size);
+-              else
+-                      ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
  -#endif
+-              if (!ptr)
+-                      panic("Cannot allocate cpu data for CPU %d\n", i);
+-              cpu_pda(i)->data_offset = ptr - __per_cpu_start;
+-              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+-      }
+-
+-      /* setup percpu data maps early */
+-      setup_per_cpu_maps();
+-} 
+-
+ #ifdef CONFIG_XEN
+ static void __init_refok switch_pt(int cpu)
+ {
+@@ -410,6 +333,17 @@ void __cpuinit cpu_init (void)
+ #endif
+       load_LDT(&init_mm.context);
+ 
++#ifdef CONFIG_KGDB
++      /*
++       * If the kgdb is connected no debug regs should be altered.  This
++       * is only applicable when KGDB and a KGDB I/O module are built
++       * into the kernel and you are using early debugging with
++       * kgdbwait. KGDB will control the kernel HW breakpoint registers.
++       */
++      if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
++              arch_kgdb_ops.correct_hw_break();
++      else {
++#endif
+       /*
+        * Clear all 6 debug registers:
+        */
+@@ -420,10 +354,17 @@ void __cpuinit cpu_init (void)
+       set_debugreg(0UL, 3);
+       set_debugreg(0UL, 6);
+       set_debugreg(0UL, 7);
++#ifdef CONFIG_KGDB
++      /* If the kgdb is connected no debug regs should be altered. */
++      }
++#endif
+ 
+       fpu_init(); 
+ 
+       asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
+       if (raw_irqs_disabled())
+               kernel_eflags &= ~X86_EFLAGS_IF;
++
++      if (is_uv_system())
++              uv_cpu_init();
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup_32-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/setup_32-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/efi.h>
+ #include <linux/init.h>
+ #include <linux/edd.h>
++#include <linux/iscsi_ibft.h>
+ #include <linux/nodemask.h>
+ #include <linux/kernel.h>
+ #include <linux/percpu.h>
+@@ -49,6 +50,7 @@
+ #include <linux/pfn.h>
+ #include <linux/pci.h>
+ #include <linux/init_ohci1394_dma.h>
++#include <linux/kvm_para.h>
+ 
+ #include <video/edid.h>
+ 
+@@ -70,8 +72,9 @@
+ #include <xen/firmware.h>
+ #include <xen/xencons.h>
+ #include <setup_arch.h>
+-#include <bios_ebda.h>
++#include <asm/bios_ebda.h>
+ #include <asm/cacheflush.h>
++#include <asm/processor.h>
+ 
+ #ifdef CONFIG_XEN
+ #include <xen/interface/kexec.h>
+@@ -136,7 +139,12 @@ static struct resource standard_io_resou
+ }, {
+       .name   = "keyboard",
+       .start  = 0x0060,
+-      .end    = 0x006f,
++      .end    = 0x0060,
++      .flags  = IORESOURCE_BUSY | IORESOURCE_IO
++}, {
++      .name   = "keyboard",
++      .start  = 0x0064,
++      .end    = 0x0064,
+       .flags  = IORESOURCE_BUSY | IORESOURCE_IO
+ }, {
+       .name   = "dma page reg",
+@@ -166,6 +174,8 @@ struct cpuinfo_x86 new_cpu_data __cpuini
+ struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+ EXPORT_SYMBOL(boot_cpu_data);
+ 
++unsigned int def_to_bigsmp;
++
+ #ifndef CONFIG_X86_PAE
+ unsigned long mmu_cr4_features;
+ #else
+@@ -204,7 +214,7 @@ EXPORT_SYMBOL(ist_info);
+ extern void early_cpu_init(void);
+ extern int root_mountflags;
+ 
+-unsigned long saved_videomode;
++unsigned long saved_video_mode;
+ 
+ #define RAMDISK_IMAGE_START_MASK      0x07FF
+ #define RAMDISK_PROMPT_FLAG           0x8000
+@@ -259,7 +269,7 @@ static inline void copy_edd(void)
+ }
+ #endif
+ 
+-int __initdata user_defined_memmap = 0;
++int __initdata user_defined_memmap;
+ 
+ /*
+  * "mem=nopentium" disables the 4MB page tables.
+@@ -420,20 +430,59 @@ unsigned long __init find_max_low_pfn(vo
+ }
+ 
+ #ifndef CONFIG_XEN
++#define BIOS_LOWMEM_KILOBYTES 0x413
++
+ /*
+- * workaround for Dell systems that neglect to reserve EBDA
++ * The BIOS places the EBDA/XBDA at the top of conventional
++ * memory, and usually decreases the reported amount of
++ * conventional memory (int 0x12) too. This also contains a
++ * workaround for Dell systems that neglect to reserve EBDA.
++ * The same workaround also avoids a problem with the AMD768MPX
++ * chipset: reserve a page before VGA to prevent PCI prefetch
++ * into it (errata #56). Usually the page is reserved anyways,
++ * unless you have no PS/2 mouse plugged in.
+  */
+ static void __init reserve_ebda_region(void)
+ {
+-      unsigned int addr;
+-      addr = get_bios_ebda();
+-      if (addr)
+-              reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
++      unsigned int lowmem, ebda_addr;
++
++      /* To determine the position of the EBDA and the */
++      /* end of conventional memory, we need to look at */
++      /* the BIOS data area. In a paravirtual environment */
++      /* that area is absent. We'll just have to assume */
++      /* that the paravirt case can handle memory setup */
++      /* correctly, without our help. */
++      if (paravirt_enabled())
++              return;
++
++      /* end of low (conventional) memory */
++      lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
++      lowmem <<= 10;
++
++      /* start of EBDA area */
++      ebda_addr = get_bios_ebda();
++
++      /* Fixup: bios puts an EBDA in the top 64K segment */
++      /* of conventional memory, but does not adjust lowmem. */
++      if ((lowmem - ebda_addr) <= 0x10000)
++              lowmem = ebda_addr;
++
++      /* Fixup: bios does not report an EBDA at all. */
++      /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
++      if ((ebda_addr == 0) && (lowmem >= 0x9f000))
++              lowmem = 0x9f000;
++
++      /* Paranoia: should never happen, but... */
++      if ((lowmem == 0) || (lowmem >= 0x100000))
++              lowmem = 0x9f000;
++
++      /* reserve all memory between lowmem and the 1MB mark */
++      reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
+ }
+ #endif
+ 
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+-void __init setup_bootmem_allocator(void);
++static void __init setup_bootmem_allocator(void);
+ static unsigned long __init setup_memory(void)
+ {
+       /*
+@@ -469,7 +518,7 @@ static unsigned long __init setup_memory
+       return max_low_pfn;
+ }
+ 
+-void __init zone_sizes_init(void)
++static void __init zone_sizes_init(void)
+ {
+       unsigned long max_zone_pfns[MAX_NR_ZONES];
+       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+@@ -521,10 +570,16 @@ static void __init reserve_crashkernel(v
+                                       (unsigned long)(crash_size >> 20),
+                                       (unsigned long)(crash_base >> 20),
+                                       (unsigned long)(total_mem >> 20));
++
++                      if (reserve_bootmem(crash_base, crash_size,
++                                      BOOTMEM_EXCLUSIVE) < 0) {
++                              printk(KERN_INFO "crashkernel reservation "
++                                      "failed - memory is in use\n");
++                              return;
++                      }
++
+                       crashk_res.start = crash_base;
+                       crashk_res.end   = crash_base + crash_size - 1;
+-                      reserve_bootmem(crash_base, crash_size,
+-                                      BOOTMEM_DEFAULT);
+               } else
+                       printk(KERN_INFO "crashkernel reservation failed - "
+                                       "you have to specify a base address\n");
+@@ -658,16 +713,9 @@ void __init setup_bootmem_allocator(void
+        */
+       reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
+ 
+-      /* reserve EBDA region, it's a 4K region */
++      /* reserve EBDA region */
+       reserve_ebda_region();
+ 
+-    /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
+-       PCI prefetch into it (errata #56). Usually the page is reserved anyways,
+-       unless you have no PS/2 mouse plugged in. */
+-      if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+-          boot_cpu_data.x86 == 6)
+-           reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
  -
--/* blktap and gntdev need this, as otherwise they would implicitly (and
-- * needlessly, as they never use it) reference init_mm. */
--pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
--                                unsigned long addr, pte_t *ptep, int full)
--{
--      return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
--}
--EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
+ #ifdef CONFIG_SMP
+       /*
+        * But first pinch a few for the stack/trampoline stuff
+@@ -689,6 +737,8 @@ void __init setup_bootmem_allocator(void
+ #endif
+       numa_kva_reserve();
+       reserve_crashkernel();
++
++      reserve_ibft_region();
+ }
+ 
+ /*
+@@ -724,6 +774,18 @@ char * __init __attribute__((weak)) memo
+       return machine_specific_memory_setup();
+ }
+ 
++#ifdef CONFIG_NUMA
++/*
++ * In the golden day, when everything among i386 and x86_64 will be
++ * integrated, this will not live here
++ */
++void *x86_cpu_to_node_map_early_ptr;
++int x86_cpu_to_node_map_init[NR_CPUS] = {
++      [0 ... NR_CPUS-1] = NUMA_NO_NODE
++};
++DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
++#endif
++
+ /*
+  * Determine if we were loaded by an EFI loader.  If so, then we have also been
+  * passed the efi memmap, systab, etc., so we should use these data structures
+@@ -773,7 +835,7 @@ void __init setup_arch(char **cmdline_p)
+       copy_edid();
+       apm_info.bios = boot_params.apm_bios_info;
+       ist_info = boot_params.ist_info;
+-      saved_videomode = boot_params.hdr.vid_mode;
++      saved_video_mode = boot_params.hdr.vid_mode;
+       if( boot_params.sys_desc_table.length != 0 ) {
+               set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
+               machine_id = boot_params.sys_desc_table.table[0];
+@@ -840,15 +902,19 @@ void __init setup_arch(char **cmdline_p)
+               efi_init();
+ 
+       /* update e820 for memory not covered by WB MTRRs */
+-      find_max_pfn();
++      propagate_e820_map();
+       mtrr_bp_init();
+ #ifndef CONFIG_XEN
+       if (mtrr_trim_uncached_memory(max_pfn))
+-              find_max_pfn();
++              propagate_e820_map();
+ #endif
+ 
+       max_low_pfn = setup_memory();
+ 
++#ifdef CONFIG_KVM_CLOCK
++      kvmclock_init();
++#endif
++
+ #ifdef CONFIG_VMI
+       /*
+        * Must be after max_low_pfn is determined, and before kernel
+@@ -856,6 +922,7 @@ void __init setup_arch(char **cmdline_p)
+        */
+       vmi_init();
+ #endif
++      kvm_guest_init();
+ 
+       /*
+        * NOTE: before this point _nobody_ is allowed to allocate
+@@ -977,6 +1044,18 @@ void __init setup_arch(char **cmdline_p)
+ 
+       io_delay_init();
+ 
++#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
++      /*
++       * setup to use the early static init tables during kernel startup
++       * X86_SMP will exclude sub-arches that don't deal well with it.
++       */
++      x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
++      x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
++#ifdef CONFIG_NUMA
++      x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
++#endif
++#endif
++
+ #ifdef CONFIG_X86_GENERICARCH
+       generic_apic_probe();
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup_64-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/setup_64-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -29,18 +29,22 @@
+ #include <linux/crash_dump.h>
+ #include <linux/root_dev.h>
+ #include <linux/pci.h>
++#include <asm/pci-direct.h>
+ #include <linux/efi.h>
+ #include <linux/acpi.h>
+ #include <linux/kallsyms.h>
+ #include <linux/edd.h>
++#include <linux/iscsi_ibft.h>
+ #include <linux/mmzone.h>
+ #include <linux/kexec.h>
+ #include <linux/cpufreq.h>
+ #include <linux/dmi.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/ctype.h>
++#include <linux/sort.h>
+ #include <linux/uaccess.h>
+ #include <linux/init_ohci1394_dma.h>
++#include <linux/kvm_para.h>
+ 
+ #include <asm/mtrr.h>
+ #include <asm/uaccess.h>
+@@ -58,7 +62,6 @@
+ #include <asm/mmu_context.h>
+ #include <asm/proto.h>
+ #include <asm/setup.h>
+-#include <asm/mach_apic.h>
+ #include <asm/numa.h>
+ #include <asm/sections.h>
+ #include <asm/dmi.h>
+@@ -66,6 +69,9 @@
+ #include <asm/mce.h>
+ #include <asm/ds.h>
+ #include <asm/topology.h>
  +#include <asm/pat.h>
++
++#include <mach_apic.h>
+ #ifdef CONFIG_XEN
+ #include <linux/percpu.h>
+ #include <xen/interface/physdev.h>
+@@ -149,7 +155,7 @@ extern int root_mountflags;
+ 
+ char __initdata command_line[COMMAND_LINE_SIZE];
+ 
+-struct resource standard_io_resources[] = {
++static struct resource standard_io_resources[] = {
+       { .name = "dma1", .start = 0x00, .end = 0x1f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "pic1", .start = 0x20, .end = 0x21,
+@@ -158,7 +164,9 @@ struct resource standard_io_resources[] 
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "timer1", .start = 0x50, .end = 0x53,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+-      { .name = "keyboard", .start = 0x60, .end = 0x6f,
++      { .name = "keyboard", .start = 0x60, .end = 0x60,
++              .flags = IORESOURCE_BUSY | IORESOURCE_IO },
++      { .name = "keyboard", .start = 0x64, .end = 0x64,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "dma page reg", .start = 0x80, .end = 0x8f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+@@ -224,8 +232,10 @@ contig_initmem_init(unsigned long start_
+       e820_register_active_regions(0, start_pfn, end_pfn);
+ #ifdef CONFIG_XEN
+       free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
++      early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT);
+ #else
+       free_bootmem_with_active_regions(0, end_pfn);
++      early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
+ #endif
+       reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
+ }
+@@ -290,6 +300,7 @@ static void __init reserve_crashkernel(v
+                               (unsigned long)(total_mem >> 20));
+               crashk_res.start = crash_base;
+               crashk_res.end   = crash_base + crash_size - 1;
++              insert_resource(&iomem_resource, &crashk_res);
+       }
+ }
+ #else
+@@ -306,6 +317,40 @@ void __attribute__((weak)) __init memory
+        machine_specific_memory_setup();
+ }
+ 
++static void __init parse_setup_data(void)
++{
++      struct setup_data *data;
++      unsigned long pa_data;
++
++      if (boot_params.hdr.version < 0x0209)
++              return;
++      pa_data = boot_params.hdr.setup_data;
++      while (pa_data) {
++              data = early_ioremap(pa_data, PAGE_SIZE);
++              switch (data->type) {
++              default:
++                      break;
++              }
++#ifndef CONFIG_DEBUG_BOOT_PARAMS
++              free_early(pa_data, pa_data+sizeof(*data)+data->len);
++#endif
++              pa_data = data->next;
++              early_iounmap(data, PAGE_SIZE);
++      }
++}
++
++#ifdef CONFIG_PCI_MMCONFIG
++extern void __cpuinit fam10h_check_enable_mmcfg(void);
++extern void __init check_enable_amd_mmconf_dmi(void);
++#else
++void __cpuinit fam10h_check_enable_mmcfg(void)
++{
++}
++void __init check_enable_amd_mmconf_dmi(void)
++{
++}
++#endif
++
+ /*
+  * setup_arch - architecture-specific boot-time initializations
+  *
+@@ -389,6 +434,8 @@ void __init setup_arch(char **cmdline_p)
+       strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+       *cmdline_p = command_line;
+ 
++      parse_setup_data();
++
+       parse_early_param();
   
- /*
-  * The current flushing context - we pass it instead of 5 arguments:
-@@ -392,6 +31,7 @@ struct cpa_data {
-       int             numpages;
-       int             flushtlb;
-       unsigned long   pfn;
-+      unsigned        force_split : 1;
- };
+ #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+@@ -398,6 +445,13 @@ void __init setup_arch(char **cmdline_p)
   
- #ifdef CONFIG_X86_64
-@@ -637,6 +277,9 @@ try_preserve_large_page(pte_t *kpte, uns
-       int i, do_split = 1;
-       unsigned int level;
+       finish_e820_parsing();
   
-+      if (cpa->force_split)
-+              return 1;
++#ifndef CONFIG_XEN
++      /* after parse_early_param, so could debug it */
++      insert_resource(&iomem_resource, &code_resource);
++      insert_resource(&iomem_resource, &data_resource);
++      insert_resource(&iomem_resource, &bss_resource);
++#endif
  +
-       spin_lock_irqsave(&pgd_lock, flags);
+       early_gart_iommu_check();
+ 
+       e820_register_active_regions(0, 0, -1UL);
+@@ -420,15 +474,23 @@ void __init setup_arch(char **cmdline_p)
+ 
+       check_efer();
+ 
+-      init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
++      max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
+       if (efi_enabled)
+               efi_init();
+ 
++#ifndef CONFIG_XEN
++      vsmp_init();
++#endif
++
+       if (is_initial_xendomain())
+               dmi_scan_machine();
+ 
+       io_delay_init();
+ 
++#ifdef CONFIG_KVM_CLOCK
++      kvmclock_init();
++#endif
++
+ #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
+       /* setup to use the early static init tables during kernel startup */
+       x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
+@@ -459,9 +521,9 @@ void __init setup_arch(char **cmdline_p)
+       contig_initmem_init(0, end_pfn);
+ #endif
+ 
+-      early_res_to_bootmem();
+-
+ #ifndef CONFIG_XEN
++      dma32_reserve_bootmem();
++
+ #ifdef CONFIG_ACPI_SLEEP
         /*
-        * Check for races, another CPU might have split this page
-@@ -856,9 +499,7 @@ static int split_large_page(pte_t *kpte,
-               goto out_unlock;
+        * Reserve low memory region for sleep support.
+@@ -487,16 +549,17 @@ void __init setup_arch(char **cmdline_p)
+               unsigned long end_of_mem    = end_pfn << PAGE_SHIFT;
   
-       pbase = (pte_t *)page_address(base);
--#ifdef CONFIG_X86_32
--      paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+               if (ramdisk_end <= end_of_mem) {
+-#ifndef CONFIG_XEN
+-                      reserve_bootmem_generic(ramdisk_image, ramdisk_size);
  -#endif
-+      paravirt_alloc_pte(&init_mm, page_to_pfn(base));
-       ref_prot = pte_pgprot(pte_clrhuge(*kpte));
++                      /*
++                       * don't need to reserve again, already reserved early
++                       * in x86_64_start_kernel, and early_res_to_bootmem
++                       * convert that to reserved in bootmem
++                       */
+                       initrd_start = ramdisk_image + PAGE_OFFSET;
+                       initrd_end = initrd_start+ramdisk_size;
+ #ifdef CONFIG_XEN
+                       initrd_below_start_ok = 1;
+ #endif
+               } else {
+-                      /* Assumes everything on node 0 */
+                       free_bootmem(ramdisk_image, ramdisk_size);
+                       printk(KERN_ERR "initrd extends beyond end of memory "
+                              "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+@@ -506,6 +569,9 @@ void __init setup_arch(char **cmdline_p)
+       }
+ #endif
+       reserve_crashkernel();
++
++      reserve_ibft_region();
++
+       paging_init();
+       map_vsyscall();
+ #ifdef CONFIG_X86_LOCAL_APIC
+@@ -633,16 +699,16 @@ void __init setup_arch(char **cmdline_p)
+       prefill_possible_map();
+ #endif
   
- #ifdef CONFIG_X86_64
-@@ -918,7 +559,7 @@ static int __change_page_attr(struct cpa
- repeat:
-       kpte = lookup_address(address, &level);
-       if (!kpte)
--              return primary ? -EINVAL : 0;
-+              return 0;
++      kvm_guest_init();
++
+       /*
+        * We trust e820 completely. No explicit ROM probing in memory.
+        */
+ #ifdef CONFIG_XEN
+       if (is_initial_xendomain())
+-              e820_reserve_resources(machine_e820.map, machine_e820.nr_map,
+-                                     &code_resource, &data_resource, &bss_resource);
++              e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
+ #else
+-      e820_reserve_resources(e820.map, e820.nr_map,
+-                             &code_resource, &data_resource, &bss_resource);
++      e820_reserve_resources(e820.map, e820.nr_map);
+       e820_mark_nosave_regions();
+ #endif
   
-       old_pte = *kpte;
-       if (!__pte_val(old_pte)) {
-@@ -1077,7 +718,8 @@ static inline int cache_attr(pgprot_t at
+@@ -690,6 +756,9 @@ void __init setup_arch(char **cmdline_p)
+ #endif
+ 
+ #endif /* !CONFIG_XEN */
++
++      /* do this before identify_cpu for boot cpu */
++      check_enable_amd_mmconf_dmi();
   }
   
- static int change_page_attr_set_clr(unsigned long addr, int numpages,
--                                  pgprot_t mask_set, pgprot_t mask_clr)
-+                                  pgprot_t mask_set, pgprot_t mask_clr,
-+                                  int force_split)
- {
-       struct cpa_data cpa;
-       int ret, cache, checkalias;
-@@ -1088,7 +730,7 @@ static int change_page_attr_set_clr(unsi
-        */
-       mask_set = canon_pgprot(mask_set);
-       mask_clr = canon_pgprot(mask_clr);
--      if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
-+      if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
-               return 0;
+ #ifdef CONFIG_XEN
+@@ -786,9 +855,9 @@ static void __cpuinit amd_detect_cmp(str
+       bits = c->x86_coreid_bits;
   
-       /* Ensure we are PAGE_SIZE aligned */
-@@ -1105,6 +747,7 @@ static int change_page_attr_set_clr(unsi
-       cpa.mask_set = mask_set;
-       cpa.mask_clr = mask_clr;
-       cpa.flushtlb = 0;
-+      cpa.force_split = force_split;
+       /* Low order bits define the core id (index of core in socket) */
+-      c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
+-      /* Convert the APIC ID into the socket ID */
+-      c->phys_proc_id = phys_pkg_id(bits);
++      c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
++      /* Convert the initial APIC ID into the socket ID */
++      c->phys_proc_id = c->initial_apicid >> bits;
   
-       /* No alias checking for _NX bit modifications */
-       checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
-@@ -1143,26 +786,67 @@ out:
- static inline int change_page_attr_set(unsigned long addr, int numpages,
-                                      pgprot_t mask)
- {
--      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
-+      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
+ #ifdef CONFIG_NUMA
+       node = c->phys_proc_id;
+@@ -805,7 +874,7 @@ static void __cpuinit amd_detect_cmp(str
+                  If that doesn't result in a usable node fall back to the
+                  path for the previous case.  */
+ 
+-              int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
++              int ht_nodeid = c->initial_apicid;
+ 
+               if (ht_nodeid >= 0 &&
+                   apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+@@ -913,7 +982,7 @@ static void __cpuinit init_amd(struct cp
+ 
+       /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+          3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+-      clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
++      clear_cpu_cap(c, 0*32+31);
+ 
+       /* On C+ stepping K8 rep microcode works well for copy/memset */
+       level = cpuid_eax(1);
+@@ -955,9 +1024,25 @@ static void __cpuinit init_amd(struct cp
+       /* MFENCE stops RDTSC speculation */
+       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ 
++      if (c->x86 == 0x10)
++              fam10h_check_enable_mmcfg();
++
+ #ifndef CONFIG_XEN
+       if (amd_apic_timer_broken())
+               disable_apic_timer = 1;
++
++      if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
++              unsigned long long tseg;
++
++              /*
++               * Split up direct mapping around the TSEG SMM area.
++               * Don't do it for gbpages because there seems very little
++               * benefit in doing so.
++               */
++              if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
++              (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
++                      set_memory_4k((unsigned long)__va(tseg), 1);
++      }
+ #endif
   }
   
- static inline int change_page_attr_clear(unsigned long addr, int numpages,
-                                        pgprot_t mask)
+@@ -1051,7 +1136,7 @@ static void __cpuinit early_init_intel(s
   {
--      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
-+      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
+       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+           (c->x86 == 0x6 && c->x86_model >= 0x0e))
+-              set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
++              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
   }
   
--int set_memory_uc(unsigned long addr, int numpages)
-+int _set_memory_uc(unsigned long addr, int numpages)
- {
-+      /*
-+       * for now UC MINUS. see comments in ioremap_nocache()
-+       */
-       return change_page_attr_set(addr, numpages,
--                                  __pgprot(_PAGE_PCD));
-+                                  __pgprot(_PAGE_CACHE_UC_MINUS));
-+}
-+
-+int set_memory_uc(unsigned long addr, int numpages)
-+{
-+      /*
-+       * for now UC MINUS. see comments in ioremap_nocache()
-+       */
-+      if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
-+                          _PAGE_CACHE_UC_MINUS, NULL))
-+              return -EINVAL;
-+
-+      return _set_memory_uc(addr, numpages);
+ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
+@@ -1094,9 +1179,6 @@ static void __cpuinit init_intel(struct 
+ 
+       if (c->x86 == 15)
+               c->x86_cache_alignment = c->x86_clflush_size * 2;
+-      if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+-          (c->x86 == 0x6 && c->x86_model >= 0x0e))
+-              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+       if (c->x86 == 6)
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+@@ -1105,6 +1187,32 @@ static void __cpuinit init_intel(struct 
+       srat_detect_node();
   }
- EXPORT_SYMBOL(set_memory_uc);
   
--int set_memory_wb(unsigned long addr, int numpages)
-+int _set_memory_wc(unsigned long addr, int numpages)
++static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
  +{
-+      return change_page_attr_set(addr, numpages,
-+                                  __pgprot(_PAGE_CACHE_WC));
++      if (c->x86 == 0x6 && c->x86_model >= 0xf)
++              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
  +}
  +
-+int set_memory_wc(unsigned long addr, int numpages)
++static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
  +{
-+      if (!pat_wc_enabled)
-+              return set_memory_uc(addr, numpages);
++      /* Cache sizes */
++      unsigned n;
  +
-+      if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
-+              _PAGE_CACHE_WC, NULL))
-+              return -EINVAL;
++      n = c->extended_cpuid_level;
++      if (n >= 0x80000008) {
++              unsigned eax = cpuid_eax(0x80000008);
++              c->x86_virt_bits = (eax >> 8) & 0xff;
++              c->x86_phys_bits = eax & 0xff;
++      }
  +
-+      return _set_memory_wc(addr, numpages);
++      if (c->x86 == 0x6 && c->x86_model >= 0xf) {
++              c->x86_cache_alignment = c->x86_clflush_size * 2;
++              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
++              set_cpu_cap(c, X86_FEATURE_REP_GOOD);
++      }
++      set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
  +}
-+EXPORT_SYMBOL(set_memory_wc);
  +
-+int _set_memory_wb(unsigned long addr, int numpages)
+ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
   {
-       return change_page_attr_clear(addr, numpages,
--                                    __pgprot(_PAGE_PCD | _PAGE_PWT));
-+                                    __pgprot(_PAGE_CACHE_MASK));
-+}
-+
-+int set_memory_wb(unsigned long addr, int numpages)
-+{
-+      free_memtype(addr, addr + numpages * PAGE_SIZE);
-+
-+      return _set_memory_wb(addr, numpages);
+       char *v = c->x86_vendor_id;
+@@ -1113,6 +1221,8 @@ static void __cpuinit get_cpu_vendor(str
+               c->x86_vendor = X86_VENDOR_AMD;
+       else if (!strcmp(v, "GenuineIntel"))
+               c->x86_vendor = X86_VENDOR_INTEL;
++      else if (!strcmp(v, "CentaurHauls"))
++              c->x86_vendor = X86_VENDOR_CENTAUR;
+       else
+               c->x86_vendor = X86_VENDOR_UNKNOWN;
   }
- EXPORT_SYMBOL(set_memory_wb);
+@@ -1160,15 +1270,16 @@ static void __cpuinit early_identify_cpu
+                       c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
+                       c->x86_model += ((tfms >> 16) & 0xF) << 4;
+-              if (c->x86_capability[0] & (1<<19))
++              if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
+                       c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+       } else {
+               /* Have CPUID level 0 only - unheard of */
+               c->x86 = 4;
+       }
   
-@@ -1193,6 +877,12 @@ int set_memory_np(unsigned long addr, in
-       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
++      c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
+ #ifdef CONFIG_SMP
+-      c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
++      c->phys_proc_id = c->initial_apicid;
+ #endif
+       /* AMD-defined flags: level 0x80000001 */
+       xlvl = cpuid_eax(0x80000000);
+@@ -1201,8 +1312,12 @@ static void __cpuinit early_identify_cpu
+       case X86_VENDOR_INTEL:
+               early_init_intel(c);
+               break;
++      case X86_VENDOR_CENTAUR:
++              early_init_centaur(c);
++              break;
+       }
+ 
++      validate_pat_support(c);
   }
   
-+int set_memory_4k(unsigned long addr, int numpages)
-+{
-+      return change_page_attr_set_clr(addr, numpages, __pgprot(0),
-+                                      __pgprot(0), 1);
-+}
+ /*
+@@ -1237,6 +1352,10 @@ void __cpuinit identify_cpu(struct cpuin
+               init_intel(c);
+               break;
+ 
++      case X86_VENDOR_CENTAUR:
++              init_centaur(c);
++              break;
  +
- int set_pages_uc(struct page *page, int numpages)
- {
-       unsigned long addr = (unsigned long)page_address(page);
-@@ -1302,6 +992,45 @@ void kernel_map_pages(struct page *page,
-       cpa_fill_pool(NULL);
+       case X86_VENDOR_UNKNOWN:
+       default:
+               display_cacheinfo(c);
+@@ -1266,14 +1385,24 @@ void __cpuinit identify_cpu(struct cpuin
+ #endif
+       select_idle_routine(c);
+ 
+-      if (c != &boot_cpu_data)
+-              mtrr_ap_init();
+ #ifdef CONFIG_NUMA
+       numa_add_cpu(smp_processor_id());
+ #endif
+ 
   }
   
-+#ifdef CONFIG_DEBUG_FS
-+static int dpa_show(struct seq_file *m, void *v)
-+{
-+      seq_puts(m, "DEBUG_PAGEALLOC\n");
-+      seq_printf(m, "pool_size     : %lu\n", pool_size);
-+      seq_printf(m, "pool_pages    : %lu\n", pool_pages);
-+      seq_printf(m, "pool_low      : %lu\n", pool_low);
-+      seq_printf(m, "pool_used     : %lu\n", pool_used);
-+      seq_printf(m, "pool_failed   : %lu\n", pool_failed);
-+
-+      return 0;
-+}
-+
-+static int dpa_open(struct inode *inode, struct file *filp)
++void __cpuinit identify_boot_cpu(void)
  +{
-+      return single_open(filp, dpa_show, NULL);
++      identify_cpu(&boot_cpu_data);
  +}
  +
-+static const struct file_operations dpa_fops = {
-+      .open           = dpa_open,
-+      .read           = seq_read,
-+      .llseek         = seq_lseek,
-+      .release        = single_release,
-+};
-+
-+static int __init debug_pagealloc_proc_init(void)
++void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
  +{
-+      struct dentry *de;
-+
-+      de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
-+                               &dpa_fops);
-+      if (!de)
-+              return -ENOMEM;
-+
-+      return 0;
++      BUG_ON(c == &boot_cpu_data);
++      identify_cpu(c);
++      mtrr_ap_init();
  +}
-+__initcall(debug_pagealloc_proc_init);
-+#endif
  +
- #ifdef CONFIG_HIBERNATION
- 
- bool kernel_page_present(struct page *page)
---- /dev/null
-+++ b/arch/x86/mm/pat-xen.c
-@@ -0,0 +1,602 @@
+ static __init int setup_noclflush(char *arg)
+ {
+       setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+@@ -1302,123 +1431,3 @@ static __init int setup_disablecpuid(cha
+       return 1;
+ }
+ __setup("clearcpuid=", setup_disablecpuid);
+-
+-/*
+- *    Get CPU information for use by the procfs.
+- */
+-
+-static int show_cpuinfo(struct seq_file *m, void *v)
+-{
+-      struct cpuinfo_x86 *c = v;
+-      int cpu = 0, i;
+-
+-#ifdef CONFIG_SMP
+-      cpu = c->cpu_index;
+-#endif
+-
+-      seq_printf(m, "processor\t: %u\n"
+-                 "vendor_id\t: %s\n"
+-                 "cpu family\t: %d\n"
+-                 "model\t\t: %d\n"
+-                 "model name\t: %s\n",
+-                 (unsigned)cpu,
+-                 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+-                 c->x86,
+-                 (int)c->x86_model,
+-                 c->x86_model_id[0] ? c->x86_model_id : "unknown");
+-
+-      if (c->x86_mask || c->cpuid_level >= 0)
+-              seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+-      else
+-              seq_printf(m, "stepping\t: unknown\n");
+-
+-      if (cpu_has(c, X86_FEATURE_TSC)) {
+-              unsigned int freq = cpufreq_quick_get((unsigned)cpu);
+-
+-              if (!freq)
+-                      freq = cpu_khz;
+-              seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
+-                         freq / 1000, (freq % 1000));
+-      }
+-
+-      /* Cache size */
+-      if (c->x86_cache_size >= 0)
+-              seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+-
+-#ifdef CONFIG_SMP
+-      if (smp_num_siblings * c->x86_max_cores > 1) {
+-              seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+-              seq_printf(m, "siblings\t: %d\n",
+-                             cpus_weight(per_cpu(cpu_core_map, cpu)));
+-              seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+-              seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+-      }
+-#endif
+-
+-      seq_printf(m,
+-                 "fpu\t\t: yes\n"
+-                 "fpu_exception\t: yes\n"
+-                 "cpuid level\t: %d\n"
+-                 "wp\t\t: yes\n"
+-                 "flags\t\t:",
+-                 c->cpuid_level);
+-
+-      for (i = 0; i < 32*NCAPINTS; i++)
+-              if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
+-                      seq_printf(m, " %s", x86_cap_flags[i]);
+-
+-      seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+-                 c->loops_per_jiffy/(500000/HZ),
+-                 (c->loops_per_jiffy/(5000/HZ)) % 100);
+-
+-      if (c->x86_tlbsize > 0)
+-              seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+-      seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
+-      seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
+-
+-      seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
+-                 c->x86_phys_bits, c->x86_virt_bits);
+-
+-      seq_printf(m, "power management:");
+-      for (i = 0; i < 32; i++) {
+-              if (c->x86_power & (1 << i)) {
+-                      if (i < ARRAY_SIZE(x86_power_flags) &&
+-                          x86_power_flags[i])
+-                              seq_printf(m, "%s%s",
+-                                         x86_power_flags[i][0]?" ":"",
+-                                         x86_power_flags[i]);
+-                      else
+-                              seq_printf(m, " [%d]", i);
+-              }
+-      }
+-
+-      seq_printf(m, "\n\n");
+-
+-      return 0;
+-}
+-
+-static void *c_start(struct seq_file *m, loff_t *pos)
+-{
+-      if (*pos == 0)  /* just in case, cpu 0 is not the first */
+-              *pos = first_cpu(cpu_online_map);
+-      if ((*pos) < NR_CPUS && cpu_online(*pos))
+-              return &cpu_data(*pos);
+-      return NULL;
+-}
+-
+-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+-{
+-      *pos = next_cpu(*pos, cpu_online_map);
+-      return c_start(m, pos);
+-}
+-
+-static void c_stop(struct seq_file *m, void *v)
+-{
+-}
+-
+-const struct seq_operations cpuinfo_op = {
+-      .start = c_start,
+-      .next = c_next,
+-      .stop = c_stop,
+-      .show = show_cpuinfo,
+-};
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/smp-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,329 @@
  +/*
-+ * Handle caching attributes in page tables (PAT)
++ *    Intel SMP support routines.
  + *
-+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
-+ *          Suresh B Siddha <suresh.b.siddha@intel.com>
++ *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
++ *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
++ *      (c) 2002,2003 Andi Kleen, SuSE Labs.
  + *
-+ * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
++ *    i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
++ *
++ *    This code is released under the GNU General Public License version 2 or
++ *    later.
  + */
  +
++#include <linux/init.h>
++
  +#include <linux/mm.h>
-+#include <linux/kernel.h>
-+#include <linux/gfp.h>
-+#include <linux/fs.h>
-+#include <linux/bootmem.h>
++#include <linux/delay.h>
++#include <linux/spinlock.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/cache.h>
++#include <linux/interrupt.h>
++#include <linux/cpu.h>
  +
-+#include <asm/msr.h>
-+#include <asm/tlbflush.h>
-+#include <asm/processor.h>
-+#include <asm/page.h>
-+#include <asm/pgtable.h>
-+#include <asm/pat.h>
-+#include <asm/e820.h>
-+#include <asm/cacheflush.h>
-+#include <asm/fcntl.h>
  +#include <asm/mtrr.h>
-+#include <asm/io.h>
-+
-+#ifdef CONFIG_X86_PAT
-+int __read_mostly pat_wc_enabled = 1;
-+
-+void __cpuinit pat_disable(char *reason)
-+{
-+      pat_wc_enabled = 0;
-+      printk(KERN_INFO "%s\n", reason);
-+}
-+
-+static int __init nopat(char *str)
-+{
-+      pat_disable("PAT support disabled.");
-+      return 0;
-+}
-+early_param("nopat", nopat);
-+#endif
-+
-+static u64 __read_mostly boot_pat_state;
-+
-+enum {
-+      PAT_UC = 0,             /* uncached */
-+      PAT_WC = 1,             /* Write combining */
-+      PAT_WT = 4,             /* Write Through */
-+      PAT_WP = 5,             /* Write Protected */
-+      PAT_WB = 6,             /* Write Back (default) */
-+      PAT_UC_MINUS = 7,       /* UC, but can be overriden by MTRR */
-+};
-+
-+#define PAT(x,y)      ((u64)PAT_ ## y << ((x)*8))
-+
-+void pat_init(void)
-+{
-+      u64 pat;
-+
-+      if (!pat_wc_enabled)
-+              return;
-+
-+      /* Paranoia check. */
-+      if (!cpu_has_pat) {
-+              printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
-+              /*
-+               * Panic if this happens on the secondary CPU, and we
-+               * switched to PAT on the boot CPU. We have no way to
-+               * undo PAT.
-+              */
-+              BUG_ON(boot_pat_state);
-+      }
-+
-+#ifndef CONFIG_XEN
-+      /* Set PWT to Write-Combining. All other bits stay the same */
-+      /*
-+       * PTE encoding used in Linux:
-+       *      PAT
-+       *      |PCD
-+       *      ||PWT
-+       *      |||
-+       *      000 WB          _PAGE_CACHE_WB
-+       *      001 WC          _PAGE_CACHE_WC
-+       *      010 UC-         _PAGE_CACHE_UC_MINUS
-+       *      011 UC          _PAGE_CACHE_UC
-+       * PAT bit unused
-+       */
-+      pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
-+            PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
-+
-+      /* Boot CPU check */
-+      if (!boot_pat_state)
-+              rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
-+
-+      wrmsrl(MSR_IA32_CR_PAT, pat);
-+#else
-+      /*
-+       * PAT settings are part of the hypervisor interface, and their
-+       * assignment cannot be changed.
-+       */
-+      rdmsrl(MSR_IA32_CR_PAT, pat);
-+      if (!boot_pat_state)
-+              boot_pat_state = pat;
-+#endif
-+      printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
-+             smp_processor_id(), boot_pat_state, pat);
-+}
-+
-+#undef PAT
-+
-+static char *cattr_name(unsigned long flags)
-+{
-+      switch (flags & _PAGE_CACHE_MASK) {
-+              case _PAGE_CACHE_UC:            return "uncached";
-+              case _PAGE_CACHE_UC_MINUS:      return "uncached-minus";
-+              case _PAGE_CACHE_WB:            return "write-back";
-+              case _PAGE_CACHE_WC:            return "write-combining";
-+              case _PAGE_CACHE_WP:            return "write-protected";
-+              case _PAGE_CACHE_WT:            return "write-through";
-+              default:                        return "broken";
-+      }
-+}
-+
++#include <asm/tlbflush.h>
++#include <asm/mmu_context.h>
++#include <asm/proto.h>
++#include <mach_ipi.h>
++#include <xen/evtchn.h>
  +/*
-+ * The global memtype list keeps track of memory type for specific
-+ * physical memory areas. Conflicting memory types in different
-+ * mappings can cause CPU cache corruption. To avoid this we keep track.
++ *    Some notes on x86 processor bugs affecting SMP operation:
  + *
-+ * The list is sorted based on starting address and can contain multiple
-+ * entries for each address (this allows reference counting for overlapping
-+ * areas). All the aliases have the same cache attributes of course.
-+ * Zero attributes are represented as holes.
++ *    Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
++ *    The Linux implications for SMP are handled as follows:
  + *
-+ * Currently the data structure is a list because the number of mappings
-+ * are expected to be relatively small. If this should be a problem
-+ * it could be changed to a rbtree or similar.
++ *    Pentium III / [Xeon]
++ *            None of the E1AP-E3AP errata are visible to the user.
  + *
-+ * memtype_lock protects the whole list.
-+ */
-+
-+struct memtype {
-+      u64 start;
-+      u64 end;
-+      unsigned long type;
-+      struct list_head nd;
-+};
-+
-+static LIST_HEAD(memtype_list);
-+static DEFINE_SPINLOCK(memtype_lock);         /* protects memtype list */
++ *    E1AP.   see PII A1AP
++ *    E2AP.   see PII A2AP
++ *    E3AP.   see PII A3AP
++ *
++ *    Pentium II / [Xeon]
++ *            None of the A1AP-A3AP errata are visible to the user.
++ *
++ *    A1AP.   see PPro 1AP
++ *    A2AP.   see PPro 2AP
++ *    A3AP.   see PPro 7AP
++ *
++ *    Pentium Pro
++ *            None of 1AP-9AP errata are visible to the normal user,
++ *    except occasional delivery of 'spurious interrupt' as trap #15.
++ *    This is very rare and a non-problem.
++ *
++ *    1AP.    Linux maps APIC as non-cacheable
++ *    2AP.    worked around in hardware
++ *    3AP.    fixed in C0 and above steppings microcode update.
++ *            Linux does not use excessive STARTUP_IPIs.
++ *    4AP.    worked around in hardware
++ *    5AP.    symmetric IO mode (normal Linux operation) not affected.
++ *            'noapic' mode has vector 0xf filled out properly.
++ *    6AP.    'noapic' mode might be affected - fixed in later steppings
++ *    7AP.    We do not assume writes to the LVT deassering IRQs
++ *    8AP.    We do not enable low power mode (deep sleep) during MP bootup
++ *    9AP.    We do not use mixed mode
++ *
++ *    Pentium
++ *            There is a marginal case where REP MOVS on 100MHz SMP
++ *    machines with B stepping processors can fail. XXX should provide
++ *    an L1cache=Writethrough or L1cache=off option.
++ *
++ *            B stepping CPUs may hang. There are hardware work arounds
++ *    for this. We warn about it in case your board doesn't have the work
++ *    arounds. Basically that's so I can tell anyone with a B stepping
++ *    CPU and SMP problems "tough".
++ *
++ *    Specific items [From Pentium Processor Specification Update]
++ *
++ *    1AP.    Linux doesn't use remote read
++ *    2AP.    Linux doesn't trust APIC errors
++ *    3AP.    We work around this
++ *    4AP.    Linux never generated 3 interrupts of the same priority
++ *            to cause a lost local interrupt.
++ *    5AP.    Remote read is never used
++ *    6AP.    not affected - worked around in hardware
++ *    7AP.    not affected - worked around in hardware
++ *    8AP.    worked around in hardware - we get explicit CS errors if not
++ *    9AP.    only 'noapic' mode affected. Might generate spurious
++ *            interrupts, we log only the first one and count the
++ *            rest silently.
++ *    10AP.   not affected - worked around in hardware
++ *    11AP.   Linux reads the APIC between writes to avoid this, as per
++ *            the documentation. Make sure you preserve this as it affects
++ *            the C stepping chips too.
++ *    12AP.   not affected - worked around in hardware
++ *    13AP.   not affected - worked around in hardware
++ *    14AP.   we always deassert INIT during bootup
++ *    15AP.   not affected - worked around in hardware
++ *    16AP.   not affected - worked around in hardware
++ *    17AP.   not affected - worked around in hardware
++ *    18AP.   not affected - worked around in hardware
++ *    19AP.   not affected - worked around in BIOS
++ *
++ *    If this sounds worrying believe me these bugs are either ___RARE___,
++ *    or are signal timing bugs worked around in hardware and there's
++ *    about nothing of note with C stepping upwards.
++ */
  +
  +/*
-+ * Does intersection of PAT memory type and MTRR memory type and returns
-+ * the resulting memory type as PAT understands it.
-+ * (Type in pat and mtrr will not have same value)
-+ * The intersection is based on "Effective Memory Type" tables in IA-32
-+ * SDM vol 3a
++ * this function sends a 'reschedule' IPI to another CPU.
++ * it goes straight through and wastes no time serializing
++ * anything. Worst case is that we lose a reschedule ...
  + */
-+static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
-+                              unsigned long *ret_prot)
++void xen_smp_send_reschedule(int cpu)
  +{
-+      unsigned long pat_type;
-+      u8 mtrr_type;
-+
-+      pat_type = prot & _PAGE_CACHE_MASK;
-+      prot &= (~_PAGE_CACHE_MASK);
-+
-+      /*
-+       * We return the PAT request directly for types where PAT takes
-+       * precedence with respect to MTRR and for UC_MINUS.
-+       * Consistency checks with other PAT requests is done later
-+       * while going through memtype list.
-+       */
-+      if (pat_type == _PAGE_CACHE_WC) {
-+              *ret_prot = prot | _PAGE_CACHE_WC;
-+              return 0;
-+      } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
-+              *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
-+              return 0;
-+      } else if (pat_type == _PAGE_CACHE_UC) {
-+              *ret_prot = prot | _PAGE_CACHE_UC;
-+              return 0;
-+      }
-+
-+      /*
-+       * Look for MTRR hint to get the effective type in case where PAT
-+       * request is for WB.
-+       */
-+      mtrr_type = mtrr_type_lookup(start, end);
-+
-+      if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
-+              *ret_prot = prot | _PAGE_CACHE_UC;
-+      } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
-+              *ret_prot = prot | _PAGE_CACHE_WC;
-+      } else {
-+              *ret_prot = prot | _PAGE_CACHE_WB;
++      if (unlikely(cpu_is_offline(cpu))) {
++              WARN_ON(1);
++              return;
  +      }
-+
-+      return 0;
++      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
  +}
  +
  +/*
-+ * req_type typically has one of the:
-+ * - _PAGE_CACHE_WB
-+ * - _PAGE_CACHE_WC
-+ * - _PAGE_CACHE_UC_MINUS
-+ * - _PAGE_CACHE_UC
-+ *
-+ * req_type will have a special case value '-1', when requester want to inherit
-+ * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
-+ *
-+ * If ret_type is NULL, function will return an error if it cannot reserve the
-+ * region with req_type. If ret_type is non-null, function will return
-+ * available type in ret_type in case of no error. In case of any error
-+ * it will return a negative return value.
++ * Structure and data for smp_call_function(). This is designed to minimise
++ * static memory requirements. It also looks cleaner.
  + */
-+int reserve_memtype(u64 start, u64 end, unsigned long req_type,
-+                      unsigned long *ret_type)
-+{
-+      struct memtype *new_entry = NULL;
-+      struct memtype *parse;
-+      unsigned long actual_type;
-+      int err = 0;
-+
-+      /* Only track when pat_wc_enabled */
-+      if (!pat_wc_enabled) {
-+              /* This is identical to page table setting without PAT */
-+              if (ret_type) {
-+                      if (req_type == -1) {
-+                              *ret_type = _PAGE_CACHE_WB;
-+                      } else {
-+                              *ret_type = req_type;
-+                      }
-+              }
-+              return 0;
-+      }
-+
-+      /* Low ISA region is always mapped WB in page table. No need to track */
-+      if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
-+              if (ret_type)
-+                      *ret_type = _PAGE_CACHE_WB;
-+
-+              return 0;
-+      }
-+
-+      if (req_type == -1) {
-+              /*
-+               * Call mtrr_lookup to get the type hint. This is an
-+               * optimization for /dev/mem mmap'ers into WB memory (BIOS
-+               * tools and ACPI tools). Use WB request for WB memory and use
-+               * UC_MINUS otherwise.
-+               */
-+              u8 mtrr_type = mtrr_type_lookup(start, end);
-+
-+              if (mtrr_type == MTRR_TYPE_WRBACK) {
-+                      req_type = _PAGE_CACHE_WB;
-+                      actual_type = _PAGE_CACHE_WB;
-+              } else {
-+                      req_type = _PAGE_CACHE_UC_MINUS;
-+                      actual_type = _PAGE_CACHE_UC_MINUS;
-+              }
-+      } else {
-+              req_type &= _PAGE_CACHE_MASK;
-+              err = pat_x_mtrr_type(start, end, req_type, &actual_type);
-+      }
-+
-+      if (err) {
-+              if (ret_type)
-+                      *ret_type = actual_type;
-+
-+              return -EINVAL;
-+      }
-+
-+      new_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
-+      if (!new_entry)
-+              return -ENOMEM;
-+
-+      new_entry->start = start;
-+      new_entry->end = end;
-+      new_entry->type = actual_type;
-+
-+      if (ret_type)
-+              *ret_type = actual_type;
-+
-+      spin_lock(&memtype_lock);
-+
-+      /* Search for existing mapping that overlaps the current range */
-+      list_for_each_entry(parse, &memtype_list, nd) {
-+              struct memtype *saved_ptr;
-+
-+              if (parse->start >= end) {
-+                      pr_debug("New Entry\n");
-+                      list_add(&new_entry->nd, parse->nd.prev);
-+                      new_entry = NULL;
-+                      break;
-+              }
-+
-+              if (start <= parse->start && end >= parse->start) {
-+                      if (actual_type != parse->type && ret_type) {
-+                              actual_type = parse->type;
-+                              *ret_type = actual_type;
-+                              new_entry->type = actual_type;
-+                      }
-+
-+                      if (actual_type != parse->type) {
-+                              printk(
-+              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
-+                                      current->comm, current->pid,
-+                                      start, end,
-+                                      cattr_name(actual_type),
-+                                      cattr_name(parse->type));
-+                              err = -EBUSY;
-+                              break;
-+                      }
-+
-+                      saved_ptr = parse;
-+                      /*
-+                       * Check to see whether the request overlaps more
-+                       * than one entry in the list
-+                       */
-+                      list_for_each_entry_continue(parse, &memtype_list, nd) {
-+                              if (end <= parse->start) {
-+                                      break;
-+                              }
-+
-+                              if (actual_type != parse->type) {
-+                                      printk(
-+              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
-+                                              current->comm, current->pid,
-+                                              start, end,
-+                                              cattr_name(actual_type),
-+                                              cattr_name(parse->type));
-+                                      err = -EBUSY;
-+                                      break;
-+                              }
-+                      }
-+
-+                      if (err) {
-+                              break;
-+                      }
-+
-+                      pr_debug("Overlap at 0x%Lx-0x%Lx\n",
-+                             saved_ptr->start, saved_ptr->end);
-+                      /* No conflict. Go ahead and add this new entry */
-+                      list_add(&new_entry->nd, saved_ptr->nd.prev);
-+                      new_entry = NULL;
-+                      break;
-+              }
-+
-+              if (start < parse->end) {
-+                      if (actual_type != parse->type && ret_type) {
-+                              actual_type = parse->type;
-+                              *ret_type = actual_type;
-+                              new_entry->type = actual_type;
-+                      }
++static DEFINE_SPINLOCK(call_lock);
  +
-+                      if (actual_type != parse->type) {
-+                              printk(
-+              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
-+                                      current->comm, current->pid,
-+                                      start, end,
-+                                      cattr_name(actual_type),
-+                                      cattr_name(parse->type));
-+                              err = -EBUSY;
-+                              break;
-+                      }
++struct call_data_struct {
++      void (*func) (void *info);
++      void *info;
++      atomic_t started;
++      atomic_t finished;
++      int wait;
++};
  +
-+                      saved_ptr = parse;
-+                      /*
-+                       * Check to see whether the request overlaps more
-+                       * than one entry in the list
-+                       */
-+                      list_for_each_entry_continue(parse, &memtype_list, nd) {
-+                              if (end <= parse->start) {
-+                                      break;
-+                              }
++void lock_ipi_call_lock(void)
++{
++      spin_lock_irq(&call_lock);
++}
  +
-+                              if (actual_type != parse->type) {
-+                                      printk(
-+              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
-+                                              current->comm, current->pid,
-+                                              start, end,
-+                                              cattr_name(actual_type),
-+                                              cattr_name(parse->type));
-+                                      err = -EBUSY;
-+                                      break;
-+                              }
-+                      }
++void unlock_ipi_call_lock(void)
++{
++      spin_unlock_irq(&call_lock);
++}
  +
-+                      if (err) {
-+                              break;
-+                      }
++static struct call_data_struct *call_data;
  +
-+                      pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
-+                               saved_ptr->start, saved_ptr->end);
-+                      /* No conflict. Go ahead and add this new entry */
-+                      list_add(&new_entry->nd, &saved_ptr->nd);
-+                      new_entry = NULL;
-+                      break;
-+              }
-+      }
++static void __smp_call_function(void (*func) (void *info), void *info,
++                              int nonatomic, int wait)
++{
++      struct call_data_struct data;
++      int cpus = num_online_cpus() - 1;
  +
-+      if (err) {
-+              printk(KERN_INFO
-+      "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
-+                      start, end, cattr_name(new_entry->type),
-+                      cattr_name(req_type));
-+              kfree(new_entry);
-+              spin_unlock(&memtype_lock);
-+              return err;
-+      }
++      if (!cpus)
++              return;
  +
-+      if (new_entry) {
-+              /* No conflict. Not yet added to the list. Add to the tail */
-+              list_add_tail(&new_entry->nd, &memtype_list);
-+              pr_debug("New Entry\n");
-+      }
++      data.func = func;
++      data.info = info;
++      atomic_set(&data.started, 0);
++      data.wait = wait;
++      if (wait)
++              atomic_set(&data.finished, 0);
  +
-+      if (ret_type) {
-+              pr_debug(
-+      "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
-+                      start, end, cattr_name(actual_type),
-+                      cattr_name(req_type), cattr_name(*ret_type));
-+      } else {
-+              pr_debug(
-+      "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
-+                      start, end, cattr_name(actual_type),
-+                      cattr_name(req_type));
-+      }
++      call_data = &data;
++      mb();
  +
-+      spin_unlock(&memtype_lock);
-+      return err;
++      /* Send a message to all other CPUs and wait for them to respond */
++      send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++
++      /* Wait for response */
++      while (atomic_read(&data.started) != cpus)
++              cpu_relax();
++
++      if (wait)
++              while (atomic_read(&data.finished) != cpus)
++                      cpu_relax();
  +}
  +
-+int free_memtype(u64 start, u64 end)
++
++/**
++ * smp_call_function_mask(): Run a function on a set of other CPUs.
++ * @mask: The set of cpus to run on.  Must not include the current cpu.
++ * @func: The function to run. This must be fast and non-blocking.
++ * @info: An arbitrary pointer to pass to the function.
++ * @wait: If true, wait (atomically) until function has completed on other CPUs.
++ *
++  * Returns 0 on success, else a negative status code.
++ *
++ * If @wait is true, then returns once @func has returned; otherwise
++ * it returns just before the target cpu calls @func.
++ *
++ * You must not call this function with disabled interrupts or from a
++ * hardware interrupt handler or from a bottom half handler.
++ */
++int
++xen_smp_call_function_mask(cpumask_t mask,
++                            void (*func)(void *), void *info,
++                            int wait)
  +{
-+      struct memtype *ml;
-+      int err = -EINVAL;
++      struct call_data_struct data;
++      cpumask_t allbutself;
++      int cpus;
  +
-+      /* Only track when pat_wc_enabled */
-+      if (!pat_wc_enabled) {
-+              return 0;
-+      }
++      /* Can deadlock when called with interrupts disabled */
++      WARN_ON(irqs_disabled());
  +
-+      /* Low ISA region is always mapped WB. No need to track */
-+      if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
++      /* Holding any lock stops cpus from going down. */
++      spin_lock(&call_lock);
++
++      allbutself = cpu_online_map;
++      cpu_clear(smp_processor_id(), allbutself);
++
++      cpus_and(mask, mask, allbutself);
++      cpus = cpus_weight(mask);
++
++      if (!cpus) {
++              spin_unlock(&call_lock);
  +              return 0;
  +      }
  +
-+      spin_lock(&memtype_lock);
-+      list_for_each_entry(ml, &memtype_list, nd) {
-+              if (ml->start == start && ml->end == end) {
-+                      list_del(&ml->nd);
-+                      kfree(ml);
-+                      err = 0;
-+                      break;
-+              }
-+      }
-+      spin_unlock(&memtype_lock);
++      data.func = func;
++      data.info = info;
++      atomic_set(&data.started, 0);
++      data.wait = wait;
++      if (wait)
++              atomic_set(&data.finished, 0);
  +
-+      if (err) {
-+              printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
-+                      current->comm, current->pid, start, end);
-+      }
++      call_data = &data;
++      wmb();
  +
-+      pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
-+      return err;
-+}
++      /* Send a message to other CPUs */
++      if (cpus_equal(mask, allbutself) &&
++          cpus_equal(cpu_online_map, cpu_callout_map))
++              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++      else
++              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
  +
++      /* Wait for response */
++      while (atomic_read(&data.started) != cpus)
++              cpu_relax();
  +
-+/*
-+ * /dev/mem mmap interface. The memtype used for mapping varies:
-+ * - Use UC for mappings with O_SYNC flag
-+ * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
-+ *   inherit the memtype from existing mapping.
-+ * - Else use UC_MINUS memtype (for backward compatibility with existing
-+ *   X drivers.
-+ */
-+pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
-+                              unsigned long size, pgprot_t vma_prot)
-+{
-+      return vma_prot;
++      if (wait)
++              while (atomic_read(&data.finished) != cpus)
++                      cpu_relax();
++      spin_unlock(&call_lock);
++
++      return 0;
  +}
  +
-+#ifdef CONFIG_NONPROMISC_DEVMEM
-+/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
-+static inline int range_is_allowed(unsigned long mfn, unsigned long size)
++static void stop_this_cpu(void *dummy)
  +{
-+      return 1;
++      local_irq_disable();
++      /*
++       * Remove this CPU:
++       */
++      cpu_clear(smp_processor_id(), cpu_online_map);
++      disable_all_local_evtchn();
++      if (hlt_works(smp_processor_id()))
++              for (;;) halt();
++      for (;;);
  +}
-+#else
-+static inline int range_is_allowed(unsigned long mfn, unsigned long size)
-+{
-+      u64 from = ((u64)mfn) << PAGE_SHIFT;
-+      u64 to = from + size;
-+      u64 cursor = from;
  +
-+      while (cursor < to) {
-+              if (!devmem_is_allowed(mfn)) {
-+                      printk(KERN_INFO
-+              "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
-+                              current->comm, from, to);
-+                      return 0;
-+              }
-+              cursor += PAGE_SIZE;
-+              mfn++;
-+      }
-+      return 1;
-+}
-+#endif /* CONFIG_NONPROMISC_DEVMEM */
++/*
++ * this function calls the 'stop' function on all other CPUs in the system.
++ */
  +
-+int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
-+                              unsigned long size, pgprot_t *vma_prot)
++void xen_smp_send_stop(void)
  +{
-+      u64 addr = (u64)mfn << PAGE_SHIFT;
-+      unsigned long flags = _PAGE_CACHE_UC_MINUS;
-+      int retval;
-+
-+      if (!range_is_allowed(mfn, size))
-+              return 0;
++      int nolock;
++      unsigned long flags;
  +
-+      if (file->f_flags & O_SYNC) {
-+              flags = _PAGE_CACHE_UC;
-+      }
++      /* Don't deadlock on the call lock in panic */
++      nolock = !spin_trylock(&call_lock);
++      local_irq_save(flags);
++      __smp_call_function(stop_this_cpu, NULL, 0, 0);
++      if (!nolock)
++              spin_unlock(&call_lock);
++      disable_all_local_evtchn();
++      local_irq_restore(flags);
++}
  +
-+#ifndef CONFIG_X86_32
-+#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
-+      /*
-+       * On the PPro and successors, the MTRRs are used to set
-+       * memory types for physical addresses outside main memory,
-+       * so blindly setting UC or PWT on those pages is wrong.
-+       * For Pentiums and earlier, the surround logic should disable
-+       * caching for the high addresses through the KEN pin, but
-+       * we maintain the tradition of paranoia in this code.
-+       */
-+      if (!pat_wc_enabled &&
-+          ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
-+              test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
-+              test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
-+              test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
-+         (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
-+              flags = _PAGE_CACHE_UC;
-+      }
-+#endif
++/*
++ * Reschedule call back. Nothing to do,
++ * all the work is done automatically when
++ * we return from the interrupt.
++ */
++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
++{
++#ifdef CONFIG_X86_32
++      __get_cpu_var(irq_stat).irq_resched_count++;
++#else
++      add_pda(irq_resched_count, 1);
  +#endif
++      return IRQ_HANDLED;
++}
++
++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
++{
++      void (*func) (void *info) = call_data->func;
++      void *info = call_data->info;
++      int wait = call_data->wait;
  +
  +      /*
-+       * With O_SYNC, we can only take UC mapping. Fail if we cannot.
-+       * Without O_SYNC, we want to get
-+       * - WB for WB-able memory and no other conflicting mappings
-+       * - UC_MINUS for non-WB-able memory with no other conflicting mappings
-+       * - Inherit from confliting mappings otherwise
++       * Notify initiating CPU that I've grabbed the data and am
++       * about to execute the function
  +       */
-+      if (flags != _PAGE_CACHE_UC_MINUS) {
-+              retval = reserve_memtype(addr, addr + size, flags, NULL);
-+      } else {
-+              retval = reserve_memtype(addr, addr + size, -1, &flags);
++      mb();
++      atomic_inc(&call_data->started);
++      /*
++       * At this point the info structure may be out of scope unless wait==1
++       */
++      irq_enter();
++      (*func)(info);
++#ifdef CONFIG_X86_32
++      __get_cpu_var(irq_stat).irq_call_count++;
++#else
++      add_pda(irq_call_count, 1);
++#endif
++      irq_exit();
++
++      if (wait) {
++              mb();
++              atomic_inc(&call_data->finished);
  +      }
  +
-+      if (retval < 0)
-+              return 0;
++      return IRQ_HANDLED;
++}
+--- sle11-2009-05-14.orig/arch/x86/kernel/smp_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,647 +0,0 @@
+-/*
+- *    Intel SMP support routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *    This code is released under the GNU General Public License version 2 or
+- *    later.
+- */
+-
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/cache.h>
+-#include <linux/interrupt.h>
+-#include <linux/cpu.h>
+-#include <linux/module.h>
+-
+-#include <asm/mtrr.h>
+-#include <asm/tlbflush.h>
+-#include <asm/mmu_context.h>
+-#if 0
+-#include <mach_apic.h>
+-#endif
+-#include <xen/evtchn.h>
+-
+-/*
+- *    Some notes on x86 processor bugs affecting SMP operation:
+- *
+- *    Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+- *    The Linux implications for SMP are handled as follows:
+- *
+- *    Pentium III / [Xeon]
+- *            None of the E1AP-E3AP errata are visible to the user.
+- *
+- *    E1AP.   see PII A1AP
+- *    E2AP.   see PII A2AP
+- *    E3AP.   see PII A3AP
+- *
+- *    Pentium II / [Xeon]
+- *            None of the A1AP-A3AP errata are visible to the user.
+- *
+- *    A1AP.   see PPro 1AP
+- *    A2AP.   see PPro 2AP
+- *    A3AP.   see PPro 7AP
+- *
+- *    Pentium Pro
+- *            None of 1AP-9AP errata are visible to the normal user,
+- *    except occasional delivery of 'spurious interrupt' as trap #15.
+- *    This is very rare and a non-problem.
+- *
+- *    1AP.    Linux maps APIC as non-cacheable
+- *    2AP.    worked around in hardware
+- *    3AP.    fixed in C0 and above steppings microcode update.
+- *            Linux does not use excessive STARTUP_IPIs.
+- *    4AP.    worked around in hardware
+- *    5AP.    symmetric IO mode (normal Linux operation) not affected.
+- *            'noapic' mode has vector 0xf filled out properly.
+- *    6AP.    'noapic' mode might be affected - fixed in later steppings
+- *    7AP.    We do not assume writes to the LVT deassering IRQs
+- *    8AP.    We do not enable low power mode (deep sleep) during MP bootup
+- *    9AP.    We do not use mixed mode
+- *
+- *    Pentium
+- *            There is a marginal case where REP MOVS on 100MHz SMP
+- *    machines with B stepping processors can fail. XXX should provide
+- *    an L1cache=Writethrough or L1cache=off option.
+- *
+- *            B stepping CPUs may hang. There are hardware work arounds
+- *    for this. We warn about it in case your board doesn't have the work
+- *    arounds. Basically that's so I can tell anyone with a B stepping
+- *    CPU and SMP problems "tough".
+- *
+- *    Specific items [From Pentium Processor Specification Update]
+- *
+- *    1AP.    Linux doesn't use remote read
+- *    2AP.    Linux doesn't trust APIC errors
+- *    3AP.    We work around this
+- *    4AP.    Linux never generated 3 interrupts of the same priority
+- *            to cause a lost local interrupt.
+- *    5AP.    Remote read is never used
+- *    6AP.    not affected - worked around in hardware
+- *    7AP.    not affected - worked around in hardware
+- *    8AP.    worked around in hardware - we get explicit CS errors if not
+- *    9AP.    only 'noapic' mode affected. Might generate spurious
+- *            interrupts, we log only the first one and count the
+- *            rest silently.
+- *    10AP.   not affected - worked around in hardware
+- *    11AP.   Linux reads the APIC between writes to avoid this, as per
+- *            the documentation. Make sure you preserve this as it affects
+- *            the C stepping chips too.
+- *    12AP.   not affected - worked around in hardware
+- *    13AP.   not affected - worked around in hardware
+- *    14AP.   we always deassert INIT during bootup
+- *    15AP.   not affected - worked around in hardware
+- *    16AP.   not affected - worked around in hardware
+- *    17AP.   not affected - worked around in hardware
+- *    18AP.   not affected - worked around in hardware
+- *    19AP.   not affected - worked around in BIOS
+- *
+- *    If this sounds worrying believe me these bugs are either ___RARE___,
+- *    or are signal timing bugs worked around in hardware and there's
+- *    about nothing of note with C stepping upwards.
+- */
+-
+-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+-
+-/*
+- * the following functions deal with sending IPIs between CPUs.
+- *
+- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+- */
+-
+-static inline int __prepare_ICR (unsigned int shortcut, int vector)
+-{
+-      unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+-
+-      switch (vector) {
+-      default:
+-              icr |= APIC_DM_FIXED | vector;
+-              break;
+-      case NMI_VECTOR:
+-              icr |= APIC_DM_NMI;
+-              break;
+-      }
+-      return icr;
+-}
+-
+-static inline int __prepare_ICR2 (unsigned int mask)
+-{
+-      return SET_APIC_DEST_FIELD(mask);
+-}
+-
+-DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+-
+-static inline void __send_IPI_one(unsigned int cpu, int vector)
+-{
+-      int irq = per_cpu(ipi_to_irq, cpu)[vector];
+-      BUG_ON(irq < 0);
+-      notify_remote_via_irq(irq);
+-}
+-
+-void __send_IPI_shortcut(unsigned int shortcut, int vector)
+-{
+-      int cpu;
+-
+-      switch (shortcut) {
+-      case APIC_DEST_SELF:
+-              __send_IPI_one(smp_processor_id(), vector);
+-              break;
+-      case APIC_DEST_ALLBUT:
+-              for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+-                      if (cpu == smp_processor_id())
+-                              continue;
+-                      if (cpu_isset(cpu, cpu_online_map)) {
+-                              __send_IPI_one(cpu, vector);
+-                      }
+-              }
+-              break;
+-      default:
+-              printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+-                     vector);
+-              break;
+-      }
+-}
+-
+-void send_IPI_self(int vector)
+-{
+-      __send_IPI_shortcut(APIC_DEST_SELF, vector);
+-}
+-
+-/*
+- * This is only used on smaller machines.
+- */
+-void send_IPI_mask_bitmask(cpumask_t mask, int vector)
+-{
+-      unsigned long flags;
+-      unsigned int cpu;
+-
+-      local_irq_save(flags);
+-      WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
+-
+-      for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+-              if (cpu_isset(cpu, mask)) {
+-                      __send_IPI_one(cpu, vector);
+-              }
+-      }
+-
+-      local_irq_restore(flags);
+-}
+-
+-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+-{
+-
+-      send_IPI_mask_bitmask(mask, vector);
+-}
+-
+-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+-
+-#if 0 /* XEN */
+-/*
+- *    Smarter SMP flushing macros. 
+- *            c/o Linus Torvalds.
+- *
+- *    These mean you can really definitely utterly forget about
+- *    writing to user space from interrupts. (Its not allowed anyway).
+- *
+- *    Optimizations Manfred Spraul <manfred@colorfullife.com>
+- */
+-
+-static cpumask_t flush_cpumask;
+-static struct mm_struct * flush_mm;
+-static unsigned long flush_va;
+-static DEFINE_SPINLOCK(tlbstate_lock);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- *
+- * We need to reload %cr3 since the page tables may be going
+- * away from under us..
+- */
+-void leave_mm(int cpu)
+-{
+-      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+-              BUG();
+-      cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+-      load_cr3(swapper_pg_dir);
+-}
+-EXPORT_SYMBOL_GPL(leave_mm);
+-
+-/*
+- *
+- * The flush IPI assumes that a thread switch happens in this order:
+- * [cpu0: the cpu that switches]
+- * 1) switch_mm() either 1a) or 1b)
+- * 1a) thread switch to a different mm
+- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+- *    Stop ipi delivery for the old mm. This is not synchronized with
+- *    the other cpus, but smp_invalidate_interrupt ignore flush ipis
+- *    for the wrong mm, and in the worst case we perform a superfluous
+- *    tlb flush.
+- * 1a2) set cpu_tlbstate to TLBSTATE_OK
+- *    Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+- *    was in lazy tlb mode.
+- * 1a3) update cpu_tlbstate[].active_mm
+- *    Now cpu0 accepts tlb flushes for the new mm.
+- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+- *    Now the other cpus will send tlb flush ipis.
+- * 1a4) change cr3.
+- * 1b) thread switch without mm change
+- *    cpu_tlbstate[].active_mm is correct, cpu0 already handles
+- *    flush ipis.
+- * 1b1) set cpu_tlbstate to TLBSTATE_OK
+- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+- *    Atomically set the bit [other cpus will start sending flush ipis],
+- *    and test the bit.
+- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+- * 2) switch %%esp, ie current
+- *
+- * The interrupt must handle 2 special cases:
+- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+- *   runs in kernel space, the cpu could load tlb entries for user space
+- *   pages.
+- *
+- * The good news is that cpu_tlbstate is local to each cpu, no
+- * write/read ordering problems.
+- */
+-
+-/*
+- * TLB flush IPI:
+- *
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- */
+-
+-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
+-{
+-      unsigned long cpu;
+-
+-      cpu = get_cpu();
+-
+-      if (!cpu_isset(cpu, flush_cpumask))
+-              goto out;
+-              /* 
+-               * This was a BUG() but until someone can quote me the
+-               * line from the intel manual that guarantees an IPI to
+-               * multiple CPUs is retried _only_ on the erroring CPUs
+-               * its staying as a return
+-               *
+-               * BUG();
+-               */
+-               
+-      if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+-              if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+-                      if (flush_va == TLB_FLUSH_ALL)
+-                              local_flush_tlb();
+-                      else
+-                              __flush_tlb_one(flush_va);
+-              } else
+-                      leave_mm(cpu);
+-      }
+-      smp_mb__before_clear_bit();
+-      cpu_clear(cpu, flush_cpumask);
+-      smp_mb__after_clear_bit();
+-out:
+-      put_cpu_no_resched();
+-      __get_cpu_var(irq_stat).irq_tlb_count++;
+-
+-      return IRQ_HANDLED;
+-}
+-
+-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+-                           unsigned long va)
+-{
+-      cpumask_t cpumask = *cpumaskp;
+-
+-      /*
+-       * A couple of (to be removed) sanity checks:
+-       *
+-       * - current CPU must not be in mask
+-       * - mask must exist :)
+-       */
+-      BUG_ON(cpus_empty(cpumask));
+-      BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+-      BUG_ON(!mm);
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-      /* If a CPU which we ran on has gone down, OK. */
+-      cpus_and(cpumask, cpumask, cpu_online_map);
+-      if (unlikely(cpus_empty(cpumask)))
+-              return;
+-#endif
+-
+-      /*
+-       * i'm not happy about this global shared spinlock in the
+-       * MM hot path, but we'll see how contended it is.
+-       * AK: x86-64 has a faster method that could be ported.
+-       */
+-      spin_lock(&tlbstate_lock);
+-      
+-      flush_mm = mm;
+-      flush_va = va;
+-      cpus_or(flush_cpumask, cpumask, flush_cpumask);
+-      /*
+-       * We have to send the IPI only to
+-       * CPUs affected.
+-       */
+-      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+-
+-      while (!cpus_empty(flush_cpumask))
+-              /* nothing. lockup detection does not belong here */
+-              cpu_relax();
+-
+-      flush_mm = NULL;
+-      flush_va = 0;
+-      spin_unlock(&tlbstate_lock);
+-}
+-      
+-void flush_tlb_current_task(void)
+-{
+-      struct mm_struct *mm = current->mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      local_flush_tlb();
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-      preempt_enable();
+-}
+-
+-void flush_tlb_mm (struct mm_struct * mm)
+-{
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if (current->mm)
+-                      local_flush_tlb();
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-      preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+-{
+-      struct mm_struct *mm = vma->vm_mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if(current->mm)
+-                      __flush_tlb_one(va);
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, va);
+-
+-      preempt_enable();
+-}
+-EXPORT_SYMBOL(flush_tlb_page);
+-
+-static void do_flush_tlb_all(void* info)
+-{
+-      unsigned long cpu = smp_processor_id();
+-
+-      __flush_tlb_all();
+-      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+-              leave_mm(cpu);
+-}
+-
+-void flush_tlb_all(void)
+-{
+-      on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+-}
+-
+-#endif /* XEN */
+-
+-/*
+- * this function sends a 'reschedule' IPI to another CPU.
+- * it goes straight through and wastes no time serializing
+- * anything. Worst case is that we lose a reschedule ...
+- */
+-void xen_smp_send_reschedule(int cpu)
+-{
+-      WARN_ON(cpu_is_offline(cpu));
+-      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+-}
+-
+-/*
+- * Structure and data for smp_call_function(). This is designed to minimise
+- * static memory requirements. It also looks cleaner.
+- */
+-static DEFINE_SPINLOCK(call_lock);
+-
+-struct call_data_struct {
+-      void (*func) (void *info);
+-      void *info;
+-      atomic_t started;
+-      atomic_t finished;
+-      int wait;
+-};
+-
+-void lock_ipi_call_lock(void)
+-{
+-      spin_lock_irq(&call_lock);
+-}
+-
+-void unlock_ipi_call_lock(void)
+-{
+-      spin_unlock_irq(&call_lock);
+-}
+-
+-static struct call_data_struct *call_data;
+-
+-static void __smp_call_function(void (*func) (void *info), void *info,
+-                              int nonatomic, int wait)
+-{
+-      struct call_data_struct data;
+-      int cpus = num_online_cpus() - 1;
+-
+-      if (!cpus)
+-              return;
+-
+-      data.func = func;
+-      data.info = info;
+-      atomic_set(&data.started, 0);
+-      data.wait = wait;
+-      if (wait)
+-              atomic_set(&data.finished, 0);
+-
+-      call_data = &data;
+-      mb();
+-
+-      /* Send a message to all other CPUs and wait for them to respond */
+-      send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-
+-      /* Wait for response */
+-      while (atomic_read(&data.started) != cpus)
+-              cpu_relax();
+-
+-      if (wait)
+-              while (atomic_read(&data.finished) != cpus)
+-                      cpu_relax();
+-}
+-
+-
+-/**
+- * smp_call_function_mask(): Run a function on a set of other CPUs.
+- * @mask: The set of cpus to run on.  Must not include the current cpu.
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @wait: If true, wait (atomically) until function has completed on other CPUs.
+- *
+-  * Returns 0 on success, else a negative status code.
+- *
+- * If @wait is true, then returns once @func has returned; otherwise
+- * it returns just before the target cpu calls @func.
+- *
+- * You must not call this function with disabled interrupts or from a
+- * hardware interrupt handler or from a bottom half handler.
+- */
+-int
+-xen_smp_call_function_mask(cpumask_t mask,
+-                            void (*func)(void *), void *info,
+-                            int wait)
+-{
+-      struct call_data_struct data;
+-      cpumask_t allbutself;
+-      int cpus;
+-
+-      /* Can deadlock when called with interrupts disabled */
+-      WARN_ON(irqs_disabled());
+-
+-      /* Holding any lock stops cpus from going down. */
+-      spin_lock(&call_lock);
+-
+-      allbutself = cpu_online_map;
+-      cpu_clear(smp_processor_id(), allbutself);
+-
+-      cpus_and(mask, mask, allbutself);
+-      cpus = cpus_weight(mask);
+-
+-      if (!cpus) {
+-              spin_unlock(&call_lock);
+-              return 0;
+-      }
+-
+-      data.func = func;
+-      data.info = info;
+-      atomic_set(&data.started, 0);
+-      data.wait = wait;
+-      if (wait)
+-              atomic_set(&data.finished, 0);
+-
+-      call_data = &data;
+-      mb();
+-
+-      /* Send a message to other CPUs */
+-      if (cpus_equal(mask, allbutself))
+-              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-      else
+-              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+-
+-      /* Wait for response */
+-      while (atomic_read(&data.started) != cpus)
+-              cpu_relax();
+-
+-      if (wait)
+-              while (atomic_read(&data.finished) != cpus)
+-                      cpu_relax();
+-      spin_unlock(&call_lock);
+-
+-      return 0;
+-}
+-
+-static void stop_this_cpu (void * dummy)
+-{
+-      local_irq_disable();
+-      /*
+-       * Remove this CPU:
+-       */
+-      cpu_clear(smp_processor_id(), cpu_online_map);
+-      disable_all_local_evtchn();
+-      if (cpu_data(smp_processor_id()).hlt_works_ok)
+-              for(;;) halt();
+-      for (;;);
+-}
+-
+-/*
+- * this function calls the 'stop' function on all other CPUs in the system.
+- */
+-
+-void xen_smp_send_stop(void)
+-{
+-      /* Don't deadlock on the call lock in panic */
+-      int nolock = !spin_trylock(&call_lock);
+-      unsigned long flags;
+-
+-      local_irq_save(flags);
+-      __smp_call_function(stop_this_cpu, NULL, 0, 0);
+-      if (!nolock)
+-              spin_unlock(&call_lock);
+-      disable_all_local_evtchn();
+-      local_irq_restore(flags);
+-}
+-
+-/*
+- * Reschedule call back. Nothing to do,
+- * all the work is done automatically when
+- * we return from the interrupt.
+- */
+-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
+-{
+-      __get_cpu_var(irq_stat).irq_resched_count++;
+-
+-      return IRQ_HANDLED;
+-}
+-
+-#include <linux/kallsyms.h>
+-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
+-{
+-      void (*func) (void *info) = call_data->func;
+-      void *info = call_data->info;
+-      int wait = call_data->wait;
+-
+-      /*
+-       * Notify initiating CPU that I've grabbed the data and am
+-       * about to execute the function
+-       */
+-      mb();
+-      atomic_inc(&call_data->started);
+-      /*
+-       * At this point the info structure may be out of scope unless wait==1
+-       */
+-      irq_enter();
+-      (*func)(info);
+-      __get_cpu_var(irq_stat).irq_call_count++;
+-      irq_exit();
+-
+-      if (wait) {
+-              mb();
+-              atomic_inc(&call_data->finished);
+-      }
+-
+-      return IRQ_HANDLED;
+-}
+--- sle11-2009-05-14.orig/arch/x86/kernel/smp_64-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,554 +0,0 @@
+-/*
+- *    Intel SMP support routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+- *      (c) 2002,2003 Andi Kleen, SuSE Labs.
+- *
+- *    This code is released under the GNU General Public License version 2 or
+- *    later.
+- */
+-
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/smp.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/interrupt.h>
+-
+-#include <asm/mtrr.h>
+-#include <asm/pgalloc.h>
+-#include <asm/tlbflush.h>
+-#include <asm/mach_apic.h>
+-#include <asm/mmu_context.h>
+-#include <asm/proto.h>
+-#include <asm/apicdef.h>
+-#include <asm/idle.h>
+-#ifdef CONFIG_XEN
+-#include <xen/evtchn.h>
+-#endif
+-
+-#ifndef CONFIG_XEN
+-/*
+- *    Smarter SMP flushing macros.
+- *            c/o Linus Torvalds.
+- *
+- *    These mean you can really definitely utterly forget about
+- *    writing to user space from interrupts. (Its not allowed anyway).
+- *
+- *    Optimizations Manfred Spraul <manfred@colorfullife.com>
+- *
+- *    More scalable flush, from Andi Kleen
+- *
+- *    To avoid global state use 8 different call vectors.
+- *    Each CPU uses a specific vector to trigger flushes on other
+- *    CPUs. Depending on the received vector the target CPUs look into
+- *    the right per cpu variable for the flush data.
+- *
+- *    With more than 8 CPUs they are hashed to the 8 available
+- *    vectors. The limited global vector space forces us to this right now.
+- *    In future when interrupts are split into per CPU domains this could be
+- *    fixed, at the cost of triggering multiple IPIs in some cases.
+- */
+-
+-union smp_flush_state {
+-      struct {
+-              cpumask_t flush_cpumask;
+-              struct mm_struct *flush_mm;
+-              unsigned long flush_va;
+-              spinlock_t tlbstate_lock;
+-      };
+-      char pad[SMP_CACHE_BYTES];
+-} ____cacheline_aligned;
+-
+-/* State is put into the per CPU data section, but padded
+-   to a full cache line because other CPUs can access it and we don't
+-   want false sharing in the per cpu data segment. */
+-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- */
+-void leave_mm(int cpu)
+-{
+-      if (read_pda(mmu_state) == TLBSTATE_OK)
+-              BUG();
+-      cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+-      load_cr3(swapper_pg_dir);
+-}
+-EXPORT_SYMBOL_GPL(leave_mm);
+-
+-/*
+- *
+- * The flush IPI assumes that a thread switch happens in this order:
+- * [cpu0: the cpu that switches]
+- * 1) switch_mm() either 1a) or 1b)
+- * 1a) thread switch to a different mm
+- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+- *    Stop ipi delivery for the old mm. This is not synchronized with
+- *    the other cpus, but smp_invalidate_interrupt ignore flush ipis
+- *    for the wrong mm, and in the worst case we perform a superfluous
+- *    tlb flush.
+- * 1a2) set cpu mmu_state to TLBSTATE_OK
+- *    Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+- *    was in lazy tlb mode.
+- * 1a3) update cpu active_mm
+- *    Now cpu0 accepts tlb flushes for the new mm.
+- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+- *    Now the other cpus will send tlb flush ipis.
+- * 1a4) change cr3.
+- * 1b) thread switch without mm change
+- *    cpu active_mm is correct, cpu0 already handles
+- *    flush ipis.
+- * 1b1) set cpu mmu_state to TLBSTATE_OK
+- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+- *    Atomically set the bit [other cpus will start sending flush ipis],
+- *    and test the bit.
+- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+- * 2) switch %%esp, ie current
+- *
+- * The interrupt must handle 2 special cases:
+- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+- *   runs in kernel space, the cpu could load tlb entries for user space
+- *   pages.
+- *
+- * The good news is that cpu mmu_state is local to each cpu, no
+- * write/read ordering problems.
+- */
+-
+-/*
+- * TLB flush IPI:
+- *
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- *
+- * Interrupts are disabled.
+- */
+-
+-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+-{
+-      int cpu;
+-      int sender;
+-      union smp_flush_state *f;
+-
+-      cpu = smp_processor_id();
+-      /*
+-       * orig_rax contains the negated interrupt vector.
+-       * Use that to determine where the sender put the data.
+-       */
+-      sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
+-      f = &per_cpu(flush_state, sender);
+-
+-      if (!cpu_isset(cpu, f->flush_cpumask))
+-              goto out;
+-              /*
+-               * This was a BUG() but until someone can quote me the
+-               * line from the intel manual that guarantees an IPI to
+-               * multiple CPUs is retried _only_ on the erroring CPUs
+-               * its staying as a return
+-               *
+-               * BUG();
+-               */
+-
+-      if (f->flush_mm == read_pda(active_mm)) {
+-              if (read_pda(mmu_state) == TLBSTATE_OK) {
+-                      if (f->flush_va == TLB_FLUSH_ALL)
+-                              local_flush_tlb();
+-                      else
+-                              __flush_tlb_one(f->flush_va);
+-              } else
+-                      leave_mm(cpu);
+-      }
+-out:
+-      ack_APIC_irq();
+-      cpu_clear(cpu, f->flush_cpumask);
+-      add_pda(irq_tlb_count, 1);
+-}
+-
+-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+-                           unsigned long va)
+-{
+-      int sender;
+-      union smp_flush_state *f;
+-      cpumask_t cpumask = *cpumaskp;
+-
+-      /* Caller has disabled preemption */
+-      sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+-      f = &per_cpu(flush_state, sender);
+-
+-      /*
+-       * Could avoid this lock when
+-       * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+-       * probably not worth checking this for a cache-hot lock.
+-       */
+-      spin_lock(&f->tlbstate_lock);
+-
+-      f->flush_mm = mm;
+-      f->flush_va = va;
+-      cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+-
+-      /*
+-       * We have to send the IPI only to
+-       * CPUs affected.
+-       */
+-      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+-
+-      while (!cpus_empty(f->flush_cpumask))
+-              cpu_relax();
+-
+-      f->flush_mm = NULL;
+-      f->flush_va = 0;
+-      spin_unlock(&f->tlbstate_lock);
+-}
+-
+-int __cpuinit init_smp_flush(void)
+-{
+-      int i;
+-
+-      for_each_cpu_mask(i, cpu_possible_map) {
+-              spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
+-      }
+-      return 0;
+-}
+-core_initcall(init_smp_flush);
+-
+-void flush_tlb_current_task(void)
+-{
+-      struct mm_struct *mm = current->mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      local_flush_tlb();
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-      preempt_enable();
+-}
+-
+-void flush_tlb_mm (struct mm_struct * mm)
+-{
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if (current->mm)
+-                      local_flush_tlb();
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-      preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+-{
+-      struct mm_struct *mm = vma->vm_mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if(current->mm)
+-                      __flush_tlb_one(va);
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, va);
+-
+-      preempt_enable();
+-}
+-
+-static void do_flush_tlb_all(void* info)
+-{
+-      unsigned long cpu = smp_processor_id();
+-
+-      __flush_tlb_all();
+-      if (read_pda(mmu_state) == TLBSTATE_LAZY)
+-              leave_mm(cpu);
+-}
+-
+-void flush_tlb_all(void)
+-{
+-      on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+-}
+-#endif /* Xen */
+-
+-/*
+- * this function sends a 'reschedule' IPI to another CPU.
+- * it goes straight through and wastes no time serializing
+- * anything. Worst case is that we lose a reschedule ...
+- */
+-
+-void smp_send_reschedule(int cpu)
+-{
+-      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+-}
+-
+-/*
+- * Structure and data for smp_call_function(). This is designed to minimise
+- * static memory requirements. It also looks cleaner.
+- */
+-static DEFINE_SPINLOCK(call_lock);
+-
+-struct call_data_struct {
+-      void (*func) (void *info);
+-      void *info;
+-      atomic_t started;
+-      atomic_t finished;
+-      int wait;
+-};
+-
+-static struct call_data_struct * call_data;
+-
+-void lock_ipi_call_lock(void)
+-{
+-      spin_lock_irq(&call_lock);
+-}
+-
+-void unlock_ipi_call_lock(void)
+-{
+-      spin_unlock_irq(&call_lock);
+-}
+-
+-/*
+- * this function sends a 'generic call function' IPI to all other CPU
+- * of the system defined in the mask.
+- */
+-static int __smp_call_function_mask(cpumask_t mask,
+-                                  void (*func)(void *), void *info,
+-                                  int wait)
+-{
+-      struct call_data_struct data;
+-      cpumask_t allbutself;
+-      int cpus;
+-
+-      allbutself = cpu_online_map;
+-      cpu_clear(smp_processor_id(), allbutself);
+-
+-      cpus_and(mask, mask, allbutself);
+-      cpus = cpus_weight(mask);
+-
+-      if (!cpus)
+-              return 0;
+-
+-      data.func = func;
+-      data.info = info;
+-      atomic_set(&data.started, 0);
+-      data.wait = wait;
+-      if (wait)
+-              atomic_set(&data.finished, 0);
+-
+-      call_data = &data;
+-      wmb();
+-
+-      /* Send a message to other CPUs */
+-      if (cpus_equal(mask, allbutself))
+-              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-      else
+-              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+-
+-      /* Wait for response */
+-      while (atomic_read(&data.started) != cpus)
+-              cpu_relax();
+-
+-      if (!wait)
+-              return 0;
+-
+-      while (atomic_read(&data.finished) != cpus)
+-              cpu_relax();
+-
+-      return 0;
+-}
+-/**
+- * smp_call_function_mask(): Run a function on a set of other CPUs.
+- * @mask: The set of cpus to run on.  Must not include the current cpu.
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @wait: If true, wait (atomically) until function has completed on other CPUs.
+- *
+- * Returns 0 on success, else a negative status code.
+- *
+- * If @wait is true, then returns once @func has returned; otherwise
+- * it returns just before the target cpu calls @func.
+- *
+- * You must not call this function with disabled interrupts or from a
+- * hardware interrupt handler or from a bottom half handler.
+- */
+-int smp_call_function_mask(cpumask_t mask,
+-                         void (*func)(void *), void *info,
+-                         int wait)
+-{
+-      int ret;
+-
+-      /* Can deadlock when called with interrupts disabled */
+-      WARN_ON(irqs_disabled());
+-
+-      spin_lock(&call_lock);
+-      ret = __smp_call_function_mask(mask, func, info, wait);
+-      spin_unlock(&call_lock);
+-      return ret;
+-}
+-EXPORT_SYMBOL(smp_call_function_mask);
+-
+-/*
+- * smp_call_function_single - Run a function on a specific CPU
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @nonatomic: Currently unused.
+- * @wait: If true, wait until function has completed on other CPUs.
+- *
+- * Retrurns 0 on success, else a negative status code.
+- *
+- * Does not return until the remote CPU is nearly ready to execute <func>
+- * or is or has executed.
+- */
+-
+-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+-                            int nonatomic, int wait)
+-{
+-      /* prevent preemption and reschedule on another processor */
+-      int ret, me = get_cpu();
+-
+-      /* Can deadlock when called with interrupts disabled */
+-      WARN_ON(irqs_disabled());
+-
+-      if (cpu == me) {
+-              local_irq_disable();
+-              func(info);
+-              local_irq_enable();
+-              put_cpu();
+-              return 0;
+-      }
+-
+-      ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+-
+-      put_cpu();
+-      return ret;
+-}
+-EXPORT_SYMBOL(smp_call_function_single);
+-
+-/*
+- * smp_call_function - run a function on all other CPUs.
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @nonatomic: currently unused.
+- * @wait: If true, wait (atomically) until function has completed on other
+- *        CPUs.
+- *
+- * Returns 0 on success, else a negative status code. Does not return until
+- * remote CPUs are nearly ready to execute func or are or have executed.
+- *
+- * You must not call this function with disabled interrupts or from a
+- * hardware interrupt handler or from a bottom half handler.
+- * Actually there are a few legal cases, like panic.
+- */
+-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+-                      int wait)
+-{
+-      return smp_call_function_mask(cpu_online_map, func, info, wait);
+-}
+-EXPORT_SYMBOL(smp_call_function);
+-
+-static void stop_this_cpu(void *dummy)
+-{
+-      local_irq_disable();
+-      /*
+-       * Remove this CPU:
+-       */
+-      cpu_clear(smp_processor_id(), cpu_online_map);
+-      disable_all_local_evtchn();
+-      for (;;)
+-              halt();
+-}
+-
+-void smp_send_stop(void)
+-{
+-      int nolock;
+-      unsigned long flags;
+-
+-#ifndef CONFIG_XEN
+-      if (reboot_force)
+-              return;
+-#endif
+-
+-      /* Don't deadlock on the call lock in panic */
+-      nolock = !spin_trylock(&call_lock);
+-      local_irq_save(flags);
+-      __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
+-      if (!nolock)
+-              spin_unlock(&call_lock);
+-      disable_all_local_evtchn();
+-      local_irq_restore(flags);
+-}
+-
+-/*
+- * Reschedule call back. Nothing to do,
+- * all the work is done automatically when
+- * we return from the interrupt.
+- */
+-#ifndef CONFIG_XEN
+-asmlinkage void smp_reschedule_interrupt(void)
+-#else
+-asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
+-#endif
+-{
+-#ifndef CONFIG_XEN
+-      ack_APIC_irq();
+-#endif
+-      add_pda(irq_resched_count, 1);
+-#ifdef CONFIG_XEN
+-      return IRQ_HANDLED;
+-#endif
+-}
+-
+-#ifndef CONFIG_XEN
+-asmlinkage void smp_call_function_interrupt(void)
+-#else
+-asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
+-#endif
+-{
+-      void (*func) (void *info) = call_data->func;
+-      void *info = call_data->info;
+-      int wait = call_data->wait;
+-
+-#ifndef CONFIG_XEN
+-      ack_APIC_irq();
+-#endif
+-      /*
+-       * Notify initiating CPU that I've grabbed the data and am
+-       * about to execute the function
+-       */
+-      mb();
+-      atomic_inc(&call_data->started);
+-      /*
+-       * At this point the info structure may be out of scope unless wait==1
+-       */
+-      exit_idle();
+-      irq_enter();
+-      (*func)(info);
+-      add_pda(irq_call_count, 1);
+-      irq_exit();
+-      if (wait) {
+-              mb();
+-              atomic_inc(&call_data->finished);
+-      }
+-#ifdef CONFIG_XEN
+-      return IRQ_HANDLED;
+-#endif
+-}
+--- sle11-2009-05-14.orig/arch/x86/kernel/time_32-xen.c        2009-03-24 10:12:48.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/time_32-xen.c     2009-03-24 10:13:09.000000000 +0100
+@@ -699,8 +699,6 @@ int xen_update_persistent_clock(void)
+       return 0;
+ }
+ 
+-extern void (*late_time_init)(void);
+-
+ /* Dynamically-mapped IRQ. */
+ DEFINE_PER_CPU(int, timer_irq);
+ 
+--- sle11-2009-05-14.orig/arch/x86/kernel/traps_32-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/traps_32-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -9,26 +9,28 @@
+  * 'Traps.c' handles hardware traps and faults after we have saved some
+  * state in 'asm.s'.
+  */
+-#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/kallsyms.h>
++#include <linux/spinlock.h>
++#include <linux/highmem.h>
++#include <linux/kprobes.h>
++#include <linux/uaccess.h>
++#include <linux/utsname.h>
++#include <linux/kdebug.h>
+ #include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/ptrace.h>
+ #include <linux/string.h>
++#include <linux/unwind.h>
++#include <linux/delay.h>
+ #include <linux/errno.h>
++#include <linux/kexec.h>
++#include <linux/sched.h>
+ #include <linux/timer.h>
+-#include <linux/mm.h>
+ #include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/interrupt.h>
+-#include <linux/highmem.h>
+-#include <linux/kallsyms.h>
+-#include <linux/ptrace.h>
+-#include <linux/utsname.h>
+-#include <linux/kprobes.h>
+-#include <linux/kexec.h>
+-#include <linux/unwind.h>
+-#include <linux/uaccess.h>
+-#include <linux/nmi.h>
+ #include <linux/bug.h>
++#include <linux/nmi.h>
++#include <linux/mm.h>
+ 
+ #ifdef CONFIG_EISA
+ #include <linux/ioport.h>
+@@ -43,21 +45,18 @@
+ #include <linux/edac.h>
+ #endif
+ 
++#include <asm/arch_hooks.h>
++#include <asm/stacktrace.h>
+ #include <asm/processor.h>
+-#include <asm/system.h>
+-#include <asm/io.h>
+-#include <asm/atomic.h>
+ #include <asm/debugreg.h>
++#include <asm/atomic.h>
++#include <asm/system.h>
++#include <asm/unwind.h>
+ #include <asm/desc.h>
+ #include <asm/i387.h>
+ #include <asm/nmi.h>
+-#include <asm/unwind.h>
+ #include <asm/smp.h>
+-#include <asm/arch_hooks.h>
+-#include <linux/kdebug.h>
+-#include <asm/stacktrace.h>
+-
+-#include <linux/module.h>
++#include <asm/io.h>
+ 
+ #include "mach_traps.h"
+ 
+@@ -71,7 +70,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
+ asmlinkage int system_call(void);
+ 
+ /* Do we ignore FPU interrupts ? */
+-char ignore_fpu_irq = 0;
++char ignore_fpu_irq;
+ 
+ #ifndef CONFIG_X86_NO_IDT
+ /*
+@@ -113,12 +112,13 @@ static unsigned int code_bytes = 64;
+ void printk_address(unsigned long address, int reliable)
+ {
+ #ifdef CONFIG_KALLSYMS
+-      unsigned long offset = 0, symsize;
++      char namebuf[KSYM_NAME_LEN];
++      unsigned long offset = 0;
++      unsigned long symsize;
+       const char *symname;
+-      char *modname;
+-      char *delim = ":";
+-      char namebuf[128];
+       char reliab[4] = "";
++      char *delim = ":";
++      char *modname;
+ 
+       symname = kallsyms_lookup(address, &symsize, &offset,
+                                       &modname, namebuf);
+@@ -146,13 +146,14 @@ static inline int valid_stack_ptr(struct
+ 
+ /* The form of the top of the frame on the stack */
+ struct stack_frame {
+-      struct stack_frame *next_frame;
+-      unsigned long return_address;
++      struct stack_frame      *next_frame;
++      unsigned long           return_address;
+ };
+ 
+-static inline unsigned long print_context_stack(struct thread_info *tinfo,
+-                              unsigned long *stack, unsigned long bp,
+-                              const struct stacktrace_ops *ops, void *data)
++static inline unsigned long
++print_context_stack(struct thread_info *tinfo,
++                  unsigned long *stack, unsigned long bp,
++                  const struct stacktrace_ops *ops, void *data)
+ {
+       struct stack_frame *frame = (struct stack_frame *)bp;
+ 
+@@ -174,7 +175,7 @@ static inline unsigned long print_contex
+       return bp;
+ }
+ 
+-#define MSG(msg) ops->warning(data, msg)
++#define MSG(msg)              ops->warning(data, msg)
+ 
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp,
+@@ -185,6 +186,7 @@ void dump_trace(struct task_struct *task
+ 
+       if (!stack) {
+               unsigned long dummy;
++
+               stack = &dummy;
+               if (task != current)
+                       stack = (unsigned long *)task->thread.sp;
+@@ -194,7 +196,7 @@ void dump_trace(struct task_struct *task
+       if (!bp) {
+               if (task == current) {
+                       /* Grab bp right from our regs */
+-                      asm ("movl %%ebp, %0" : "=r" (bp) : );
++                      asm("movl %%ebp, %0" : "=r" (bp) :);
+               } else {
+                       /* bp is the last reg pushed by switch_to */
+                       bp = *(unsigned long *) task->thread.sp;
+@@ -204,15 +206,18 @@ void dump_trace(struct task_struct *task
+ 
+       while (1) {
+               struct thread_info *context;
  +
-+      if (ioremap_check_change_attr(mfn, size, flags) < 0) {
-+              free_memtype(addr, addr + size);
-+              printk(KERN_INFO
-+              "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
-+                      current->comm, current->pid,
-+                      cattr_name(flags),
-+                      addr, addr + size);
-+              return 0;
-+      }
+               context = (struct thread_info *)
+                       ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+               bp = print_context_stack(context, stack, bp, ops, data);
+-              /* Should be after the line below, but somewhere
+-                 in early boot context comes out corrupted and we
+-                 can't reference it -AK */
++              /*
++               * Should be after the line below, but somewhere
++               * in early boot context comes out corrupted and we
++               * can't reference it:
++               */
+               if (ops->stack(data, "IRQ") < 0)
+                       break;
+-              stack = (unsigned long*)context->previous_esp;
++              stack = (unsigned long *)context->previous_esp;
+               if (!stack)
+                       break;
+               touch_nmi_watchdog();
+@@ -251,15 +256,15 @@ static void print_trace_address(void *da
+ }
+ 
+ static const struct stacktrace_ops print_trace_ops = {
+-      .warning = print_trace_warning,
+-      .warning_symbol = print_trace_warning_symbol,
+-      .stack = print_trace_stack,
+-      .address = print_trace_address,
++      .warning                = print_trace_warning,
++      .warning_symbol         = print_trace_warning_symbol,
++      .stack                  = print_trace_stack,
++      .address                = print_trace_address,
+ };
+ 
+ static void
+ show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+-              unsigned long *stack, unsigned long bp, char *log_lvl)
++                 unsigned long *stack, unsigned long bp, char *log_lvl)
+ {
+       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+       printk("%s =======================\n", log_lvl);
+@@ -271,21 +276,22 @@ void show_trace(struct task_struct *task
+       show_trace_log_lvl(task, regs, stack, bp, "");
+ }
+ 
+-static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+-                     unsigned long *sp, unsigned long bp, char *log_lvl)
++static void
++show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
++                 unsigned long *sp, unsigned long bp, char *log_lvl)
+ {
+       unsigned long *stack;
+       int i;
+ 
+       if (sp == NULL) {
+               if (task)
+-                      sp = (unsigned long*)task->thread.sp;
++                      sp = (unsigned long *)task->thread.sp;
+               else
+                       sp = (unsigned long *)&sp;
+       }
+ 
+       stack = sp;
+-      for(i = 0; i < kstack_depth_to_print; i++) {
++      for (i = 0; i < kstack_depth_to_print; i++) {
+               if (kstack_end(stack))
+                       break;
+               if (i && ((i % 8) == 0))
+@@ -293,6 +299,7 @@ static void show_stack_log_lvl(struct ta
+               printk("%08lx ", *stack++);
+       }
+       printk("\n%sCall Trace:\n", log_lvl);
  +
-+      *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
-+                           flags);
-+      return 1;
-+}
+       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ }
+ 
+@@ -307,8 +314,8 @@ void show_stack(struct task_struct *task
+  */
+ void dump_stack(void)
+ {
+-      unsigned long stack;
+       unsigned long bp = 0;
++      unsigned long stack;
+ 
+ #ifdef CONFIG_FRAME_POINTER
+       if (!bp)
+@@ -320,6 +327,7 @@ void dump_stack(void)
+               init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
  +
-+void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
-+{
-+      u64 addr = (u64)mfn << PAGE_SHIFT;
-+      unsigned long flags;
-+      unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+       show_trace(current, NULL, &stack, bp);
+ }
+ 
+@@ -331,6 +339,7 @@ void show_registers(struct pt_regs *regs
+ 
+       print_modules();
+       __show_registers(regs, 0);
  +
-+      reserve_memtype(addr, addr + size, want_flags, &flags);
-+      if (flags != want_flags) {
-+              printk(KERN_INFO
-+              "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
-+                      current->comm, current->pid,
-+                      cattr_name(want_flags),
-+                      addr, (unsigned long long)(addr + size),
-+                      cattr_name(flags));
-+      }
+       printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
+               TASK_COMM_LEN, current->comm, task_pid_nr(current),
+               current_thread_info(), current, task_thread_info(current));
+@@ -339,10 +348,10 @@ void show_registers(struct pt_regs *regs
+        * time of the fault..
+        */
+       if (!user_mode_vm(regs)) {
+-              u8 *ip;
+               unsigned int code_prologue = code_bytes * 43 / 64;
+               unsigned int code_len = code_bytes;
+               unsigned char c;
++              u8 *ip;
+ 
+               printk("\n" KERN_EMERG "Stack: ");
+               show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
+@@ -369,7 +378,7 @@ void show_registers(struct pt_regs *regs
+               }
+       }
+       printk("\n");
+-}     
  +}
+ 
+ int is_valid_bugaddr(unsigned long ip)
+ {
+@@ -385,10 +394,10 @@ int is_valid_bugaddr(unsigned long ip)
+ 
+ static int die_counter;
+ 
+-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
++int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+ {
+-      unsigned long sp;
+       unsigned short ss;
++      unsigned long sp;
+ 
+       printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+ #ifdef CONFIG_PREEMPT
+@@ -403,8 +412,8 @@ int __kprobes __die(const char * str, st
+       printk("\n");
+ 
+       if (notify_die(DIE_OOPS, str, regs, err,
+-                              current->thread.trap_no, SIGSEGV) !=
+-                      NOTIFY_STOP) {
++                      current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
  +
-+void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
-+{
-+      u64 addr = (u64)mfn << PAGE_SHIFT;
+               show_registers(regs);
+               /* Executive summary in case the oops scrolled away */
+               sp = (unsigned long) (&regs->sp);
+@@ -416,17 +425,18 @@ int __kprobes __die(const char * str, st
+               printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+               print_symbol("%s", regs->ip);
+               printk(" SS:ESP %04x:%08lx\n", ss, sp);
  +
-+      free_memtype(addr, addr + size);
-+}
+               return 0;
+-      } else {
+-              return 1;
+       }
  +
---- a/arch/x86/mm/pgtable_32-xen.c
-+++ b/arch/x86/mm/pgtable_32-xen.c
-@@ -1,7 +1,3 @@
--/*
-- *  linux/arch/i386/mm/pgtable.c
-- */
--
- #include <linux/sched.h>
- #include <linux/kernel.h>
- #include <linux/errno.h>
-@@ -41,7 +37,6 @@ void show_mem(void)
++      return 1;
+ }
   
-       printk(KERN_INFO "Mem-info:\n");
-       show_free_areas();
--      printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-       for_each_online_pgdat(pgdat) {
-               pgdat_resize_lock(pgdat, &flags);
-               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
-@@ -157,243 +152,6 @@ void __init reserve_top_address(unsigned
-       __VMALLOC_RESERVE += reserve;
+ /*
+- * This is gone through when something in the kernel has done something bad and
+- * is about to be terminated.
++ * This is gone through when something in the kernel has done something bad
++ * and is about to be terminated:
+  */
+-void die(const char * str, struct pt_regs * regs, long err)
++void die(const char *str, struct pt_regs *regs, long err)
+ {
+       static struct {
+               raw_spinlock_t lock;
+@@ -448,8 +458,9 @@ void die(const char * str, struct pt_reg
+               die.lock_owner = smp_processor_id();
+               die.lock_owner_depth = 0;
+               bust_spinlocks(1);
+-      } else
++      } else {
+               raw_local_irq_save(flags);
++      }
+ 
+       if (++die.lock_owner_depth < 3) {
+               report_bug(regs->ip, regs);
+@@ -482,19 +493,20 @@ void die(const char * str, struct pt_reg
+       do_exit(SIGSEGV);
   }
   
--pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
--{
--      pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
--      if (pte)
--              make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
--      return pte;
--}
--
--/*
-- * List of all pgd's needed for non-PAE so it can invalidate entries
-- * in both cached and uncached pgd's; not needed for PAE since the
-- * kernel pmd is shared. If PAE were not to share the pmd a similar
-- * tactic would be needed. This is essentially codepath-based locking
-- * against pageattr.c; it is the unique case in which a valid change
-- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
-- * vmalloc faults work because attached pagetables are never freed.
-- * -- wli
-- */
--static inline void pgd_list_add(pgd_t *pgd)
--{
--      struct page *page = virt_to_page(pgd);
--
--      list_add(&page->lru, &pgd_list);
--}
--
--static inline void pgd_list_del(pgd_t *pgd)
--{
--      struct page *page = virt_to_page(pgd);
--
--      list_del(&page->lru);
--}
--
--#define UNSHARED_PTRS_PER_PGD                         \
--      (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
--
--static void pgd_ctor(void *p)
--{
--      pgd_t *pgd = p;
--      unsigned long flags;
--
--      pgd_test_and_unpin(pgd);
--
--      /* Clear usermode parts of PGD */
--      memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
--
--      spin_lock_irqsave(&pgd_lock, flags);
--
--      /* If the pgd points to a shared pagetable level (either the
--         ptes in non-PAE, or shared PMD in PAE), then just copy the
--         references from swapper_pg_dir. */
--      if (PAGETABLE_LEVELS == 2 ||
--          (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
--              clone_pgd_range(pgd + USER_PTRS_PER_PGD,
--                              swapper_pg_dir + USER_PTRS_PER_PGD,
--                              KERNEL_PGD_PTRS);
--              paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
--                                      __pa(swapper_pg_dir) >> PAGE_SHIFT,
--                                      USER_PTRS_PER_PGD,
--                                      KERNEL_PGD_PTRS);
--      }
--
--      /* list required to sync kernel mapping updates */
--      if (PAGETABLE_LEVELS == 2)
--              pgd_list_add(pgd);
--
--      spin_unlock_irqrestore(&pgd_lock, flags);
--}
--
--static void pgd_dtor(void *pgd)
--{
--      unsigned long flags; /* can be called from interrupt context */
--
--      if (!SHARED_KERNEL_PMD) {
--              spin_lock_irqsave(&pgd_lock, flags);
--              pgd_list_del(pgd);
--              spin_unlock_irqrestore(&pgd_lock, flags);
--      }
--
--      pgd_test_and_unpin(pgd);
--}
--
--#ifdef CONFIG_X86_PAE
--/*
-- * Mop up any pmd pages which may still be attached to the pgd.
-- * Normally they will be freed by munmap/exit_mmap, but any pmd we
-- * preallocate which never got a corresponding vma will need to be
-- * freed manually.
-- */
--static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
--{
--      int i;
--
--      for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
--              pgd_t pgd = pgdp[i];
--
--              if (__pgd_val(pgd) != 0) {
--                      pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
--
--                      pgdp[i] = xen_make_pgd(0);
--
--                      paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
--                      pmd_free(mm, pmd);
--              }
--      }
--}
--
--/*
-- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
-- * updating the top-level pagetable entries to guarantee the
-- * processor notices the update.  Since this is expensive, and
-- * all 4 top-level entries are used almost immediately in a
-- * new process's life, we just pre-populate them here.
-- *
-- * Also, if we're in a paravirt environment where the kernel pmd is
-- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
-- * and initialize the kernel pmds here.
-- */
--static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
--{
--      pud_t *pud;
--      pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
--      unsigned long addr, flags;
--      int i;
--
--      /*
--       * We can race save/restore (if we sleep during a GFP_KERNEL memory
--       * allocation). We therefore store virtual addresses of pmds as they
--       * do not change across save/restore, and poke the machine addresses
--       * into the pgdir under the pgd_lock.
--       */
--      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
--              pmds[i] = pmd_alloc_one(mm, addr);
--              if (!pmds[i])
--                      goto out_oom;
--      }
--
--      spin_lock_irqsave(&pgd_lock, flags);
--
--      /* Protect against save/restore: move below 4GB under pgd_lock. */
--      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
--          && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
--              spin_unlock_irqrestore(&pgd_lock, flags);
--out_oom:
--              while (i--)
--                      pmd_free(mm, pmds[i]);
--              return 0;
+-static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
++static inline void
++die_if_kernel(const char *str, struct pt_regs *regs, long err)
+ {
+       if (!user_mode_vm(regs))
+               die(str, regs, err);
+ }
+ 
+-static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
+-                            struct pt_regs * regs, long error_code,
+-                            siginfo_t *info)
++static void __kprobes
++do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
++      long error_code, siginfo_t *info)
+ {
+       struct task_struct *tsk = current;
+ 
+-      if (regs->flags & VM_MASK) {
++      if (regs->flags & X86_VM_MASK) {
+               if (vm86)
+                       goto vm86_trap;
+               goto trap_signal;
+@@ -503,109 +515,112 @@ static void __kprobes do_trap(int trapnr
+       if (!user_mode(regs))
+               goto kernel_trap;
+ 
+-      trap_signal: {
+-              /*
+-               * We want error_code and trap_no set for userspace faults and
+-               * kernelspace faults which result in die(), but not
+-               * kernelspace faults which are fixed up.  die() gives the
+-               * process no chance to handle the signal and notice the
+-               * kernel fault information, so that won't result in polluting
+-               * the information about previously queued, but not yet
+-               * delivered, faults.  See also do_general_protection below.
+-               */
+-              tsk->thread.error_code = error_code;
+-              tsk->thread.trap_no = trapnr;
++trap_signal:
++      /*
++       * We want error_code and trap_no set for userspace faults and
++       * kernelspace faults which result in die(), but not
++       * kernelspace faults which are fixed up.  die() gives the
++       * process no chance to handle the signal and notice the
++       * kernel fault information, so that won't result in polluting
++       * the information about previously queued, but not yet
++       * delivered, faults.  See also do_general_protection below.
++       */
++      tsk->thread.error_code = error_code;
++      tsk->thread.trap_no = trapnr;
+ 
+-              if (info)
+-                      force_sig_info(signr, info, tsk);
+-              else
+-                      force_sig(signr, tsk);
+-              return;
  -      }
--
--      /* Copy kernel pmd contents and write-protect the new pmds. */
--      pud = pud_offset(pgd, 0);
--      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
--           i++, pud++, addr += PUD_SIZE) {
--              if (i >= USER_PTRS_PER_PGD) {
--                      memcpy(pmds[i],
--                             (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
--                             sizeof(pmd_t) * PTRS_PER_PMD);
--                      make_lowmem_page_readonly(
--                              pmds[i], XENFEAT_writable_page_tables);
++      if (info)
++              force_sig_info(signr, info, tsk);
++      else
++              force_sig(signr, tsk);
++      return;
+ 
+-      kernel_trap: {
+-              if (!fixup_exception(regs)) {
+-                      tsk->thread.error_code = error_code;
+-                      tsk->thread.trap_no = trapnr;
+-                      die(str, regs, error_code);
  -              }
--
--              /* It is safe to poke machine addresses of pmds under the pgd_lock. */
--              pud_populate(mm, pud, pmds[i]);
--      }
--
--      /* List required to sync kernel mapping updates and
--       * to pin/unpin on save/restore. */
--      pgd_list_add(pgd);
--
--      spin_unlock_irqrestore(&pgd_lock, flags);
--
--      return 1;
--}
--#else  /* !CONFIG_X86_PAE */
--/* No need to prepopulate any pagetable entries in non-PAE modes. */
--static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
--{
--      return 1;
--}
--
--static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
--{
--}
--#endif        /* CONFIG_X86_PAE */
--
--pgd_t *pgd_alloc(struct mm_struct *mm)
--{
--      pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
--
--      /* so that alloc_pd can use it */
--      mm->pgd = pgd;
--      if (pgd)
--              pgd_ctor(pgd);
--
--      if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
--              free_page((unsigned long)pgd);
--              pgd = NULL;
+-              return;
++kernel_trap:
++      if (!fixup_exception(regs)) {
++              tsk->thread.error_code = error_code;
++              tsk->thread.trap_no = trapnr;
++              die(str, regs, error_code);
+       }
++      return;
+ 
+-      vm86_trap: {
+-              int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
+-              if (ret) goto trap_signal;
+-              return;
  -      }
--
--      return pgd;
--}
--
--void pgd_free(struct mm_struct *mm, pgd_t *pgd)
--{
--      /*
--       * After this the pgd should not be pinned for the duration of this
--       * function's execution. We should never sleep and thus never race:
--       *  1. User pmds will not become write-protected under our feet due
--       *     to a concurrent mm_pin_all().
--       *  2. The machine addresses in PGD entries will not become invalid
--       *     due to a concurrent save/restore.
--       */
--      pgd_dtor(pgd);
--
--      if (PTRS_PER_PMD > 1 && !xen_feature(XENFEAT_pae_pgdir_above_4gb))
--              xen_destroy_contiguous_region((unsigned long)pgd, 0);
--
--      pgd_mop_up_pmds(mm, pgd);
--      free_page((unsigned long)pgd);
++vm86_trap:
++      if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
++                                              error_code, trapnr))
++              goto trap_signal;
++      return;
+ }
+ 
+-#define DO_ERROR(trapnr, signr, str, name) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
  -}
  -
--void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
--{
--      pgtable_page_dtor(pte);
--      paravirt_release_pt(page_to_pfn(pte));
--      tlb_remove_page(tlb, pte);
+-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      siginfo_t info; \
+-      if (irq) \
+-              local_irq_enable(); \
+-      info.si_signo = signr; \
+-      info.si_errno = 0; \
+-      info.si_code = sicode; \
+-      info.si_addr = (void __user *)siaddr; \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
  -}
  -
--#ifdef CONFIG_X86_PAE
--
--void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
--{
--      paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
--      tlb_remove_page(tlb, virt_to_page(pmd));
+-#define DO_VM86_ERROR(trapnr, signr, str, name) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
  -}
  -
--#endif
--
- void make_lowmem_page_readonly(void *va, unsigned int feature)
- {
-       pte_t *pte;
---- /dev/null
-+++ b/arch/x86/mm/pgtable-xen.c
-@@ -0,0 +1,709 @@
-+#include <linux/mm.h>
-+#include <linux/module.h>
-+#include <xen/features.h>
-+#include <asm/pgalloc.h>
-+#include <asm/pgtable.h>
-+#include <asm/tlb.h>
-+#include <asm/hypervisor.h>
-+#include <asm/mmu_context.h>
-+
-+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-+{
-+      pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-+      if (pte)
-+              make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
-+      return pte;
-+}
-+
-+static void _pte_free(struct page *page, unsigned int order)
-+{
-+      BUG_ON(order);
-+      __pte_free(page);
-+}
-+
-+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
-+{
-+      struct page *pte;
-+
-+#ifdef CONFIG_HIGHPTE
-+      pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-+#else
-+      pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-+#endif
-+      if (pte) {
-+              pgtable_page_ctor(pte);
-+              SetPageForeign(pte, _pte_free);
-+              init_page_count(pte);
-+      }
-+      return pte;
-+}
-+
-+void __pte_free(pgtable_t pte)
-+{
-+      if (!PageHighMem(pte)) {
-+              unsigned long va = (unsigned long)page_address(pte);
-+              unsigned int level;
-+              pte_t *ptep = lookup_address(va, &level);
-+
-+              BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
-+              if (!pte_write(*ptep)
-+                  && HYPERVISOR_update_va_mapping(va,
-+                                                  mk_pte(pte, PAGE_KERNEL),
-+                                                  0))
-+                      BUG();
-+      } else
-+#ifdef CONFIG_HIGHPTE
-+              ClearPagePinned(pte);
-+#else
-+              BUG();
-+#endif
-+
-+      ClearPageForeign(pte);
-+      init_page_count(pte);
-+      pgtable_page_dtor(pte);
-+      __free_page(pte);
-+}
-+
-+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
-+{
-+      pgtable_page_dtor(pte);
-+      paravirt_release_pte(page_to_pfn(pte));
-+      tlb_remove_page(tlb, pte);
+-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      siginfo_t info; \
+-      info.si_signo = signr; \
+-      info.si_errno = 0; \
+-      info.si_code = sicode; \
+-      info.si_addr = (void __user *)siaddr; \
+-      trace_hardirqs_fixup(); \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
++#define DO_ERROR(trapnr, signr, str, name)                            \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      trace_hardirqs_fixup();                                         \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 0, regs, error_code, NULL);         \
  +}
  +
-+#if PAGETABLE_LEVELS > 2
-+static void _pmd_free(struct page *page, unsigned int order)
-+{
-+      BUG_ON(order);
-+      __pmd_free(page);
++#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq)  \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      siginfo_t info;                                                 \
++      if (irq)                                                        \
++              local_irq_enable();                                     \
++      info.si_signo = signr;                                          \
++      info.si_errno = 0;                                              \
++      info.si_code = sicode;                                          \
++      info.si_addr = (void __user *)siaddr;                           \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 0, regs, error_code, &info);        \
  +}
  +
-+pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
-+{
-+      struct page *pmd;
-+
-+      pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-+      if (!pmd)
-+              return NULL;
-+      SetPageForeign(pmd, _pmd_free);
-+      init_page_count(pmd);
-+      return page_address(pmd);
++#define DO_VM86_ERROR(trapnr, signr, str, name)                               \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 1, regs, error_code, NULL);         \
  +}
  +
-+void __pmd_free(pgtable_t pmd)
-+{
-+      unsigned long va = (unsigned long)page_address(pmd);
-+      unsigned int level;
-+      pte_t *ptep = lookup_address(va, &level);
-+
-+      BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
-+      if (!pte_write(*ptep)
-+          && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
-+              BUG();
-+
-+      ClearPageForeign(pmd);
-+      init_page_count(pmd);
-+      __free_page(pmd);
-+}
++#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)  \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      siginfo_t info;                                                 \
++      info.si_signo = signr;                                          \
++      info.si_errno = 0;                                              \
++      info.si_code = sicode;                                          \
++      info.si_addr = (void __user *)siaddr;                           \
++      trace_hardirqs_fixup();                                         \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 1, regs, error_code, &info);        \
+ }
+ 
+-DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
++DO_VM86_ERROR_INFO(0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
+ #ifndef CONFIG_KPROBES
+-DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
++DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
+ #endif
+-DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
+-DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
+-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
+-DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
++DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
++DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
++DO_ERROR_INFO(6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
++DO_ERROR(9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+ DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+ DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+ DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
+-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
++DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+ 
+ void __kprobes do_general_protection(struct pt_regs * regs,
+                                             long error_code)
+ {
+-      if (regs->flags & VM_MASK)
++      struct thread_struct *thread;
  +
-+void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
-+{
-+      paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
-+      tlb_remove_page(tlb, virt_to_page(pmd));
-+}
++      thread = &current->thread;
  +
-+#if PAGETABLE_LEVELS > 3
-+void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
-+{
-+      paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
-+      tlb_remove_page(tlb, virt_to_page(pud));
-+}
-+#endif        /* PAGETABLE_LEVELS > 3 */
-+#endif        /* PAGETABLE_LEVELS > 2 */
++      if (regs->flags & X86_VM_MASK)
+               goto gp_in_vm86;
+ 
+       if (!user_mode(regs))
+@@ -613,6 +628,7 @@ void __kprobes do_general_protection(str
+ 
+       current->thread.error_code = error_code;
+       current->thread.trap_no = 13;
  +
-+#ifndef CONFIG_X86_64
-+#define TASK_SIZE64 TASK_SIZE
-+#endif
+       if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
+           printk_ratelimit()) {
+               printk(KERN_INFO
+@@ -642,22 +658,25 @@ gp_in_kernel:
+       }
+ }
+ 
+-static __kprobes void
+-mem_parity_error(unsigned char reason, struct pt_regs * regs)
++static notrace __kprobes void
++mem_parity_error(unsigned char reason, struct pt_regs *regs)
+ {
+-      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+-              "CPU %d.\n", reason, smp_processor_id());
+-      printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
++      printk(KERN_EMERG
++              "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
++                      reason, smp_processor_id());
  +
-+static void _pin_lock(struct mm_struct *mm, int lock) {
-+      if (lock)
-+              spin_lock(&mm->page_table_lock);
-+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-+      /* While mm->page_table_lock protects us against insertions and
-+       * removals of higher level page table pages, it doesn't protect
-+       * against updates of pte-s. Such updates, however, require the
-+       * pte pages to be in consistent state (unpinned+writable or
-+       * pinned+readonly). The pinning and attribute changes, however
-+       * cannot be done atomically, which is why such updates must be
-+       * prevented from happening concurrently.
-+       * Note that no pte lock can ever elsewhere be acquired nesting
-+       * with an already acquired one in the same mm, or with the mm's
-+       * page_table_lock already acquired, as that would break in the
-+       * non-split case (where all these are actually resolving to the
-+       * one page_table_lock). Thus acquiring all of them here is not
-+       * going to result in dead locks, and the order of acquires
-+       * doesn't matter.
++      printk(KERN_EMERG
++              "You have some hardware problem, likely on the PCI bus.\n");
+ 
+ #if defined(CONFIG_EDAC)
+-      if(edac_handler_set()) {
++      if (edac_handler_set()) {
+               edac_atomic_assert_error();
+               return;
+       }
+ #endif
+ 
+       if (panic_on_unrecovered_nmi)
+-                panic("NMI: Not continuing");
++              panic("NMI: Not continuing");
+ 
+       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ 
+@@ -665,8 +684,8 @@ mem_parity_error(unsigned char reason, s
+       clear_mem_error(reason);
+ }
+ 
+-static __kprobes void
+-io_check_error(unsigned char reason, struct pt_regs * regs)
++static notrace __kprobes void
++io_check_error(unsigned char reason, struct pt_regs *regs)
+ {
+       printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+       show_registers(regs);
+@@ -675,38 +694,43 @@ io_check_error(unsigned char reason, str
+       clear_io_check_error(reason);
+ }
+ 
+-static __kprobes void
+-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
++static notrace __kprobes void
++unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
+ {
++      if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
++              return;
+ #ifdef CONFIG_MCA
+-      /* Might actually be able to figure out what the guilty party
+-      * is. */
+-      if( MCA_bus ) {
++      /*
++       * Might actually be able to figure out what the guilty party
++       * is:
  +       */
-+      {
-+              pgd_t *pgd = mm->pgd;
-+              unsigned g;
-+
-+              for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
-+                      pud_t *pud;
-+                      unsigned u;
-+
-+                      if (pgd_none(*pgd))
-+                              continue;
-+                      pud = pud_offset(pgd, 0);
-+                      for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
-+                              pmd_t *pmd;
-+                              unsigned m;
++      if (MCA_bus) {
+               mca_handle_nmi();
+               return;
+       }
+ #endif
+-      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+-              "CPU %d.\n", reason, smp_processor_id());
++      printk(KERN_EMERG
++              "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
++                      reason, smp_processor_id());
  +
-+                              if (pud_none(*pud))
-+                                      continue;
-+                              pmd = pmd_offset(pud, 0);
-+                              for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
-+                                      spinlock_t *ptl;
+       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+       if (panic_on_unrecovered_nmi)
+-                panic("NMI: Not continuing");
++              panic("NMI: Not continuing");
+ 
+       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+ 
+ static DEFINE_SPINLOCK(nmi_print_lock);
+ 
+-void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
++void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
+ {
+-      if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
+-          NOTIFY_STOP)
++      if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+               return;
+ 
+       spin_lock(&nmi_print_lock);
+       /*
+       * We are in trouble anyway, lets at least try
+-      * to get a message out.
++      * to get a message out:
+       */
+       bust_spinlocks(1);
+       printk(KERN_EMERG "%s", msg);
+@@ -717,9 +741,10 @@ void __kprobes die_nmi(struct pt_regs *r
+       spin_unlock(&nmi_print_lock);
+       bust_spinlocks(0);
+ 
+-      /* If we are in kernel we are probably nested up pretty bad
+-       * and might aswell get out now while we still can.
+-      */
++      /*
++       * If we are in kernel we are probably nested up pretty bad
++       * and might aswell get out now while we still can:
++       */
+       if (!user_mode_vm(regs)) {
+               current->thread.trap_no = 2;
+               crash_kexec(regs);
+@@ -728,14 +753,14 @@ void __kprobes die_nmi(struct pt_regs *r
+       do_exit(SIGSEGV);
+ }
+ 
+-static __kprobes void default_do_nmi(struct pt_regs * regs)
++static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+ {
+       unsigned char reason = 0;
+ 
+-      /* Only the BSP gets external NMIs from the system.  */
++      /* Only the BSP gets external NMIs from the system: */
+       if (!smp_processor_id())
+               reason = get_nmi_reason();
+- 
  +
-+                                      if (pmd_none(*pmd))
-+                                              continue;
-+                                      ptl = pte_lockptr(0, pmd);
-+                                      if (lock)
-+                                              spin_lock(ptl);
-+                                      else
-+                                              spin_unlock(ptl);
-+                              }
-+                      }
-+              }
-+      }
+       if (!(reason & 0xc0)) {
+               if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
+                                                       == NOTIFY_STOP)
+@@ -748,8 +773,10 @@ static __kprobes void default_do_nmi(str
+               if (nmi_watchdog_tick(regs, reason))
+                       return;
+               if (!do_nmi_callback(regs, smp_processor_id()))
+-#endif
+                       unknown_nmi_error(reason, regs);
++#else
++              unknown_nmi_error(reason, regs);
  +#endif
-+      if (!lock)
-+              spin_unlock(&mm->page_table_lock);
-+}
-+#define pin_lock(mm) _pin_lock(mm, 1)
-+#define pin_unlock(mm) _pin_lock(mm, 0)
-+
-+#define PIN_BATCH sizeof(void *)
-+static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
-+
-+static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
-+                                           unsigned int cpu, unsigned int seq)
-+{
-+      unsigned long pfn = page_to_pfn(page);
+ 
+               return;
+       }
+@@ -761,14 +788,14 @@ static __kprobes void default_do_nmi(str
+               io_check_error(reason, regs);
+       /*
+        * Reassert NMI in case it became active meanwhile
+-       * as it's edge-triggered.
++       * as it's edge-triggered:
+        */
+       reassert_nmi();
+ }
+ 
+ static int ignore_nmis;
+ 
+-__kprobes void do_nmi(struct pt_regs * regs, long error_code)
++notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
+ {
+       int cpu;
+ 
+@@ -804,9 +831,12 @@ void __kprobes do_int3(struct pt_regs *r
+       if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+                       == NOTIFY_STOP)
+               return;
+-      /* This is an interrupt gate, because kprobes wants interrupts
+-      disabled.  Normal trap handlers don't. */
++      /*
++       * This is an interrupt gate, because kprobes wants interrupts
++       * disabled. Normal trap handlers don't.
++       */
+       restore_interrupts(regs);
  +
-+      if (PageHighMem(page)) {
-+              if (pgprot_val(flags) & _PAGE_RW)
-+                      ClearPagePinned(page);
-+              else
-+                      SetPagePinned(page);
-+      } else {
-+              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
-+                                      (unsigned long)__va(pfn << PAGE_SHIFT),
-+                                      pfn_pte(pfn, flags), 0);
-+              if (unlikely(++seq == PIN_BATCH)) {
-+                      if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
-+                                                              PIN_BATCH, NULL)))
-+                              BUG();
-+                      seq = 0;
-+              }
+       do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+ }
+ #endif
+@@ -821,7 +851,7 @@ void __kprobes do_int3(struct pt_regs *r
+  * from user space. Such code must not hold kernel locks (since it
+  * can equally take a page fault), therefore it is safe to call
+  * force_sig_info even though that claims and releases locks.
+- * 
++ *
+  * Code in ./signal.c ensures that the debug control register
+  * is restored before we deliver any signal, and therefore that
+  * user code runs with the correct debug control register even though
+@@ -833,10 +863,10 @@ void __kprobes do_int3(struct pt_regs *r
+  * find every occurrence of the TF bit that could be saved away even
+  * by user code)
+  */
+-void __kprobes do_debug(struct pt_regs * regs, long error_code)
++void __kprobes do_debug(struct pt_regs *regs, long error_code)
+ {
+-      unsigned int condition;
+       struct task_struct *tsk = current;
++      unsigned int condition;
+ 
+       trace_hardirqs_fixup();
+ 
+@@ -861,7 +891,7 @@ void __kprobes do_debug(struct pt_regs *
+                       goto clear_dr7;
+       }
+ 
+-      if (regs->flags & VM_MASK)
++      if (regs->flags & X86_VM_MASK)
+               goto debug_vm86;
+ 
+       /* Save debug status register where ptrace can see it */
+@@ -884,7 +914,8 @@ void __kprobes do_debug(struct pt_regs *
+       /* Ok, finally something we can handle */
+       send_sigtrap(tsk, regs, error_code);
+ 
+-      /* Disable additional traps. They'll be re-enabled when
++      /*
++       * Disable additional traps. They'll be re-enabled when
+        * the signal is delivered.
+        */
+ clear_dr7:
+@@ -897,7 +928,7 @@ debug_vm86:
+ 
+ clear_TF_reenable:
+       set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+-      regs->flags &= ~TF_MASK;
++      regs->flags &= ~X86_EFLAGS_TF;
+       return;
+ }
+ 
+@@ -908,9 +939,10 @@ clear_TF_reenable:
+  */
+ void math_error(void __user *ip)
+ {
+-      struct task_struct * task;
++      struct task_struct *task;
++      unsigned short cwd;
++      unsigned short swd;
+       siginfo_t info;
+-      unsigned short cwd, swd;
+ 
+       /*
+        * Save the info for the exception handler and clear the error.
+@@ -936,36 +968,36 @@ void math_error(void __user *ip)
+       cwd = get_fpu_cwd(task);
+       swd = get_fpu_swd(task);
+       switch (swd & ~cwd & 0x3f) {
+-              case 0x000: /* No unmasked exception */
+-                      return;
+-              default:    /* Multiple exceptions */
+-                      break;
+-              case 0x001: /* Invalid Op */
+-                      /*
+-                       * swd & 0x240 == 0x040: Stack Underflow
+-                       * swd & 0x240 == 0x240: Stack Overflow
+-                       * User must clear the SF bit (0x40) if set
+-                       */
+-                      info.si_code = FPE_FLTINV;
+-                      break;
+-              case 0x002: /* Denormalize */
+-              case 0x010: /* Underflow */
+-                      info.si_code = FPE_FLTUND;
+-                      break;
+-              case 0x004: /* Zero Divide */
+-                      info.si_code = FPE_FLTDIV;
+-                      break;
+-              case 0x008: /* Overflow */
+-                      info.si_code = FPE_FLTOVF;
+-                      break;
+-              case 0x020: /* Precision */
+-                      info.si_code = FPE_FLTRES;
+-                      break;
++      case 0x000: /* No unmasked exception */
++              return;
++      default:    /* Multiple exceptions */
++              break;
++      case 0x001: /* Invalid Op */
++              /*
++               * swd & 0x240 == 0x040: Stack Underflow
++               * swd & 0x240 == 0x240: Stack Overflow
++               * User must clear the SF bit (0x40) if set
++               */
++              info.si_code = FPE_FLTINV;
++              break;
++      case 0x002: /* Denormalize */
++      case 0x010: /* Underflow */
++              info.si_code = FPE_FLTUND;
++              break;
++      case 0x004: /* Zero Divide */
++              info.si_code = FPE_FLTDIV;
++              break;
++      case 0x008: /* Overflow */
++              info.si_code = FPE_FLTOVF;
++              break;
++      case 0x020: /* Precision */
++              info.si_code = FPE_FLTRES;
++              break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+ }
+ 
+-void do_coprocessor_error(struct pt_regs * regs, long error_code)
++void do_coprocessor_error(struct pt_regs *regs, long error_code)
+ {
+       ignore_fpu_irq = 1;
+       math_error((void __user *)regs->ip);
+@@ -973,9 +1005,9 @@ void do_coprocessor_error(struct pt_regs
+ 
+ static void simd_math_error(void __user *ip)
+ {
+-      struct task_struct * task;
+-      siginfo_t info;
++      struct task_struct *task;
+       unsigned short mxcsr;
++      siginfo_t info;
+ 
+       /*
+        * Save the info for the exception handler and clear the error.
+@@ -996,84 +1028,82 @@ static void simd_math_error(void __user 
+        */
+       mxcsr = get_fpu_mxcsr(task);
+       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+-              case 0x000:
+-              default:
+-                      break;
+-              case 0x001: /* Invalid Op */
+-                      info.si_code = FPE_FLTINV;
+-                      break;
+-              case 0x002: /* Denormalize */
+-              case 0x010: /* Underflow */
+-                      info.si_code = FPE_FLTUND;
+-                      break;
+-              case 0x004: /* Zero Divide */
+-                      info.si_code = FPE_FLTDIV;
+-                      break;
+-              case 0x008: /* Overflow */
+-                      info.si_code = FPE_FLTOVF;
+-                      break;
+-              case 0x020: /* Precision */
+-                      info.si_code = FPE_FLTRES;
+-                      break;
++      case 0x000:
++      default:
++              break;
++      case 0x001: /* Invalid Op */
++              info.si_code = FPE_FLTINV;
++              break;
++      case 0x002: /* Denormalize */
++      case 0x010: /* Underflow */
++              info.si_code = FPE_FLTUND;
++              break;
++      case 0x004: /* Zero Divide */
++              info.si_code = FPE_FLTDIV;
++              break;
++      case 0x008: /* Overflow */
++              info.si_code = FPE_FLTOVF;
++              break;
++      case 0x020: /* Precision */
++              info.si_code = FPE_FLTRES;
++              break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+ }
+ 
+-void do_simd_coprocessor_error(struct pt_regs * regs,
+-                                        long error_code)
++void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
+ {
+       if (cpu_has_xmm) {
+               /* Handle SIMD FPU exceptions on PIII+ processors. */
+               ignore_fpu_irq = 1;
+               simd_math_error((void __user *)regs->ip);
+-      } else {
+-              /*
+-               * Handle strange cache flush from user space exception
+-               * in all other cases.  This is undocumented behaviour.
+-               */
+-              if (regs->flags & VM_MASK) {
+-                      handle_vm86_fault((struct kernel_vm86_regs *)regs,
+-                                        error_code);
+-                      return;
+-              }
+-              current->thread.trap_no = 19;
+-              current->thread.error_code = error_code;
+-              die_if_kernel("cache flush denied", regs, error_code);
+-              force_sig(SIGSEGV, current);
++              return;
  +      }
-+
-+      return seq;
-+}
-+
-+static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
-+{
-+      pgd_t       *pgd = pgd_base;
-+      pud_t       *pud;
-+      pmd_t       *pmd;
-+      int          g,u,m;
-+      unsigned int cpu, seq;
-+      multicall_entry_t *mcl;
-+
-+      if (xen_feature(XENFEAT_auto_translated_physmap))
++      /*
++       * Handle strange cache flush from user space exception
++       * in all other cases.  This is undocumented behaviour.
++       */
++      if (regs->flags & X86_VM_MASK) {
++              handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
  +              return;
+       }
++      current->thread.trap_no = 19;
++      current->thread.error_code = error_code;
++      die_if_kernel("cache flush denied", regs, error_code);
++      force_sig(SIGSEGV, current);
+ }
+ 
+ #ifndef CONFIG_XEN
+-void do_spurious_interrupt_bug(struct pt_regs * regs,
+-                                        long error_code)
++void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
+ {
+ #if 0
+       /* No need to warn about this any longer. */
+-      printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
++      printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+ #endif
+ }
+ 
+-unsigned long patch_espfix_desc(unsigned long uesp,
+-                                        unsigned long kesp)
++unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
+ {
+       struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
+       unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+       unsigned long new_kesp = kesp - base;
+       unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
+       __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
  +
-+      cpu = get_cpu();
+       /* Set up base for espfix segment */
+-      desc &= 0x00f0ff0000000000ULL;
+-      desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
++      desc &= 0x00f0ff0000000000ULL;
++      desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+               ((((__u64)base) << 32) & 0xff00000000000000ULL) |
+               ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
+               (lim_pages & 0xffff);
+       *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
  +
-+      /*
-+       * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
-+       * may not be the 'current' task's pagetables (e.g., current may be
-+       * 32-bit, but the pagetables may be for a 64-bit task).
-+       * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
-+       * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
-+       */
-+      for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
-+              if (pgd_none(*pgd))
-+                      continue;
-+              pud = pud_offset(pgd, 0);
-+              if (PTRS_PER_PUD > 1) /* not folded */
-+                      seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
-+              for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
-+                      if (pud_none(*pud))
-+                              continue;
-+                      pmd = pmd_offset(pud, 0);
-+                      if (PTRS_PER_PMD > 1) /* not folded */
-+                              seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
-+                      for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
-+                              if (pmd_none(*pmd))
-+                                      continue;
-+                              seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
-+                      }
+       return new_kesp;
+ }
+ #endif
+ 
+ /*
+- *  'math_state_restore()' saves the current math information in the
++ * 'math_state_restore()' saves the current math information in the
+  * old math state array, and gets the new ones from the current task
+  *
+  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+@@ -1087,9 +1117,22 @@ asmlinkage void math_state_restore(void)
+       struct thread_info *thread = current_thread_info();
+       struct task_struct *tsk = thread->task;
+ 
++      if (!tsk_used_math(tsk)) {
++              local_irq_enable();
++              /*
++               * does a slab alloc which can sleep
++               */
++              if (init_fpu(tsk)) {
++                      /*
++                       * ran out of memory!
++                       */
++                      do_group_exit(SIGKILL);
++                      return;
  +              }
++              local_irq_disable();
  +      }
  +
-+      mcl = per_cpu(pb_mcl, cpu);
-+#ifdef CONFIG_X86_64
-+      if (unlikely(seq > PIN_BATCH - 2)) {
-+              if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
-+                      BUG();
-+              seq = 0;
-+      }
-+      MULTI_update_va_mapping(mcl + seq,
-+             (unsigned long)__user_pgd(pgd_base),
-+             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
-+             0);
-+      MULTI_update_va_mapping(mcl + seq + 1,
-+             (unsigned long)pgd_base,
-+             pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
-+             UVMF_TLB_FLUSH);
-+      if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
-+              BUG();
-+#else
-+      if (likely(seq != 0)) {
-+              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
-+                      (unsigned long)pgd_base,
-+                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
-+                      UVMF_TLB_FLUSH);
-+              if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
-+                                                      seq + 1, NULL)))
-+                      BUG();
-+      } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
-+                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
-+                      UVMF_TLB_FLUSH))
-+              BUG();
-+#endif
-+
-+      put_cpu();
-+}
-+
-+static void __pgd_pin(pgd_t *pgd)
-+{
-+      pgd_walk(pgd, PAGE_KERNEL_RO);
-+      kmap_flush_unused();
-+      xen_pgd_pin(__pa(pgd)); /* kernel */
-+#ifdef CONFIG_X86_64
-+      xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
-+#endif
-+      SetPagePinned(virt_to_page(pgd));
-+}
-+
-+static void __pgd_unpin(pgd_t *pgd)
-+{
-+      xen_pgd_unpin(__pa(pgd));
-+#ifdef CONFIG_X86_64
-+      xen_pgd_unpin(__pa(__user_pgd(pgd)));
-+#endif
-+      pgd_walk(pgd, PAGE_KERNEL);
-+      ClearPagePinned(virt_to_page(pgd));
-+}
-+
-+static void pgd_test_and_unpin(pgd_t *pgd)
-+{
-+      if (PagePinned(virt_to_page(pgd)))
-+              __pgd_unpin(pgd);
-+}
-+
-+void mm_pin(struct mm_struct *mm)
-+{
-+      if (xen_feature(XENFEAT_writable_page_tables))
-+              return;
-+
-+      pin_lock(mm);
-+      __pgd_pin(mm->pgd);
-+      pin_unlock(mm);
-+}
-+
-+void mm_unpin(struct mm_struct *mm)
-+{
-+      if (xen_feature(XENFEAT_writable_page_tables))
-+              return;
+       /* NB. 'clts' is done for us by Xen during virtual trap. */
+-      if (!tsk_used_math(tsk))
+-              init_fpu(tsk);
+       restore_fpu(tsk);
+       thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
+       tsk->fpu_counter++;
+@@ -1100,15 +1143,15 @@ EXPORT_SYMBOL_GPL(math_state_restore);
+ 
+ asmlinkage void math_emulate(long arg)
+ {
+-      printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
+-      printk(KERN_EMERG "killing %s.\n",current->comm);
+-      force_sig(SIGFPE,current);
++      printk(KERN_EMERG
++              "math-emulation not enabled and no coprocessor found.\n");
++      printk(KERN_EMERG "killing %s.\n", current->comm);
++      force_sig(SIGFPE, current);
+       schedule();
+ }
+ 
+ #endif /* CONFIG_MATH_EMULATION */
+ 
+-
+ /*
+  * NB. All these are "trap gates" (i.e. events_mask isn't set) except
+  * for those that specify <dpl>|4 in the second field.
+@@ -1146,25 +1189,21 @@ void __init trap_init(void)
+       if (ret)
+               printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
+ 
+-      /*
+-       * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+-       * Generate a build-time error if the alignment is wrong.
+-       */
+-      BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
+       if (cpu_has_fxsr) {
+               printk(KERN_INFO "Enabling fast FPU save and restore... ");
+               set_in_cr4(X86_CR4_OSFXSR);
+               printk("done.\n");
+       }
+       if (cpu_has_xmm) {
+-              printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
+-                              "support... ");
++              printk(KERN_INFO
++                      "Enabling unmasked SIMD FPU exception support... ");
+               set_in_cr4(X86_CR4_OSXMMEXCPT);
+               printk("done.\n");
+       }
+ 
++      init_thread_xstate();
+       /*
+-       * Should be a barrier for any external CPU state.
++       * Should be a barrier for any external CPU state:
+        */
+       cpu_init();
+ }
+@@ -1183,6 +1222,7 @@ void __cpuinit smp_trap_init(trap_info_t
+ static int __init kstack_setup(char *s)
+ {
+       kstack_depth_to_print = simple_strtoul(s, NULL, 0);
  +
-+      pin_lock(mm);
-+      __pgd_unpin(mm->pgd);
-+      pin_unlock(mm);
-+}
+       return 1;
+ }
+ __setup("kstack=", kstack_setup);
+--- sle11-2009-05-14.orig/arch/x86/kernel/traps_64-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/traps_64-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -33,6 +33,8 @@
+ #include <linux/kdebug.h>
+ #include <linux/utsname.h>
+ 
++#include <mach_traps.h>
  +
-+void mm_pin_all(void)
-+{
-+      struct page *page;
+ #if defined(CONFIG_EDAC)
+ #include <linux/edac.h>
+ #endif
+@@ -601,10 +603,16 @@ void die(const char * str, struct pt_reg
+ }
+ 
+ #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
+-void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
++notrace __kprobes void
++die_nmi(char *str, struct pt_regs *regs, int do_panic)
+ {
+-      unsigned long flags = oops_begin();
  +      unsigned long flags;
  +
-+      if (xen_feature(XENFEAT_writable_page_tables))
++      if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
++          NOTIFY_STOP)
++              return;
+ 
++      flags = oops_begin();
+       /*
+        * We are in trouble anyway, lets at least try
+        * to get a message out.
+@@ -769,7 +777,7 @@ asmlinkage void __kprobes do_general_pro
+       die("general protection fault", regs, error_code);
+ }
+ 
+-static __kprobes void
++static notrace __kprobes void
+ mem_parity_error(unsigned char reason, struct pt_regs * regs)
+ {
+       printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
+@@ -792,7 +800,7 @@ mem_parity_error(unsigned char reason, s
+       clear_mem_error(reason);
+ }
+ 
+-static __kprobes void
++static notrace __kprobes void
+ io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+       printk("NMI: IOCK error (debug interrupt?)\n");
+@@ -802,9 +810,11 @@ io_check_error(unsigned char reason, str
+       clear_io_check_error(reason);
+ }
+ 
+-static __kprobes void
++static notrace __kprobes void
+ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+ {
++      if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
  +              return;
+       printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
+               reason);
+       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+@@ -817,7 +827,7 @@ unknown_nmi_error(unsigned char reason, 
+ 
+ /* Runs on IST stack. This code must keep interrupts off all the time.
+    Nested NMIs are prevented by the CPU. */
+-asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
++asmlinkage notrace  __kprobes void default_do_nmi(struct pt_regs *regs)
+ {
+       unsigned char reason = 0;
+       int cpu;
+@@ -1117,11 +1127,25 @@ asmlinkage void __attribute__((weak)) mc
+ asmlinkage void math_state_restore(void)
+ {
+       struct task_struct *me = current;
  +
-+      /*
-+       * Allow uninterrupted access to the pgd_list. Also protects
-+       * __pgd_pin() by disabling preemption.
-+       * All other CPUs must be at a safe point (e.g., in stop_machine
-+       * or offlined entirely).
-+       */
-+      spin_lock_irqsave(&pgd_lock, flags);
-+      list_for_each_entry(page, &pgd_list, lru) {
-+              if (!PagePinned(page))
-+                      __pgd_pin((pgd_t *)page_address(page));
++      if (!used_math()) {
++              local_irq_enable();
++              /*
++               * does a slab alloc which can sleep
++               */
++              if (init_fpu(me)) {
++                      /*
++                       * ran out of memory!
++                       */
++                      do_group_exit(SIGKILL);
++                      return;
++              }
++              local_irq_disable();
  +      }
-+      spin_unlock_irqrestore(&pgd_lock, flags);
-+}
-+
-+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
-+{
-+      if (!PagePinned(virt_to_page(mm->pgd)))
-+              mm_pin(mm);
-+}
-+
-+void arch_exit_mmap(struct mm_struct *mm)
-+{
-+      struct task_struct *tsk = current;
-+
-+      task_lock(tsk);
  +
-+      /*
-+       * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
-+       * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+         /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
+ 
+-      if (!used_math())
+-              init_fpu(me);
+-      restore_fpu_checking(&me->thread.i387.fxsave);
++      restore_fpu_checking(&me->thread.xstate->fxsave);
+       task_thread_info(me)->status |= TS_USEDFPU;
+       me->fpu_counter++;
+ }
+@@ -1168,6 +1192,10 @@ void __init trap_init(void)
+               printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
+ 
+       /*
++       * initialize the per thread extended state:
  +       */
-+      if (tsk->active_mm == mm) {
-+              tsk->active_mm = &init_mm;
-+              atomic_inc(&init_mm.mm_count);
-+
-+              switch_mm(mm, &init_mm, tsk);
-+
-+              atomic_dec(&mm->mm_count);
-+              BUG_ON(atomic_read(&mm->mm_count) == 0);
-+      }
-+
-+      task_unlock(tsk);
-+
-+      if (PagePinned(virt_to_page(mm->pgd))
-+          && atomic_read(&mm->mm_count) == 1
-+          && !mm->context.has_foreign_mappings)
-+              mm_unpin(mm);
-+}
++        init_thread_xstate();
++      /*
+        * Should be a barrier for any external CPU state.
+        */
+       cpu_init();
+--- sle11-2009-05-14.orig/arch/x86/kernel/vsyscall_64-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
+       return 0;
+ }
+ 
+-long __vsyscall(3) venosys_1(void)
++static long __vsyscall(3) venosys_1(void)
+ {
+       return -ENOSYS;
+ }
+--- sle11-2009-05-14.orig/arch/x86/mm/fault-xen.c      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/fault-xen.c   2009-03-16 16:38:05.000000000 +0100
+@@ -510,6 +510,11 @@ static int vmalloc_fault(unsigned long a
+       unsigned long pgd_paddr;
+       pmd_t *pmd_k;
+       pte_t *pte_k;
  +
-+static inline void pgd_list_add(pgd_t *pgd)
-+{
-+      struct page *page = virt_to_page(pgd);
++      /* Make sure we are in vmalloc area */
++      if (!(address >= VMALLOC_START && address < VMALLOC_END))
++              return -1;
  +
-+      list_add(&page->lru, &pgd_list);
-+}
+       /*
+        * Synchronize this task's top level page-table
+        * with the 'reference' page table.
+@@ -670,7 +675,7 @@ void __kprobes do_page_fault(struct pt_r
+ #ifdef CONFIG_X86_32
+       /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+          fault has been handled. */
+-      if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
++      if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
+               local_irq_enable();
+ 
+       /*
+@@ -1017,9 +1022,5 @@ void vmalloc_sync_all(void)
+               if (address == start)
+                       start = address + PGDIR_SIZE;
+       }
+-      /* Check that there is no need to do the same for the modules area. */
+-      BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+-      BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+-                              (__START_KERNEL & PGDIR_MASK)));
+ #endif
+ }
+--- sle11-2009-05-14.orig/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/highmem_32-xen.c      2009-03-16 16:38:05.000000000 +0100
+@@ -200,6 +200,8 @@ EXPORT_SYMBOL(kmap);
+ EXPORT_SYMBOL(kunmap);
+ EXPORT_SYMBOL(kmap_atomic);
+ EXPORT_SYMBOL(kunmap_atomic);
++#ifdef CONFIG_HIGHPTE
+ EXPORT_SYMBOL(kmap_atomic_to_page);
++#endif
+ EXPORT_SYMBOL(clear_highpage);
+ EXPORT_SYMBOL(copy_highpage);
+--- sle11-2009-05-14.orig/arch/x86/mm/init_32-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/init_32-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,4 @@
+ /*
+- *  linux/arch/i386/mm/init.c
+  *
+  *  Copyright (C) 1995  Linus Torvalds
+  *
+@@ -22,6 +21,7 @@
+ #include <linux/init.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
++#include <linux/pci.h>
+ #include <linux/pfn.h>
+ #include <linux/poison.h>
+ #include <linux/bootmem.h>
+@@ -54,6 +54,8 @@
+ 
+ unsigned int __VMALLOC_RESERVE = 128 << 20;
+ 
++unsigned long max_pfn_mapped;
  +
-+static inline void pgd_list_del(pgd_t *pgd)
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ unsigned long highstart_pfn, highend_pfn;
+ 
+@@ -73,7 +75,7 @@ static pmd_t * __init one_md_table_init(
+       if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
+               pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ 
+-              paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
++              paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+               make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
+               set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+               pud = pud_offset(pgd, 0);
+@@ -107,7 +109,7 @@ static pte_t * __init one_page_table_ini
+                               (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+               }
+ 
+-              paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
++              paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+               make_lowmem_page_readonly(page_table,
+                                         XENFEAT_writable_page_tables);
+               set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+@@ -209,8 +211,13 @@ static void __init kernel_physical_mappi
+                       /*
+                        * Map with big pages if possible, otherwise
+                        * create normal page tables:
++                       *
++                       * Don't use a large page for the first 2/4MB of memory
++                       * because there are often fixed size MTRRs in there
++                       * and overlapping MTRRs into large pages can cause
++                       * slowdowns.
+                        */
+-                      if (cpu_has_pse) {
++                      if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
+                               unsigned int addr2;
+                               pgprot_t prot = PAGE_KERNEL_LARGE;
+ 
+@@ -224,6 +231,7 @@ static void __init kernel_physical_mappi
+                               set_pmd(pmd, pfn_pmd(pfn, prot));
+ 
+                               pfn += PTRS_PER_PTE;
++                              max_pfn_mapped = pfn;
+                               continue;
+                       }
+                       pte = one_page_table_init(pmd);
+@@ -241,6 +249,7 @@ static void __init kernel_physical_mappi
+ 
+                               set_pte(pte, pfn_pte(pfn, prot));
+                       }
++                      max_pfn_mapped = pfn;
+                       pte_ofs = 0;
+               }
+               pmd_idx = 0;
+@@ -262,6 +271,25 @@ static inline int page_kills_ppro(unsign
+ 
+ #endif
+ 
++/*
++ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
++ * is valid. The argument is a physical page number.
++ *
++ *
++ * On x86, access has to be given to the first megabyte of ram because that area
++ * contains bios code and data regions used by X and dosemu and similar apps.
++ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
++ * mmio resources as well as potential bios/acpi data regions.
++ */
++int devmem_is_allowed(unsigned long pagenr)
  +{
-+      struct page *page = virt_to_page(pgd);
-+
-+      list_del(&page->lru);
++      if (pagenr <= 256)
++              return 1;
++      if (mfn_to_local_pfn(pagenr) >= max_pfn)
++              return 1;
++      return 0;
  +}
  +
-+#define UNSHARED_PTRS_PER_PGD                         \
-+      (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-+
-+static void pgd_ctor(void *p)
-+{
-+      pgd_t *pgd = p;
-+      unsigned long flags;
-+
-+      pgd_test_and_unpin(pgd);
-+
-+      /* Clear usermode parts of PGD */
-+      memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
-+
-+      spin_lock_irqsave(&pgd_lock, flags);
-+
-+      /* If the pgd points to a shared pagetable level (either the
-+         ptes in non-PAE, or shared PMD in PAE), then just copy the
-+         references from swapper_pg_dir. */
-+      if (PAGETABLE_LEVELS == 2 ||
-+          (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
-+          PAGETABLE_LEVELS == 4) {
-+              clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
-+                              swapper_pg_dir + KERNEL_PGD_BOUNDARY,
-+                              KERNEL_PGD_PTRS);
-+              paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
-+                                       __pa(swapper_pg_dir) >> PAGE_SHIFT,
-+                                       KERNEL_PGD_BOUNDARY,
-+                                       KERNEL_PGD_PTRS);
-+      }
-+
-+#ifdef CONFIG_X86_64
-+      /* set level3_user_pgt for vsyscall area */
-+      __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
-+              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
-+#endif
-+
-+#ifndef CONFIG_X86_PAE
-+      /* list required to sync kernel mapping updates */
-+      if (!SHARED_KERNEL_PMD)
-+              pgd_list_add(pgd);
+ #ifdef CONFIG_HIGHMEM
+ pte_t *kmap_pte;
+ pgprot_t kmap_prot;
+@@ -303,48 +331,18 @@ static void __init permanent_kmaps_init(
+       pkmap_page_table = pte;
+ }
+ 
+-static void __meminit free_new_highpage(struct page *page, int pfn)
+-{
+-      init_page_count(page);
+-      if (pfn < xen_start_info->nr_pages)
+-              __free_page(page);
+-      totalhigh_pages++;
+-}
+-
+ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
+ {
+       if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
+               ClearPageReserved(page);
+-              free_new_highpage(page, pfn);
++              init_page_count(page);
++              if (pfn < xen_start_info->nr_pages)
++                      __free_page(page);
++              totalhigh_pages++;
+       } else
+               SetPageReserved(page);
+ }
+ 
+-static int __meminit
+-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+-{
+-      free_new_highpage(page, pfn);
+-      totalram_pages++;
+-#ifdef CONFIG_FLATMEM
+-      max_mapnr = max(pfn, max_mapnr);
+-#endif
+-      num_physpages++;
+-
+-      return 0;
+-}
+-
+-/*
+- * Not currently handling the NUMA case.
+- * Assuming single node and all memory that
+- * has been added dynamically that would be
+- * onlined here is in HIGHMEM.
+- */
+-void __meminit online_page(struct page *page)
+-{
+-      ClearPageReserved(page);
+-      add_one_highpage_hotplug(page, page_to_pfn(page));
+-}
+-
+ #ifndef CONFIG_NUMA
+ static void __init set_highmem_pages_init(int bad_ppro)
+ {
+@@ -459,15 +457,13 @@ void zap_low_mappings(void)
+ {
+       int i;
+ 
+-      save_pg_dir();
+-
+       /*
+        * Zap initial low-memory mappings.
+        *
+        * Note that "pgd_clear()" doesn't do it for
+        * us, because pgd_clear() is a no-op on i386.
+        */
+-      for (i = 0; i < USER_PTRS_PER_PGD; i++) {
++      for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
+ #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
+               set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
+ #else
+@@ -572,9 +568,9 @@ void __init paging_init(void)
+ 
+ /*
+  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
+- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
+- * used to involve black magic jumps to work around some nasty CPU bugs,
+- * but fortunately the switch to using exceptions got rid of all that.
++ * and also on some strange 486's. All 586+'s are OK. This used to involve
++ * black magic jumps to work around some nasty CPU bugs, but fortunately the
++ * switch to using exceptions got rid of all that.
+  */
+ static void __init test_wp_bit(void)
+ {
+@@ -605,9 +601,7 @@ void __init mem_init(void)
+       int tmp, bad_ppro;
+       unsigned long pfn;
+ 
+-#if defined(CONFIG_SWIOTLB)
+-      swiotlb_init(); 
+-#endif
++      pci_iommu_alloc();
+ 
+ #ifdef CONFIG_FLATMEM
+       BUG_ON(!mem_map);
+@@ -710,16 +704,8 @@ void __init mem_init(void)
+               test_wp_bit();
+ 
+       cpa_init();
+-
+-      /*
+-       * Subtle. SMP is doing it's boot stuff late (because it has to
+-       * fork idle threads) - but it also needs low mappings for the
+-       * protected-mode entry to work. We zap these entries only after
+-       * the WP-bit has been tested.
+-       */
+-#ifndef CONFIG_SMP
++      save_pg_dir();
+       zap_low_mappings();
+-#endif
+ 
+       SetPagePinned(virt_to_page(init_mm.pgd));
+ }
+@@ -769,25 +755,17 @@ void mark_rodata_ro(void)
+       unsigned long start = PFN_ALIGN(_text);
+       unsigned long size = PFN_ALIGN(_etext) - start;
+ 
+-#ifndef CONFIG_KPROBES
+-#ifdef CONFIG_HOTPLUG_CPU
+-      /* It must still be possible to apply SMP alternatives. */
+-      if (num_possible_cpus() <= 1)
+-#endif
+-      {
+-              set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+-              printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+-                      size >> 10);
++      set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
++      printk(KERN_INFO "Write protecting the kernel text: %luk\n",
++              size >> 10);
+ 
+ #ifdef CONFIG_CPA_DEBUG
+-              printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+-                      start, start+size);
+-              set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
++      printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
++              start, start+size);
++      set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+ 
+-              printk(KERN_INFO "Testing CPA: write protecting again\n");
+-              set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+-#endif
+-      }
++      printk(KERN_INFO "Testing CPA: write protecting again\n");
++      set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+ #endif
+       start += size;
+       size = (unsigned long)__end_rodata - start;
+--- sle11-2009-05-14.orig/arch/x86/mm/init_64-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/init_64-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -52,9 +52,6 @@
+ 
+ #include <xen/features.h>
+ 
+-const struct dma_mapping_ops *dma_ops;
+-EXPORT_SYMBOL(dma_ops);
+-
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ unsigned int __kernel_page_user;
+ EXPORT_SYMBOL(__kernel_page_user);
+@@ -68,6 +65,28 @@ extern unsigned long start_pfn;
+ extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
+ extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
+ 
++#ifndef CONFIG_XEN
++int direct_gbpages __meminitdata
++#ifdef CONFIG_DIRECT_GBPAGES
++                              = 1
  +#endif
++;
  +
-+      spin_unlock_irqrestore(&pgd_lock, flags);
-+}
-+
-+static void pgd_dtor(void *pgd)
-+{
-+      unsigned long flags; /* can be called from interrupt context */
-+
-+      if (!SHARED_KERNEL_PMD) {
-+              spin_lock_irqsave(&pgd_lock, flags);
-+              pgd_list_del(pgd);
-+              spin_unlock_irqrestore(&pgd_lock, flags);
-+      }
-+
-+      pgd_test_and_unpin(pgd);
-+}
-+
-+/*
-+ * List of all pgd's needed for non-PAE so it can invalidate entries
-+ * in both cached and uncached pgd's; not needed for PAE since the
-+ * kernel pmd is shared. If PAE were not to share the pmd a similar
-+ * tactic would be needed. This is essentially codepath-based locking
-+ * against pageattr.c; it is the unique case in which a valid change
-+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
-+ * vmalloc faults work because attached pagetables are never freed.
-+ * -- wli
-+ */
-+
-+#ifdef CONFIG_X86_PAE
-+/*
-+ * Mop up any pmd pages which may still be attached to the pgd.
-+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
-+ * preallocate which never got a corresponding vma will need to be
-+ * freed manually.
-+ */
-+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
++static int __init parse_direct_gbpages_off(char *arg)
  +{
-+      int i;
-+
-+      for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
-+              pgd_t pgd = pgdp[i];
-+
-+              if (__pgd_val(pgd) != 0) {
-+                      pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
-+
-+                      pgdp[i] = xen_make_pgd(0);
-+
-+                      paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
-+                      pmd_free(mm, pmd);
-+              }
-+      }
-+
-+      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
-+              xen_destroy_contiguous_region((unsigned long)pgdp, 0);
++      direct_gbpages = 0;
++      return 0;
  +}
++early_param("nogbpages", parse_direct_gbpages_off);
  +
-+/*
-+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
-+ * updating the top-level pagetable entries to guarantee the
-+ * processor notices the update.  Since this is expensive, and
-+ * all 4 top-level entries are used almost immediately in a
-+ * new process's life, we just pre-populate them here.
-+ *
-+ * Also, if we're in a paravirt environment where the kernel pmd is
-+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
-+ * and initialize the kernel pmds here.
-+ */
-+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
++static int __init parse_direct_gbpages_on(char *arg)
  +{
-+      pud_t *pud;
-+      pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
-+      unsigned long addr, flags;
-+      int i;
-+
-+      /*
-+       * We can race save/restore (if we sleep during a GFP_KERNEL memory
-+       * allocation). We therefore store virtual addresses of pmds as they
-+       * do not change across save/restore, and poke the machine addresses
-+       * into the pgdir under the pgd_lock.
-+       */
-+      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
-+              pmds[i] = pmd_alloc_one(mm, addr);
-+              if (!pmds[i])
-+                      goto out_oom;
-+      }
-+
-+      spin_lock_irqsave(&pgd_lock, flags);
-+
-+      /* Protect against save/restore: move below 4GB under pgd_lock. */
-+      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
-+          && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
-+              spin_unlock_irqrestore(&pgd_lock, flags);
-+out_oom:
-+              while (i--)
-+                      pmd_free(mm, pmds[i]);
-+              return 0;
-+      }
-+
-+      /* Copy kernel pmd contents and write-protect the new pmds. */
-+      pud = pud_offset(pgd, 0);
-+      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
-+           i++, pud++, addr += PUD_SIZE) {
-+              if (i >= KERNEL_PGD_BOUNDARY) {
-+                      memcpy(pmds[i],
-+                             (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
-+                             sizeof(pmd_t) * PTRS_PER_PMD);
-+                      make_lowmem_page_readonly(
-+                              pmds[i], XENFEAT_writable_page_tables);
-+              }
-+
-+              /* It is safe to poke machine addresses of pmds under the pgd_lock. */
-+              pud_populate(mm, pud, pmds[i]);
-+      }
-+
-+      /* List required to sync kernel mapping updates and
-+       * to pin/unpin on save/restore. */
-+      pgd_list_add(pgd);
-+
-+      spin_unlock_irqrestore(&pgd_lock, flags);
-+
-+      return 1;
++      direct_gbpages = 1;
++      return 0;
  +}
++early_param("gbpages", parse_direct_gbpages_on);
++#endif
  +
-+void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
-+{
-+      struct page *page = virt_to_page(pmd);
-+      unsigned long pfn = page_to_pfn(page);
-+
-+      paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
+ /*
+  * Use this until direct mapping is established, i.e. before __va() is 
+  * available in init_memory_mapping().
+@@ -135,9 +154,6 @@ void show_mem(void)
+ 
+       printk(KERN_INFO "Mem-info:\n");
+       show_free_areas();
+-      printk(KERN_INFO "Free swap:       %6ldkB\n",
+-              nr_swap_pages << (PAGE_SHIFT-10));
+-
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+                       /*
+@@ -328,7 +344,7 @@ void __init cleanup_highmap(void)
+       pmd_t *last_pmd = pmd + PTRS_PER_PMD;
+ 
+       for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+-              if (!pmd_present(*pmd))
++              if (pmd_none(*pmd))
+                       continue;
+               if (vaddr < (unsigned long) _text || vaddr > end)
+                       set_pmd(pmd, __pmd(0));
+@@ -337,8 +353,7 @@ void __init cleanup_highmap(void)
+ #endif
+ 
+ /* NOTE: this is meant to be run only at boot */
+-void __init
+-__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
++void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+ {
+       unsigned long address = __fix_to_virt(idx);
+ 
+@@ -463,7 +478,7 @@ __meminit void early_iounmap(void *addr,
+ }
+ #endif
+ 
+-static void __meminit
++static unsigned long __meminit
+ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+ {
+       int i = pmd_index(address);
+@@ -503,21 +518,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
+                       set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+               }
+       }
++      return address;
+ }
+ 
+-static void __meminit
++static unsigned long __meminit
+ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+ {
+       pmd_t *pmd = pmd_offset(pud, 0);
++      unsigned long last_map_addr;
  +
-+      /* Note: almost everything apart from _PAGE_PRESENT is
-+         reserved at the pmd (PDPT) level. */
-+      if (PagePinned(virt_to_page(mm->pgd))) {
-+              BUG_ON(PageHighMem(page));
-+              BUG_ON(HYPERVISOR_update_va_mapping(
-+                        (unsigned long)__va(pfn << PAGE_SHIFT),
-+                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
-+              set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
-+      } else
-+              *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
+       spin_lock(&init_mm.page_table_lock);
+-      phys_pmd_init(pmd, address, end);
++      last_map_addr = phys_pmd_init(pmd, address, end);
+       spin_unlock(&init_mm.page_table_lock);
+       __flush_tlb_all();
++      return last_map_addr;
+ }
+ 
+-static void __meminit
++static unsigned long __meminit
+ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+ {
++      unsigned long last_map_addr = end;
+       int i = pud_index(addr);
+ 
+       for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
+@@ -529,7 +549,15 @@ phys_pud_init(pud_t *pud_page, unsigned 
+                       break;
+ 
+               if (__pud_val(*pud)) {
+-                      phys_pmd_update(pud, addr, end);
++                      if (!pud_large(*pud))
++                              last_map_addr = phys_pmd_update(pud, addr, end);
++                      continue;
++              }
  +
-+      /*
-+       * According to Intel App note "TLBs, Paging-Structure Caches,
-+       * and Their Invalidation", April 2007, document 317080-001,
-+       * section 8.1: in PAE mode we explicitly have to flush the
-+       * TLB via cr3 if the top-level pgd is changed...
-+       */
-+      if (mm == current->active_mm)
-+              xen_tlb_flush();
-+}
-+#else  /* !CONFIG_X86_PAE */
-+/* No need to prepopulate any pagetable entries in non-PAE modes. */
-+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-+{
-+      return 1;
-+}
++              if (direct_gbpages) {
++                      set_pte((pte_t *)pud,
++                              pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
++                      last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
+                       continue;
+               }
+ 
+@@ -537,12 +565,14 @@ phys_pud_init(pud_t *pud_page, unsigned 
+ 
+               spin_lock(&init_mm.page_table_lock);
+               *pud = __pud(pmd_phys | _KERNPG_TABLE);
+-              phys_pmd_init(pmd, addr, end);
++              last_map_addr = phys_pmd_init(pmd, addr, end);
+               spin_unlock(&init_mm.page_table_lock);
+ 
+               early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
+       }
+       __flush_tlb_all();
  +
-+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
++      return last_map_addr >> PAGE_SHIFT;
+ }
+ 
+ void __init xen_init_pt(void)
+@@ -754,16 +784,138 @@ static void __init xen_finish_init_mappi
+       table_end = start_pfn;
+ }
+ 
++static void __init init_gbpages(void)
  +{
++#ifndef CONFIG_XEN
++      if (direct_gbpages && cpu_has_gbpages)
++              printk(KERN_INFO "Using GB pages for direct mapping\n");
++      else
++              direct_gbpages = 0;
++#endif
  +}
-+#endif        /* CONFIG_X86_PAE */
  +
-+#ifdef CONFIG_X86_64
-+/* We allocate two contiguous pages for kernel and user. */
-+#define PGD_ORDER 1
-+#else
-+#define PGD_ORDER 0
-+#endif
++#ifdef CONFIG_MEMTEST_BOOTPARAM
  +
-+pgd_t *pgd_alloc(struct mm_struct *mm)
++static void __init memtest(unsigned long start_phys, unsigned long size,
++                               unsigned pattern)
  +{
-+      pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
-+
-+      /* so that alloc_pd can use it */
-+      mm->pgd = pgd;
-+      if (pgd)
-+              pgd_ctor(pgd);
++      unsigned long i;
++      unsigned long *start;
++      unsigned long start_bad;
++      unsigned long last_bad;
++      unsigned long val;
++      unsigned long start_phys_aligned;
++      unsigned long count;
++      unsigned long incr;
  +
-+      if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
-+              free_pages((unsigned long)pgd, PGD_ORDER);
-+              pgd = NULL;
++      switch (pattern) {
++      case 0:
++              val = 0UL;
++              break;
++      case 1:
++              val = -1UL;
++              break;
++      case 2:
++              val = 0x5555555555555555UL;
++              break;
++      case 3:
++              val = 0xaaaaaaaaaaaaaaaaUL;
++              break;
++      default:
++              return;
  +      }
  +
-+      return pgd;
-+}
++      incr = sizeof(unsigned long);
++      start_phys_aligned = ALIGN(start_phys, incr);
++      count = (size - (start_phys_aligned - start_phys))/incr;
++      start = __va(start_phys_aligned);
++      start_bad = 0;
++      last_bad = 0;
  +
-+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-+{
-+      /*
-+       * After this the pgd should not be pinned for the duration of this
-+       * function's execution. We should never sleep and thus never race:
-+       *  1. User pmds will not become write-protected under our feet due
-+       *     to a concurrent mm_pin_all().
-+       *  2. The machine addresses in PGD entries will not become invalid
-+       *     due to a concurrent save/restore.
-+       */
-+      pgd_dtor(pgd);
++      for (i = 0; i < count; i++)
++              start[i] = val;
++      for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
++              if (*start != val) {
++                      if (start_phys_aligned == last_bad + incr) {
++                              last_bad += incr;
++                      } else {
++                              if (start_bad) {
++                                      printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
++                                              val, start_bad, last_bad + incr);
++                                      reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
++                              }
++                              start_bad = last_bad = start_phys_aligned;
++                      }
++              }
++      }
++      if (start_bad) {
++              printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
++                      val, start_bad, last_bad + incr);
++              reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
++      }
  +
-+      pgd_mop_up_pmds(mm, pgd);
-+      free_pages((unsigned long)pgd, PGD_ORDER);
  +}
  +
-+/* blktap and gntdev need this, as otherwise they would implicitly (and
-+ * needlessly, as they never use it) reference init_mm. */
-+pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
-+                                unsigned long addr, pte_t *ptep, int full)
-+{
-+      return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
-+}
-+EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
++static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
  +
-+int ptep_set_access_flags(struct vm_area_struct *vma,
-+                        unsigned long address, pte_t *ptep,
-+                        pte_t entry, int dirty)
++static int __init parse_memtest(char *arg)
  +{
-+      int changed = !pte_same(*ptep, entry);
-+
-+      if (changed && dirty) {
-+              if (likely(vma->vm_mm == current->mm)) {
-+                      if (HYPERVISOR_update_va_mapping(address,
-+                              entry,
-+                              (unsigned long)vma->vm_mm->cpu_vm_mask.bits|
-+                                      UVMF_INVLPG|UVMF_MULTI))
-+                              BUG();
-+              } else {
-+                      xen_l1_entry_update(ptep, entry);
-+                      flush_tlb_page(vma, address);
-+              }
-+      }
-+
-+      return changed;
++      if (arg)
++              memtest_pattern = simple_strtoul(arg, NULL, 0);
++      return 0;
  +}
  +
-+int ptep_test_and_clear_young(struct vm_area_struct *vma,
-+                            unsigned long addr, pte_t *ptep)
++early_param("memtest", parse_memtest);
++
++static void __init early_memtest(unsigned long start, unsigned long end)
  +{
-+      int ret = 0;
++      u64 t_start, t_size;
++      unsigned pattern;
  +
-+      if (pte_young(*ptep))
-+              ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
-+                                       &ptep->pte);
++      if (!memtest_pattern)
++              return;
  +
-+      if (ret)
-+              pte_update(vma->vm_mm, addr, ptep);
++      printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
++      for (pattern = 0; pattern < memtest_pattern; pattern++) {
++              t_start = start;
++              t_size = 0;
++              while (t_start < end) {
++                      t_start = find_e820_area_size(t_start, &t_size, 1);
  +
-+      return ret;
-+}
++                      /* done ? */
++                      if (t_start >= end)
++                              break;
++                      if (t_start + t_size > end)
++                              t_size = end - t_start;
  +
-+int ptep_clear_flush_young(struct vm_area_struct *vma,
-+                         unsigned long address, pte_t *ptep)
-+{
-+      pte_t pte = *ptep;
-+      int young = pte_young(pte);
++                      printk(KERN_CONT "\n  %016llx - %016llx pattern %d",
++                              (unsigned long long)t_start,
++                              (unsigned long long)t_start + t_size, pattern);
  +
-+      pte = pte_mkold(pte);
-+      if (PagePinned(virt_to_page(vma->vm_mm->pgd)))
-+              ptep_set_access_flags(vma, address, ptep, pte, young);
-+      else if (young)
-+              ptep->pte_low = pte.pte_low;
++                      memtest(t_start, t_size, pattern);
  +
-+      return young;
-+}
---- a/arch/x86/pci/i386.c
-+++ b/arch/x86/pci/i386.c
-@@ -328,10 +328,14 @@ int pci_mmap_page_range(struct pci_dev *
-               flags = new_flags;
-       }
- 
-+#ifndef CONFIG_XEN
-       if (((vma->vm_pgoff < max_low_pfn_mapped) ||
-            (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
-             vma->vm_pgoff < max_pfn_mapped)) &&
-           ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
-+#else
-+      if (ioremap_check_change_attr(vma->vm_pgoff, len, flags)) {
-+#endif
-               free_memtype(addr, addr + len);
-               return -EINVAL;
-       }
---- a/arch/x86/pci/irq-xen.c
-+++ b/arch/x86/pci/irq-xen.c
-@@ -140,9 +140,11 @@ static void __init pirq_peer_trick(void)
-               busmap[e->bus] = 1;
-       }
-       for(i = 1; i < 256; i++) {
-+              int node;
-               if (!busmap[i] || pci_find_bus(0, i))
-                       continue;
--              if (pci_scan_bus_with_sysdata(i))
-+              node = get_mp_bus_to_node(i);
-+              if (pci_scan_bus_on_node(i, &pci_root_ops, node))
-                       printk(KERN_INFO "PCI: Discovered primary peer "
-                              "bus %02x [IRQ]\n", i);
-       }
-@@ -204,7 +206,7 @@ static int pirq_ali_get(struct pci_dev *
++                      t_start += t_size;
++              }
++      }
++      printk(KERN_CONT "\n");
++}
++#else
++static void __init early_memtest(unsigned long start, unsigned long end)
++{
++}
++#endif
++
+ /*
+  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+  * This runs before bootmem is initialized and gets pages directly from
+  * the physical memory. To access them they are temporarily mapped.
+  */
+-void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
++unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
   {
-       static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
- 
--      WARN_ON_ONCE(pirq >= 16);
-+      WARN_ON_ONCE(pirq > 16);
-       return irqmap[read_config_nybble(router, 0x48, pirq-1)];
- }
+-      unsigned long next;
++      unsigned long next, last_map_addr = end;
++      unsigned long start_phys = start, end_phys = end;
   
-@@ -213,7 +215,7 @@ static int pirq_ali_set(struct pci_dev *
-       static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
-       unsigned int val = irqmap[irq];
+-      pr_debug("init_memory_mapping\n");
++      printk(KERN_INFO "init_memory_mapping\n");
   
--      WARN_ON_ONCE(pirq >= 16);
-+      WARN_ON_ONCE(pirq > 16);
-       if (val) {
-               write_config_nybble(router, 0x48, pirq-1, val);
-               return 1;
-@@ -264,7 +266,7 @@ static int pirq_via586_get(struct pci_de
- {
-       static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+       /*
+        * Find space for the kernel direct mapping tables.
+@@ -772,8 +924,10 @@ void __init_refok init_memory_mapping(un
+        * memory mapped. Unfortunately this is done currently before the
+        * nodes are discovered.
+        */
+-      if (!after_bootmem)
++      if (!after_bootmem) {
++              init_gbpages();
+               find_early_table_space(end);
++      }
   
--      WARN_ON_ONCE(pirq >= 5);
-+      WARN_ON_ONCE(pirq > 5);
-       return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
+       start = (unsigned long)__va(start);
+       end = (unsigned long)__va(end);
+@@ -790,7 +944,7 @@ void __init_refok init_memory_mapping(un
+               next = start + PGDIR_SIZE;
+               if (next > end)
+                       next = end;
+-              phys_pud_init(pud, __pa(start), __pa(next));
++              last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
+               if (!after_bootmem) {
+                       early_make_page_readonly(pud, XENFEAT_writable_page_tables);
+                       set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+@@ -807,6 +961,11 @@ void __init_refok init_memory_mapping(un
+       if (!after_bootmem)
+               reserve_early(table_start << PAGE_SHIFT,
+                             table_end << PAGE_SHIFT, "PGTABLE");
++
++      if (!after_bootmem)
++              early_memtest(start_phys, end_phys);
++
++      return last_map_addr;
   }
   
-@@ -272,7 +274,7 @@ static int pirq_via586_set(struct pci_de
+ #ifndef CONFIG_NUMA
+@@ -830,15 +989,6 @@ void __init paging_init(void)
+ /*
+  * Memory hotplug specific functions
+  */
+-void online_page(struct page *page)
+-{
+-      ClearPageReserved(page);
+-      init_page_count(page);
+-      __free_page(page);
+-      totalram_pages++;
+-      num_physpages++;
+-}
+-
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+  * Memory is added always to NORMAL zone. This means you will never get
+@@ -848,11 +998,13 @@ int arch_add_memory(int nid, u64 start, 
   {
-       static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+       struct pglist_data *pgdat = NODE_DATA(nid);
+       struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+-      unsigned long start_pfn = start >> PAGE_SHIFT;
++      unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+       int ret;
   
--      WARN_ON_ONCE(pirq >= 5);
-+      WARN_ON_ONCE(pirq > 5);
-       write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
-       return 1;
- }
-@@ -286,7 +288,7 @@ static int pirq_ite_get(struct pci_dev *
- {
-       static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+-      init_memory_mapping(start, start + size-1);
++      last_mapped_pfn = init_memory_mapping(start, start + size-1);
++      if (last_mapped_pfn > max_pfn_mapped)
++              max_pfn_mapped = last_mapped_pfn;
   
--      WARN_ON_ONCE(pirq >= 4);
-+      WARN_ON_ONCE(pirq > 4);
-       return read_config_nybble(router,0x43, pirqmap[pirq-1]);
- }
+       ret = __add_pages(zone, start_pfn, nr_pages);
+       WARN_ON(1);
+@@ -871,6 +1023,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
   
-@@ -294,7 +296,7 @@ static int pirq_ite_set(struct pci_dev *
- {
-       static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ #endif /* CONFIG_MEMORY_HOTPLUG */
   
--      WARN_ON_ONCE(pirq >= 4);
-+      WARN_ON_ONCE(pirq > 4);
-       write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
-       return 1;
- }
-@@ -623,6 +625,13 @@ static __init int via_router_probe(struc
-                        */
-                       device = PCI_DEVICE_ID_VIA_8235;
-                       break;
-+              case PCI_DEVICE_ID_VIA_8237:
-+                      /**
-+                       * Asus a7v600 bios wrongly reports 8237
-+                       * as 586-compatible
-+                       */
-+                      device = PCI_DEVICE_ID_VIA_8237;
-+                      break;
-               }
-       }
++/*
++ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
++ * is valid. The argument is a physical page number.
++ *
++ *
++ * On x86, access has to be given to the first megabyte of ram because that area
++ * contains bios code and data regions used by X and dosemu and similar apps.
++ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
++ * mmio resources as well as potential bios/acpi data regions.
++ */
++int devmem_is_allowed(unsigned long pagenr)
++{
++      if (pagenr <= 256)
++              return 1;
++      if (mfn_to_local_pfn(pagenr) >= max_pfn)
++              return 1;
++      return 0;
++}
++
++
+ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
+                        kcore_modules, kcore_vsyscall;
   
---- a/arch/x86/vdso/vdso32-setup-xen.c
-+++ b/arch/x86/vdso/vdso32-setup-xen.c
-@@ -164,7 +164,7 @@ static __init void relocate_vdso(Elf32_E
-       Elf32_Shdr *shdr;
-       int i;
+@@ -979,24 +1151,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
   
--      BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
-+      BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
-              !elf_check_arch_ia32(ehdr) ||
-              ehdr->e_type != ET_DYN);
+ void mark_rodata_ro(void)
+ {
+-      unsigned long start = (unsigned long)_stext, end;
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-      /* It must still be possible to apply SMP alternatives. */
+-      if (num_possible_cpus() > 1)
+-              start = (unsigned long)_etext;
+-#endif
+-
+-#ifdef CONFIG_KPROBES
+-      start = (unsigned long)__start_rodata;
+-#endif
+-
+-      end = (unsigned long)__end_rodata;
+-      start = (start + PAGE_SIZE - 1) & PAGE_MASK;
+-      end &= PAGE_MASK;
+-      if (end <= start)
+-              return;
+-
++      unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
   
-@@ -233,8 +233,12 @@ void syscall32_cpu_init(void)
-               BUG();
+       printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+              (end - start) >> 10);
+@@ -1019,6 +1174,7 @@ void mark_rodata_ro(void)
+       set_memory_ro(start, (end-start) >> PAGE_SHIFT);
   #endif
- 
--      if (use_sysenter < 0)
--              use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
-+      if (use_sysenter < 0) {
-+              if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-+                      use_sysenter = 1;
-+              if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
-+                      use_sysenter = 1;
-+      }
   }
- 
- #define compat_uses_vma               1
-@@ -337,8 +341,6 @@ int __init sysenter_setup(void)
- 
- #ifdef CONFIG_X86_32
-       gate_vma_init();
--
--      printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
- #endif
- 
- #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
-@@ -383,6 +385,9 @@ int arch_setup_additional_pages(struct l
-       int ret = 0;
-       bool compat;
- 
-+      if (vdso_enabled == VDSO_DISABLED)
-+              return 0;
  +
-       down_write(&mm->mmap_sem);
- 
-       /* Test compat mode once here, in case someone
---- a/drivers/acpi/processor_core.c
-+++ b/drivers/acpi/processor_core.c
-@@ -657,7 +657,7 @@ static int acpi_processor_get_info(struc
-        * of /proc/cpuinfo
-        */
-       status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
--      if (ACPI_SUCCESS(status))
-+      if (ACPI_SUCCESS(status) && pr->id != -1)
-               arch_fix_phys_package_id(pr->id, object.integer.value);
- 
-       return 0;
---- a/drivers/input/xen-kbdfront.c
-+++ b/drivers/input/xen-kbdfront.c
-@@ -325,7 +325,6 @@ static struct xenbus_device_id xenkbd_id
+ #endif
   
- static struct xenbus_driver xenkbd = {
-       .name = "vkbd",
--      .owner = THIS_MODULE,
-       .ids = xenkbd_ids,
-       .probe = xenkbd_probe,
-       .remove = xenkbd_remove,
---- a/drivers/oprofile/cpu_buffer.c
-+++ b/drivers/oprofile/cpu_buffer.c
-@@ -310,7 +310,7 @@ void oprofile_add_trace(unsigned long pc
- #ifdef CONFIG_XEN
- int oprofile_add_domain_switch(int32_t domain_id)
+ #ifdef CONFIG_BLK_DEV_INITRD
+@@ -1031,7 +1187,7 @@ void free_initrd_mem(unsigned long start
+ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
   {
--      struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
-+      struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ #ifdef CONFIG_NUMA
+-      int nid = phys_to_nid(phys);
++      int nid, next_nid;
+ #endif
+       unsigned long pfn = phys >> PAGE_SHIFT;
   
-       /* should have space for switching into and out of domain
-          (2 slots each) plus one sample and one cpu mode switch */
---- a/drivers/pci/msi-xen.c
-+++ b/drivers/pci/msi-xen.c
-@@ -588,7 +588,7 @@ int pci_enable_msi(struct pci_dev* dev)
- EXPORT_SYMBOL(pci_enable_msi);
+@@ -1040,7 +1196,7 @@ void __init reserve_bootmem_generic(unsi
+                * This can happen with kdump kernels when accessing
+                * firmware tables:
+                */
+-              if (pfn < end_pfn_map)
++              if (pfn < max_pfn_mapped)
+                       return;
   
- extern void pci_frontend_disable_msi(struct pci_dev* dev);
--void pci_disable_msi(struct pci_dev* dev)
-+void pci_msi_shutdown(struct pci_dev* dev)
+               printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
+@@ -1050,10 +1206,16 @@ void __init reserve_bootmem_generic(unsi
+ 
+       /* Should check here against the e820 map to avoid double free */
+ #ifdef CONFIG_NUMA
+-      reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
++      nid = phys_to_nid(phys);
++      next_nid = phys_to_nid(phys + len - 1);
++      if (nid == next_nid)
++              reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
++      else
++              reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ #else
+       reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ #endif
++
+ #ifndef CONFIG_XEN
+       if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
+               dma_reserve += len / PAGE_SIZE;
+@@ -1149,6 +1311,10 @@ const char *arch_vma_name(struct vm_area
+ /*
+  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
+  */
++static long __meminitdata addr_start, addr_end;
++static void __meminitdata *p_start, *p_end;
++static int __meminitdata node_start;
++
+ int __meminit
+ vmemmap_populate(struct page *start_page, unsigned long size, int node)
   {
-       int pirq;
+@@ -1183,12 +1349,32 @@ vmemmap_populate(struct page *start_page
+                                                       PAGE_KERNEL_LARGE);
+                       set_pmd(pmd, __pmd_ma(__pte_val(entry)));
   
-@@ -617,6 +617,10 @@ void pci_disable_msi(struct pci_dev* dev
-       pci_intx_for_msi(dev, 1);
-       dev->msi_enabled = 0;
+-                      printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
+-                              addr, addr + PMD_SIZE - 1, p, node);
++                      /* check to see if we have contiguous blocks */
++                      if (p_end != p || node_start != node) {
++                              if (p_start)
++                                      printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
++                                              addr_start, addr_end-1, p_start, p_end-1, node_start);
++                              addr_start = addr;
++                              node_start = node;
++                              p_start = p;
++                      }
++                      addr_end = addr + PMD_SIZE;
++                      p_end = p + PMD_SIZE;
+               } else {
+                       vmemmap_verify((pte_t *)pmd, node, addr, next);
+               }
+       }
+       return 0;
   }
-+void pci_disable_msi(struct pci_dev* dev)
++
++void __meminit vmemmap_populate_print_last(void)
  +{
-+      pci_msi_shutdown(dev);
++      if (p_start) {
++              printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
++                      addr_start, addr_end-1, p_start, p_end-1, node_start);
++              p_start = NULL;
++              p_end = NULL;
++              node_start = 0;
++      }
  +}
- EXPORT_SYMBOL(pci_disable_msi);
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/mm/ioremap-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/ioremap-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -20,14 +20,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+ #include <asm/pgalloc.h>
++#include <asm/pat.h>
   
- /**
-@@ -719,7 +723,7 @@ int pci_enable_msix(struct pci_dev* dev,
- EXPORT_SYMBOL(pci_enable_msix);
+-enum ioremap_mode {
+-      IOR_MODE_UNCACHED,
+-      IOR_MODE_CACHED,
+-};
+-
+-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
++#ifdef CONFIG_X86_64
   
- extern void pci_frontend_disable_msix(struct pci_dev* dev);
--void pci_disable_msix(struct pci_dev* dev)
-+void pci_msix_shutdown(struct pci_dev* dev)
++#ifndef CONFIG_XEN
+ unsigned long __phys_addr(unsigned long x)
   {
-       if (!pci_msi_enable)
-               return;
-@@ -756,6 +760,10 @@ void pci_disable_msix(struct pci_dev* de
-       pci_intx_for_msi(dev, 1);
-       dev->msix_enabled = 0;
+       if (x >= __START_KERNEL_map)
+@@ -35,6 +32,19 @@ unsigned long __phys_addr(unsigned long 
+       return x - PAGE_OFFSET;
   }
-+void pci_disable_msix(struct pci_dev* dev)
+ EXPORT_SYMBOL(__phys_addr);
++#endif
++
++static inline int phys_addr_valid(unsigned long addr)
  +{
-+      pci_msix_shutdown(dev);
++      return addr < (1UL << boot_cpu_data.x86_phys_bits);
++}
++
++#else
++
++static inline int phys_addr_valid(unsigned long addr)
++{
++      return 1;
  +}
- EXPORT_SYMBOL(pci_disable_msix);
   
- /**
---- a/drivers/video/Kconfig
-+++ b/drivers/video/Kconfig
-@@ -2047,7 +2047,7 @@ config FB_VIRTUAL
+ #endif
   
- config XEN_FBDEV_FRONTEND
-       tristate "Xen virtual frame buffer support"
--      depends on FB && XEN
-+      depends on FB && PARAVIRT_XEN
-       select FB_SYS_FILLRECT
-       select FB_SYS_COPYAREA
-       select FB_SYS_IMAGEBLIT
---- a/drivers/video/xen-fbfront.c
-+++ b/drivers/video/xen-fbfront.c
-@@ -670,7 +670,6 @@ static struct xenbus_device_id xenfb_ids
+@@ -92,7 +102,8 @@ static int __direct_remap_pfn_range(stru
+                * Fill in the machine address: PTE ptr is done later by
+                * apply_to_page_range().
+                */
+-              v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO;
++              pgprot_val(prot) |= _PAGE_IO;
++              v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)));
   
- static struct xenbus_driver xenfb = {
-       .name = "vfb",
--      .owner = THIS_MODULE,
-       .ids = xenfb_ids,
-       .probe = xenfb_probe,
-       .remove = xenfb_remove,
---- a/drivers/xen/blkfront/blkfront.c
-+++ b/drivers/xen/blkfront/blkfront.c
-@@ -282,7 +282,9 @@ static void backend_changed(struct xenbu
-               break;
+               mfn++;
+               address += PAGE_SIZE;
+@@ -189,10 +200,9 @@ int touch_pte_range(struct mm_struct *mm
   
-       case XenbusStateClosing:
--              bd = bdget(info->dev);
-+              if (!info->gd)
-+                      break;
-+              bd = bdget_disk(info->gd, 0);
-               if (bd == NULL)
-                       xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+ EXPORT_SYMBOL(touch_pte_range);
   
---- a/drivers/xen/blkfront/block.h
-+++ b/drivers/xen/blkfront/block.h
-@@ -96,7 +96,6 @@ struct blk_shadow {
- struct blkfront_info
+-#ifdef CONFIG_X86_32
+ int page_is_ram(unsigned long pagenr)
   {
-       struct xenbus_device *xbdev;
--      dev_t dev;
-       struct gendisk *gd;
-       int vdevice;
-       blkif_vdev_t handle;
---- a/drivers/xen/blkfront/vbd.c
-+++ b/drivers/xen/blkfront/vbd.c
-@@ -246,17 +246,32 @@ xlvbd_init_blk_queue(struct gendisk *gd,
+-      unsigned long addr, end;
++      resource_size_t addr, end;
+       int i;
+ 
+ #ifndef CONFIG_XEN
+@@ -228,31 +238,51 @@ int page_is_ram(unsigned long pagenr)
+       }
         return 0;
   }
+-#endif
   
--static int
--xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
--                  u16 vdisk_info, u16 sector_size,
--                  struct blkfront_info *info)
-+int
-+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
-+        u16 sector_size, struct blkfront_info *info)
+ /*
+  * Fix up the linear direct mapping of the kernel to avoid cache attribute
+  * conflicts.
+  */
+ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
+-                             enum ioremap_mode mode)
++                             unsigned long prot_val)
   {
-+      int major, minor;
-       struct gendisk *gd;
-       struct xlbd_major_info *mi;
-       int nr_minors = 1;
-       int err = -ENODEV;
-       unsigned int offset;
- 
-+      if ((vdevice>>EXT_SHIFT) > 1) {
-+              /* this is above the extended range; something is wrong */
-+              printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
-+              return -ENODEV;
-+      }
-+
-+      if (!VDEV_IS_EXTENDED(vdevice)) {
-+              major = BLKIF_MAJOR(vdevice);
-+              minor = BLKIF_MINOR(vdevice);
-+      }
-+      else {
-+              major = 202;
-+              minor = BLKIF_MINOR_EXT(vdevice);
-+      }
-+
-       BUG_ON(info->gd != NULL);
-       BUG_ON(info->mi != NULL);
-       BUG_ON(info->rq != NULL);
-@@ -337,41 +352,6 @@ xlvbd_alloc_gendisk(int major, int minor
-       return err;
- }
+       unsigned long nrpages = size >> PAGE_SHIFT;
+       int err;
   
--int
--xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
--        u16 sector_size, struct blkfront_info *info)
--{
--      struct block_device *bd;
--      int err = 0;
--      int major, minor;
--
--      if ((vdevice>>EXT_SHIFT) > 1) {
--              /* this is above the extended range; something is wrong */
--              printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
--              return -ENODEV;
--      }
--
--      if (!VDEV_IS_EXTENDED(vdevice)) {
--              major = BLKIF_MAJOR(vdevice);
--              minor = BLKIF_MINOR(vdevice);
--      }
--      else {
--              major = 202;
--              minor = BLKIF_MINOR_EXT(vdevice);
--      }
--
--      info->dev = MKDEV(major, minor);
--      bd = bdget(info->dev);
--      if (bd == NULL)
--              return -ENODEV;
--
--      err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
--                                sector_size, info);
--
--      bdput(bd);
--      return err;
--}
--
- void
- xlvbd_del(struct blkfront_info *info)
- {
---- a/drivers/xen/blktap/blktap.c
-+++ b/drivers/xen/blktap/blktap.c
-@@ -111,6 +111,7 @@ typedef struct tap_blkif {
-       unsigned long mode;           /*current switching mode               */
-       int minor;                    /*Minor number for tapdisk device      */
-       pid_t pid;                    /*tapdisk process id                   */
-+      struct pid_namespace *pid_ns; /*... and its corresponding namespace  */
-       enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
-                                                 shutdown                   */
-       unsigned long *idx_map;       /*Record the user ring id to kern 
-@@ -295,16 +296,14 @@ static inline int OFFSET_TO_SEG(int offs
-  * BLKTAP VM OPS
-  */
+-      switch (mode) {
+-      case IOR_MODE_UNCACHED:
++      switch (prot_val) {
++      case _PAGE_CACHE_UC:
+       default:
+-              err = set_memory_uc(vaddr, nrpages);
++              err = _set_memory_uc(vaddr, nrpages);
++              break;
++      case _PAGE_CACHE_WC:
++              err = _set_memory_wc(vaddr, nrpages);
+               break;
+-      case IOR_MODE_CACHED:
+-              err = set_memory_wb(vaddr, nrpages);
++      case _PAGE_CACHE_WB:
++              err = _set_memory_wb(vaddr, nrpages);
+               break;
+       }
   
--static struct page *blktap_nopage(struct vm_area_struct *vma,
--                                unsigned long address,
--                                int *type)
-+static int blktap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+       return err;
+ }
+ 
++int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
++                            unsigned long prot_val)
++{
++      unsigned long sz;
++      int rc;
++
++      for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
++              unsigned long pfn = mfn_to_local_pfn(mfn);
++
++              if (pfn >= max_pfn_mapped)
++                      continue;
++              rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
++                                       PAGE_SIZE, prot_val);
++      }
++
++      return rc;
++}
++
+ /*
+  * Remap an arbitrary physical address space into the kernel virtual
+  * address space. Needed when the kernel wants to access high addresses
+@@ -262,12 +292,15 @@ static int ioremap_change_attr(unsigned 
+  * have to convert them into an offset in a page-aligned mapping, but the
+  * caller shouldn't need to know that small detail.
+  */
+-static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
+-                             enum ioremap_mode mode)
++static void __iomem *__ioremap_caller(resource_size_t phys_addr,
++              unsigned long size, unsigned long prot_val, void *caller)
   {
+-      unsigned long mfn, offset, last_addr, vaddr;
++      unsigned long mfn, offset, vaddr;
++      resource_size_t last_addr;
+       struct vm_struct *area;
++      unsigned long new_prot_val;
+       pgprot_t prot;
++      int retval;
+       domid_t domid = DOMID_IO;
+ 
+       /* Don't allow wraparound or zero size */
+@@ -275,6 +308,13 @@ static void __iomem *__ioremap(resource_
+       if (!size || last_addr < phys_addr)
+               return NULL;
+ 
++      if (!phys_addr_valid(phys_addr)) {
++              printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
++                     (unsigned long long)phys_addr);
++              WARN_ON_ONCE(1);
++              return NULL;
++      }
++
         /*
-        * if the page has not been mapped in by the driver then return
--       * NOPAGE_SIGBUS to the domain.
-+       * VM_FAULT_SIGBUS to the domain.
+        * Don't remap the low PCI/ISA area, it's always mapped..
          */
+@@ -287,55 +327,86 @@ static void __iomem *__ioremap(resource_
+       for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
+               unsigned long pfn = mfn_to_local_pfn(mfn);
   
--      return NOPAGE_SIGBUS;
-+      return VM_FAULT_SIGBUS;
- }
- 
- static pte_t blktap_clear_pte(struct vm_area_struct *vma,
-@@ -390,7 +389,7 @@ static pte_t blktap_clear_pte(struct vm_
- }
+-              if (pfn >= max_pfn)
+-                      continue;
++              if (pfn_valid(pfn)) {
++                      if (!PageReserved(pfn_to_page(pfn)))
++                              return NULL;
++                      domid = DOMID_SELF;
++              }
++      }
++      WARN_ON_ONCE(domid == DOMID_SELF);
   
- struct vm_operations_struct blktap_vm_ops = {
--      nopage:   blktap_nopage,
-+      fault:    blktap_fault,
-       zap_pte:  blktap_clear_pte,
- };
+-              domid = DOMID_SELF;
++      /*
++       * Mappings have to be page-aligned
++       */
++      offset = phys_addr & ~PAGE_MASK;
++      phys_addr &= PAGE_MASK;
++      size = PAGE_ALIGN(last_addr+1) - phys_addr;
   
-@@ -483,9 +482,8 @@ found:
-               tapfds[minor] = info;
+-              if (pfn >= max_pfn_mapped) /* bogus */
+-                      continue;
++      retval = reserve_memtype(phys_addr, phys_addr + size,
++                                              prot_val, &new_prot_val);
++      if (retval) {
++              pr_debug("Warning: reserve_memtype returned %d\n", retval);
++              return NULL;
++      }
   
-               if ((class = get_xen_class()) != NULL)
--                      class_device_create(class, NULL,
--                                          MKDEV(blktap_major, minor), NULL,
--                                          "blktap%d", minor);
-+                      device_create(class, NULL, MKDEV(blktap_major, minor),
-+                                    "blktap%d", minor);
+-              if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
++      if (prot_val != new_prot_val) {
++              /*
++               * Do not fallback to certain memory types with certain
++               * requested type:
++               * - request is uc-, return cannot be write-back
++               * - request is uc-, return cannot be write-combine
++               * - request is write-combine, return cannot be write-back
++               */
++              if ((prot_val == _PAGE_CACHE_UC_MINUS &&
++                   (new_prot_val == _PAGE_CACHE_WB ||
++                    new_prot_val == _PAGE_CACHE_WC)) ||
++                  (prot_val == _PAGE_CACHE_WC &&
++                   new_prot_val == _PAGE_CACHE_WB)) {
++                      pr_debug(
++              "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
++                              (unsigned long long)phys_addr,
++                              (unsigned long long)(phys_addr + size),
++                              prot_val, new_prot_val);
++                      free_memtype(phys_addr, phys_addr + size);
+                       return NULL;
++              }
++              prot_val = new_prot_val;
         }
   
- out:
-@@ -527,7 +525,7 @@ void signal_tapdisk(int idx) 
-               return;
+-      switch (mode) {
+-      case IOR_MODE_UNCACHED:
++      switch (prot_val) {
++      case _PAGE_CACHE_UC:
+       default:
+-              /*
+-               * FIXME: we will use UC MINUS for now, as video fb drivers
+-               * depend on it. Upcoming ioremap_wc() will fix this behavior.
+-               */
++              prot = PAGE_KERNEL_NOCACHE;
++              break;
++      case _PAGE_CACHE_UC_MINUS:
+               prot = PAGE_KERNEL_UC_MINUS;
+               break;
+-      case IOR_MODE_CACHED:
++      case _PAGE_CACHE_WC:
++              prot = PAGE_KERNEL_WC;
++              break;
++      case _PAGE_CACHE_WB:
+               prot = PAGE_KERNEL;
+               break;
+       }
   
-       if (info->pid > 0) {
--              ptask = find_task_by_pid(info->pid);
-+              ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
-               if (ptask)
-                       info->status = CLEANSHUTDOWN;
+       /*
+-       * Mappings have to be page-aligned
+-       */
+-      offset = phys_addr & ~PAGE_MASK;
+-      phys_addr &= PAGE_MASK;
+-      size = PAGE_ALIGN(last_addr+1) - phys_addr;
+-
+-      /*
+        * Ok, go for it..
+        */
+-      area = get_vm_area(size, VM_IOREMAP | (mode << 20));
++      area = get_vm_area_caller(size, VM_IOREMAP, caller);
+       if (!area)
+               return NULL;
+       area->phys_addr = phys_addr;
+       vaddr = (unsigned long) area->addr;
+       if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
+                                    size, prot, domid)) {
++              free_memtype(phys_addr, phys_addr + size);
+               free_vm_area(area);
+               return NULL;
         }
-@@ -773,8 +771,9 @@ static int blktap_ioctl(struct inode *in
-       {
-               if (info) {
-                       info->pid = (pid_t)arg;
--                      DPRINTK("blktap: pid received %d\n", 
--                             info->pid);
-+                      info->pid_ns = current->nsproxy->pid_ns;
-+                      DPRINTK("blktap: pid received %p:%d\n",
-+                              info->pid_ns, info->pid);
-               }
-               return 0;
+ 
+-      if (ioremap_change_attr(vaddr, size, mode) < 0) {
+-              iounmap((void __iomem *) vaddr);
++      if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
++              free_memtype(phys_addr, phys_addr + size);
++              vunmap(area->addr);
+               return NULL;
         }
-@@ -1687,9 +1686,7 @@ static int __init blkif_init(void)
-                * We only create the device when a request of a new device is
-                * made.
-                */
--              class_device_create(class, NULL,
--                                  MKDEV(blktap_major, 0), NULL,
--                                  "blktap0");
-+              device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
-       } else {
-               /* this is bad, but not fatal */
-               WPRINTK("blktap: sysfs xen_class not created\n");
---- a/drivers/xen/char/mem.c
-+++ b/drivers/xen/char/mem.c
-@@ -33,6 +33,27 @@ static inline int uncached_access(struct
-       return 0;
+ 
+@@ -365,16 +436,72 @@ static void __iomem *__ioremap(resource_
+  */
+ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
+ {
+-      return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
++      /*
++       * Ideally, this should be:
++       *      pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
++       *
++       * Till we fix all X drivers to use ioremap_wc(), we will use
++       * UC MINUS.
++       */
++      unsigned long val = _PAGE_CACHE_UC_MINUS;
++
++      return __ioremap_caller(phys_addr, size, val,
++                              __builtin_return_address(0));
+ }
+ EXPORT_SYMBOL(ioremap_nocache);
+ 
++/**
++ * ioremap_wc -       map memory into CPU space write combined
++ * @offset:   bus address of the memory
++ * @size:     size of the resource to map
++ *
++ * This version of ioremap ensures that the memory is marked write combining.
++ * Write combining allows faster writes to some hardware devices.
++ *
++ * Must be freed with iounmap.
++ */
++void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
++{
++      if (pat_wc_enabled)
++              return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
++                                      __builtin_return_address(0));
++      else
++              return ioremap_nocache(phys_addr, size);
++}
++EXPORT_SYMBOL(ioremap_wc);
++
+ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
+ {
+-      return __ioremap(phys_addr, size, IOR_MODE_CACHED);
++      return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
++                              __builtin_return_address(0));
   }
+ EXPORT_SYMBOL(ioremap_cache);
   
-+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
++#ifndef CONFIG_XEN
++static void __iomem *ioremap_default(resource_size_t phys_addr,
++                                      unsigned long size)
  +{
-+#ifdef CONFIG_NONPROMISC_DEVMEM
-+      u64 from = ((u64)pfn) << PAGE_SHIFT;
-+      u64 to = from + size;
-+      u64 cursor = from;
++      unsigned long flags;
++      void *ret;
++      int err;
  +
-+      while (cursor < to) {
-+              if (!devmem_is_allowed(pfn)) {
-+                      printk(KERN_INFO
-+              "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
-+                              current->comm, from, to);
-+                      return 0;
-+              }
-+              cursor += PAGE_SIZE;
-+              pfn++;
-+      }
-+#endif
-+      return 1;
-+}
++      /*
++       * - WB for WB-able memory and no other conflicting mappings
++       * - UC_MINUS for non-WB-able memory with no other conflicting mappings
++       * - Inherit from confliting mappings otherwise
++       */
++      err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
++      if (err < 0)
++              return NULL;
  +
- /*
-  * This funcion reads the *physical* memory. The f_pos points directly to the 
-  * memory location. 
-@@ -55,6 +76,9 @@ static ssize_t read_mem(struct file * fi
- 
-               sz = min_t(unsigned long, sz, count);
- 
-+              if (!range_is_allowed(p >> PAGE_SHIFT, count))
-+                      return -EPERM;
++      ret = (void *) __ioremap_caller(phys_addr, size, flags,
++                                      __builtin_return_address(0));
  +
-               v = ioremap(p, sz);
-               if (IS_ERR(v) || v == NULL) {
-                       /*
-@@ -103,6 +127,9 @@ static ssize_t write_mem(struct file * f
++      free_memtype(phys_addr, phys_addr + size);
++      return (void __iomem *)ret;
++}
++#endif
++
+ /**
+  * iounmap - Free a IO remapping
+  * @addr: virtual address from ioremap_*
+@@ -417,15 +544,7 @@ void iounmap(volatile void __iomem *addr
+               return;
+       }
   
-               sz = min_t(unsigned long, sz, count);
+-      if ((p->flags >> 20) != IOR_MODE_CACHED) {
+-              unsigned long n = get_vm_area_size(p) >> PAGE_SHIFT;
+-              unsigned long mfn = p->phys_addr;
+-              unsigned long va = (unsigned long)addr;
+-
+-              for (; n > 0; n--, mfn++, va += PAGE_SIZE)
+-                      if (mfn_to_local_pfn(mfn) < max_pfn)
+-                              set_memory_wb(va, 1);
+-      }
++      free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
   
-+              if (!range_is_allowed(p >> PAGE_SHIFT, sz))
-+                      return -EPERM;
-+
-               v = ioremap(p, sz);
-               if (v == NULL)
-                       break;
-@@ -131,6 +158,23 @@ static ssize_t write_mem(struct file * f
+       /* Finally remove it */
+       o = remove_vm_area((void *)addr);
+@@ -434,6 +553,37 @@ void iounmap(volatile void __iomem *addr
   }
+ EXPORT_SYMBOL(iounmap);
   
- #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
-+static void mmap_mem_open(struct vm_area_struct *vma)
-+{
-+      map_devmem(vma->vm_pgoff,  vma->vm_end - vma->vm_start,
-+                      vma->vm_page_prot);
-+}
-+
-+static void mmap_mem_close(struct vm_area_struct *vma)
++#ifndef CONFIG_XEN
++/*
++ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
++ * access
++ */
++void *xlate_dev_mem_ptr(unsigned long phys)
  +{
-+      unmap_devmem(vma->vm_pgoff,  vma->vm_end - vma->vm_start,
-+                      vma->vm_page_prot);
-+}
++      void *addr;
++      unsigned long start = phys & PAGE_MASK;
  +
-+static struct vm_operations_struct mmap_mem_ops = {
-+      .open  = mmap_mem_open,
-+      .close = mmap_mem_close
-+};
++      /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
++      if (page_is_ram(start >> PAGE_SHIFT))
++              return __va(phys);
  +
- static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
- {
-       size_t size = vma->vm_end - vma->vm_start;
-@@ -138,6 +182,15 @@ static int xen_mmap_mem(struct file * fi
-       if (uncached_access(file))
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- 
-+      if (!range_is_allowed(vma->vm_pgoff, size))
-+              return -EPERM;
++      addr = (void *)ioremap_default(start, PAGE_SIZE);
++      if (addr)
++              addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
  +
-+      if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
-+                                              &vma->vm_page_prot))
-+              return -EINVAL;
++      return addr;
++}
  +
-+      vma->vm_ops = &mmap_mem_ops;
++void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
++{
++      if (page_is_ram(phys >> PAGE_SHIFT))
++              return;
  +
-       /* We want to return the real error code, not EAGAIN. */
-       return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-                                     size, vma->vm_page_prot, DOMID_IO);
---- a/drivers/xen/console/console.c
-+++ b/drivers/xen/console/console.c
-@@ -536,16 +536,18 @@ static int xencons_write(
-       return i;
- }
- 
--static void xencons_put_char(struct tty_struct *tty, u_char ch)
-+static int xencons_put_char(struct tty_struct *tty, u_char ch)
- {
-       unsigned long flags;
-+      int ret;
- 
-       if (DUMMY_TTY(tty))
--              return;
-+              return 0;
- 
-       spin_lock_irqsave(&xencons_lock, flags);
--      (void)__xencons_put_char(ch);
-+      ret = __xencons_put_char(ch);
-       spin_unlock_irqrestore(&xencons_lock, flags);
-+      return ret;
- }
- 
- static void xencons_flush_chars(struct tty_struct *tty)
-@@ -567,7 +569,7 @@ static void xencons_wait_until_sent(stru
-       if (DUMMY_TTY(tty))
-               return;
- 
--      while (DRV(tty->driver)->chars_in_buffer(tty)) {
-+      while (tty_chars_in_buffer(tty)) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(1);
-               if (signal_pending(current))
-@@ -616,8 +618,7 @@ static void xencons_close(struct tty_str
- 
-       tty->closing = 1;
-       tty_wait_until_sent(tty, 0);
--      if (DRV(tty->driver)->flush_buffer != NULL)
--              DRV(tty->driver)->flush_buffer(tty);
-+      tty_driver_flush_buffer(tty);
-       if (tty->ldisc.flush_buffer != NULL)
-               tty->ldisc.flush_buffer(tty);
-       tty->closing = 0;
---- a/drivers/xen/core/machine_kexec.c
-+++ b/drivers/xen/core/machine_kexec.c
-@@ -90,6 +90,9 @@ void __init xen_machine_kexec_setup_reso
-       xen_hypervisor_res.start = range.start;
-       xen_hypervisor_res.end = range.start + range.size - 1;
-       xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
-+#ifdef CONFIG_X86_64
-+      insert_resource(&iomem_resource, &xen_hypervisor_res);
++      iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
++      return;
++}
  +#endif
++
+ int __initdata early_ioremap_debug;
   
-       /* fill in crashk_res if range is reserved by hypervisor */
- 
-@@ -102,6 +105,9 @@ void __init xen_machine_kexec_setup_reso
-       if (range.size) {
-               crashk_res.start = range.start;
-               crashk_res.end = range.start + range.size - 1;
-+#ifdef CONFIG_X86_64
-+              insert_resource(&iomem_resource, &crashk_res);
-+#endif
-       }
+ static int __init early_ioremap_debug_setup(char *str)
+@@ -445,8 +595,8 @@ static int __init early_ioremap_debug_se
+ early_param("early_ioremap_debug", early_ioremap_debug_setup);
   
-       /* get physical address of vmcoreinfo */
-@@ -146,11 +152,13 @@ void __init xen_machine_kexec_setup_reso
-       return;
- }
+ static __initdata int after_paging_init;
+-static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
+-                              __attribute__((aligned(PAGE_SIZE)));
++static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
++              __section(.bss.page_aligned);
   
-+#ifndef CONFIG_X86_64
- void __init xen_machine_kexec_register_resources(struct resource *res)
- {
-       request_resource(res, &xen_hypervisor_res);
-       machine_kexec_register_resources(res);
+ #ifdef CONFIG_X86_32
+ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
+@@ -461,8 +611,8 @@ static inline pmd_t * __init early_iorem
   }
-+#endif
+ #else
+ #define early_ioremap_pmd early_get_pmd
++#undef make_lowmem_page_readonly
+ #define make_lowmem_page_readonly early_make_page_readonly
+-#define make_lowmem_page_writable make_page_writable
+ #endif
   
- static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
- {
---- a/drivers/xen/core/machine_reboot.c
-+++ b/drivers/xen/core/machine_reboot.c
-@@ -52,6 +52,14 @@ void machine_power_off(void)
-       HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
+@@ -512,7 +662,7 @@ void __init early_ioremap_clear(void)
+       pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+       pmd_clear(pmd);
+       make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables);
+-      /* paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); */
++      /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */
+       __flush_tlb_all();
   }
   
-+#ifdef CONFIG_KEXEC
-+#include <asm/reboot.h>
-+void machine_crash_shutdown(struct pt_regs *regs)
-+{
-+      native_machine_crash_shutdown(regs);
-+}
-+#endif
-+
- int reboot_thru_bios = 0;     /* for dmi_scan.c */
- EXPORT_SYMBOL(machine_restart);
- EXPORT_SYMBOL(machine_halt);
---- a/drivers/xen/core/smpboot.c
-+++ b/drivers/xen/core/smpboot.c
-@@ -57,17 +57,16 @@ static DEFINE_PER_CPU(int, callfunc_irq)
- static char resched_name[NR_CPUS][15];
- static char callfunc_name[NR_CPUS][15];
+@@ -654,10 +804,11 @@ void __init early_iounmap(void *addr, un
+       unsigned long offset;
+       unsigned int nrpages;
+       enum fixed_addresses idx;
+-      unsigned int nesting;
++      int nesting;
   
--u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-+#ifdef CONFIG_X86_LOCAL_APIC
-+#define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
-+#else
-+#define set_cpu_to_apicid(cpu, apicid)
-+#endif
+       nesting = --early_ioremap_nested;
+-      WARN_ON(nesting < 0);
++      if (WARN_ON(nesting < 0))
++              return;
   
- DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
- DEFINE_PER_CPU(cpumask_t, cpu_core_map);
- EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+       if (early_ioremap_debug) {
+               printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
+--- sle11-2009-05-14.orig/arch/x86/mm/pageattr-xen.c   2009-03-16 16:37:14.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/pageattr-xen.c        2009-03-16 16:38:05.000000000 +0100
+@@ -9,6 +9,8 @@
+ #include <linux/slab.h>
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
++#include <linux/seq_file.h>
++#include <linux/debugfs.h>
   
--#if defined(__i386__)
--DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
--EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+ #include <asm/e820.h>
+ #include <asm/processor.h>
+@@ -17,370 +19,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/pgalloc.h>
+ #include <asm/proto.h>
+-#include <asm/mmu_context.h>
+-
+-#ifndef CONFIG_X86_64
+-#define TASK_SIZE64 TASK_SIZE
+-#endif
+-
+-static void _pin_lock(struct mm_struct *mm, int lock) {
+-      if (lock)
+-              spin_lock(&mm->page_table_lock);
+-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+-      /* While mm->page_table_lock protects us against insertions and
+-       * removals of higher level page table pages, it doesn't protect
+-       * against updates of pte-s. Such updates, however, require the
+-       * pte pages to be in consistent state (unpinned+writable or
+-       * pinned+readonly). The pinning and attribute changes, however
+-       * cannot be done atomically, which is why such updates must be
+-       * prevented from happening concurrently.
+-       * Note that no pte lock can ever elsewhere be acquired nesting
+-       * with an already acquired one in the same mm, or with the mm's
+-       * page_table_lock already acquired, as that would break in the
+-       * non-split case (where all these are actually resolving to the
+-       * one page_table_lock). Thus acquiring all of them here is not
+-       * going to result in dead locks, and the order of acquires
+-       * doesn't matter.
+-       */
+-      {
+-              pgd_t *pgd = mm->pgd;
+-              unsigned g;
+-
+-              for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
+-                      pud_t *pud;
+-                      unsigned u;
+-
+-                      if (pgd_none(*pgd))
+-                              continue;
+-                      pud = pud_offset(pgd, 0);
+-                      for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+-                              pmd_t *pmd;
+-                              unsigned m;
+-
+-                              if (pud_none(*pud))
+-                                      continue;
+-                              pmd = pmd_offset(pud, 0);
+-                              for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+-                                      spinlock_t *ptl;
+-
+-                                      if (pmd_none(*pmd))
+-                                              continue;
+-                                      ptl = pte_lockptr(0, pmd);
+-                                      if (lock)
+-                                              spin_lock(ptl);
+-                                      else
+-                                              spin_unlock(ptl);
+-                              }
+-                      }
+-              }
+-      }
+-#endif
+-      if (!lock)
+-              spin_unlock(&mm->page_table_lock);
+-}
+-#define pin_lock(mm) _pin_lock(mm, 1)
+-#define pin_unlock(mm) _pin_lock(mm, 0)
+-
+-#define PIN_BATCH sizeof(void *)
+-static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
+-
+-static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
+-                                           unsigned int cpu, unsigned int seq)
+-{
+-      unsigned long pfn = page_to_pfn(page);
+-
+-      if (PageHighMem(page)) {
+-              if (pgprot_val(flags) & _PAGE_RW)
+-                      ClearPagePinned(page);
+-              else
+-                      SetPagePinned(page);
+-      } else {
+-              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
+-                                      (unsigned long)__va(pfn << PAGE_SHIFT),
+-                                      pfn_pte(pfn, flags), 0);
+-              if (unlikely(++seq == PIN_BATCH)) {
+-                      if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
+-                                                              PIN_BATCH, NULL)))
+-                              BUG();
+-                      seq = 0;
+-              }
+-      }
+-
+-      return seq;
+-}
+-
+-static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
+-{
+-      pgd_t       *pgd = pgd_base;
+-      pud_t       *pud;
+-      pmd_t       *pmd;
+-      int          g,u,m;
+-      unsigned int cpu, seq;
+-      multicall_entry_t *mcl;
+-
+-      if (xen_feature(XENFEAT_auto_translated_physmap))
+-              return;
+-
+-      cpu = get_cpu();
+-
+-      /*
+-       * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
+-       * may not be the 'current' task's pagetables (e.g., current may be
+-       * 32-bit, but the pagetables may be for a 64-bit task).
+-       * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
+-       * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
+-       */
+-      for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
+-              if (pgd_none(*pgd))
+-                      continue;
+-              pud = pud_offset(pgd, 0);
+-              if (PTRS_PER_PUD > 1) /* not folded */
+-                      seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
+-              for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+-                      if (pud_none(*pud))
+-                              continue;
+-                      pmd = pmd_offset(pud, 0);
+-                      if (PTRS_PER_PMD > 1) /* not folded */
+-                              seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
+-                      for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+-                              if (pmd_none(*pmd))
+-                                      continue;
+-                              seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
+-                      }
+-              }
+-      }
+-
+-      mcl = per_cpu(pb_mcl, cpu);
+-#ifdef CONFIG_X86_64
+-      if (unlikely(seq > PIN_BATCH - 2)) {
+-              if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
+-                      BUG();
+-              seq = 0;
+-      }
+-      MULTI_update_va_mapping(mcl + seq,
+-             (unsigned long)__user_pgd(pgd_base),
+-             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
+-             0);
+-      MULTI_update_va_mapping(mcl + seq + 1,
+-             (unsigned long)pgd_base,
+-             pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
+-             UVMF_TLB_FLUSH);
+-      if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
+-              BUG();
+-#else
+-      if (likely(seq != 0)) {
+-              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
+-                      (unsigned long)pgd_base,
+-                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
+-                      UVMF_TLB_FLUSH);
+-              if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
+-                                                      seq + 1, NULL)))
+-                      BUG();
+-      } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
+-                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
+-                      UVMF_TLB_FLUSH))
+-              BUG();
+-#endif
+-
+-      put_cpu();
+-}
+-
+-static void __pgd_pin(pgd_t *pgd)
+-{
+-      pgd_walk(pgd, PAGE_KERNEL_RO);
+-      kmap_flush_unused();
+-      xen_pgd_pin(__pa(pgd)); /* kernel */
+-#ifdef CONFIG_X86_64
+-      xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
+-#endif
+-      SetPagePinned(virt_to_page(pgd));
+-}
+-
+-static void __pgd_unpin(pgd_t *pgd)
+-{
+-      xen_pgd_unpin(__pa(pgd));
+-#ifdef CONFIG_X86_64
+-      xen_pgd_unpin(__pa(__user_pgd(pgd)));
+-#endif
+-      pgd_walk(pgd, PAGE_KERNEL);
+-      ClearPagePinned(virt_to_page(pgd));
+-}
+-
+-void pgd_test_and_unpin(pgd_t *pgd)
+-{
+-      if (PagePinned(virt_to_page(pgd)))
+-              __pgd_unpin(pgd);
+-}
+-
+-void mm_pin(struct mm_struct *mm)
+-{
+-      if (xen_feature(XENFEAT_writable_page_tables))
+-              return;
+-
+-      pin_lock(mm);
+-      __pgd_pin(mm->pgd);
+-      pin_unlock(mm);
+-}
+-
+-void mm_unpin(struct mm_struct *mm)
+-{
+-      if (xen_feature(XENFEAT_writable_page_tables))
+-              return;
+-
+-      pin_lock(mm);
+-      __pgd_unpin(mm->pgd);
+-      pin_unlock(mm);
+-}
+-
+-void mm_pin_all(void)
+-{
+-      struct page *page;
+-      unsigned long flags;
+-
+-      if (xen_feature(XENFEAT_writable_page_tables))
+-              return;
+-
+-      /*
+-       * Allow uninterrupted access to the pgd_list. Also protects
+-       * __pgd_pin() by disabling preemption.
+-       * All other CPUs must be at a safe point (e.g., in stop_machine
+-       * or offlined entirely).
+-       */
+-      spin_lock_irqsave(&pgd_lock, flags);
+-      list_for_each_entry(page, &pgd_list, lru) {
+-              if (!PagePinned(page))
+-                      __pgd_pin((pgd_t *)page_address(page));
+-      }
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+-{
+-      if (!PagePinned(virt_to_page(mm->pgd)))
+-              mm_pin(mm);
+-}
+-
+-void arch_exit_mmap(struct mm_struct *mm)
+-{
+-      struct task_struct *tsk = current;
+-
+-      task_lock(tsk);
+-
+-      /*
+-       * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+-       * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+-       */
+-      if (tsk->active_mm == mm) {
+-              tsk->active_mm = &init_mm;
+-              atomic_inc(&init_mm.mm_count);
+-
+-              switch_mm(mm, &init_mm, tsk);
+-
+-              atomic_dec(&mm->mm_count);
+-              BUG_ON(atomic_read(&mm->mm_count) == 0);
+-      }
+-
+-      task_unlock(tsk);
+-
+-      if (PagePinned(virt_to_page(mm->pgd))
+-          && atomic_read(&mm->mm_count) == 1
+-          && !mm->context.has_foreign_mappings)
+-              mm_unpin(mm);
+-}
+-
+-static void _pte_free(struct page *page, unsigned int order)
+-{
+-      BUG_ON(order);
+-      __pte_free(page);
+-}
+-
+-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+-{
+-      struct page *pte;
+-
+-#ifdef CONFIG_HIGHPTE
+-      pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+-#else
+-      pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
  -#endif
+-      if (pte) {
+-              pgtable_page_ctor(pte);
+-              SetPageForeign(pte, _pte_free);
+-              init_page_count(pte);
+-      }
+-      return pte;
+-}
  -
- void __init prefill_possible_map(void)
- {
-       int i, rc;
-@@ -158,7 +157,7 @@ static int __cpuinit xen_smp_intr_init(u
- }
- 
- #ifdef CONFIG_HOTPLUG_CPU
--static void xen_smp_intr_exit(unsigned int cpu)
-+static void __cpuexit xen_smp_intr_exit(unsigned int cpu)
- {
-       if (cpu != 0)
-               local_teardown_timer(cpu);
-@@ -267,8 +266,7 @@ void __init smp_prepare_cpus(unsigned in
-       boot_cpu_data.apicid = apicid;
-       cpu_data(0) = boot_cpu_data;
- 
--      cpu_2_logical_apicid[0] = apicid;
--      per_cpu(x86_cpu_to_apicid, 0) = apicid;
-+      set_cpu_to_apicid(0, apicid);
- 
-       current_thread_info()->cpu = 0;
- 
-@@ -323,8 +321,7 @@ void __init smp_prepare_cpus(unsigned in
-               cpu_data(cpu).cpu_index = cpu;
-               cpu_data(cpu).apicid = apicid;
- 
--              cpu_2_logical_apicid[cpu] = apicid;
--              per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-+              set_cpu_to_apicid(cpu, apicid);
- 
- #ifdef __x86_64__
-               cpu_pda(cpu)->pcurrent = idle;
-@@ -379,7 +376,7 @@ static int __init initialize_cpu_present
- }
- core_initcall(initialize_cpu_present_map);
- 
--int __cpu_disable(void)
-+int __cpuexit __cpu_disable(void)
- {
-       cpumask_t map = cpu_online_map;
-       unsigned int cpu = smp_processor_id();
-@@ -396,7 +393,7 @@ int __cpu_disable(void)
-       return 0;
- }
- 
--void __cpu_die(unsigned int cpu)
-+void __cpuexit __cpu_die(unsigned int cpu)
- {
-       while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
-               current->state = TASK_UNINTERRUPTIBLE;
---- a/drivers/xen/core/xen_proc.c
-+++ b/drivers/xen/core/xen_proc.c
-@@ -8,7 +8,7 @@ static struct proc_dir_entry *xen_base;
- struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
- {
-       if ( xen_base == NULL )
--              if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
-+              if ( (xen_base = proc_mkdir("xen", NULL)) == NULL )
-                       panic("Couldn't create /proc/xen");
-       return create_proc_entry(name, mode, xen_base);
- }
---- a/drivers/xen/fbfront/xenfb.c
-+++ b/drivers/xen/fbfront/xenfb.c
-@@ -94,7 +94,7 @@ struct xenfb_info
-  *    only mappings.  The former creates unfaulted pages.  Preserves
-  *    invariant.  The latter removes pages.  Preserves invariant.
-  *
-- * 3. Holding both locks: xenfb_vm_nopage().  Extends the dirty
-+ * 3. Holding both locks: xenfb_vm_fault().  Extends the dirty
-  *    rectangle and updates mappings consistently.  Preserves
-  *    invariant.
-  *
-@@ -113,13 +113,13 @@ struct xenfb_info
-  *
-  * But FIXME: the invariant is too weak.  It misses that the fault
-  * record in mappings must be consistent with the mapping of pages in
-- * the associated address space!  do_no_page() updates the PTE after
-- * xenfb_vm_nopage() returns, i.e. outside the critical region.  This
-+ * the associated address space!  __do_fault() updates the PTE after
-+ * xenfb_vm_fault() returns, i.e. outside the critical region.  This
-  * allows the following race:
-  *
-  * X writes to some address in the Xen frame buffer
-- * Fault - call do_no_page()
-- *     call xenfb_vm_nopage()
-+ * Fault - call __do_fault()
-+ *     call xenfb_vm_fault()
-  *         grab mm_lock
-  *         map->faults++;
-  *         release mm_lock
-@@ -386,18 +386,17 @@ static void xenfb_vm_close(struct vm_are
-       mutex_unlock(&info->mm_lock);
- }
- 
--static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
--                                  unsigned long vaddr, int *type)
-+static int xenfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
- {
-       struct xenfb_mapping *map = vma->vm_private_data;
-       struct xenfb_info *info = map->info;
--      int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
-+      int pgnr = ((long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
-       unsigned long flags;
-       struct page *page;
-       int y1, y2;
- 
-       if (pgnr >= info->nr_pages)
--              return NOPAGE_SIGBUS;
-+              return VM_FAULT_SIGBUS;
- 
-       mutex_lock(&info->mm_lock);
-       spin_lock_irqsave(&info->dirty_lock, flags);
-@@ -413,16 +412,15 @@ static struct page *xenfb_vm_nopage(stru
-       spin_unlock_irqrestore(&info->dirty_lock, flags);
-       mutex_unlock(&info->mm_lock);
- 
--      if (type)
--              *type = VM_FAULT_MINOR;
-+      vmf->page = page;
- 
--      return page;
-+      return VM_FAULT_MINOR;
- }
- 
- static struct vm_operations_struct xenfb_vm_ops = {
-       .open   = xenfb_vm_open,
-       .close  = xenfb_vm_close,
--      .nopage = xenfb_vm_nopage,
-+      .fault  = xenfb_vm_fault,
- };
- 
- static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
---- a/drivers/xen/gntdev/gntdev.c
-+++ b/drivers/xen/gntdev/gntdev.c
-@@ -392,7 +392,7 @@ nomem_out:
- static int __init gntdev_init(void)
- {
-       struct class *class;
--      struct class_device *device;
-+      struct device *device;
- 
-       if (!is_running_on_xen()) {
-               printk(KERN_ERR "You must be running Xen to use gntdev\n");
-@@ -417,8 +417,8 @@ static int __init gntdev_init(void)
-               return 0;
-       }
- 
--      device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
--                                   NULL, GNTDEV_NAME);
-+      device = device_create(class, NULL, MKDEV(gntdev_major, 0),
-+                             GNTDEV_NAME);
-       if (IS_ERR(device)) {
-               printk(KERN_ERR "Error creating gntdev device in xen_class\n");
-               printk(KERN_ERR "gntdev created with major number = %d\n",
-@@ -435,7 +435,7 @@ static void __exit gntdev_exit(void)
- {
-       struct class *class;
-       if ((class = get_xen_class()) != NULL)
--              class_device_destroy(class, MKDEV(gntdev_major, 0));
-+              device_destroy(class, MKDEV(gntdev_major, 0));
-       unregister_chrdev(gntdev_major, GNTDEV_NAME);
- }
- 
---- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -2,8 +2,6 @@
- # This Kconfig describe xen options
- #
- 
--mainmenu "Xen Configuration"
+-void __pte_free(pgtable_t pte)
+-{
+-      if (!PageHighMem(pte)) {
+-              unsigned long va = (unsigned long)page_address(pte);
+-              unsigned int level;
+-              pte_t *ptep = lookup_address(va, &level);
+-
+-              BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
+-              if (!pte_write(*ptep)
+-                  && HYPERVISOR_update_va_mapping(va,
+-                                                  mk_pte(pte, PAGE_KERNEL),
+-                                                  0))
+-                      BUG();
+-      } else
+-#ifdef CONFIG_HIGHPTE
+-              ClearPagePinned(pte);
+-#else
+-              BUG();
+-#endif
+-
+-      ClearPageForeign(pte);
+-      init_page_count(pte);
+-      pgtable_page_dtor(pte);
+-      __free_page(pte);
+-}
+-
+-#if PAGETABLE_LEVELS >= 3
+-static void _pmd_free(struct page *page, unsigned int order)
+-{
+-      BUG_ON(order);
+-      __pmd_free(page);
+-}
+-
+-pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+-{
+-      struct page *pmd;
+-
+-      pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+-      if (!pmd)
+-              return NULL;
+-      SetPageForeign(pmd, _pmd_free);
+-      init_page_count(pmd);
+-      return page_address(pmd);
+-}
+-
+-void __pmd_free(pgtable_t pmd)
+-{
+-      unsigned long va = (unsigned long)page_address(pmd);
+-      unsigned int level;
+-      pte_t *ptep = lookup_address(va, &level);
+-
+-      BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
+-      if (!pte_write(*ptep)
+-          && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
+-              BUG();
+-
+-      ClearPageForeign(pmd);
+-      init_page_count(pmd);
+-      __free_page(pmd);
+-}
+-#endif
  -
- config XEN
-       bool
- 
---- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -1,5 +1,8 @@
--obj-$(CONFIG_PARAVIRT_XEN)    += grant-table.o
-+obj-$(CONFIG_PARAVIRT_XEN)    += grant-table.o features.o events.o
-+xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o
-+xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+-/* blktap and gntdev need this, as otherwise they would implicitly (and
+- * needlessly, as they never use it) reference init_mm. */
+-pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
+-                                unsigned long addr, pte_t *ptep, int full)
+-{
+-      return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
+-}
+-EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
++#include <asm/pat.h>
   
-+xen-balloon-$(CONFIG_XEN)     := balloon/
- obj-$(CONFIG_XEN)             += core/
- obj-$(CONFIG_XEN)             += console/
- obj-$(CONFIG_XEN)             += evtchn/
-@@ -7,7 +10,8 @@ obj-y                         += xenbus/
- obj-$(CONFIG_XEN)             += char/
+ /*
+  * The current flushing context - we pass it instead of 5 arguments:
+@@ -392,6 +31,7 @@ struct cpa_data {
+       int             numpages;
+       int             flushtlb;
+       unsigned long   pfn;
++      unsigned        force_split : 1;
+ };
   
- obj-$(CONFIG_XEN)             += util.o
--obj-$(CONFIG_XEN_BALLOON)             += balloon/
-+obj-$(CONFIG_XEN_XENCOMM)     += $(xen-xencomm-y)
-+obj-$(CONFIG_XEN_BALLOON)             += $(xen-balloon-y)
- obj-$(CONFIG_XEN_BLKDEV_BACKEND)      += blkback/
- obj-$(CONFIG_XEN_BLKDEV_TAP)          += blktap/
- obj-$(CONFIG_XEN_NETDEV_BACKEND)      += netback/
---- a/drivers/xen/netfront/netfront.c
-+++ b/drivers/xen/netfront/netfront.c
-@@ -1464,8 +1464,7 @@ err:     
-               }
-       }
+ #ifdef CONFIG_X86_64
+@@ -637,6 +277,9 @@ try_preserve_large_page(pte_t *kpte, uns
+       int i, do_split = 1;
+       unsigned int level;
   
--      while ((skb = __skb_dequeue(&errq)))
--              kfree_skb(skb);
-+      __skb_queue_purge(&errq);
++      if (cpa->force_split)
++              return 1;
++
+       spin_lock_irqsave(&pgd_lock, flags);
+       /*
+        * Check for races, another CPU might have split this page
+@@ -856,9 +499,7 @@ static int split_large_page(pte_t *kpte,
+               goto out_unlock;
   
-       while ((skb = __skb_dequeue(&rxq)) != NULL) {
-               struct page *page = NETFRONT_SKB_CB(skb)->page;
-@@ -1630,8 +1629,7 @@ static void netif_release_rx_bufs_flip(s
-               }
-       }
+       pbase = (pte_t *)page_address(base);
+-#ifdef CONFIG_X86_32
+-      paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+-#endif
++      paravirt_alloc_pte(&init_mm, page_to_pfn(base));
+       ref_prot = pte_pgprot(pte_clrhuge(*kpte));
   
--      while ((skb = __skb_dequeue(&free_list)) != NULL)
--              dev_kfree_skb(skb);
-+      __skb_queue_purge(&free_list);
+ #ifdef CONFIG_X86_64
+@@ -919,7 +560,7 @@ static int __change_page_attr(struct cpa
+ repeat:
+       kpte = lookup_address(address, &level);
+       if (!kpte)
+-              return primary ? -EINVAL : 0;
++              return 0;
   
-       spin_unlock_bh(&np->rx_lock);
- }
---- a/drivers/xen/privcmd/privcmd.c
-+++ b/drivers/xen/privcmd/privcmd.c
-@@ -261,15 +261,13 @@ static long privcmd_ioctl(struct file *f
+       old_pte = *kpte;
+       if (!__pte_val(old_pte)) {
+@@ -1078,7 +719,8 @@ static inline int cache_attr(pgprot_t at
   }
   
- #ifndef HAVE_ARCH_PRIVCMD_MMAP
--static struct page *privcmd_nopage(struct vm_area_struct *vma,
--                                 unsigned long address,
--                                 int *type)
-+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ static int change_page_attr_set_clr(unsigned long addr, int numpages,
+-                                  pgprot_t mask_set, pgprot_t mask_clr)
++                                  pgprot_t mask_set, pgprot_t mask_clr,
++                                  int force_split)
   {
--      return NOPAGE_SIGBUS;
-+      return VM_FAULT_SIGBUS;
- }
- 
- static struct vm_operations_struct privcmd_vm_ops = {
--      .nopage = privcmd_nopage
-+      .fault = privcmd_fault
- };
+       struct cpa_data cpa;
+       int ret, cache, checkalias;
+@@ -1089,7 +731,7 @@ static int change_page_attr_set_clr(unsi
+        */
+       mask_set = canon_pgprot(mask_set);
+       mask_clr = canon_pgprot(mask_clr);
+-      if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
++      if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
+               return 0;
   
- static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
---- a/drivers/xen/xenbus/xenbus_client.c
-+++ b/drivers/xen/xenbus/xenbus_client.c
-@@ -440,7 +440,7 @@ int xenbus_map_ring_valloc(struct xenbus
+       /* Ensure we are PAGE_SIZE aligned */
+@@ -1106,6 +748,7 @@ static int change_page_attr_set_clr(unsi
+       cpa.mask_set = mask_set;
+       cpa.mask_clr = mask_clr;
+       cpa.flushtlb = 0;
++      cpa.force_split = force_split;
   
-       *vaddr = NULL;
+       /* No alias checking for _NX bit modifications */
+       checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
+@@ -1144,26 +787,67 @@ out:
+ static inline int change_page_attr_set(unsigned long addr, int numpages,
+                                      pgprot_t mask)
+ {
+-      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
++      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
+ }
   
--      area = alloc_vm_area(PAGE_SIZE);
-+      area = xen_alloc_vm_area(PAGE_SIZE);
-       if (!area)
-               return -ENOMEM;
+ static inline int change_page_attr_clear(unsigned long addr, int numpages,
+                                        pgprot_t mask)
+ {
+-      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
++      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
+ }
   
-@@ -450,7 +450,7 @@ int xenbus_map_ring_valloc(struct xenbus
-               BUG();
+-int set_memory_uc(unsigned long addr, int numpages)
++int _set_memory_uc(unsigned long addr, int numpages)
+ {
++      /*
++       * for now UC MINUS. see comments in ioremap_nocache()
++       */
+       return change_page_attr_set(addr, numpages,
+-                                  __pgprot(_PAGE_PCD));
++                                  __pgprot(_PAGE_CACHE_UC_MINUS));
++}
++
++int set_memory_uc(unsigned long addr, int numpages)
++{
++      /*
++       * for now UC MINUS. see comments in ioremap_nocache()
++       */
++      if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
++                          _PAGE_CACHE_UC_MINUS, NULL))
++              return -EINVAL;
++
++      return _set_memory_uc(addr, numpages);
+ }
+ EXPORT_SYMBOL(set_memory_uc);
   
-       if (op.status != GNTST_okay) {
--              free_vm_area(area);
-+              xen_free_vm_area(area);
-               xenbus_dev_fatal(dev, op.status,
-                                "mapping in shared page %d from domain %d",
-                                gnt_ref, dev->otherend_id);
-@@ -549,7 +549,7 @@ int xenbus_unmap_ring_vfree(struct xenbu
-               BUG();
+-int set_memory_wb(unsigned long addr, int numpages)
++int _set_memory_wc(unsigned long addr, int numpages)
++{
++      return change_page_attr_set(addr, numpages,
++                                  __pgprot(_PAGE_CACHE_WC));
++}
++
++int set_memory_wc(unsigned long addr, int numpages)
++{
++      if (!pat_wc_enabled)
++              return set_memory_uc(addr, numpages);
++
++      if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
++              _PAGE_CACHE_WC, NULL))
++              return -EINVAL;
++
++      return _set_memory_wc(addr, numpages);
++}
++EXPORT_SYMBOL(set_memory_wc);
++
++int _set_memory_wb(unsigned long addr, int numpages)
+ {
+       return change_page_attr_clear(addr, numpages,
+-                                    __pgprot(_PAGE_PCD | _PAGE_PWT));
++                                    __pgprot(_PAGE_CACHE_MASK));
++}
++
++int set_memory_wb(unsigned long addr, int numpages)
++{
++      free_memtype(addr, addr + numpages * PAGE_SIZE);
++
++      return _set_memory_wb(addr, numpages);
+ }
+ EXPORT_SYMBOL(set_memory_wb);
   
-       if (op.status == GNTST_okay)
--              free_vm_area(area);
-+              xen_free_vm_area(area);
-       else
-               xenbus_dev_error(dev, op.status,
-                                "unmapping page at handle %d error %d",
---- a/drivers/xen/xenbus/xenbus_probe.c
-+++ b/drivers/xen/xenbus/xenbus_probe.c
-@@ -173,7 +173,7 @@ static int read_backend_details(struct x
-       return read_otherend_details(xendev, "backend-id", "backend");
+@@ -1194,6 +878,12 @@ int set_memory_np(unsigned long addr, in
+       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
   }
   
--#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
- static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
++int set_memory_4k(unsigned long addr, int numpages)
++{
++      return change_page_attr_set_clr(addr, numpages, __pgprot(0),
++                                      __pgprot(0), 1);
++}
++
+ int set_pages_uc(struct page *page, int numpages)
   {
-       struct xenbus_device *xdev;
-@@ -185,8 +185,10 @@ static int xenbus_uevent_frontend(struct
-               return -ENODEV;
+       unsigned long addr = (unsigned long)page_address(page);
+@@ -1303,6 +993,45 @@ void kernel_map_pages(struct page *page,
+       cpa_fill_pool(NULL);
+ }
+ 
++#ifdef CONFIG_DEBUG_FS
++static int dpa_show(struct seq_file *m, void *v)
++{
++      seq_puts(m, "DEBUG_PAGEALLOC\n");
++      seq_printf(m, "pool_size     : %lu\n", pool_size);
++      seq_printf(m, "pool_pages    : %lu\n", pool_pages);
++      seq_printf(m, "pool_low      : %lu\n", pool_low);
++      seq_printf(m, "pool_used     : %lu\n", pool_used);
++      seq_printf(m, "pool_failed   : %lu\n", pool_failed);
++
++      return 0;
++}
++
++static int dpa_open(struct inode *inode, struct file *filp)
++{
++      return single_open(filp, dpa_show, NULL);
++}
++
++static const struct file_operations dpa_fops = {
++      .open           = dpa_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = single_release,
++};
++
++static int __init debug_pagealloc_proc_init(void)
++{
++      struct dentry *de;
++
++      de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
++                               &dpa_fops);
++      if (!de)
++              return -ENOMEM;
++
++      return 0;
++}
++__initcall(debug_pagealloc_proc_init);
++#endif
++
+ #ifdef CONFIG_HIBERNATION
   
-       /* stuff we want to pass to /sbin/hotplug */
-+#if defined(CONFIG_XEN) || defined(MODULE)
-       add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
-       add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
+ bool kernel_page_present(struct page *page)
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/mm/pat-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,602 @@
++/*
++ * Handle caching attributes in page tables (PAT)
++ *
++ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *          Suresh B Siddha <suresh.b.siddha@intel.com>
++ *
++ * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
++ */
++
++#include <linux/mm.h>
++#include <linux/kernel.h>
++#include <linux/gfp.h>
++#include <linux/fs.h>
++#include <linux/bootmem.h>
++
++#include <asm/msr.h>
++#include <asm/tlbflush.h>
++#include <asm/processor.h>
++#include <asm/page.h>
++#include <asm/pgtable.h>
++#include <asm/pat.h>
++#include <asm/e820.h>
++#include <asm/cacheflush.h>
++#include <asm/fcntl.h>
++#include <asm/mtrr.h>
++#include <asm/io.h>
++
++#ifdef CONFIG_X86_PAT
++int __read_mostly pat_wc_enabled = 1;
++
++void __cpuinit pat_disable(char *reason)
++{
++      pat_wc_enabled = 0;
++      printk(KERN_INFO "%s\n", reason);
++}
++
++static int __init nopat(char *str)
++{
++      pat_disable("PAT support disabled.");
++      return 0;
++}
++early_param("nopat", nopat);
++#endif
++
++static u64 __read_mostly boot_pat_state;
++
++enum {
++      PAT_UC = 0,             /* uncached */
++      PAT_WC = 1,             /* Write combining */
++      PAT_WT = 4,             /* Write Through */
++      PAT_WP = 5,             /* Write Protected */
++      PAT_WB = 6,             /* Write Back (default) */
++      PAT_UC_MINUS = 7,       /* UC, but can be overriden by MTRR */
++};
++
++#define PAT(x,y)      ((u64)PAT_ ## y << ((x)*8))
++
++void pat_init(void)
++{
++      u64 pat;
++
++      if (!pat_wc_enabled)
++              return;
++
++      /* Paranoia check. */
++      if (!cpu_has_pat) {
++              printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
++              /*
++               * Panic if this happens on the secondary CPU, and we
++               * switched to PAT on the boot CPU. We have no way to
++               * undo PAT.
++              */
++              BUG_ON(boot_pat_state);
++      }
++
++#ifndef CONFIG_XEN
++      /* Set PWT to Write-Combining. All other bits stay the same */
++      /*
++       * PTE encoding used in Linux:
++       *      PAT
++       *      |PCD
++       *      ||PWT
++       *      |||
++       *      000 WB          _PAGE_CACHE_WB
++       *      001 WC          _PAGE_CACHE_WC
++       *      010 UC-         _PAGE_CACHE_UC_MINUS
++       *      011 UC          _PAGE_CACHE_UC
++       * PAT bit unused
++       */
++      pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
++            PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
++
++      /* Boot CPU check */
++      if (!boot_pat_state)
++              rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
++
++      wrmsrl(MSR_IA32_CR_PAT, pat);
++#else
++      /*
++       * PAT settings are part of the hypervisor interface, and their
++       * assignment cannot be changed.
++       */
++      rdmsrl(MSR_IA32_CR_PAT, pat);
++      if (!boot_pat_state)
++              boot_pat_state = pat;
++#endif
++      printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
++             smp_processor_id(), boot_pat_state, pat);
++}
++
++#undef PAT
++
++static char *cattr_name(unsigned long flags)
++{
++      switch (flags & _PAGE_CACHE_MASK) {
++              case _PAGE_CACHE_UC:            return "uncached";
++              case _PAGE_CACHE_UC_MINUS:      return "uncached-minus";
++              case _PAGE_CACHE_WB:            return "write-back";
++              case _PAGE_CACHE_WC:            return "write-combining";
++              case _PAGE_CACHE_WP:            return "write-protected";
++              case _PAGE_CACHE_WT:            return "write-through";
++              default:                        return "broken";
++      }
++}
++
++/*
++ * The global memtype list keeps track of memory type for specific
++ * physical memory areas. Conflicting memory types in different
++ * mappings can cause CPU cache corruption. To avoid this we keep track.
++ *
++ * The list is sorted based on starting address and can contain multiple
++ * entries for each address (this allows reference counting for overlapping
++ * areas). All the aliases have the same cache attributes of course.
++ * Zero attributes are represented as holes.
++ *
++ * Currently the data structure is a list because the number of mappings
++ * are expected to be relatively small. If this should be a problem
++ * it could be changed to a rbtree or similar.
++ *
++ * memtype_lock protects the whole list.
++ */
++
++struct memtype {
++      u64 start;
++      u64 end;
++      unsigned long type;
++      struct list_head nd;
++};
++
++static LIST_HEAD(memtype_list);
++static DEFINE_SPINLOCK(memtype_lock);         /* protects memtype list */
++
++/*
++ * Does intersection of PAT memory type and MTRR memory type and returns
++ * the resulting memory type as PAT understands it.
++ * (Type in pat and mtrr will not have same value)
++ * The intersection is based on "Effective Memory Type" tables in IA-32
++ * SDM vol 3a
++ */
++static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
++                              unsigned long *ret_prot)
++{
++      unsigned long pat_type;
++      u8 mtrr_type;
++
++      pat_type = prot & _PAGE_CACHE_MASK;
++      prot &= (~_PAGE_CACHE_MASK);
++
++      /*
++       * We return the PAT request directly for types where PAT takes
++       * precedence with respect to MTRR and for UC_MINUS.
++       * Consistency checks with other PAT requests is done later
++       * while going through memtype list.
++       */
++      if (pat_type == _PAGE_CACHE_WC) {
++              *ret_prot = prot | _PAGE_CACHE_WC;
++              return 0;
++      } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
++              *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
++              return 0;
++      } else if (pat_type == _PAGE_CACHE_UC) {
++              *ret_prot = prot | _PAGE_CACHE_UC;
++              return 0;
++      }
++
++      /*
++       * Look for MTRR hint to get the effective type in case where PAT
++       * request is for WB.
++       */
++      mtrr_type = mtrr_type_lookup(start, end);
++
++      if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
++              *ret_prot = prot | _PAGE_CACHE_UC;
++      } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
++              *ret_prot = prot | _PAGE_CACHE_WC;
++      } else {
++              *ret_prot = prot | _PAGE_CACHE_WB;
++      }
++
++      return 0;
++}
++
++/*
++ * req_type typically has one of the:
++ * - _PAGE_CACHE_WB
++ * - _PAGE_CACHE_WC
++ * - _PAGE_CACHE_UC_MINUS
++ * - _PAGE_CACHE_UC
++ *
++ * req_type will have a special case value '-1', when requester want to inherit
++ * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
++ *
++ * If ret_type is NULL, function will return an error if it cannot reserve the
++ * region with req_type. If ret_type is non-null, function will return
++ * available type in ret_type in case of no error. In case of any error
++ * it will return a negative return value.
++ */
++int reserve_memtype(u64 start, u64 end, unsigned long req_type,
++                      unsigned long *ret_type)
++{
++      struct memtype *new_entry = NULL;
++      struct memtype *parse;
++      unsigned long actual_type;
++      int err = 0;
++
++      /* Only track when pat_wc_enabled */
++      if (!pat_wc_enabled) {
++              /* This is identical to page table setting without PAT */
++              if (ret_type) {
++                      if (req_type == -1) {
++                              *ret_type = _PAGE_CACHE_WB;
++                      } else {
++                              *ret_type = req_type;
++                      }
++              }
++              return 0;
++      }
++
++      /* Low ISA region is always mapped WB in page table. No need to track */
++      if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
++              if (ret_type)
++                      *ret_type = _PAGE_CACHE_WB;
++
++              return 0;
++      }
++
++      if (req_type == -1) {
++              /*
++               * Call mtrr_lookup to get the type hint. This is an
++               * optimization for /dev/mem mmap'ers into WB memory (BIOS
++               * tools and ACPI tools). Use WB request for WB memory and use
++               * UC_MINUS otherwise.
++               */
++              u8 mtrr_type = mtrr_type_lookup(start, end);
++
++              if (mtrr_type == MTRR_TYPE_WRBACK) {
++                      req_type = _PAGE_CACHE_WB;
++                      actual_type = _PAGE_CACHE_WB;
++              } else {
++                      req_type = _PAGE_CACHE_UC_MINUS;
++                      actual_type = _PAGE_CACHE_UC_MINUS;
++              }
++      } else {
++              req_type &= _PAGE_CACHE_MASK;
++              err = pat_x_mtrr_type(start, end, req_type, &actual_type);
++      }
++
++      if (err) {
++              if (ret_type)
++                      *ret_type = actual_type;
++
++              return -EINVAL;
++      }
++
++      new_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
++      if (!new_entry)
++              return -ENOMEM;
++
++      new_entry->start = start;
++      new_entry->end = end;
++      new_entry->type = actual_type;
++
++      if (ret_type)
++              *ret_type = actual_type;
++
++      spin_lock(&memtype_lock);
++
++      /* Search for existing mapping that overlaps the current range */
++      list_for_each_entry(parse, &memtype_list, nd) {
++              struct memtype *saved_ptr;
++
++              if (parse->start >= end) {
++                      pr_debug("New Entry\n");
++                      list_add(&new_entry->nd, parse->nd.prev);
++                      new_entry = NULL;
++                      break;
++              }
++
++              if (start <= parse->start && end >= parse->start) {
++                      if (actual_type != parse->type && ret_type) {
++                              actual_type = parse->type;
++                              *ret_type = actual_type;
++                              new_entry->type = actual_type;
++                      }
++
++                      if (actual_type != parse->type) {
++                              printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                      current->comm, current->pid,
++                                      start, end,
++                                      cattr_name(actual_type),
++                                      cattr_name(parse->type));
++                              err = -EBUSY;
++                              break;
++                      }
++
++                      saved_ptr = parse;
++                      /*
++                       * Check to see whether the request overlaps more
++                       * than one entry in the list
++                       */
++                      list_for_each_entry_continue(parse, &memtype_list, nd) {
++                              if (end <= parse->start) {
++                                      break;
++                              }
++
++                              if (actual_type != parse->type) {
++                                      printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                              current->comm, current->pid,
++                                              start, end,
++                                              cattr_name(actual_type),
++                                              cattr_name(parse->type));
++                                      err = -EBUSY;
++                                      break;
++                              }
++                      }
++
++                      if (err) {
++                              break;
++                      }
++
++                      pr_debug("Overlap at 0x%Lx-0x%Lx\n",
++                             saved_ptr->start, saved_ptr->end);
++                      /* No conflict. Go ahead and add this new entry */
++                      list_add(&new_entry->nd, saved_ptr->nd.prev);
++                      new_entry = NULL;
++                      break;
++              }
++
++              if (start < parse->end) {
++                      if (actual_type != parse->type && ret_type) {
++                              actual_type = parse->type;
++                              *ret_type = actual_type;
++                              new_entry->type = actual_type;
++                      }
++
++                      if (actual_type != parse->type) {
++                              printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                      current->comm, current->pid,
++                                      start, end,
++                                      cattr_name(actual_type),
++                                      cattr_name(parse->type));
++                              err = -EBUSY;
++                              break;
++                      }
++
++                      saved_ptr = parse;
++                      /*
++                       * Check to see whether the request overlaps more
++                       * than one entry in the list
++                       */
++                      list_for_each_entry_continue(parse, &memtype_list, nd) {
++                              if (end <= parse->start) {
++                                      break;
++                              }
++
++                              if (actual_type != parse->type) {
++                                      printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                              current->comm, current->pid,
++                                              start, end,
++                                              cattr_name(actual_type),
++                                              cattr_name(parse->type));
++                                      err = -EBUSY;
++                                      break;
++                              }
++                      }
++
++                      if (err) {
++                              break;
++                      }
++
++                      pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
++                               saved_ptr->start, saved_ptr->end);
++                      /* No conflict. Go ahead and add this new entry */
++                      list_add(&new_entry->nd, &saved_ptr->nd);
++                      new_entry = NULL;
++                      break;
++              }
++      }
++
++      if (err) {
++              printk(KERN_INFO
++      "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
++                      start, end, cattr_name(new_entry->type),
++                      cattr_name(req_type));
++              kfree(new_entry);
++              spin_unlock(&memtype_lock);
++              return err;
++      }
++
++      if (new_entry) {
++              /* No conflict. Not yet added to the list. Add to the tail */
++              list_add_tail(&new_entry->nd, &memtype_list);
++              pr_debug("New Entry\n");
++      }
++
++      if (ret_type) {
++              pr_debug(
++      "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
++                      start, end, cattr_name(actual_type),
++                      cattr_name(req_type), cattr_name(*ret_type));
++      } else {
++              pr_debug(
++      "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
++                      start, end, cattr_name(actual_type),
++                      cattr_name(req_type));
++      }
++
++      spin_unlock(&memtype_lock);
++      return err;
++}
++
++int free_memtype(u64 start, u64 end)
++{
++      struct memtype *ml;
++      int err = -EINVAL;
++
++      /* Only track when pat_wc_enabled */
++      if (!pat_wc_enabled) {
++              return 0;
++      }
++
++      /* Low ISA region is always mapped WB. No need to track */
++      if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
++              return 0;
++      }
++
++      spin_lock(&memtype_lock);
++      list_for_each_entry(ml, &memtype_list, nd) {
++              if (ml->start == start && ml->end == end) {
++                      list_del(&ml->nd);
++                      kfree(ml);
++                      err = 0;
++                      break;
++              }
++      }
++      spin_unlock(&memtype_lock);
++
++      if (err) {
++              printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
++                      current->comm, current->pid, start, end);
++      }
++
++      pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
++      return err;
++}
++
++
++/*
++ * /dev/mem mmap interface. The memtype used for mapping varies:
++ * - Use UC for mappings with O_SYNC flag
++ * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
++ *   inherit the memtype from existing mapping.
++ * - Else use UC_MINUS memtype (for backward compatibility with existing
++ *   X drivers.
++ */
++pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
++                              unsigned long size, pgprot_t vma_prot)
++{
++      return vma_prot;
++}
++
++#ifdef CONFIG_NONPROMISC_DEVMEM
++/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
++static inline int range_is_allowed(unsigned long mfn, unsigned long size)
++{
++      return 1;
++}
++#else
++static inline int range_is_allowed(unsigned long mfn, unsigned long size)
++{
++      u64 from = ((u64)mfn) << PAGE_SHIFT;
++      u64 to = from + size;
++      u64 cursor = from;
++
++      while (cursor < to) {
++              if (!devmem_is_allowed(mfn)) {
++                      printk(KERN_INFO
++              "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
++                              current->comm, from, to);
++                      return 0;
++              }
++              cursor += PAGE_SIZE;
++              mfn++;
++      }
++      return 1;
++}
++#endif /* CONFIG_NONPROMISC_DEVMEM */
++
++int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
++                              unsigned long size, pgprot_t *vma_prot)
++{
++      u64 addr = (u64)mfn << PAGE_SHIFT;
++      unsigned long flags = _PAGE_CACHE_UC_MINUS;
++      int retval;
++
++      if (!range_is_allowed(mfn, size))
++              return 0;
++
++      if (file->f_flags & O_SYNC) {
++              flags = _PAGE_CACHE_UC;
++      }
++
++#ifndef CONFIG_X86_32
++#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
++      /*
++       * On the PPro and successors, the MTRRs are used to set
++       * memory types for physical addresses outside main memory,
++       * so blindly setting UC or PWT on those pages is wrong.
++       * For Pentiums and earlier, the surround logic should disable
++       * caching for the high addresses through the KEN pin, but
++       * we maintain the tradition of paranoia in this code.
++       */
++      if (!pat_wc_enabled &&
++          ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
++              test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
++              test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
++              test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
++         (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
++              flags = _PAGE_CACHE_UC;
++      }
++#endif
++#endif
++
++      /*
++       * With O_SYNC, we can only take UC mapping. Fail if we cannot.
++       * Without O_SYNC, we want to get
++       * - WB for WB-able memory and no other conflicting mappings
++       * - UC_MINUS for non-WB-able memory with no other conflicting mappings
++       * - Inherit from confliting mappings otherwise
++       */
++      if (flags != _PAGE_CACHE_UC_MINUS) {
++              retval = reserve_memtype(addr, addr + size, flags, NULL);
++      } else {
++              retval = reserve_memtype(addr, addr + size, -1, &flags);
++      }
++
++      if (retval < 0)
++              return 0;
++
++      if (ioremap_check_change_attr(mfn, size, flags) < 0) {
++              free_memtype(addr, addr + size);
++              printk(KERN_INFO
++              "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
++                      current->comm, current->pid,
++                      cattr_name(flags),
++                      addr, addr + size);
++              return 0;
++      }
++
++      *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
++                           flags);
++      return 1;
++}
++
++void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
++{
++      u64 addr = (u64)mfn << PAGE_SHIFT;
++      unsigned long flags;
++      unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
++
++      reserve_memtype(addr, addr + size, want_flags, &flags);
++      if (flags != want_flags) {
++              printk(KERN_INFO
++              "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
++                      current->comm, current->pid,
++                      cattr_name(want_flags),
++                      addr, (unsigned long long)(addr + size),
++                      cattr_name(flags));
++      }
++}
++
++void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
++{
++      u64 addr = (u64)mfn << PAGE_SHIFT;
++
++      free_memtype(addr, addr + size);
++}
++
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/mm/pgtable-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,709 @@
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <xen/features.h>
++#include <asm/pgalloc.h>
++#include <asm/pgtable.h>
++#include <asm/tlb.h>
++#include <asm/hypervisor.h>
++#include <asm/mmu_context.h>
++
++pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
++{
++      pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
++      if (pte)
++              make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
++      return pte;
++}
++
++static void _pte_free(struct page *page, unsigned int order)
++{
++      BUG_ON(order);
++      __pte_free(page);
++}
++
++pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
++{
++      struct page *pte;
++
++#ifdef CONFIG_HIGHPTE
++      pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
++#else
++      pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++#endif
++      if (pte) {
++              pgtable_page_ctor(pte);
++              SetPageForeign(pte, _pte_free);
++              init_page_count(pte);
++      }
++      return pte;
++}
++
++void __pte_free(pgtable_t pte)
++{
++      if (!PageHighMem(pte)) {
++              unsigned long va = (unsigned long)page_address(pte);
++              unsigned int level;
++              pte_t *ptep = lookup_address(va, &level);
++
++              BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
++              if (!pte_write(*ptep)
++                  && HYPERVISOR_update_va_mapping(va,
++                                                  mk_pte(pte, PAGE_KERNEL),
++                                                  0))
++                      BUG();
++      } else
++#ifdef CONFIG_HIGHPTE
++              ClearPagePinned(pte);
++#else
++              BUG();
++#endif
++
++      ClearPageForeign(pte);
++      init_page_count(pte);
++      pgtable_page_dtor(pte);
++      __free_page(pte);
++}
++
++void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
++{
++      pgtable_page_dtor(pte);
++      paravirt_release_pte(page_to_pfn(pte));
++      tlb_remove_page(tlb, pte);
++}
++
++#if PAGETABLE_LEVELS > 2
++static void _pmd_free(struct page *page, unsigned int order)
++{
++      BUG_ON(order);
++      __pmd_free(page);
++}
++
++pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
++{
++      struct page *pmd;
++
++      pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++      if (!pmd)
++              return NULL;
++      SetPageForeign(pmd, _pmd_free);
++      init_page_count(pmd);
++      return page_address(pmd);
++}
++
++void __pmd_free(pgtable_t pmd)
++{
++      unsigned long va = (unsigned long)page_address(pmd);
++      unsigned int level;
++      pte_t *ptep = lookup_address(va, &level);
++
++      BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
++      if (!pte_write(*ptep)
++          && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
++              BUG();
++
++      ClearPageForeign(pmd);
++      init_page_count(pmd);
++      __free_page(pmd);
++}
++
++void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
++{
++      paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
++      tlb_remove_page(tlb, virt_to_page(pmd));
++}
++
++#if PAGETABLE_LEVELS > 3
++void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
++{
++      paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
++      tlb_remove_page(tlb, virt_to_page(pud));
++}
++#endif        /* PAGETABLE_LEVELS > 3 */
++#endif        /* PAGETABLE_LEVELS > 2 */
++
++#ifndef CONFIG_X86_64
++#define TASK_SIZE64 TASK_SIZE
++#endif
++
++static void _pin_lock(struct mm_struct *mm, int lock) {
++      if (lock)
++              spin_lock(&mm->page_table_lock);
++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
++      /* While mm->page_table_lock protects us against insertions and
++       * removals of higher level page table pages, it doesn't protect
++       * against updates of pte-s. Such updates, however, require the
++       * pte pages to be in consistent state (unpinned+writable or
++       * pinned+readonly). The pinning and attribute changes, however
++       * cannot be done atomically, which is why such updates must be
++       * prevented from happening concurrently.
++       * Note that no pte lock can ever elsewhere be acquired nesting
++       * with an already acquired one in the same mm, or with the mm's
++       * page_table_lock already acquired, as that would break in the
++       * non-split case (where all these are actually resolving to the
++       * one page_table_lock). Thus acquiring all of them here is not
++       * going to result in dead locks, and the order of acquires
++       * doesn't matter.
++       */
++      {
++              pgd_t *pgd = mm->pgd;
++              unsigned g;
++
++              for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
++                      pud_t *pud;
++                      unsigned u;
++
++                      if (pgd_none(*pgd))
++                              continue;
++                      pud = pud_offset(pgd, 0);
++                      for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
++                              pmd_t *pmd;
++                              unsigned m;
++
++                              if (pud_none(*pud))
++                                      continue;
++                              pmd = pmd_offset(pud, 0);
++                              for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
++                                      spinlock_t *ptl;
++
++                                      if (pmd_none(*pmd))
++                                              continue;
++                                      ptl = pte_lockptr(0, pmd);
++                                      if (lock)
++                                              spin_lock(ptl);
++                                      else
++                                              spin_unlock(ptl);
++                              }
++                      }
++              }
++      }
++#endif
++      if (!lock)
++              spin_unlock(&mm->page_table_lock);
++}
++#define pin_lock(mm) _pin_lock(mm, 1)
++#define pin_unlock(mm) _pin_lock(mm, 0)
++
++#define PIN_BATCH sizeof(void *)
++static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
++
++static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
++                                           unsigned int cpu, unsigned int seq)
++{
++      unsigned long pfn = page_to_pfn(page);
++
++      if (PageHighMem(page)) {
++              if (pgprot_val(flags) & _PAGE_RW)
++                      ClearPagePinned(page);
++              else
++                      SetPagePinned(page);
++      } else {
++              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
++                                      (unsigned long)__va(pfn << PAGE_SHIFT),
++                                      pfn_pte(pfn, flags), 0);
++              if (unlikely(++seq == PIN_BATCH)) {
++                      if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
++                                                              PIN_BATCH, NULL)))
++                              BUG();
++                      seq = 0;
++              }
++      }
++
++      return seq;
++}
++
++static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
++{
++      pgd_t       *pgd = pgd_base;
++      pud_t       *pud;
++      pmd_t       *pmd;
++      int          g,u,m;
++      unsigned int cpu, seq;
++      multicall_entry_t *mcl;
++
++      if (xen_feature(XENFEAT_auto_translated_physmap))
++              return;
++
++      cpu = get_cpu();
++
++      /*
++       * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
++       * may not be the 'current' task's pagetables (e.g., current may be
++       * 32-bit, but the pagetables may be for a 64-bit task).
++       * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
++       * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
++       */
++      for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
++              if (pgd_none(*pgd))
++                      continue;
++              pud = pud_offset(pgd, 0);
++              if (PTRS_PER_PUD > 1) /* not folded */
++                      seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
++              for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
++                      if (pud_none(*pud))
++                              continue;
++                      pmd = pmd_offset(pud, 0);
++                      if (PTRS_PER_PMD > 1) /* not folded */
++                              seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
++                      for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
++                              if (pmd_none(*pmd))
++                                      continue;
++                              seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
++                      }
++              }
++      }
++
++      mcl = per_cpu(pb_mcl, cpu);
++#ifdef CONFIG_X86_64
++      if (unlikely(seq > PIN_BATCH - 2)) {
++              if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
++                      BUG();
++              seq = 0;
++      }
++      MULTI_update_va_mapping(mcl + seq,
++             (unsigned long)__user_pgd(pgd_base),
++             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
++             0);
++      MULTI_update_va_mapping(mcl + seq + 1,
++             (unsigned long)pgd_base,
++             pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
++             UVMF_TLB_FLUSH);
++      if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
++              BUG();
++#else
++      if (likely(seq != 0)) {
++              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
++                      (unsigned long)pgd_base,
++                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
++                      UVMF_TLB_FLUSH);
++              if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
++                                                      seq + 1, NULL)))
++                      BUG();
++      } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
++                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
++                      UVMF_TLB_FLUSH))
++              BUG();
++#endif
++
++      put_cpu();
++}
++
++static void __pgd_pin(pgd_t *pgd)
++{
++      pgd_walk(pgd, PAGE_KERNEL_RO);
++      kmap_flush_unused();
++      xen_pgd_pin(__pa(pgd)); /* kernel */
++#ifdef CONFIG_X86_64
++      xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
++#endif
++      SetPagePinned(virt_to_page(pgd));
++}
++
++static void __pgd_unpin(pgd_t *pgd)
++{
++      xen_pgd_unpin(__pa(pgd));
++#ifdef CONFIG_X86_64
++      xen_pgd_unpin(__pa(__user_pgd(pgd)));
++#endif
++      pgd_walk(pgd, PAGE_KERNEL);
++      ClearPagePinned(virt_to_page(pgd));
++}
++
++static void pgd_test_and_unpin(pgd_t *pgd)
++{
++      if (PagePinned(virt_to_page(pgd)))
++              __pgd_unpin(pgd);
++}
++
++void mm_pin(struct mm_struct *mm)
++{
++      if (xen_feature(XENFEAT_writable_page_tables))
++              return;
++
++      pin_lock(mm);
++      __pgd_pin(mm->pgd);
++      pin_unlock(mm);
++}
++
++void mm_unpin(struct mm_struct *mm)
++{
++      if (xen_feature(XENFEAT_writable_page_tables))
++              return;
++
++      pin_lock(mm);
++      __pgd_unpin(mm->pgd);
++      pin_unlock(mm);
++}
++
++void mm_pin_all(void)
++{
++      struct page *page;
++      unsigned long flags;
++
++      if (xen_feature(XENFEAT_writable_page_tables))
++              return;
++
++      /*
++       * Allow uninterrupted access to the pgd_list. Also protects
++       * __pgd_pin() by disabling preemption.
++       * All other CPUs must be at a safe point (e.g., in stop_machine
++       * or offlined entirely).
++       */
++      spin_lock_irqsave(&pgd_lock, flags);
++      list_for_each_entry(page, &pgd_list, lru) {
++              if (!PagePinned(page))
++                      __pgd_pin((pgd_t *)page_address(page));
++      }
++      spin_unlock_irqrestore(&pgd_lock, flags);
++}
++
++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
++{
++      if (!PagePinned(virt_to_page(mm->pgd)))
++              mm_pin(mm);
++}
++
++void arch_exit_mmap(struct mm_struct *mm)
++{
++      struct task_struct *tsk = current;
++
++      task_lock(tsk);
++
++      /*
++       * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
++       * *much* faster this way, as no tlb flushes means bigger wrpt batches.
++       */
++      if (tsk->active_mm == mm) {
++              tsk->active_mm = &init_mm;
++              atomic_inc(&init_mm.mm_count);
++
++              switch_mm(mm, &init_mm, tsk);
++
++              atomic_dec(&mm->mm_count);
++              BUG_ON(atomic_read(&mm->mm_count) == 0);
++      }
++
++      task_unlock(tsk);
++
++      if (PagePinned(virt_to_page(mm->pgd))
++          && atomic_read(&mm->mm_count) == 1
++          && !mm->context.has_foreign_mappings)
++              mm_unpin(mm);
++}
++
++static inline void pgd_list_add(pgd_t *pgd)
++{
++      struct page *page = virt_to_page(pgd);
++
++      list_add(&page->lru, &pgd_list);
++}
++
++static inline void pgd_list_del(pgd_t *pgd)
++{
++      struct page *page = virt_to_page(pgd);
++
++      list_del(&page->lru);
++}
++
++#define UNSHARED_PTRS_PER_PGD                         \
++      (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
++
++static void pgd_ctor(void *p)
++{
++      pgd_t *pgd = p;
++      unsigned long flags;
++
++      pgd_test_and_unpin(pgd);
++
++      /* Clear usermode parts of PGD */
++      memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
++
++      spin_lock_irqsave(&pgd_lock, flags);
++
++      /* If the pgd points to a shared pagetable level (either the
++         ptes in non-PAE, or shared PMD in PAE), then just copy the
++         references from swapper_pg_dir. */
++      if (PAGETABLE_LEVELS == 2 ||
++          (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
++          PAGETABLE_LEVELS == 4) {
++              clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
++                              swapper_pg_dir + KERNEL_PGD_BOUNDARY,
++                              KERNEL_PGD_PTRS);
++              paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
++                                       __pa(swapper_pg_dir) >> PAGE_SHIFT,
++                                       KERNEL_PGD_BOUNDARY,
++                                       KERNEL_PGD_PTRS);
++      }
++
++#ifdef CONFIG_X86_64
++      /* set level3_user_pgt for vsyscall area */
++      __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
++              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
  +#endif
-       add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
- 
-       return 0;
-@@ -207,10 +209,8 @@ static struct xen_bus_type xenbus_fronte
-               .probe    = xenbus_dev_probe,
-               .remove   = xenbus_dev_remove,
-               .shutdown = xenbus_dev_shutdown,
--#if defined(CONFIG_XEN) || defined(MODULE)
-               .uevent   = xenbus_uevent_frontend,
- #endif
--#endif
-       },
- #if defined(CONFIG_XEN) || defined(MODULE)
-       .dev = {
-@@ -519,6 +519,15 @@ static ssize_t xendev_show_devtype(struc
- }
- DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
- 
-+static ssize_t xendev_show_modalias(struct device *dev,
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
-+                                  struct device_attribute *attr,
++
++#ifndef CONFIG_X86_PAE
++      /* list required to sync kernel mapping updates */
++      if (!SHARED_KERNEL_PMD)
++              pgd_list_add(pgd);
  +#endif
-+                                  char *buf)
++
++      spin_unlock_irqrestore(&pgd_lock, flags);
++}
++
++static void pgd_dtor(void *pgd)
++{
++      unsigned long flags; /* can be called from interrupt context */
++
++      if (!SHARED_KERNEL_PMD) {
++              spin_lock_irqsave(&pgd_lock, flags);
++              pgd_list_del(pgd);
++              spin_unlock_irqrestore(&pgd_lock, flags);
++      }
++
++      pgd_test_and_unpin(pgd);
++}
++
++/*
++ * List of all pgd's needed for non-PAE so it can invalidate entries
++ * in both cached and uncached pgd's; not needed for PAE since the
++ * kernel pmd is shared. If PAE were not to share the pmd a similar
++ * tactic would be needed. This is essentially codepath-based locking
++ * against pageattr.c; it is the unique case in which a valid change
++ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
++ * vmalloc faults work because attached pagetables are never freed.
++ * -- wli
++ */
++
++#ifdef CONFIG_X86_PAE
++/*
++ * Mop up any pmd pages which may still be attached to the pgd.
++ * Normally they will be freed by munmap/exit_mmap, but any pmd we
++ * preallocate which never got a corresponding vma will need to be
++ * freed manually.
++ */
++static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
++{
++      int i;
++
++      for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
++              pgd_t pgd = pgdp[i];
++
++              if (__pgd_val(pgd) != 0) {
++                      pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
++
++                      pgdp[i] = xen_make_pgd(0);
++
++                      paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
++                      pmd_free(mm, pmd);
++              }
++      }
++
++      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
++              xen_destroy_contiguous_region((unsigned long)pgdp, 0);
++}
++
++/*
++ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
++ * updating the top-level pagetable entries to guarantee the
++ * processor notices the update.  Since this is expensive, and
++ * all 4 top-level entries are used almost immediately in a
++ * new process's life, we just pre-populate them here.
++ *
++ * Also, if we're in a paravirt environment where the kernel pmd is
++ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
++ * and initialize the kernel pmds here.
++ */
++static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
++{
++      pud_t *pud;
++      pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
++      unsigned long addr, flags;
++      int i;
++
++      /*
++       * We can race save/restore (if we sleep during a GFP_KERNEL memory
++       * allocation). We therefore store virtual addresses of pmds as they
++       * do not change across save/restore, and poke the machine addresses
++       * into the pgdir under the pgd_lock.
++       */
++      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
++              pmds[i] = pmd_alloc_one(mm, addr);
++              if (!pmds[i])
++                      goto out_oom;
++      }
++
++      spin_lock_irqsave(&pgd_lock, flags);
++
++      /* Protect against save/restore: move below 4GB under pgd_lock. */
++      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
++          && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
++              spin_unlock_irqrestore(&pgd_lock, flags);
++out_oom:
++              while (i--)
++                      pmd_free(mm, pmds[i]);
++              return 0;
++      }
++
++      /* Copy kernel pmd contents and write-protect the new pmds. */
++      pud = pud_offset(pgd, 0);
++      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
++           i++, pud++, addr += PUD_SIZE) {
++              if (i >= KERNEL_PGD_BOUNDARY) {
++                      memcpy(pmds[i],
++                             (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
++                             sizeof(pmd_t) * PTRS_PER_PMD);
++                      make_lowmem_page_readonly(
++                              pmds[i], XENFEAT_writable_page_tables);
++              }
++
++              /* It is safe to poke machine addresses of pmds under the pgd_lock. */
++              pud_populate(mm, pud, pmds[i]);
++      }
++
++      /* List required to sync kernel mapping updates and
++       * to pin/unpin on save/restore. */
++      pgd_list_add(pgd);
++
++      spin_unlock_irqrestore(&pgd_lock, flags);
++
++      return 1;
++}
++
++void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
++{
++      struct page *page = virt_to_page(pmd);
++      unsigned long pfn = page_to_pfn(page);
++
++      paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
++
++      /* Note: almost everything apart from _PAGE_PRESENT is
++         reserved at the pmd (PDPT) level. */
++      if (PagePinned(virt_to_page(mm->pgd))) {
++              BUG_ON(PageHighMem(page));
++              BUG_ON(HYPERVISOR_update_va_mapping(
++                        (unsigned long)__va(pfn << PAGE_SHIFT),
++                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
++              set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
++      } else
++              *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
++
++      /*
++       * According to Intel App note "TLBs, Paging-Structure Caches,
++       * and Their Invalidation", April 2007, document 317080-001,
++       * section 8.1: in PAE mode we explicitly have to flush the
++       * TLB via cr3 if the top-level pgd is changed...
++       */
++      if (mm == current->active_mm)
++              xen_tlb_flush();
++}
++#else  /* !CONFIG_X86_PAE */
++/* No need to prepopulate any pagetable entries in non-PAE modes. */
++static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
++{
++      return 1;
++}
++
++static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
++{
++}
++#endif        /* CONFIG_X86_PAE */
++
++#ifdef CONFIG_X86_64
++/* We allocate two contiguous pages for kernel and user. */
++#define PGD_ORDER 1
++#else
++#define PGD_ORDER 0
++#endif
++
++pgd_t *pgd_alloc(struct mm_struct *mm)
++{
++      pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
++
++      /* so that alloc_pd can use it */
++      mm->pgd = pgd;
++      if (pgd)
++              pgd_ctor(pgd);
++
++      if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
++              free_pages((unsigned long)pgd, PGD_ORDER);
++              pgd = NULL;
++      }
++
++      return pgd;
++}
++
++void pgd_free(struct mm_struct *mm, pgd_t *pgd)
++{
++      /*
++       * After this the pgd should not be pinned for the duration of this
++       * function's execution. We should never sleep and thus never race:
++       *  1. User pmds will not become write-protected under our feet due
++       *     to a concurrent mm_pin_all().
++       *  2. The machine addresses in PGD entries will not become invalid
++       *     due to a concurrent save/restore.
++       */
++      pgd_dtor(pgd);
++
++      pgd_mop_up_pmds(mm, pgd);
++      free_pages((unsigned long)pgd, PGD_ORDER);
++}
++
++/* blktap and gntdev need this, as otherwise they would implicitly (and
++ * needlessly, as they never use it) reference init_mm. */
++pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
++                                unsigned long addr, pte_t *ptep, int full)
  +{
-+      return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
++      return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
  +}
-+DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
- 
- int xenbus_probe_node(struct xen_bus_type *bus,
-                     const char *type,
-@@ -579,10 +588,16 @@ int xenbus_probe_node(struct xen_bus_typ
- 
-       err = device_create_file(&xendev->dev, &dev_attr_devtype);
-       if (err)
--              goto fail_remove_file;
-+              goto fail_remove_nodename;
++EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
  +
-+      err = device_create_file(&xendev->dev, &dev_attr_modalias);
-+      if (err)
-+              goto fail_remove_devtype;
- 
-       return 0;
--fail_remove_file:
-+fail_remove_devtype:
-+      device_remove_file(&xendev->dev, &dev_attr_devtype);
-+fail_remove_nodename:
-       device_remove_file(&xendev->dev, &dev_attr_nodename);
- fail_unregister:
-       device_unregister(&xendev->dev);
---- a/fs/aio.c
-+++ b/fs/aio.c
-@@ -1255,6 +1255,7 @@ static void io_destroy(struct kioctx *io
- #ifdef CONFIG_EPOLL
-       /* forget the poll file, but it's up to the user to close it */
-       if (ioctx->file) {
-+              fput(ioctx->file);
-               ioctx->file->private_data = 0;
-               ioctx->file = 0;
-       }
-@@ -1279,6 +1280,7 @@ static int aio_queue_fd_close(struct ino
-               spin_lock_irq(&ioctx->ctx_lock);
-               ioctx->file = 0;
-               spin_unlock_irq(&ioctx->ctx_lock);
-+              fput(file);
-       }
-       return 0;
- }
-@@ -1314,16 +1316,17 @@ static const struct file_operations aioq
- 
- static int make_aio_fd(struct kioctx *ioctx)
- {
--      int error, fd;
--      struct inode *inode;
-+      int fd;
-       struct file *file;
- 
--      error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
--                               &aioq_fops, ioctx);
--      if (error)
--              return error;
-+      fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
-+      if (fd < 0)
-+              return fd;
- 
-       /* associate the file with the IO context */
-+      file = fget(fd);
-+      if (!file)
-+              return -EBADF;
-       file->private_data = ioctx;
-       ioctx->file = file;
-       init_waitqueue_head(&ioctx->poll_wait);
---- a/include/asm-x86/dma-mapping.h
-+++ b/include/asm-x86/dma-mapping.h
-@@ -223,8 +223,13 @@ static inline dma_addr_t dma_map_page(st
-       struct dma_mapping_ops *ops = get_dma_ops(dev);
- 
-       BUG_ON(!valid_dma_direction(direction));
-+#ifndef CONFIG_XEN
-       return ops->map_single(dev, page_to_phys(page) + offset,
-                              size, direction);
-+#else
-+      return ops->map_single(dev, page_to_pseudophys(page) + offset,
-+                             size, direction);
-+#endif
- }
- 
- static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
---- a/include/asm-x86/genapic_64.h
-+++ b/include/asm-x86/genapic_64.h
-@@ -46,5 +46,6 @@ extern struct genapic apic_x2apic_phys;
- extern int acpi_madt_oem_check(char *, char *);
- 
-+#ifndef CONFIG_XEN
- enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
- extern enum uv_system_type get_uv_system_type(void);
- extern int is_uv_system(void);
-@@ -55,6 +56,10 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
- extern void uv_cpu_init(void);
- extern void uv_system_init(void);
- extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
-+#else
-+#define is_uv_system() 0
-+#define uv_cpu_init() ((void)0)
-+#endif
- 
- extern void setup_apic_routing(void);
- 
---- a/include/asm-x86/mach-xen/asm/desc.h
-+++ b/include/asm-x86/mach-xen/asm/desc.h
-@@ -64,8 +64,8 @@ static inline struct desc_struct *get_cp
- }
- 
- static inline void pack_gate(gate_desc *gate, unsigned char type,
--       unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
--
-+                           unsigned long base, unsigned dpl, unsigned flags,
-+                           unsigned short seg)
- {
-       gate->a = (seg << 16) | (base & 0xffff);
-       gate->b = (base & 0xffff0000) |
-@@ -84,22 +84,23 @@ static inline int desc_empty(const void 
- #define load_TR_desc() native_load_tr_desc()
- #define load_gdt(dtr) native_load_gdt(dtr)
- #define load_idt(dtr) native_load_idt(dtr)
--#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
--#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
-+#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
-+#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
- 
- #define store_gdt(dtr) native_store_gdt(dtr)
- #define store_idt(dtr) native_store_idt(dtr)
- #define store_tr(tr) (tr = native_store_tr())
--#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
-+#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
- 
- #define load_TLS(t, cpu) native_load_tls(t, cpu)
- #define set_ldt native_set_ldt
- 
--#define write_ldt_entry(dt, entry, desc) \
--                              native_write_ldt_entry(dt, entry, desc)
--#define write_gdt_entry(dt, entry, desc, type) \
--                              native_write_gdt_entry(dt, entry, desc, type)
--#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
-+#define write_ldt_entry(dt, entry, desc)      \
-+      native_write_ldt_entry(dt, entry, desc)
-+#define write_gdt_entry(dt, entry, desc, type)                \
-+      native_write_gdt_entry(dt, entry, desc, type)
-+#define write_idt_entry(dt, entry, g)         \
-+      native_write_idt_entry(dt, entry, g)
- 
- static inline void native_write_idt_entry(gate_desc *idt, int entry,
-                                         const gate_desc *gate)
-@@ -138,8 +139,8 @@ static inline void pack_descriptor(struc
- {
-       desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
-       desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
--                (limit & 0x000f0000) | ((type & 0xff) << 8) |
--                ((flags & 0xf) << 20);
-+              (limit & 0x000f0000) | ((type & 0xff) << 8) |
-+              ((flags & 0xf) << 20);
-       desc->p = 1;
- }
- 
-@@ -160,7 +161,6 @@ static inline void set_tssldt_descriptor
-       desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
-       desc->base3 = PTR_HIGH(addr);
- #else
--
-       pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
- #endif
- }
-@@ -178,7 +178,8 @@ static inline void __set_tss_desc(unsign
-        * last valid byte
-        */
-       set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
--              IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
-+                            IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
-+                            sizeof(unsigned long) - 1);
-       write_gdt_entry(d, entry, &tss, DESC_TSS);
- }
- 
-@@ -187,16 +188,16 @@ static inline void __set_tss_desc(unsign
- static inline void native_set_ldt(const void *addr, unsigned int entries)
- {
-       if (likely(entries == 0))
--              __asm__ __volatile__("lldt %w0"::"q" (0));
-+              asm volatile("lldt %w0"::"q" (0));
-       else {
-               unsigned cpu = smp_processor_id();
-               ldt_desc ldt;
- 
--              set_tssldt_descriptor(&ldt, (unsigned long)addr,
--                                    DESC_LDT, entries * sizeof(ldt) - 1);
-+              set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
-+                                    entries * LDT_ENTRY_SIZE - 1);
-               write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
-                               &ldt, DESC_LDT);
--              __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
-+              asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
-       }
- }
- 
-@@ -261,15 +262,15 @@ static inline void xen_load_tls(struct t
- }
- #endif
- 
--#define _LDT_empty(info) (\
--      (info)->base_addr       == 0    && \
--      (info)->limit           == 0    && \
--      (info)->contents        == 0    && \
--      (info)->read_exec_only  == 1    && \
--      (info)->seg_32bit       == 0    && \
--      (info)->limit_in_pages  == 0    && \
--      (info)->seg_not_present == 1    && \
--      (info)->useable         == 0)
-+#define _LDT_empty(info)                              \
-+      ((info)->base_addr              == 0    &&      \
-+       (info)->limit                  == 0    &&      \
-+       (info)->contents               == 0    &&      \
-+       (info)->read_exec_only         == 1    &&      \
-+       (info)->seg_32bit              == 0    &&      \
-+       (info)->limit_in_pages         == 0    &&      \
-+       (info)->seg_not_present        == 1    &&      \
-+       (info)->useable                == 0)
- 
- #ifdef CONFIG_X86_64
- #define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
-@@ -309,7 +310,7 @@ static inline unsigned long get_desc_lim
- 
- #ifndef CONFIG_X86_NO_IDT
- static inline void _set_gate(int gate, unsigned type, void *addr,
--                            unsigned dpl, unsigned ist, unsigned seg)
-+                           unsigned dpl, unsigned ist, unsigned seg)
- {
-       gate_desc s;
-       pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
-@@ -393,10 +394,10 @@ static inline void set_system_gate_ist(i
-  *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
-  */
- #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
--      movb idx*8+4(gdt), lo_b; \
--      movb idx*8+7(gdt), hi_b; \
--      shll $16, base; \
--      movw idx*8+2(gdt), lo_w;
-+      movb idx * 8 + 4(gdt), lo_b;                    \
-+      movb idx * 8 + 7(gdt), hi_b;                    \
-+      shll $16, base;                                 \
-+      movw idx * 8 + 2(gdt), lo_w;
- 
- 
- #endif /* __ASSEMBLY__ */
---- a/include/asm-x86/mach-xen/asm/dma-mapping_32.h
-+++ /dev/null
-@@ -1,141 +0,0 @@
--#ifndef _ASM_I386_DMA_MAPPING_H
--#define _ASM_I386_DMA_MAPPING_H
--
++int ptep_set_access_flags(struct vm_area_struct *vma,
++                        unsigned long address, pte_t *ptep,
++                        pte_t entry, int dirty)
++{
++      int changed = !pte_same(*ptep, entry);
++
++      if (changed && dirty) {
++              if (likely(vma->vm_mm == current->mm)) {
++                      if (HYPERVISOR_update_va_mapping(address,
++                              entry,
++                              (unsigned long)vma->vm_mm->cpu_vm_mask.bits|
++                                      UVMF_INVLPG|UVMF_MULTI))
++                              BUG();
++              } else {
++                      xen_l1_entry_update(ptep, entry);
++                      flush_tlb_page(vma, address);
++              }
++      }
++
++      return changed;
++}
++
++int ptep_test_and_clear_young(struct vm_area_struct *vma,
++                            unsigned long addr, pte_t *ptep)
++{
++      int ret = 0;
++
++      if (pte_young(*ptep))
++              ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
++                                       &ptep->pte);
++
++      if (ret)
++              pte_update(vma->vm_mm, addr, ptep);
++
++      return ret;
++}
++
++int ptep_clear_flush_young(struct vm_area_struct *vma,
++                         unsigned long address, pte_t *ptep)
++{
++      pte_t pte = *ptep;
++      int young = pte_young(pte);
++
++      pte = pte_mkold(pte);
++      if (PagePinned(virt_to_page(vma->vm_mm->pgd)))
++              ptep_set_access_flags(vma, address, ptep, pte, young);
++      else if (young)
++              ptep->pte_low = pte.pte_low;
++
++      return young;
++}
+--- sle11-2009-05-14.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/pgtable_32-xen.c      2009-03-16 16:38:05.000000000 +0100
+@@ -1,7 +1,3 @@
  -/*
-- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
-- * documentation.
+- *  linux/arch/i386/mm/pgtable.c
  - */
  -
--#include <linux/mm.h>
--#include <linux/scatterlist.h>
--#include <asm/cache.h>
--#include <asm/io.h>
--#include <asm/swiotlb.h>
--
--static inline int
--address_needs_mapping(struct device *hwdev, dma_addr_t addr)
--{
--      dma_addr_t mask = 0xffffffff;
--      /* If the device has a mask, use it, otherwise default to 32 bits */
--      if (hwdev && hwdev->dma_mask)
--              mask = *hwdev->dma_mask;
--      return (addr & ~mask) != 0;
--}
--
--extern int range_straddles_page_boundary(paddr_t p, size_t size);
--
--#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
--#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
--
--void *dma_alloc_coherent(struct device *dev, size_t size,
--                         dma_addr_t *dma_handle, gfp_t flag);
--
--void dma_free_coherent(struct device *dev, size_t size,
--                       void *vaddr, dma_addr_t dma_handle);
--
--extern dma_addr_t
--dma_map_single(struct device *dev, void *ptr, size_t size,
--             enum dma_data_direction direction);
--
--extern void
--dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
--               enum dma_data_direction direction);
--
--extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
--                    int nents, enum dma_data_direction direction);
--extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
--                       int nents, enum dma_data_direction direction);
--
--#ifdef CONFIG_HIGHMEM
--extern dma_addr_t
--dma_map_page(struct device *dev, struct page *page, unsigned long offset,
--           size_t size, enum dma_data_direction direction);
--
--extern void
--dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
--             enum dma_data_direction direction);
--#else
--#define dma_map_page(dev, page, offset, size, dir) \
--      dma_map_single(dev, page_address(page) + (offset), (size), (dir))
--#define dma_unmap_page dma_unmap_single
--#endif
--
--extern void
--dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
--                      enum dma_data_direction direction);
--
--extern void
--dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
--                           enum dma_data_direction direction);
--
--static inline void
--dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
--                            unsigned long offset, size_t size,
--                            enum dma_data_direction direction)
--{
--      dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
--}
--
--static inline void
--dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
--                               unsigned long offset, size_t size,
--                               enum dma_data_direction direction)
--{
--      dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
--}
--
--extern void
--dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
--                  enum dma_data_direction direction);
--
--extern void
--dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
--                  enum dma_data_direction direction);
--
--extern int
--dma_mapping_error(dma_addr_t dma_addr);
--
--extern int
--dma_supported(struct device *dev, u64 mask);
--
--static inline int
--dma_set_mask(struct device *dev, u64 mask)
--{
--      if(!dev->dma_mask || !dma_supported(dev, mask))
--              return -EIO;
--
--      *dev->dma_mask = mask;
--
--      return 0;
--}
--
--static inline int
--dma_get_cache_alignment(void)
--{
--      /* no easy way to get cache size on all x86, so return the
--       * maximum possible, to be safe */
--      return (1 << INTERNODE_CACHE_SHIFT);
--}
--
--#define dma_is_consistent(d, h)       (1)
--
--static inline void
--dma_cache_sync(struct device *dev, void *vaddr, size_t size,
--             enum dma_data_direction direction)
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+@@ -41,7 +37,6 @@ void show_mem(void)
+ 
+       printk(KERN_INFO "Mem-info:\n");
+       show_free_areas();
+-      printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+       for_each_online_pgdat(pgdat) {
+               pgdat_resize_lock(pgdat, &flags);
+               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+@@ -157,243 +152,6 @@ void __init reserve_top_address(unsigned
+       __VMALLOC_RESERVE += reserve;
+ }
+ 
+-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
  -{
--      flush_write_buffers();
+-      pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+-      if (pte)
+-              make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
+-      return pte;
  -}
  -
--#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
--extern int
--dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
--                          dma_addr_t device_addr, size_t size, int flags);
--
--extern void
--dma_release_declared_memory(struct device *dev);
--
--extern void *
--dma_mark_declared_memory_occupied(struct device *dev,
--                                dma_addr_t device_addr, size_t size);
--
--#endif
---- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h
-+++ /dev/null
-@@ -1,205 +0,0 @@
--#ifndef _X8664_DMA_MAPPING_H
--#define _X8664_DMA_MAPPING_H 1
--
  -/*
-- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
-- * documentation.
-- */
--
--#include <linux/scatterlist.h>
--#include <asm/io.h>
--
--struct dma_mapping_ops {
--      int             (*mapping_error)(dma_addr_t dma_addr);
--      void*           (*alloc_coherent)(struct device *dev, size_t size,
--                                dma_addr_t *dma_handle, gfp_t gfp);
--      void            (*free_coherent)(struct device *dev, size_t size,
--                                void *vaddr, dma_addr_t dma_handle);
--      dma_addr_t      (*map_single)(struct device *hwdev, void *ptr,
--                                size_t size, int direction);
--      /* like map_single, but doesn't check the device mask */
--      dma_addr_t      (*map_simple)(struct device *hwdev, char *ptr,
--                                size_t size, int direction);
--      void            (*unmap_single)(struct device *dev, dma_addr_t addr,
--                              size_t size, int direction);
--      void            (*sync_single_for_cpu)(struct device *hwdev,
--                              dma_addr_t dma_handle, size_t size,
--                              int direction);
--      void            (*sync_single_for_device)(struct device *hwdev,
--                                dma_addr_t dma_handle, size_t size,
--                              int direction);
--      void            (*sync_single_range_for_cpu)(struct device *hwdev,
--                                dma_addr_t dma_handle, unsigned long offset,
--                              size_t size, int direction);
--      void            (*sync_single_range_for_device)(struct device *hwdev,
--                              dma_addr_t dma_handle, unsigned long offset,
--                              size_t size, int direction);
--      void            (*sync_sg_for_cpu)(struct device *hwdev,
--                                struct scatterlist *sg, int nelems,
--                              int direction);
--      void            (*sync_sg_for_device)(struct device *hwdev,
--                              struct scatterlist *sg, int nelems,
--                              int direction);
--      int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
--                              int nents, int direction);
--      void            (*unmap_sg)(struct device *hwdev,
--                              struct scatterlist *sg, int nents,
--                              int direction);
--      int             (*dma_supported)(struct device *hwdev, u64 mask);
--      int             is_phys;
--};
+- * List of all pgd's needed for non-PAE so it can invalidate entries
+- * in both cached and uncached pgd's; not needed for PAE since the
+- * kernel pmd is shared. If PAE were not to share the pmd a similar
+- * tactic would be needed. This is essentially codepath-based locking
+- * against pageattr.c; it is the unique case in which a valid change
+- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+- * vmalloc faults work because attached pagetables are never freed.
+- * -- wli
+- */
+-static inline void pgd_list_add(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
  -
--extern dma_addr_t bad_dma_address;
--extern const struct dma_mapping_ops* dma_ops;
--extern int iommu_merge;
+-      list_add(&page->lru, &pgd_list);
+-}
  -
--#if 0
--static inline int dma_mapping_error(dma_addr_t dma_addr)
+-static inline void pgd_list_del(pgd_t *pgd)
  -{
--      if (dma_ops->mapping_error)
--              return dma_ops->mapping_error(dma_addr);
+-      struct page *page = virt_to_page(pgd);
  -
--      return (dma_addr == bad_dma_address);
+-      list_del(&page->lru);
  -}
  -
--#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
--#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+-#define UNSHARED_PTRS_PER_PGD                         \
+-      (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
  -
--#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
--#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+-static void pgd_ctor(void *p)
+-{
+-      pgd_t *pgd = p;
+-      unsigned long flags;
  -
--extern void *dma_alloc_coherent(struct device *dev, size_t size,
--                              dma_addr_t *dma_handle, gfp_t gfp);
--extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
--                            dma_addr_t dma_handle);
+-      pgd_test_and_unpin(pgd);
  -
--static inline dma_addr_t
--dma_map_single(struct device *hwdev, void *ptr, size_t size,
--             int direction)
--{
--      BUG_ON(!valid_dma_direction(direction));
--      return dma_ops->map_single(hwdev, ptr, size, direction);
--}
+-      /* Clear usermode parts of PGD */
+-      memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
  -
--static inline void
--dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
--               int direction)
--{
--      BUG_ON(!valid_dma_direction(direction));
--      dma_ops->unmap_single(dev, addr, size, direction);
--}
+-      spin_lock_irqsave(&pgd_lock, flags);
  -
--#define dma_map_page(dev,page,offset,size,dir) \
--      dma_map_single((dev), page_address(page)+(offset), (size), (dir))
+-      /* If the pgd points to a shared pagetable level (either the
+-         ptes in non-PAE, or shared PMD in PAE), then just copy the
+-         references from swapper_pg_dir. */
+-      if (PAGETABLE_LEVELS == 2 ||
+-          (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+-              clone_pgd_range(pgd + USER_PTRS_PER_PGD,
+-                              swapper_pg_dir + USER_PTRS_PER_PGD,
+-                              KERNEL_PGD_PTRS);
+-              paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+-                                      __pa(swapper_pg_dir) >> PAGE_SHIFT,
+-                                      USER_PTRS_PER_PGD,
+-                                      KERNEL_PGD_PTRS);
+-      }
  -
--#define dma_unmap_page dma_unmap_single
+-      /* list required to sync kernel mapping updates */
+-      if (PAGETABLE_LEVELS == 2)
+-              pgd_list_add(pgd);
  -
--static inline void
--dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
--                      size_t size, int direction)
--{
--      BUG_ON(!valid_dma_direction(direction));
--      if (dma_ops->sync_single_for_cpu)
--              dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
--                                           direction);
--      flush_write_buffers();
+-      spin_unlock_irqrestore(&pgd_lock, flags);
  -}
  -
--static inline void
--dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
--                         size_t size, int direction)
+-static void pgd_dtor(void *pgd)
  -{
--      BUG_ON(!valid_dma_direction(direction));
--      if (dma_ops->sync_single_for_device)
--              dma_ops->sync_single_for_device(hwdev, dma_handle, size,
--                                              direction);
--      flush_write_buffers();
--}
+-      unsigned long flags; /* can be called from interrupt context */
  -
--static inline void
--dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
--                            unsigned long offset, size_t size, int direction)
--{
--      BUG_ON(!valid_dma_direction(direction));
--      if (dma_ops->sync_single_range_for_cpu) {
--              dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
+-      if (!SHARED_KERNEL_PMD) {
+-              spin_lock_irqsave(&pgd_lock, flags);
+-              pgd_list_del(pgd);
+-              spin_unlock_irqrestore(&pgd_lock, flags);
  -      }
  -
--      flush_write_buffers();
+-      pgd_test_and_unpin(pgd);
  -}
  -
--static inline void
--dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
--                               unsigned long offset, size_t size, int direction)
+-#ifdef CONFIG_X86_PAE
+-/*
+- * Mop up any pmd pages which may still be attached to the pgd.
+- * Normally they will be freed by munmap/exit_mmap, but any pmd we
+- * preallocate which never got a corresponding vma will need to be
+- * freed manually.
+- */
+-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
  -{
--      BUG_ON(!valid_dma_direction(direction));
--      if (dma_ops->sync_single_range_for_device)
--              dma_ops->sync_single_range_for_device(hwdev, dma_handle,
--                                                    offset, size, direction);
+-      int i;
  -
--      flush_write_buffers();
--}
+-      for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+-              pgd_t pgd = pgdp[i];
  -
--static inline void
--dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
--                  int nelems, int direction)
--{
--      BUG_ON(!valid_dma_direction(direction));
--      if (dma_ops->sync_sg_for_cpu)
--              dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
--      flush_write_buffers();
+-              if (__pgd_val(pgd) != 0) {
+-                      pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+-
+-                      pgdp[i] = xen_make_pgd(0);
+-
+-                      paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
+-                      pmd_free(mm, pmd);
+-              }
+-      }
  -}
  -
--static inline void
--dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
--                     int nelems, int direction)
+-/*
+- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+- * updating the top-level pagetable entries to guarantee the
+- * processor notices the update.  Since this is expensive, and
+- * all 4 top-level entries are used almost immediately in a
+- * new process's life, we just pre-populate them here.
+- *
+- * Also, if we're in a paravirt environment where the kernel pmd is
+- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+- * and initialize the kernel pmds here.
+- */
+-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
  -{
--      BUG_ON(!valid_dma_direction(direction));
--      if (dma_ops->sync_sg_for_device) {
--              dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+-      pud_t *pud;
+-      pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
+-      unsigned long addr, flags;
+-      int i;
+-
+-      /*
+-       * We can race save/restore (if we sleep during a GFP_KERNEL memory
+-       * allocation). We therefore store virtual addresses of pmds as they
+-       * do not change across save/restore, and poke the machine addresses
+-       * into the pgdir under the pgd_lock.
+-       */
+-      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
+-              pmds[i] = pmd_alloc_one(mm, addr);
+-              if (!pmds[i])
+-                      goto out_oom;
  -      }
  -
--      flush_write_buffers();
+-      spin_lock_irqsave(&pgd_lock, flags);
+-
+-      /* Protect against save/restore: move below 4GB under pgd_lock. */
+-      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
+-          && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
+-              spin_unlock_irqrestore(&pgd_lock, flags);
+-out_oom:
+-              while (i--)
+-                      pmd_free(mm, pmds[i]);
+-              return 0;
+-      }
+-
+-      /* Copy kernel pmd contents and write-protect the new pmds. */
+-      pud = pud_offset(pgd, 0);
+-      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+-           i++, pud++, addr += PUD_SIZE) {
+-              if (i >= USER_PTRS_PER_PGD) {
+-                      memcpy(pmds[i],
+-                             (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+-                             sizeof(pmd_t) * PTRS_PER_PMD);
+-                      make_lowmem_page_readonly(
+-                              pmds[i], XENFEAT_writable_page_tables);
+-              }
+-
+-              /* It is safe to poke machine addresses of pmds under the pgd_lock. */
+-              pud_populate(mm, pud, pmds[i]);
+-      }
+-
+-      /* List required to sync kernel mapping updates and
+-       * to pin/unpin on save/restore. */
+-      pgd_list_add(pgd);
+-
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-
+-      return 1;
+-}
+-#else  /* !CONFIG_X86_PAE */
+-/* No need to prepopulate any pagetable entries in non-PAE modes. */
+-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      return 1;
  -}
  -
--static inline int
--dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
+-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
  -{
--      BUG_ON(!valid_dma_direction(direction));
--      return dma_ops->map_sg(hwdev, sg, nents, direction);
  -}
+-#endif        /* CONFIG_X86_PAE */
  -
--static inline void
--dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
--           int direction)
+-pgd_t *pgd_alloc(struct mm_struct *mm)
  -{
--      BUG_ON(!valid_dma_direction(direction));
--      dma_ops->unmap_sg(hwdev, sg, nents, direction);
+-      pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+-
+-      /* so that alloc_pd can use it */
+-      mm->pgd = pgd;
+-      if (pgd)
+-              pgd_ctor(pgd);
+-
+-      if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+-              free_page((unsigned long)pgd);
+-              pgd = NULL;
+-      }
+-
+-      return pgd;
  -}
  -
--extern int dma_supported(struct device *hwdev, u64 mask);
+-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      /*
+-       * After this the pgd should not be pinned for the duration of this
+-       * function's execution. We should never sleep and thus never race:
+-       *  1. User pmds will not become write-protected under our feet due
+-       *     to a concurrent mm_pin_all().
+-       *  2. The machine addresses in PGD entries will not become invalid
+-       *     due to a concurrent save/restore.
+-       */
+-      pgd_dtor(pgd);
+-
+-      if (PTRS_PER_PMD > 1 && !xen_feature(XENFEAT_pae_pgdir_above_4gb))
+-              xen_destroy_contiguous_region((unsigned long)pgd, 0);
+-
+-      pgd_mop_up_pmds(mm, pgd);
+-      free_page((unsigned long)pgd);
+-}
  -
--/* same for gart, swiotlb, and nommu */
--static inline int dma_get_cache_alignment(void)
+-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
  -{
--      return boot_cpu_data.x86_clflush_size;
+-      pgtable_page_dtor(pte);
+-      paravirt_release_pt(page_to_pfn(pte));
+-      tlb_remove_page(tlb, pte);
  -}
  -
--#define dma_is_consistent(d, h) 1
--
--extern int dma_set_mask(struct device *dev, u64 mask);
+-#ifdef CONFIG_X86_PAE
  -
--static inline void
--dma_cache_sync(struct device *dev, void *vaddr, size_t size,
--      enum dma_data_direction dir)
+-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
  -{
--      flush_write_buffers();
+-      paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+-      tlb_remove_page(tlb, virt_to_page(pmd));
  -}
  -
--extern struct device fallback_dev;
--extern int panic_on_overflow;
  -#endif
  -
--#endif /* _X8664_DMA_MAPPING_H */
+ void make_lowmem_page_readonly(void *va, unsigned int feature)
+ {
+       pte_t *pte;
+--- sle11-2009-05-14.orig/arch/x86/pci/i386.c  2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/arch/x86/pci/i386.c       2009-05-14 11:20:29.000000000 +0200
+@@ -331,10 +331,14 @@ int pci_mmap_page_range(struct pci_dev *
+                       flags);
+       }
+ 
++#ifndef CONFIG_XEN
+       if (((vma->vm_pgoff < max_low_pfn_mapped) ||
+            (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+             vma->vm_pgoff < max_pfn_mapped)) &&
+           ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
++#else
++      if (ioremap_check_change_attr(vma->vm_pgoff, len, flags)) {
++#endif
+               free_memtype(addr, addr + len);
+               return -EINVAL;
+       }
+--- sle11-2009-05-14.orig/arch/x86/pci/irq-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/pci/irq-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -140,9 +140,11 @@ static void __init pirq_peer_trick(void)
+               busmap[e->bus] = 1;
+       }
+       for(i = 1; i < 256; i++) {
++              int node;
+               if (!busmap[i] || pci_find_bus(0, i))
+                       continue;
+-              if (pci_scan_bus_with_sysdata(i))
++              node = get_mp_bus_to_node(i);
++              if (pci_scan_bus_on_node(i, &pci_root_ops, node))
+                       printk(KERN_INFO "PCI: Discovered primary peer "
+                              "bus %02x [IRQ]\n", i);
+       }
+@@ -204,7 +206,7 @@ static int pirq_ali_get(struct pci_dev *
+ {
+       static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+ 
+-      WARN_ON_ONCE(pirq >= 16);
++      WARN_ON_ONCE(pirq > 16);
+       return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+ }
+ 
+@@ -213,7 +215,7 @@ static int pirq_ali_set(struct pci_dev *
+       static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+       unsigned int val = irqmap[irq];
+ 
+-      WARN_ON_ONCE(pirq >= 16);
++      WARN_ON_ONCE(pirq > 16);
+       if (val) {
+               write_config_nybble(router, 0x48, pirq-1, val);
+               return 1;
+@@ -264,7 +266,7 @@ static int pirq_via586_get(struct pci_de
+ {
+       static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+ 
+-      WARN_ON_ONCE(pirq >= 5);
++      WARN_ON_ONCE(pirq > 5);
+       return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
+ }
+ 
+@@ -272,7 +274,7 @@ static int pirq_via586_set(struct pci_de
+ {
+       static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+ 
+-      WARN_ON_ONCE(pirq >= 5);
++      WARN_ON_ONCE(pirq > 5);
+       write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
+       return 1;
+ }
+@@ -286,7 +288,7 @@ static int pirq_ite_get(struct pci_dev *
+ {
+       static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ 
+-      WARN_ON_ONCE(pirq >= 4);
++      WARN_ON_ONCE(pirq > 4);
+       return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+ }
+ 
+@@ -294,7 +296,7 @@ static int pirq_ite_set(struct pci_dev *
+ {
+       static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ 
+-      WARN_ON_ONCE(pirq >= 4);
++      WARN_ON_ONCE(pirq > 4);
+       write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+       return 1;
+ }
+@@ -623,6 +625,13 @@ static __init int via_router_probe(struc
+                        */
+                       device = PCI_DEVICE_ID_VIA_8235;
+                       break;
++              case PCI_DEVICE_ID_VIA_8237:
++                      /**
++                       * Asus a7v600 bios wrongly reports 8237
++                       * as 586-compatible
++                       */
++                      device = PCI_DEVICE_ID_VIA_8237;
++                      break;
+               }
+       }
+ 
+--- sle11-2009-05-14.orig/arch/x86/vdso/vdso32-setup-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/vdso/vdso32-setup-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -164,7 +164,7 @@ static __init void relocate_vdso(Elf32_E
+       Elf32_Shdr *shdr;
+       int i;
+ 
+-      BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
++      BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
+              !elf_check_arch_ia32(ehdr) ||
+              ehdr->e_type != ET_DYN);
+ 
+@@ -233,8 +233,12 @@ void syscall32_cpu_init(void)
+               BUG();
+ #endif
+ 
+-      if (use_sysenter < 0)
+-              use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
++      if (use_sysenter < 0) {
++              if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
++                      use_sysenter = 1;
++              if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
++                      use_sysenter = 1;
++      }
+ }
+ 
+ #define compat_uses_vma               1
+@@ -337,8 +341,6 @@ int __init sysenter_setup(void)
+ 
+ #ifdef CONFIG_X86_32
+       gate_vma_init();
  -
--#include "dma-mapping_32.h"
---- a/include/asm-x86/mach-xen/asm/dma-mapping.h
-+++ b/include/asm-x86/mach-xen/asm/dma-mapping.h
-@@ -1,5 +1,17 @@
--#ifdef CONFIG_X86_32
--# include "dma-mapping_32.h"
--#else
--# include "dma-mapping_64.h"
--#endif
-+#ifndef _ASM_DMA_MAPPING_H_
-+
-+#include "../../dma-mapping.h"
+-      printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+ #endif
+ 
+ #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
+@@ -383,6 +385,9 @@ int arch_setup_additional_pages(struct l
+       int ret = 0;
+       bool compat;
+ 
++      if (vdso_enabled == VDSO_DISABLED)
++              return 0;
  +
-+static inline int
-+address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+       down_write(&mm->mmap_sem);
+ 
+       /* Test compat mode once here, in case someone
+--- sle11-2009-05-14.orig/drivers/acpi/processor_core.c        2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/drivers/acpi/processor_core.c     2009-03-16 16:38:05.000000000 +0100
+@@ -657,7 +657,7 @@ static int acpi_processor_get_info(struc
+        * of /proc/cpuinfo
+        */
+       status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
+-      if (ACPI_SUCCESS(status))
++      if (ACPI_SUCCESS(status) && pr->id != -1)
+               arch_fix_phys_package_id(pr->id, object.integer.value);
+ 
+       return 0;
+--- sle11-2009-05-14.orig/drivers/input/xen-kbdfront.c 2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/drivers/input/xen-kbdfront.c      2009-03-16 16:38:05.000000000 +0100
+@@ -325,7 +325,6 @@ static struct xenbus_device_id xenkbd_id
+ 
+ static struct xenbus_driver xenkbd = {
+       .name = "vkbd",
+-      .owner = THIS_MODULE,
+       .ids = xenkbd_ids,
+       .probe = xenkbd_probe,
+       .remove = xenkbd_remove,
+--- sle11-2009-05-14.orig/drivers/oprofile/cpu_buffer.c        2009-03-12 16:15:32.000000000 +0100
++++ sle11-2009-05-14/drivers/oprofile/cpu_buffer.c     2009-03-16 16:38:05.000000000 +0100
+@@ -341,7 +341,7 @@ void oprofile_add_mode(int cpu_mode)
+ 
+ int oprofile_add_domain_switch(int32_t domain_id)
+ {
+-      struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
++      struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ 
+       /* should have space for switching into and out of domain 
+          (2 slots each) plus one sample and one cpu mode switch */
+--- sle11-2009-05-14.orig/drivers/pci/msi-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/drivers/pci/msi-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -583,7 +583,7 @@ int pci_enable_msi(struct pci_dev* dev)
+ EXPORT_SYMBOL(pci_enable_msi);
+ 
+ extern void pci_frontend_disable_msi(struct pci_dev* dev);
+-void pci_disable_msi(struct pci_dev* dev)
++void pci_msi_shutdown(struct pci_dev* dev)
+ {
+       int pirq;
+ 
+@@ -612,6 +612,10 @@ void pci_disable_msi(struct pci_dev* dev
+       pci_intx_for_msi(dev, 1);
+       dev->msi_enabled = 0;
+ }
++void pci_disable_msi(struct pci_dev* dev)
  +{
-+      dma_addr_t mask = 0xffffffff;
-+      /* If the device has a mask, use it, otherwise default to 32 bits */
-+      if (hwdev && hwdev->dma_mask)
-+              mask = *hwdev->dma_mask;
-+      return (addr & ~mask) != 0;
++      pci_msi_shutdown(dev);
  +}
-+
-+extern int range_straddles_page_boundary(paddr_t p, size_t size);
-+
-+#endif /* _ASM_DMA_MAPPING_H_ */
---- a/include/asm-x86/mach-xen/asm/fixmap_32.h
-+++ b/include/asm-x86/mach-xen/asm/fixmap_32.h
-@@ -10,8 +10,8 @@
-  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
-  */
- 
--#ifndef _ASM_FIXMAP_H
--#define _ASM_FIXMAP_H
-+#ifndef _ASM_FIXMAP_32_H
-+#define _ASM_FIXMAP_32_H
+ EXPORT_SYMBOL(pci_disable_msi);
   
- /* used by vmalloc.c, vsyscall.lds.S.
-  *
-@@ -102,8 +102,7 @@ enum fixed_addresses {
-        */
- #define NR_FIX_BTMAPS         64
- #define FIX_BTMAPS_NESTING    4
--      FIX_BTMAP_END =
--              __end_of_permanent_fixed_addresses + 512 -
-+      FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
-                       (__end_of_permanent_fixed_addresses & 511),
-       FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
-       FIX_WP_TEST,
-@@ -114,19 +113,16 @@ enum fixed_addresses {
- };
+ /**
+@@ -714,7 +718,7 @@ int pci_enable_msix(struct pci_dev* dev,
+ EXPORT_SYMBOL(pci_enable_msix);
   
- extern void __set_fixmap(enum fixed_addresses idx,
--                                      maddr_t phys, pgprot_t flags);
-+                       maddr_t phys, pgprot_t flags);
- extern void reserve_top_address(unsigned long reserve);
+ extern void pci_frontend_disable_msix(struct pci_dev* dev);
+-void pci_disable_msix(struct pci_dev* dev)
++void pci_msix_shutdown(struct pci_dev* dev)
+ {
+       if (!pci_msi_enable)
+               return;
+@@ -751,6 +755,10 @@ void pci_disable_msix(struct pci_dev* de
+       pci_intx_for_msi(dev, 1);
+       dev->msix_enabled = 0;
+ }
++void pci_disable_msix(struct pci_dev* dev)
++{
++      pci_msix_shutdown(dev);
++}
+ EXPORT_SYMBOL(pci_disable_msix);
   
--#define set_fixmap(idx, phys) \
--              __set_fixmap(idx, phys, PAGE_KERNEL)
-+#define set_fixmap(idx, phys)                         \
-+      __set_fixmap(idx, phys, PAGE_KERNEL)
- /*
-  * Some hardware wants to get fixmapped without caching.
-  */
--#define set_fixmap_nocache(idx, phys) \
--              __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
--
--#define clear_fixmap(idx) \
--              __set_fixmap(idx, 0, __pgprot(0))
-+#define set_fixmap_nocache(idx, phys)                 \
-+      __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+ /**
+--- sle11-2009-05-14.orig/drivers/video/Kconfig        2009-02-16 15:58:02.000000000 +0100
++++ sle11-2009-05-14/drivers/video/Kconfig     2009-03-16 16:38:05.000000000 +0100
+@@ -2029,7 +2029,7 @@ config FB_VIRTUAL
   
- #define FIXADDR_TOP   ((unsigned long)__FIXADDR_TOP)
+ config XEN_FBDEV_FRONTEND
+       tristate "Xen virtual frame buffer support"
+-      depends on FB && XEN
++      depends on FB && PARAVIRT_XEN
+       select FB_SYS_FILLRECT
+       select FB_SYS_COPYAREA
+       select FB_SYS_IMAGEBLIT
+--- sle11-2009-05-14.orig/drivers/video/xen-fbfront.c  2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/drivers/video/xen-fbfront.c       2009-03-16 16:38:05.000000000 +0100
+@@ -670,7 +670,6 @@ static struct xenbus_device_id xenfb_ids
   
-@@ -159,7 +155,7 @@ static __always_inline unsigned long fix
-       if (idx >= __end_of_fixed_addresses)
-               __this_fixmap_does_not_exist();
+ static struct xenbus_driver xenfb = {
+       .name = "vfb",
+-      .owner = THIS_MODULE,
+       .ids = xenfb_ids,
+       .probe = xenfb_probe,
+       .remove = xenfb_remove,
+--- sle11-2009-05-14.orig/drivers/xen/Kconfig  2009-03-04 11:28:34.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/Kconfig       2009-03-16 16:38:05.000000000 +0100
+@@ -2,8 +2,6 @@
+ # This Kconfig describe xen options
+ #
   
--        return __fix_to_virt(idx);
-+      return __fix_to_virt(idx);
- }
+-mainmenu "Xen Configuration"
+-
+ config XEN
+       bool
   
- static inline unsigned long virt_to_fix(const unsigned long vaddr)
---- a/include/asm-x86/mach-xen/asm/fixmap_64.h
-+++ b/include/asm-x86/mach-xen/asm/fixmap_64.h
-@@ -8,8 +8,8 @@
-  * Copyright (C) 1998 Ingo Molnar
-  */
+--- sle11-2009-05-14.orig/drivers/xen/Makefile 2009-02-16 16:17:21.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/Makefile      2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,8 @@
+-obj-$(CONFIG_PARAVIRT_XEN)    += grant-table.o
++obj-$(CONFIG_PARAVIRT_XEN)    += grant-table.o features.o events.o
++xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o
++xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
   
--#ifndef _ASM_FIXMAP_H
--#define _ASM_FIXMAP_H
-+#ifndef _ASM_FIXMAP_64_H
-+#define _ASM_FIXMAP_64_H
++xen-balloon-$(CONFIG_XEN)     := balloon/
+ obj-$(CONFIG_XEN)             += core/
+ obj-$(CONFIG_XEN)             += console/
+ obj-$(CONFIG_XEN)             += evtchn/
+@@ -7,7 +10,8 @@ obj-y                         += xenbus/
+ obj-$(CONFIG_XEN)             += char/
   
- #include <linux/kernel.h>
- #include <asm/apicdef.h>
-@@ -35,7 +35,8 @@
+ obj-$(CONFIG_XEN)             += util.o
+-obj-$(CONFIG_XEN_BALLOON)             += balloon/
++obj-$(CONFIG_XEN_XENCOMM)     += $(xen-xencomm-y)
++obj-$(CONFIG_XEN_BALLOON)             += $(xen-balloon-y)
+ obj-$(CONFIG_XEN_BLKDEV_BACKEND)      += blkback/
+ obj-$(CONFIG_XEN_BLKDEV_TAP)          += blktap/
+ obj-$(CONFIG_XEN_NETDEV_BACKEND)      += netback/
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/blkfront.c      2009-03-24 10:12:53.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/blkfront.c   2009-05-19 10:38:53.000000000 +0200
+@@ -285,7 +285,11 @@ static void backend_changed(struct xenbu
+               break;
   
- enum fixed_addresses {
-       VSYSCALL_LAST_PAGE,
--      VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
-+      VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
-+                          + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
-       VSYSCALL_HPET,
-       FIX_DBGP_BASE,
-       FIX_EARLYCON_MEM_BASE,
-@@ -45,11 +46,12 @@ enum fixed_addresses {
- #endif
- #ifndef CONFIG_XEN
-       FIX_IO_APIC_BASE_0,
--      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
-+      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
- #endif
- #ifdef CONFIG_EFI
-       FIX_EFI_IO_MAP_LAST_PAGE,
--      FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
-+      FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
-+                                + MAX_EFI_IO_PAGES - 1,
- #endif
- #ifdef CONFIG_ACPI
-       FIX_ACPI_BEGIN,
-@@ -79,19 +81,16 @@ enum fixed_addresses {
-       __end_of_fixed_addresses
- };
+       case XenbusStateClosing:
+-              bd = bdget(info->dev);
++              if (!info->gd) {
++                      xenbus_frontend_closed(dev);
++                      break;
++              }
++              bd = bdget_disk(info->gd, 0);
+               if (bd == NULL)
+                       xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
   
--extern void __set_fixmap (enum fixed_addresses idx,
--                                      unsigned long phys, pgprot_t flags);
-+extern void __set_fixmap(enum fixed_addresses idx,
-+                       unsigned long phys, pgprot_t flags);
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/block.h 2009-03-24 10:11:58.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/block.h      2009-03-16 16:38:05.000000000 +0100
+@@ -96,7 +96,6 @@ struct blk_shadow {
+ struct blkfront_info
+ {
+       struct xenbus_device *xbdev;
+-      dev_t dev;
+       struct gendisk *gd;
+       int vdevice;
+       blkif_vdev_t handle;
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/vbd.c   2009-02-16 16:17:21.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/vbd.c        2009-03-16 16:38:05.000000000 +0100
+@@ -246,17 +246,32 @@ xlvbd_init_blk_queue(struct gendisk *gd,
+       return 0;
+ }
   
--#define set_fixmap(idx, phys) \
--              __set_fixmap(idx, phys, PAGE_KERNEL)
-+#define set_fixmap(idx, phys)                 \
-+      __set_fixmap(idx, phys, PAGE_KERNEL)
- /*
-  * Some hardware wants to get fixmapped without caching.
-  */
--#define set_fixmap_nocache(idx, phys) \
--              __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
--
--#define clear_fixmap(idx) \
--                __set_fixmap(idx, 0, __pgprot(0))
-+#define set_fixmap_nocache(idx, phys)                 \
-+      __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+-static int
+-xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
+-                  u16 vdisk_info, u16 sector_size,
+-                  struct blkfront_info *info)
++int
++xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
++        u16 sector_size, struct blkfront_info *info)
+ {
++      int major, minor;
+       struct gendisk *gd;
+       struct xlbd_major_info *mi;
+       int nr_minors = 1;
+       int err = -ENODEV;
+       unsigned int offset;
   
- #define FIXADDR_TOP   (VSYSCALL_END-PAGE_SIZE)
- #define FIXADDR_SIZE  (__end_of_fixed_addresses << PAGE_SHIFT)
---- a/include/asm-x86/mach-xen/asm/fixmap.h
-+++ b/include/asm-x86/mach-xen/asm/fixmap.h
-@@ -1,5 +1,13 @@
-+#ifndef _ASM_FIXMAP_H
-+#define _ASM_FIXMAP_H
-+
- #ifdef CONFIG_X86_32
- # include "fixmap_32.h"
- #else
- # include "fixmap_64.h"
- #endif
++      if ((vdevice>>EXT_SHIFT) > 1) {
++              /* this is above the extended range; something is wrong */
++              printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
++              return -ENODEV;
++      }
  +
-+#define clear_fixmap(idx)                     \
-+      __set_fixmap(idx, 0, __pgprot(0))
++      if (!VDEV_IS_EXTENDED(vdevice)) {
++              major = BLKIF_MAJOR(vdevice);
++              minor = BLKIF_MINOR(vdevice);
++      }
++      else {
++              major = 202;
++              minor = BLKIF_MINOR_EXT(vdevice);
++      }
  +
-+#endif
---- a/include/asm-x86/mach-xen/asm/highmem.h
-+++ b/include/asm-x86/mach-xen/asm/highmem.h
-@@ -8,7 +8,7 @@
-  *                  Gerhard.Wichert@pdb.siemens.de
-  *
-  *
-- * Redesigned the x86 32-bit VM architecture to deal with 
-+ * Redesigned the x86 32-bit VM architecture to deal with
-  * up to 16 Terabyte physical memory. With current x86 CPUs
-  * we now support up to 64 Gigabytes physical RAM.
-  *
---- a/include/asm-x86/mach-xen/asm/io_32.h
-+++ b/include/asm-x86/mach-xen/asm/io_32.h
-@@ -50,12 +50,6 @@
- #include <asm/fixmap.h>
+       BUG_ON(info->gd != NULL);
+       BUG_ON(info->mi != NULL);
+       BUG_ON(info->rq != NULL);
+@@ -337,41 +352,6 @@ xlvbd_alloc_gendisk(int major, int minor
+       return err;
+ }
   
- /*
-- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
-- * access
-- */
--#define xlate_dev_mem_ptr(p)  __va(p)
+-int
+-xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+-        u16 sector_size, struct blkfront_info *info)
+-{
+-      struct block_device *bd;
+-      int err = 0;
+-      int major, minor;
  -
--/*
-  * Convert a virtual cached pointer to an uncached pointer
-  */
- #define xlate_dev_kmem_ptr(p) p
-@@ -66,14 +60,14 @@
-  *
-  *    The returned physical address is the physical (CPU) mapping for
-  *    the memory address given. It is only valid to use this function on
-- *    addresses directly mapped or allocated via kmalloc. 
-+ *    addresses directly mapped or allocated via kmalloc.
-  *
-  *    This function does not give bus mappings for DMA transfers. In
-  *    almost all conceivable cases a device driver should not be using
-  *    this function
-  */
-- 
--static inline unsigned long virt_to_phys(volatile void * address)
-+
-+static inline unsigned long virt_to_phys(volatile void *address)
+-      if ((vdevice>>EXT_SHIFT) > 1) {
+-              /* this is above the extended range; something is wrong */
+-              printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
+-              return -ENODEV;
+-      }
+-
+-      if (!VDEV_IS_EXTENDED(vdevice)) {
+-              major = BLKIF_MAJOR(vdevice);
+-              minor = BLKIF_MINOR(vdevice);
+-      }
+-      else {
+-              major = 202;
+-              minor = BLKIF_MINOR_EXT(vdevice);
+-      }
+-
+-      info->dev = MKDEV(major, minor);
+-      bd = bdget(info->dev);
+-      if (bd == NULL)
+-              return -ENODEV;
+-
+-      err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
+-                                sector_size, info);
+-
+-      bdput(bd);
+-      return err;
+-}
+-
+ void
+ xlvbd_del(struct blkfront_info *info)
+ {
+--- sle11-2009-05-14.orig/drivers/xen/blktap/blktap.c  2009-04-20 11:38:54.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/blktap/blktap.c       2009-04-20 11:40:14.000000000 +0200
+@@ -111,6 +111,7 @@ typedef struct tap_blkif {
+       unsigned long mode;           /*current switching mode               */
+       int minor;                    /*Minor number for tapdisk device      */
+       pid_t pid;                    /*tapdisk process id                   */
++      struct pid_namespace *pid_ns; /*... and its corresponding namespace  */
+       enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
+                                                 shutdown                   */
+       unsigned long *idx_map;       /*Record the user ring id to kern 
+@@ -299,16 +300,14 @@ struct tap_vma_priv {
+       struct page *map[];
+ };
+ 
+-static struct page *blktap_nopage(struct vm_area_struct *vma,
+-                                unsigned long address,
+-                                int *type)
++static int blktap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
   {
-       return __pa(address);
+       /*
+        * if the page has not been mapped in by the driver then return
+-       * NOPAGE_SIGBUS to the domain.
++       * VM_FAULT_SIGBUS to the domain.
+        */
+ 
+-      return NOPAGE_SIGBUS;
++      return VM_FAULT_SIGBUS;
   }
-@@ -91,7 +85,7 @@ static inline unsigned long virt_to_phys
-  *    this function
-  */
   
--static inline void * phys_to_virt(unsigned long address)
-+static inline void *phys_to_virt(unsigned long address)
- {
-       return __va(address);
+ static pte_t blktap_clear_pte(struct vm_area_struct *vma,
+@@ -404,7 +403,7 @@ static void blktap_vma_close(struct vm_a
   }
-@@ -152,11 +146,6 @@ extern void *early_ioremap(unsigned long
- extern void early_iounmap(void *addr, unsigned long size);
- extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
   
--/* Use early IO mappings for DMI because it's initialized early */
--#define dmi_ioremap early_ioremap
--#define dmi_iounmap early_iounmap
--#define dmi_alloc alloc_bootmem
--
- /*
-  * ISA I/O bus memory addresses are 1:1 with the physical address.
-  */
-@@ -182,16 +171,19 @@ extern void __iomem *fix_ioremap(unsigne
+ struct vm_operations_struct blktap_vm_ops = {
+-      nopage:   blktap_nopage,
++      fault:    blktap_fault,
+       zap_pte:  blktap_clear_pte,
+       close:    blktap_vma_close,
+ };
+@@ -498,9 +497,8 @@ found:
+               tapfds[minor] = info;
   
- static inline unsigned char readb(const volatile void __iomem *addr)
- {
--      return *(volatile unsigned char __force *) addr;
-+      return *(volatile unsigned char __force *)addr;
- }
-+
- static inline unsigned short readw(const volatile void __iomem *addr)
- {
--      return *(volatile unsigned short __force *) addr;
-+      return *(volatile unsigned short __force *)addr;
- }
-+
- static inline unsigned int readl(const volatile void __iomem *addr)
- {
-       return *(volatile unsigned int __force *) addr;
- }
-+
- #define readb_relaxed(addr) readb(addr)
- #define readw_relaxed(addr) readw(addr)
- #define readl_relaxed(addr) readl(addr)
-@@ -201,15 +193,17 @@ static inline unsigned int readl(const v
+               if ((class = get_xen_class()) != NULL)
+-                      class_device_create(class, NULL,
+-                                          MKDEV(blktap_major, minor), NULL,
+-                                          "blktap%d", minor);
++                      device_create(class, NULL, MKDEV(blktap_major, minor),
++                                    "blktap%d", minor);
+       }
   
- static inline void writeb(unsigned char b, volatile void __iomem *addr)
- {
--      *(volatile unsigned char __force *) addr = b;
-+      *(volatile unsigned char __force *)addr = b;
- }
-+
- static inline void writew(unsigned short b, volatile void __iomem *addr)
- {
--      *(volatile unsigned short __force *) addr = b;
-+      *(volatile unsigned short __force *)addr = b;
+ out:
+@@ -542,7 +540,7 @@ void signal_tapdisk(int idx) 
+               return;
+ 
+       if (info->pid > 0) {
+-              ptask = find_task_by_pid(info->pid);
++              ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
+               if (ptask)
+                       info->status = CLEANSHUTDOWN;
+       }
+@@ -770,8 +768,9 @@ static int blktap_ioctl(struct inode *in
+       {
+               if (info) {
+                       info->pid = (pid_t)arg;
+-                      DPRINTK("blktap: pid received %d\n", 
+-                             info->pid);
++                      info->pid_ns = current->nsproxy->pid_ns;
++                      DPRINTK("blktap: pid received %p:%d\n",
++                              info->pid_ns, info->pid);
+               }
+               return 0;
+       }
+@@ -1684,9 +1683,7 @@ static int __init blkif_init(void)
+                * We only create the device when a request of a new device is
+                * made.
+                */
+-              class_device_create(class, NULL,
+-                                  MKDEV(blktap_major, 0), NULL,
+-                                  "blktap0");
++              device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
+       } else {
+               /* this is bad, but not fatal */
+               WPRINTK("blktap: sysfs xen_class not created\n");
+--- sle11-2009-05-14.orig/drivers/xen/char/mem.c       2008-12-15 11:27:22.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/char/mem.c    2009-03-16 16:38:05.000000000 +0100
+@@ -33,6 +33,27 @@ static inline int uncached_access(struct
+       return 0;
   }
+ 
++static inline int range_is_allowed(unsigned long pfn, unsigned long size)
++{
++#ifdef CONFIG_NONPROMISC_DEVMEM
++      u64 from = ((u64)pfn) << PAGE_SHIFT;
++      u64 to = from + size;
++      u64 cursor = from;
  +
- static inline void writel(unsigned int b, volatile void __iomem *addr)
- {
--      *(volatile unsigned int __force *) addr = b;
-+      *(volatile unsigned int __force *)addr = b;
- }
- #define __raw_writeb writeb
- #define __raw_writew writew
-@@ -252,12 +246,12 @@ memcpy_toio(volatile void __iomem *dst, 
-  *    1. Out of order aware processors
-  *    2. Accidentally out of order processors (PPro errata #51)
-  */
-- 
++      while (cursor < to) {
++              if (!devmem_is_allowed(pfn)) {
++                      printk(KERN_INFO
++              "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
++                              current->comm, from, to);
++                      return 0;
++              }
++              cursor += PAGE_SIZE;
++              pfn++;
++      }
++#endif
++      return 1;
++}
  +
- #if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+ /*
+  * This funcion reads the *physical* memory. The f_pos points directly to the 
+  * memory location. 
+@@ -55,6 +76,9 @@ static ssize_t read_mem(struct file * fi
   
- static inline void flush_write_buffers(void)
- {
--      __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
-+      asm volatile("lock; addl $0,0(%%esp)": : :"memory");
- }
+               sz = min_t(unsigned long, sz, count);
   
- #else
-@@ -274,7 +268,8 @@ extern void xen_io_delay(void);
- extern int io_delay_type;
- extern void io_delay_init(void);
++              if (!range_is_allowed(p >> PAGE_SHIFT, count))
++                      return -EPERM;
++
+               v = ioremap(p, sz);
+               if (IS_ERR(v) || v == NULL) {
+                       /*
+@@ -103,6 +127,9 @@ static ssize_t write_mem(struct file * f
   
--static inline void slow_down_io(void) {
-+static inline void slow_down_io(void)
-+{
-       native_io_delay();
- #ifdef REALLY_SLOW_IO
-       native_io_delay();
-@@ -283,52 +278,75 @@ static inline void slow_down_io(void) {
- #endif
+               sz = min_t(unsigned long, sz, count);
+ 
++              if (!range_is_allowed(p >> PAGE_SHIFT, sz))
++                      return -EPERM;
++
+               v = ioremap(p, sz);
+               if (v == NULL)
+                       break;
+@@ -131,6 +158,23 @@ static ssize_t write_mem(struct file * f
   }
   
--#define __BUILDIO(bwl,bw,type) \
--static inline void out##bwl(unsigned type value, int port) { \
--      out##bwl##_local(value, port); \
--} \
--static inline unsigned type in##bwl(int port) { \
--      return in##bwl##_local(port); \
--}
--
--#define BUILDIO(bwl,bw,type) \
--static inline void out##bwl##_local(unsigned type value, int port) { \
--      __asm__ __volatile__("out" #bwl " %" #bw "0, %w1" : : "a"(value), "Nd"(port)); \
--} \
--static inline unsigned type in##bwl##_local(int port) { \
--      unsigned type value; \
--      __asm__ __volatile__("in" #bwl " %w1, %" #bw "0" : "=a"(value) : "Nd"(port)); \
--      return value; \
--} \
--static inline void out##bwl##_local_p(unsigned type value, int port) { \
--      out##bwl##_local(value, port); \
--      slow_down_io(); \
--} \
--static inline unsigned type in##bwl##_local_p(int port) { \
--      unsigned type value = in##bwl##_local(port); \
--      slow_down_io(); \
--      return value; \
--} \
--__BUILDIO(bwl,bw,type) \
--static inline void out##bwl##_p(unsigned type value, int port) { \
--      out##bwl(value, port); \
--      slow_down_io(); \
--} \
--static inline unsigned type in##bwl##_p(int port) { \
--      unsigned type value = in##bwl(port); \
--      slow_down_io(); \
--      return value; \
--} \
--static inline void outs##bwl(int port, const void *addr, unsigned long count) { \
--      __asm__ __volatile__("rep; outs" #bwl : "+S"(addr), "+c"(count) : "d"(port)); \
--} \
--static inline void ins##bwl(int port, void *addr, unsigned long count) { \
--      __asm__ __volatile__("rep; ins" #bwl : "+D"(addr), "+c"(count) : "d"(port)); \
--}
--
--BUILDIO(b,b,char)
--BUILDIO(w,w,short)
--BUILDIO(l,,int)
-+#define __BUILDIO(bwl, bw, type)                              \
-+static inline void out##bwl(unsigned type value, int port)    \
-+{                                                             \
-+      out##bwl##_local(value, port);                          \
-+}                                                             \
-+                                                              \
-+static inline unsigned type in##bwl(int port)                 \
-+{                                                             \
-+      return in##bwl##_local(port);                           \
+ #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
++static void mmap_mem_open(struct vm_area_struct *vma)
++{
++      map_devmem(vma->vm_pgoff,  vma->vm_end - vma->vm_start,
++                      vma->vm_page_prot);
  +}
  +
-+#define BUILDIO(bwl, bw, type)                                                \
-+static inline void out##bwl##_local(unsigned type value, int port)    \
-+{                                                                     \
-+      asm volatile("out" #bwl " %" #bw "0, %w1"               \
-+                   : : "a"(value), "Nd"(port));                       \
-+}                                                                     \
-+                                                                      \
-+static inline unsigned type in##bwl##_local(int port)                 \
-+{                                                                     \
-+      unsigned type value;                                            \
-+      asm volatile("in" #bwl " %w1, %" #bw "0"                \
-+                   : "=a"(value) : "Nd"(port));                       \
-+      return value;                                                   \
-+}                                                                     \
-+                                                                      \
-+static inline void out##bwl##_local_p(unsigned type value, int port)  \
-+{                                                                     \
-+      out##bwl##_local(value, port);                                  \
-+      slow_down_io();                                                 \
-+}                                                                     \
-+                                                                      \
-+static inline unsigned type in##bwl##_local_p(int port)                       \
-+{                                                                     \
-+      unsigned type value = in##bwl##_local(port);                    \
-+      slow_down_io();                                                 \
-+      return value;                                                   \
-+}                                                                     \
-+                                                                      \
-+__BUILDIO(bwl, bw, type)                                              \
-+                                                                      \
-+static inline void out##bwl##_p(unsigned type value, int port)                \
-+{                                                                     \
-+      out##bwl(value, port);                                          \
-+      slow_down_io();                                                 \
-+}                                                                     \
-+                                                                      \
-+static inline unsigned type in##bwl##_p(int port)                     \
-+{                                                                     \
-+      unsigned type value = in##bwl(port);                            \
-+      slow_down_io();                                                 \
-+      return value;                                                   \
-+}                                                                     \
-+                                                                      \
-+static inline void outs##bwl(int port, const void *addr, unsigned long count) \
-+{                                                                     \
-+      asm volatile("rep; outs" #bwl                                   \
-+                   : "+S"(addr), "+c"(count) : "d"(port));            \
-+}                                                                     \
-+                                                                      \
-+static inline void ins##bwl(int port, void *addr, unsigned long count)        \
-+{                                                                     \
-+      asm volatile("rep; ins" #bwl                                    \
-+                   : "+D"(addr), "+c"(count) : "d"(port));            \
++static void mmap_mem_close(struct vm_area_struct *vma)
++{
++      unmap_devmem(vma->vm_pgoff,  vma->vm_end - vma->vm_start,
++                      vma->vm_page_prot);
  +}
  +
-+BUILDIO(b, b, char)
-+BUILDIO(w, w, short)
-+BUILDIO(l, , int)
- 
- /* We will be supplying our own /dev/mem implementation */
- #define ARCH_HAS_DEV_MEM
---- a/include/asm-x86/mach-xen/asm/io_64.h
-+++ b/include/asm-x86/mach-xen/asm/io_64.h
-@@ -55,60 +55,75 @@ static inline void slow_down_io(void)
- /*
-  * Talk about misusing macros..
-  */
--#define __OUT1(s,x) \
-+#define __OUT1(s, x)                                                  \
- static inline void out##s(unsigned x value, unsigned short port) {
- 
--#define __OUT2(s,s1,s2) \
--__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
-+#define __OUT2(s, s1, s2)                             \
-+asm volatile ("out" #s " %" s1 "0,%" s2 "1"
- 
- #ifndef REALLY_SLOW_IO
- #define REALLY_SLOW_IO
- #define UNSET_REALLY_SLOW_IO
- #endif
++static struct vm_operations_struct mmap_mem_ops = {
++      .open  = mmap_mem_open,
++      .close = mmap_mem_close
++};
++
+ static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
+ {
+       size_t size = vma->vm_end - vma->vm_start;
+@@ -138,6 +182,15 @@ static int xen_mmap_mem(struct file * fi
+       if (uncached_access(file))
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   
--#define __OUT(s,s1,x) \
--__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
--__OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
--              slow_down_io(); }
--
--#define __IN1(s) \
--static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
--
--#define __IN2(s,s1,s2) \
--__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
--
--#define __IN(s,s1,i...) \
--__IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); return _v; } \
--__IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i);         \
--                              slow_down_io(); return _v; }
-+#define __OUT(s, s1, x)                                                       \
-+      __OUT1(s, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port));  \
-+      }                                                               \
-+      __OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
-+      slow_down_io();                                                 \
-+}
++      if (!range_is_allowed(vma->vm_pgoff, size))
++              return -EPERM;
  +
-+#define __IN1(s)                                                      \
-+static inline RETURN_TYPE in##s(unsigned short port)                  \
-+{                                                                     \
-+      RETURN_TYPE _v;
++      if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
++                                              &vma->vm_page_prot))
++              return -EINVAL;
  +
-+#define __IN2(s, s1, s2)                                              \
-+      asm volatile ("in" #s " %" s2 "1,%" s1 "0"
++      vma->vm_ops = &mmap_mem_ops;
  +
-+#define __IN(s, s1, i...)                                             \
-+      __IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i);     \
-+      return _v;                                                      \
-+      }                                                               \
-+      __IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \
-+      slow_down_io(); \
-+      return _v; }
+       /* We want to return the real error code, not EAGAIN. */
+       return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+                                     size, vma->vm_page_prot, DOMID_IO);
+--- sle11-2009-05-14.orig/drivers/xen/console/console.c        2008-12-15 11:26:44.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/console/console.c     2009-03-16 16:38:05.000000000 +0100
+@@ -552,16 +552,18 @@ static int xencons_write(
+       return i;
+ }
   
- #ifdef UNSET_REALLY_SLOW_IO
- #undef REALLY_SLOW_IO
- #endif
+-static void xencons_put_char(struct tty_struct *tty, u_char ch)
++static int xencons_put_char(struct tty_struct *tty, u_char ch)
+ {
+       unsigned long flags;
++      int ret;
   
--#define __INS(s) \
--static inline void ins##s(unsigned short port, void * addr, unsigned long count) \
--{ __asm__ __volatile__ ("rep ; ins" #s \
--: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
--
--#define __OUTS(s) \
--static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
--{ __asm__ __volatile__ ("rep ; outs" #s \
--: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
-+#define __INS(s)                                                      \
-+static inline void ins##s(unsigned short port, void *addr,            \
-+                        unsigned long count)                          \
-+{                                                                     \
-+      asm volatile ("rep ; ins" #s                                    \
-+                    : "=D" (addr), "=c" (count)                       \
-+                    : "d" (port), "0" (addr), "1" (count));           \
-+}
-+
-+#define __OUTS(s)                                                     \
-+static inline void outs##s(unsigned short port, const void *addr,     \
-+                         unsigned long count)                         \
-+{                                                                     \
-+      asm volatile ("rep ; outs" #s                                   \
-+                    : "=S" (addr), "=c" (count)                       \
-+                    : "d" (port), "0" (addr), "1" (count));           \
-+}
+       if (DUMMY_TTY(tty))
+-              return;
++              return 0;
   
- #define RETURN_TYPE unsigned char
--__IN(b,"")
-+__IN(b, "")
- #undef RETURN_TYPE
- #define RETURN_TYPE unsigned short
--__IN(w,"")
-+__IN(w, "")
- #undef RETURN_TYPE
- #define RETURN_TYPE unsigned int
--__IN(l,"")
-+__IN(l, "")
- #undef RETURN_TYPE
- 
--__OUT(b,"b",char)
--__OUT(w,"w",short)
--__OUT(l,,int)
-+__OUT(b, "b", char)
-+__OUT(w, "w", short)
-+__OUT(l, , int)
- 
- __INS(b)
- __INS(w)
-@@ -129,12 +144,12 @@ __OUTS(l)
-  * Change virtual addresses to physical addresses and vv.
-  * These are pretty trivial
-  */
--static inline unsigned long virt_to_phys(volatile void * address)
-+static inline unsigned long virt_to_phys(volatile void *address)
- {
-       return __pa(address);
+       spin_lock_irqsave(&xencons_lock, flags);
+-      (void)__xencons_put_char(ch);
++      ret = __xencons_put_char(ch);
+       spin_unlock_irqrestore(&xencons_lock, flags);
++      return ret;
   }
   
--static inline void * phys_to_virt(unsigned long address)
-+static inline void *phys_to_virt(unsigned long address)
- {
-       return __va(address);
- }
-@@ -216,18 +231,22 @@ static inline __u8 __readb(const volatil
- {
-       return *(__force volatile __u8 *)addr;
+ static void xencons_flush_chars(struct tty_struct *tty)
+@@ -583,7 +585,7 @@ static void xencons_wait_until_sent(stru
+       if (DUMMY_TTY(tty))
+               return;
+ 
+-      while (DRV(tty->driver)->chars_in_buffer(tty)) {
++      while (tty_chars_in_buffer(tty)) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+               if (signal_pending(current))
+@@ -632,8 +634,7 @@ static void xencons_close(struct tty_str
+ 
+       tty->closing = 1;
+       tty_wait_until_sent(tty, 0);
+-      if (DRV(tty->driver)->flush_buffer != NULL)
+-              DRV(tty->driver)->flush_buffer(tty);
++      tty_driver_flush_buffer(tty);
+       if (tty->ldisc.flush_buffer != NULL)
+               tty->ldisc.flush_buffer(tty);
+       tty->closing = 0;
+--- sle11-2009-05-14.orig/drivers/xen/core/machine_kexec.c     2009-02-17 11:46:41.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/core/machine_kexec.c  2009-03-16 16:38:05.000000000 +0100
+@@ -5,6 +5,7 @@
+ 
+ #include <linux/kexec.h>
+ #include <xen/interface/kexec.h>
++#include <linux/reboot.h>
+ #include <linux/mm.h>
+ #include <linux/bootmem.h>
+ 
+@@ -90,6 +91,9 @@ void __init xen_machine_kexec_setup_reso
+       xen_hypervisor_res.start = range.start;
+       xen_hypervisor_res.end = range.start + range.size - 1;
+       xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
++#ifdef CONFIG_X86_64
++      insert_resource(&iomem_resource, &xen_hypervisor_res);
++#endif
+ 
+       /* fill in crashk_res if range is reserved by hypervisor */
+ 
+@@ -102,6 +106,9 @@ void __init xen_machine_kexec_setup_reso
+       if (range.size) {
+               crashk_res.start = range.start;
+               crashk_res.end = range.start + range.size - 1;
++#ifdef CONFIG_X86_64
++              insert_resource(&iomem_resource, &crashk_res);
++#endif
+       }
+ 
+       /* get physical address of vmcoreinfo */
+@@ -153,11 +160,13 @@ void __init xen_machine_kexec_setup_reso
+       return;
   }
-+
- static inline __u16 __readw(const volatile void __iomem *addr)
+ 
++#ifndef CONFIG_X86_64
+ void __init xen_machine_kexec_register_resources(struct resource *res)
   {
-       return *(__force volatile __u16 *)addr;
+       request_resource(res, &xen_hypervisor_res);
+       machine_kexec_register_resources(res);
   }
-+
- static __always_inline __u32 __readl(const volatile void __iomem *addr)
++#endif
+ 
+ static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
   {
-       return *(__force volatile __u32 *)addr;
+@@ -228,6 +237,11 @@ void machine_shutdown(void)
+       /* do nothing */
   }
-+
- static inline __u64 __readq(const volatile void __iomem *addr)
+ 
++void machine_crash_shutdown(struct pt_regs *regs)
++{
++      /* The kernel is broken so disable interrupts */
++      local_irq_disable();
++}
+ 
+ /*
+  * Local variables:
+--- sle11-2009-05-14.orig/drivers/xen/core/smpboot.c   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/core/smpboot.c        2009-03-16 16:38:05.000000000 +0100
+@@ -53,17 +53,16 @@ static DEFINE_PER_CPU(int, callfunc_irq)
+ static char resched_name[NR_CPUS][15];
+ static char callfunc_name[NR_CPUS][15];
+ 
+-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
++#ifdef CONFIG_X86_LOCAL_APIC
++#define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
++#else
++#define set_cpu_to_apicid(cpu, apicid)
++#endif
+ 
+ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+ DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+ 
+-#if defined(__i386__)
+-DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+-#endif
+-
+ void __init prefill_possible_map(void)
   {
-       return *(__force volatile __u64 *)addr;
+       int i, rc;
+@@ -154,7 +153,7 @@ static int __cpuinit xen_smp_intr_init(u
   }
-+
- #define readb(x) __readb(x)
- #define readw(x) __readw(x)
- #define readl(x) __readl(x)
-@@ -247,37 +266,44 @@ static inline void __writel(__u32 b, vol
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-static void xen_smp_intr_exit(unsigned int cpu)
++static void __cpuexit xen_smp_intr_exit(unsigned int cpu)
   {
-       *(__force volatile __u32 *)addr = b;
+       if (cpu != 0)
+               local_teardown_timer(cpu);
+@@ -263,8 +262,7 @@ void __init smp_prepare_cpus(unsigned in
+       boot_cpu_data.apicid = apicid;
+       cpu_data(0) = boot_cpu_data;
+ 
+-      cpu_2_logical_apicid[0] = apicid;
+-      per_cpu(x86_cpu_to_apicid, 0) = apicid;
++      set_cpu_to_apicid(0, apicid);
+ 
+       current_thread_info()->cpu = 0;
+ 
+@@ -319,8 +317,7 @@ void __init smp_prepare_cpus(unsigned in
+               cpu_data(cpu).cpu_index = cpu;
+               cpu_data(cpu).apicid = apicid;
+ 
+-              cpu_2_logical_apicid[cpu] = apicid;
+-              per_cpu(x86_cpu_to_apicid, cpu) = apicid;
++              set_cpu_to_apicid(cpu, apicid);
+ 
+ #ifdef __x86_64__
+               cpu_pda(cpu)->pcurrent = idle;
+@@ -375,7 +372,7 @@ static int __init initialize_cpu_present
   }
-+
- static inline void __writeq(__u64 b, volatile void __iomem *addr)
+ core_initcall(initialize_cpu_present_map);
+ 
+-int __cpu_disable(void)
++int __cpuexit __cpu_disable(void)
   {
-       *(__force volatile __u64 *)addr = b;
+       cpumask_t map = cpu_online_map;
+       unsigned int cpu = smp_processor_id();
+@@ -392,7 +389,7 @@ int __cpu_disable(void)
+       return 0;
   }
-+
- static inline void __writeb(__u8 b, volatile void __iomem *addr)
+ 
+-void __cpu_die(unsigned int cpu)
++void __cpuexit __cpu_die(unsigned int cpu)
   {
-       *(__force volatile __u8 *)addr = b;
- }
-+
- static inline void __writew(__u16 b, volatile void __iomem *addr)
+       while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+               current->state = TASK_UNINTERRUPTIBLE;
+--- sle11-2009-05-14.orig/drivers/xen/core/xen_proc.c  2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/core/xen_proc.c       2009-03-16 16:38:05.000000000 +0100
+@@ -8,7 +8,7 @@ static struct proc_dir_entry *xen_base;
+ struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
   {
-       *(__force volatile __u16 *)addr = b;
+       if ( xen_base == NULL )
+-              if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
++              if ( (xen_base = proc_mkdir("xen", NULL)) == NULL )
+                       panic("Couldn't create /proc/xen");
+       return create_proc_entry(name, mode, xen_base);
   }
--#define writeq(val,addr) __writeq((val),(addr))
--#define writel(val,addr) __writel((val),(addr))
--#define writew(val,addr) __writew((val),(addr))
--#define writeb(val,addr) __writeb((val),(addr))
-+
-+#define writeq(val, addr) __writeq((val), (addr))
-+#define writel(val, addr) __writel((val), (addr))
-+#define writew(val, addr) __writew((val), (addr))
-+#define writeb(val, addr) __writeb((val), (addr))
- #define __raw_writeb writeb
- #define __raw_writew writew
- #define __raw_writel writel
- #define __raw_writeq writeq
- 
--void __memcpy_fromio(void*,unsigned long,unsigned);
--void __memcpy_toio(unsigned long,const void*,unsigned);
-+void __memcpy_fromio(void *, unsigned long, unsigned);
-+void __memcpy_toio(unsigned long, const void *, unsigned);
- 
--static inline void memcpy_fromio(void *to, const volatile void __iomem *from, unsigned len)
-+static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
-+                               unsigned len)
- {
--      __memcpy_fromio(to,(unsigned long)from,len);
-+      __memcpy_fromio(to, (unsigned long)from, len);
+--- sle11-2009-05-14.orig/drivers/xen/fbfront/xenfb.c  2009-03-04 11:25:55.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/fbfront/xenfb.c       2009-03-16 16:38:05.000000000 +0100
+@@ -93,7 +93,7 @@ struct xenfb_info
+  *    only mappings.  The former creates unfaulted pages.  Preserves
+  *    invariant.  The latter removes pages.  Preserves invariant.
+  *
+- * 3. Holding both locks: xenfb_vm_nopage().  Extends the dirty
++ * 3. Holding both locks: xenfb_vm_fault().  Extends the dirty
+  *    rectangle and updates mappings consistently.  Preserves
+  *    invariant.
+  *
+@@ -112,13 +112,13 @@ struct xenfb_info
+  *
+  * But FIXME: the invariant is too weak.  It misses that the fault
+  * record in mappings must be consistent with the mapping of pages in
+- * the associated address space!  do_no_page() updates the PTE after
+- * xenfb_vm_nopage() returns, i.e. outside the critical region.  This
++ * the associated address space!  __do_fault() updates the PTE after
++ * xenfb_vm_fault() returns, i.e. outside the critical region.  This
+  * allows the following race:
+  *
+  * X writes to some address in the Xen frame buffer
+- * Fault - call do_no_page()
+- *     call xenfb_vm_nopage()
++ * Fault - call __do_fault()
++ *     call xenfb_vm_fault()
+  *         grab mm_lock
+  *         map->faults++;
+  *         release mm_lock
+@@ -387,18 +387,17 @@ static void xenfb_vm_close(struct vm_are
+       mutex_unlock(&info->mm_lock);
   }
--static inline void memcpy_toio(volatile void __iomem *to, const void *from, unsigned len)
-+
-+static inline void memcpy_toio(volatile void __iomem *to, const void *from,
-+                             unsigned len)
+ 
+-static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
+-                                  unsigned long vaddr, int *type)
++static int xenfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
   {
--      __memcpy_toio((unsigned long)to,from,len);
-+      __memcpy_toio((unsigned long)to, from, len);
- }
+       struct xenfb_mapping *map = vma->vm_private_data;
+       struct xenfb_info *info = map->info;
+-      int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
++      int pgnr = ((long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long flags;
+       struct page *page;
+       int y1, y2;
   
- void memset_io(volatile void __iomem *a, int b, size_t c);
-@@ -292,18 +318,12 @@ void memset_io(volatile void __iomem *a,
-  */
- #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
+       if (pgnr >= info->nr_pages)
+-              return NOPAGE_SIGBUS;
++              return VM_FAULT_SIGBUS;
+ 
+       mutex_lock(&info->mm_lock);
+       spin_lock_irqsave(&info->dirty_lock, flags);
+@@ -414,16 +413,15 @@ static struct page *xenfb_vm_nopage(stru
+       spin_unlock_irqrestore(&info->dirty_lock, flags);
+       mutex_unlock(&info->mm_lock);
   
--#define flush_write_buffers() 
-+#define flush_write_buffers()
+-      if (type)
+-              *type = VM_FAULT_MINOR;
++      vmf->page = page;
   
- extern int iommu_bio_merge;
- #define BIO_VMERGE_BOUNDARY iommu_bio_merge
+-      return page;
++      return VM_FAULT_MINOR;
+ }
   
- /*
-- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
-- * access
-- */
--#define xlate_dev_mem_ptr(p)  __va(p)
--
--/*
-  * Convert a virtual cached pointer to an uncached pointer
-  */
- #define xlate_dev_kmem_ptr(p) p
---- a/include/asm-x86/mach-xen/asm/io.h
-+++ b/include/asm-x86/mach-xen/asm/io.h
-@@ -1,5 +1,22 @@
-+#ifndef _ASM_X86_IO_H
-+#define _ASM_X86_IO_H
-+
-+#define ARCH_HAS_IOREMAP_WC
-+
- #ifdef CONFIG_X86_32
- # include "io_32.h"
- #else
- # include "io_64.h"
- #endif
-+
-+extern void *xlate_dev_mem_ptr(unsigned long phys);
-+extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
-+
-+extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
-+extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
-+
-+extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
-+                                   unsigned long prot_val);
-+extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
-+
-+#endif /* _ASM_X86_IO_H */
---- a/include/asm-x86/mach-xen/asm/irqflags.h
-+++ b/include/asm-x86/mach-xen/asm/irqflags.h
-@@ -137,11 +137,11 @@ sysexit_ecrit:   /**** END OF SYSEXIT CRIT
- #endif /* __ASSEMBLY__ */
+ static struct vm_operations_struct xenfb_vm_ops = {
+       .open   = xenfb_vm_open,
+       .close  = xenfb_vm_close,
+-      .nopage = xenfb_vm_nopage,
++      .fault  = xenfb_vm_fault,
+ };
   
- #ifndef __ASSEMBLY__
--#define raw_local_save_flags(flags) \
--              do { (flags) = __raw_local_save_flags(); } while (0)
-+#define raw_local_save_flags(flags)                           \
-+      do { (flags) = __raw_local_save_flags(); } while (0)
+ static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
+--- sle11-2009-05-14.orig/drivers/xen/gntdev/gntdev.c  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/gntdev/gntdev.c       2009-03-16 16:38:05.000000000 +0100
+@@ -392,7 +392,7 @@ nomem_out:
+ static int __init gntdev_init(void)
+ {
+       struct class *class;
+-      struct class_device *device;
++      struct device *device;
   
--#define raw_local_irq_save(flags) \
--              do { (flags) = __raw_local_irq_save(); } while (0)
-+#define raw_local_irq_save(flags)                             \
-+      do { (flags) = __raw_local_irq_save(); } while (0)
+       if (!is_running_on_xen()) {
+               printk(KERN_ERR "You must be running Xen to use gntdev\n");
+@@ -417,8 +417,8 @@ static int __init gntdev_init(void)
+               return 0;
+       }
   
- static inline int raw_irqs_disabled_flags(unsigned long flags)
+-      device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
+-                                   NULL, GNTDEV_NAME);
++      device = device_create(class, NULL, MKDEV(gntdev_major, 0),
++                             GNTDEV_NAME);
+       if (IS_ERR(device)) {
+               printk(KERN_ERR "Error creating gntdev device in xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n",
+@@ -435,7 +435,7 @@ static void __exit gntdev_exit(void)
   {
---- a/include/asm-x86/mach-xen/asm/mmu_context_32.h
-+++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h
-@@ -94,7 +94,7 @@ static inline void switch_mm(struct mm_s
-               BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
+       struct class *class;
+       if ((class = get_xen_class()) != NULL)
+-              class_device_destroy(class, MKDEV(gntdev_major, 0));
++              device_destroy(class, MKDEV(gntdev_major, 0));
+       unregister_chrdev(gntdev_major, GNTDEV_NAME);
+ }
   
-               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
--                      /* We were in lazy tlb mode and leave_mm disabled 
-+                      /* We were in lazy tlb mode and leave_mm disabled
-                        * tlb flush IPI delivery. We must reload %cr3.
-                        */
-                       load_cr3(next->pgd);
-@@ -107,10 +107,10 @@ static inline void switch_mm(struct mm_s
- #define deactivate_mm(tsk, mm)                        \
-       asm("movl %0,%%gs": :"r" (0));
+--- sle11-2009-05-14.orig/drivers/xen/netfront/netfront.c      2009-03-30 16:39:44.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/netfront/netfront.c   2009-03-30 16:40:17.000000000 +0200
+@@ -1464,8 +1464,7 @@ err:     
+               }
+       }
   
--#define activate_mm(prev, next)                               \
--      do {                                            \
--              xen_activate_mm(prev, next);            \
--              switch_mm((prev),(next),NULL);          \
--      } while(0)
-+#define activate_mm(prev, next)                       \
-+do {                                          \
-+      xen_activate_mm(prev, next);            \
-+      switch_mm((prev), (next), NULL);        \
-+} while (0)
+-      while ((skb = __skb_dequeue(&errq)))
+-              kfree_skb(skb);
++      __skb_queue_purge(&errq);
   
- #endif
---- a/include/asm-x86/mach-xen/asm/mmu_context_64.h
-+++ b/include/asm-x86/mach-xen/asm/mmu_context_64.h
-@@ -21,7 +21,7 @@ void destroy_context(struct mm_struct *m
- static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
- {
- #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
--      if (read_pda(mmu_state) == TLBSTATE_OK) 
-+      if (read_pda(mmu_state) == TLBSTATE_OK)
-               write_pda(mmu_state, TLBSTATE_LAZY);
- #endif
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               struct page *page = NETFRONT_SKB_CB(skb)->page;
+@@ -1630,8 +1629,7 @@ static void netif_release_rx_bufs_flip(s
+               }
+       }
+ 
+-      while ((skb = __skb_dequeue(&free_list)) != NULL)
+-              dev_kfree_skb(skb);
++      __skb_queue_purge(&free_list);
+ 
+       spin_unlock_bh(&np->rx_lock);
+ }
+--- sle11-2009-05-14.orig/drivers/xen/privcmd/privcmd.c        2009-03-04 11:28:34.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/privcmd/privcmd.c     2009-03-16 16:38:05.000000000 +0100
+@@ -261,15 +261,13 @@ static long privcmd_ioctl(struct file *f
   }
-@@ -62,7 +62,7 @@ extern void mm_pin(struct mm_struct *mm)
- extern void mm_unpin(struct mm_struct *mm);
- void mm_pin_all(void);
   
--static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
-+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-                            struct task_struct *tsk)
+ #ifndef HAVE_ARCH_PRIVCMD_MMAP
+-static struct page *privcmd_nopage(struct vm_area_struct *vma,
+-                                 unsigned long address,
+-                                 int *type)
++static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
   {
-       unsigned cpu = smp_processor_id();
-@@ -106,7 +106,7 @@ static inline void switch_mm(struct mm_s
-               if (read_pda(active_mm) != next)
-                       BUG();
-               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
--                      /* We were in lazy tlb mode and leave_mm disabled 
-+                      /* We were in lazy tlb mode and leave_mm disabled
-                        * tlb flush IPI delivery. We must reload CR3
-                        * to make sure to use no freed page tables.
-                        */
-@@ -118,10 +118,11 @@ static inline void switch_mm(struct mm_s
- #endif
+-      return NOPAGE_SIGBUS;
++      return VM_FAULT_SIGBUS;
   }
   
--#define deactivate_mm(tsk,mm) do { \
--      load_gs_index(0); \
--      asm volatile("movl %0,%%fs"::"r"(0));  \
--} while(0)
-+#define deactivate_mm(tsk, mm)                        \
-+do {                                          \
-+      load_gs_index(0);                       \
-+      asm volatile("movl %0,%%fs"::"r"(0));   \
-+} while (0)
+ static struct vm_operations_struct privcmd_vm_ops = {
+-      .nopage = privcmd_nopage
++      .fault = privcmd_fault
+ };
   
- static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
- {
---- a/include/asm-x86/mach-xen/asm/page_64.h
-+++ b/include/asm-x86/mach-xen/asm/page_64.h
-@@ -5,7 +5,7 @@
+ static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+--- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_client.c   2009-03-24 10:12:22.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_client.c        2009-03-24 10:13:17.000000000 +0100
+@@ -442,7 +442,7 @@ int xenbus_map_ring_valloc(struct xenbus
   
- #define THREAD_ORDER  1
- #define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
--#define CURRENT_MASK (~(THREAD_SIZE-1))
-+#define CURRENT_MASK (~(THREAD_SIZE - 1))
+       *vaddr = NULL;
   
- #define EXCEPTION_STACK_ORDER 0
- #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-@@ -53,10 +53,10 @@
- #define __VIRTUAL_MASK_SHIFT  48
+-      area = alloc_vm_area(PAGE_SIZE);
++      area = xen_alloc_vm_area(PAGE_SIZE);
+       if (!area)
+               return -ENOMEM;
   
- /*
-- * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
-+ * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
-  * arch/x86/kernel/head_64.S), and it is mapped here:
-  */
--#define KERNEL_IMAGE_SIZE     (128*1024*1024)
-+#define KERNEL_IMAGE_SIZE     (512 * 1024 * 1024)
- #define KERNEL_IMAGE_START    _AC(0xffffffff80000000, UL)
+@@ -452,7 +452,7 @@ int xenbus_map_ring_valloc(struct xenbus
+               BUG();
   
- #ifndef __ASSEMBLY__
-@@ -64,7 +64,6 @@ void clear_page(void *page);
- void copy_page(void *to, void *from);
+       if (op.status != GNTST_okay) {
+-              free_vm_area(area);
++              xen_free_vm_area(area);
+               xenbus_dev_fatal(dev, op.status,
+                                "mapping in shared page %d from domain %d",
+                                gnt_ref, dev->otherend_id);
+@@ -551,7 +551,7 @@ int xenbus_unmap_ring_vfree(struct xenbu
+               BUG();
   
- extern unsigned long end_pfn;
--extern unsigned long end_pfn_map;
+       if (op.status == GNTST_okay)
+-              free_vm_area(area);
++              xen_free_vm_area(area);
+       else
+               xenbus_dev_error(dev, op.status,
+                                "unmapping page at handle %d error %d",
+--- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_probe.c    2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_probe.c 2009-03-16 16:38:05.000000000 +0100
+@@ -173,7 +173,7 @@ static int read_backend_details(struct x
+       return read_otherend_details(xendev, "backend-id", "backend");
+ }
   
- static inline unsigned long __phys_addr(unsigned long x)
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+ static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
   {
-@@ -89,6 +88,9 @@ typedef union { pteval_t pte; unsigned i
+       struct xenbus_device *xdev;
+@@ -185,8 +185,10 @@ static int xenbus_uevent_frontend(struct
+               return -ENODEV;
   
- #define vmemmap ((struct page *)VMEMMAP_START)
+       /* stuff we want to pass to /sbin/hotplug */
++#if defined(CONFIG_XEN) || defined(MODULE)
+       add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
+       add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
++#endif
+       add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
   
-+extern unsigned long init_memory_mapping(unsigned long start,
-+                                       unsigned long end);
-+
- #endif        /* !__ASSEMBLY__ */
+       return 0;
+@@ -207,10 +209,8 @@ static struct xen_bus_type xenbus_fronte
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+               .shutdown = xenbus_dev_shutdown,
+-#if defined(CONFIG_XEN) || defined(MODULE)
+               .uevent   = xenbus_uevent_frontend,
+ #endif
+-#endif
+       },
+ #if defined(CONFIG_XEN) || defined(MODULE)
+       .dev = {
+@@ -519,6 +519,15 @@ static ssize_t xendev_show_devtype(struc
+ }
+ DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
   
- #ifdef CONFIG_FLATMEM
---- a/include/asm-x86/mach-xen/asm/page.h
-+++ b/include/asm-x86/mach-xen/asm/page.h
-@@ -20,8 +20,16 @@
- #define _PAGE_BIT_IO          9
- #define _PAGE_IO              (_AC(1, L)<<_PAGE_BIT_IO)
++static ssize_t xendev_show_modalias(struct device *dev,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
++                                  struct device_attribute *attr,
++#endif
++                                  char *buf)
++{
++      return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
++}
++DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
   
--#define PHYSICAL_PAGE_MASK    (~(_AT(phys_addr_t, PAGE_SIZE) - 1) & __PHYSICAL_MASK)
--#define PTE_MASK              _AT(pteval_t, PHYSICAL_PAGE_MASK)
-+#define __PHYSICAL_MASK               ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
-+#define __VIRTUAL_MASK                ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
-+
-+/* Cast PAGE_MASK to a signed type so that it is sign-extended if
-+   virtual addresses are 32-bits but physical addresses are larger
-+   (ie, 32-bit PAE). */
-+#define PHYSICAL_PAGE_MASK    (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
+ int xenbus_probe_node(struct xen_bus_type *bus,
+                     const char *type,
+@@ -579,10 +588,16 @@ int xenbus_probe_node(struct xen_bus_typ
+ 
+       err = device_create_file(&xendev->dev, &dev_attr_devtype);
+       if (err)
+-              goto fail_remove_file;
++              goto fail_remove_nodename;
  +
-+/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
-+#define PTE_MASK              ((pteval_t)PHYSICAL_PAGE_MASK)
++      err = device_create_file(&xendev->dev, &dev_attr_modalias);
++      if (err)
++              goto fail_remove_devtype;
   
- #define PMD_PAGE_SIZE         (_AC(1, UL) << PMD_SHIFT)
- #define PMD_PAGE_MASK         (~(PMD_PAGE_SIZE-1))
-@@ -34,19 +42,14 @@
- /* to align the pointer to the (next) page boundary */
- #define PAGE_ALIGN(addr)      (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+       return 0;
+-fail_remove_file:
++fail_remove_devtype:
++      device_remove_file(&xendev->dev, &dev_attr_devtype);
++fail_remove_nodename:
+       device_remove_file(&xendev->dev, &dev_attr_nodename);
+ fail_unregister:
+       device_unregister(&xendev->dev);
+--- sle11-2009-05-14.orig/fs/aio.c     2009-03-24 10:11:37.000000000 +0100
++++ sle11-2009-05-14/fs/aio.c  2009-03-24 10:13:25.000000000 +0100
+@@ -1271,6 +1271,7 @@ static void io_destroy(struct kioctx *io
+ #ifdef CONFIG_EPOLL
+       /* forget the poll file, but it's up to the user to close it */
+       if (ioctx->file) {
++              fput(ioctx->file);
+               ioctx->file->private_data = 0;
+               ioctx->file = 0;
+       }
+@@ -1295,6 +1296,7 @@ static int aio_queue_fd_close(struct ino
+               spin_lock_irq(&ioctx->ctx_lock);
+               ioctx->file = 0;
+               spin_unlock_irq(&ioctx->ctx_lock);
++              fput(file);
+       }
+       return 0;
+ }
+@@ -1330,16 +1332,17 @@ static const struct file_operations aioq
   
--#define __PHYSICAL_MASK               _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
--#define __VIRTUAL_MASK                ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
--
- #ifndef __ASSEMBLY__
- #include <linux/types.h>
- #endif
+ static int make_aio_fd(struct kioctx *ioctx)
+ {
+-      int error, fd;
+-      struct inode *inode;
++      int fd;
+       struct file *file;
   
- #ifdef CONFIG_X86_64
- #include <asm/page_64.h>
--#define max_pfn_mapped                end_pfn_map
- #else
- #include <asm/page_32.h>
--#define max_pfn_mapped                max_low_pfn
- #endif        /* CONFIG_X86_64 */
+-      error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
+-                               &aioq_fops, ioctx);
+-      if (error)
+-              return error;
++      fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
++      if (fd < 0)
++              return fd;
   
- #define PAGE_OFFSET           ((unsigned long)__PAGE_OFFSET)
-@@ -59,6 +62,9 @@
- #ifndef __ASSEMBLY__
+       /* associate the file with the IO context */
++      file = fget(fd);
++      if (!file)
++              return -EBADF;
+       file->private_data = ioctx;
+       ioctx->file = file;
+       init_waitqueue_head(&ioctx->poll_wait);
+--- sle11-2009-05-14.orig/include/asm-x86/dma-mapping.h        2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/dma-mapping.h     2009-03-16 16:38:05.000000000 +0100
+@@ -223,8 +223,13 @@ static inline dma_addr_t dma_map_page(st
+       struct dma_mapping_ops *ops = get_dma_ops(dev);
   
- extern int page_is_ram(unsigned long pagenr);
-+extern int devmem_is_allowed(unsigned long pagenr);
-+
-+extern unsigned long max_pfn_mapped;
+       BUG_ON(!valid_dma_direction(direction));
++#ifndef CONFIG_XEN
+       return ops->map_single(dev, page_to_phys(page) + offset,
+                              size, direction);
++#else
++      return ops->map_single(dev, page_to_pseudophys(page) + offset,
++                             size, direction);
++#endif
+ }
   
- struct page;
+ static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+--- sle11-2009-05-14.orig/include/asm-x86/genapic_64.h 2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/genapic_64.h      2009-03-16 16:38:05.000000000 +0100
+@@ -46,6 +46,7 @@ extern struct genapic apic_x2apic_phys;
+ extern int acpi_madt_oem_check(char *, char *);
+ 
+ extern void apic_send_IPI_self(int vector);
++#ifndef CONFIG_XEN
+ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+ extern enum uv_system_type get_uv_system_type(void);
+ extern int is_uv_system(void);
+@@ -55,6 +56,10 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
+ extern void uv_cpu_init(void);
+ extern void uv_system_init(void);
+ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
++#else
++#define is_uv_system() 0
++#define uv_cpu_init() ((void)0)
++#endif
   
---- a/include/asm-x86/mach-xen/asm/pci_64.h
-+++ b/include/asm-x86/mach-xen/asm/pci_64.h
-@@ -1,12 +1,10 @@
- #ifndef __x8664_PCI_H
- #define __x8664_PCI_H
+ extern void setup_apic_routing(void);
   
--
- #ifdef __KERNEL__
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc.h  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/desc.h       2009-03-16 16:38:05.000000000 +0100
+@@ -64,8 +64,8 @@ static inline struct desc_struct *get_cp
+ }
   
+ static inline void pack_gate(gate_desc *gate, unsigned char type,
+-       unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
  -
- #ifdef CONFIG_CALGARY_IOMMU
--static inline void* pci_iommu(struct pci_bus *bus)
-+static inline void *pci_iommu(struct pci_bus *bus)
++                           unsigned long base, unsigned dpl, unsigned flags,
++                           unsigned short seg)
   {
-       struct pci_sysdata *sd = bus->sysdata;
-       return sd->iommu;
-@@ -19,13 +17,12 @@ static inline void set_pci_iommu(struct 
- }
- #endif /* CONFIG_CALGARY_IOMMU */
- 
-+extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
-+                            int reg, int len, u32 *value);
-+extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
-+                             int reg, int len, u32 value);
+       gate->a = (seg << 16) | (base & 0xffff);
+       gate->b = (base & 0xffff0000) |
+@@ -84,22 +84,23 @@ static inline int desc_empty(const void 
+ #define load_TR_desc() native_load_tr_desc()
+ #define load_gdt(dtr) native_load_gdt(dtr)
+ #define load_idt(dtr) native_load_idt(dtr)
+-#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
+-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
++#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
++#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
   
--extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
--extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
--
--
--
--extern void pci_iommu_alloc(void);
-+extern void dma32_reserve_bootmem(void);
+ #define store_gdt(dtr) native_store_gdt(dtr)
+ #define store_idt(dtr) native_store_idt(dtr)
+ #define store_tr(tr) (tr = native_store_tr())
+-#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
++#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
   
- /* The PCI address space does equal the physical memory
-  * address space.  The networking and block device layers use
-@@ -82,5 +79,4 @@ extern void pci_iommu_alloc(void);
+ #define load_TLS(t, cpu) native_load_tls(t, cpu)
+ #define set_ldt native_set_ldt
   
- #endif /* __KERNEL__ */
+-#define write_ldt_entry(dt, entry, desc) \
+-                              native_write_ldt_entry(dt, entry, desc)
+-#define write_gdt_entry(dt, entry, desc, type) \
+-                              native_write_gdt_entry(dt, entry, desc, type)
+-#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
++#define write_ldt_entry(dt, entry, desc)      \
++      native_write_ldt_entry(dt, entry, desc)
++#define write_gdt_entry(dt, entry, desc, type)                \
++      native_write_gdt_entry(dt, entry, desc, type)
++#define write_idt_entry(dt, entry, g)         \
++      native_write_idt_entry(dt, entry, g)
   
--
- #endif /* __x8664_PCI_H */
---- a/include/asm-x86/mach-xen/asm/pci.h
-+++ b/include/asm-x86/mach-xen/asm/pci.h
-@@ -8,14 +8,13 @@
- #include <asm/scatterlist.h>
- #include <asm/io.h>
+ static inline void native_write_idt_entry(gate_desc *idt, int entry,
+                                         const gate_desc *gate)
+@@ -138,8 +139,8 @@ static inline void pack_descriptor(struc
+ {
+       desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
+       desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+-                (limit & 0x000f0000) | ((type & 0xff) << 8) |
+-                ((flags & 0xf) << 20);
++              (limit & 0x000f0000) | ((type & 0xff) << 8) |
++              ((flags & 0xf) << 20);
+       desc->p = 1;
+ }
   
+@@ -160,7 +161,6 @@ static inline void set_tssldt_descriptor
+       desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
+       desc->base3 = PTR_HIGH(addr);
+ #else
  -
- #ifdef __KERNEL__
- 
- struct pci_sysdata {
-       int             domain;         /* PCI domain */
-       int             node;           /* NUMA node */
- #ifdef CONFIG_X86_64
--      void*           iommu;          /* IOMMU private data */
-+      void            *iommu;         /* IOMMU private data */
+       pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
   #endif
- #ifdef CONFIG_XEN_PCIDEV_FRONTEND
-       struct pcifront_device *pdev;
-@@ -23,6 +22,8 @@ struct pci_sysdata {
- };
+ }
+@@ -178,7 +178,8 @@ static inline void __set_tss_desc(unsign
+        * last valid byte
+        */
+       set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
+-              IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
++                            IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
++                            sizeof(unsigned long) - 1);
+       write_gdt_entry(d, entry, &tss, DESC_TSS);
+ }
   
- /* scan a bus after allocating a pci_sysdata for it */
-+extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
-+                                          int node);
- extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
+@@ -187,16 +188,16 @@ static inline void __set_tss_desc(unsign
+ static inline void native_set_ldt(const void *addr, unsigned int entries)
+ {
+       if (likely(entries == 0))
+-              __asm__ __volatile__("lldt %w0"::"q" (0));
++              asm volatile("lldt %w0"::"q" (0));
+       else {
+               unsigned cpu = smp_processor_id();
+               ldt_desc ldt;
   
- static inline int pci_domain_nr(struct pci_bus *bus)
-@@ -36,6 +37,7 @@ static inline int pci_proc_domain(struct
-       return pci_domain_nr(bus);
+-              set_tssldt_descriptor(&ldt, (unsigned long)addr,
+-                                    DESC_LDT, entries * sizeof(ldt) - 1);
++              set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
++                                    entries * LDT_ENTRY_SIZE - 1);
+               write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+                               &ldt, DESC_LDT);
+-              __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
++              asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+       }
   }
   
-+extern void pci_iommu_alloc(void);
- 
- /* Can be used to override the logic in pci_scan_bus for skipping
-    already-configured bus numbers - to be used for buggy BIOSes
-@@ -57,7 +59,7 @@ extern unsigned long pci_mem_start;
- #define PCIBIOS_MIN_CARDBUS_IO        0x4000
+@@ -261,15 +262,15 @@ static inline void xen_load_tls(struct t
+ }
+ #endif
   
- void pcibios_config_init(void);
--struct pci_bus * pcibios_scan_root(int bus);
-+struct pci_bus *pcibios_scan_root(int bus);
+-#define _LDT_empty(info) (\
+-      (info)->base_addr       == 0    && \
+-      (info)->limit           == 0    && \
+-      (info)->contents        == 0    && \
+-      (info)->read_exec_only  == 1    && \
+-      (info)->seg_32bit       == 0    && \
+-      (info)->limit_in_pages  == 0    && \
+-      (info)->seg_not_present == 1    && \
+-      (info)->useable         == 0)
++#define _LDT_empty(info)                              \
++      ((info)->base_addr              == 0    &&      \
++       (info)->limit                  == 0    &&      \
++       (info)->contents               == 0    &&      \
++       (info)->read_exec_only         == 1    &&      \
++       (info)->seg_32bit              == 0    &&      \
++       (info)->limit_in_pages         == 0    &&      \
++       (info)->seg_not_present        == 1    &&      \
++       (info)->useable                == 0)
   
- void pcibios_set_master(struct pci_dev *dev);
- void pcibios_penalize_isa_irq(int irq, int active);
-@@ -67,7 +69,8 @@ int pcibios_set_irq_routing(struct pci_d
+ #ifdef CONFIG_X86_64
+ #define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+@@ -309,7 +310,7 @@ static inline unsigned long get_desc_lim
   
- #define HAVE_PCI_MMAP
- extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
--                             enum pci_mmap_state mmap_state, int write_combine);
-+                             enum pci_mmap_state mmap_state,
-+                             int write_combine);
+ #ifndef CONFIG_X86_NO_IDT
+ static inline void _set_gate(int gate, unsigned type, void *addr,
+-                            unsigned dpl, unsigned ist, unsigned seg)
++                           unsigned dpl, unsigned ist, unsigned seg)
+ {
+       gate_desc s;
+       pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+@@ -393,10 +394,10 @@ static inline void set_system_gate_ist(i
+  *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
+  */
+ #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
+-      movb idx*8+4(gdt), lo_b; \
+-      movb idx*8+7(gdt), hi_b; \
+-      shll $16, base; \
+-      movw idx*8+2(gdt), lo_w;
++      movb idx * 8 + 4(gdt), lo_b;                    \
++      movb idx * 8 + 7(gdt), hi_b;                    \
++      shll $16, base;                                 \
++      movw idx * 8 + 2(gdt), lo_w;
   
   
- #ifdef CONFIG_PCI
---- a/include/asm-x86/mach-xen/asm/pgalloc_32.h
-+++ /dev/null
-@@ -1,111 +0,0 @@
--#ifndef _I386_PGALLOC_H
--#define _I386_PGALLOC_H
--
--#include <linux/threads.h>
--#include <linux/mm.h>         /* for struct page */
--#include <linux/pagemap.h>
--#include <asm/tlb.h>
--#include <asm-generic/tlb.h>
--#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
--
--#define paravirt_alloc_pt(mm, pfn) do { } while (0)
--#define paravirt_alloc_pd(mm, pfn) do { } while (0)
--#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
--#define paravirt_release_pt(pfn) do { } while (0)
--#define paravirt_release_pd(pfn) do { } while (0)
--
--static inline void pmd_populate_kernel(struct mm_struct *mm,
--                                     pmd_t *pmd, pte_t *pte)
--{
--      paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
--      set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
--}
--
--static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
--{
--      unsigned long pfn = page_to_pfn(pte);
--
--      paravirt_alloc_pt(mm, pfn);
--      if (PagePinned(virt_to_page(mm->pgd))) {
--              if (!PageHighMem(pte))
--                      BUG_ON(HYPERVISOR_update_va_mapping(
--                        (unsigned long)__va(pfn << PAGE_SHIFT),
--                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
--              else if (!test_and_set_bit(PG_pinned, &pte->flags))
--                      kmap_flush_unused();
--              set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
--      } else
--              *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
--}
--#define pmd_pgtable(pmd) pmd_page(pmd)
--
--/*
-- * Allocate and free page tables.
-- */
--extern void pgd_test_and_unpin(pgd_t *);
--extern pgd_t *pgd_alloc(struct mm_struct *);
--extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
--
--extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
--extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
--
--static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
--{
--      make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
--      free_page((unsigned long)pte);
--}
--
--extern void __pte_free(pgtable_t);
--static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
--{
--      __pte_free(pte);
--}
--
--
--extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+ #endif /* __ASSEMBLY__ */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping.h   2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/dma-mapping.h        2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,17 @@
+-#ifdef CONFIG_X86_32
+-# include "dma-mapping_32.h"
+-#else
+-# include "dma-mapping_64.h"
+-#endif
++#ifndef _ASM_DMA_MAPPING_H_
++
++#include "../../dma-mapping.h"
++
++static inline int
++address_needs_mapping(struct device *hwdev, dma_addr_t addr)
++{
++      dma_addr_t mask = 0xffffffff;
++      /* If the device has a mask, use it, otherwise default to 32 bits */
++      if (hwdev && hwdev->dma_mask)
++              mask = *hwdev->dma_mask;
++      return (addr & ~mask) != 0;
++}
++
++extern int range_straddles_page_boundary(paddr_t p, size_t size);
++
++#endif /* _ASM_DMA_MAPPING_H_ */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h        2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,141 +0,0 @@
+-#ifndef _ASM_I386_DMA_MAPPING_H
+-#define _ASM_I386_DMA_MAPPING_H
  -
--#ifdef CONFIG_X86_PAE
  -/*
-- * In the PAE case we free the pmds as part of the pgd.
+- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+- * documentation.
  - */
--extern pmd_t *pmd_alloc_one(struct mm_struct *, unsigned long);
  -
--extern void __pmd_free(pgtable_t);
--static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+-#include <linux/mm.h>
+-#include <linux/scatterlist.h>
+-#include <asm/cache.h>
+-#include <asm/io.h>
+-#include <asm/swiotlb.h>
+-
+-static inline int
+-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
  -{
--      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
--      __pmd_free(virt_to_page(pmd));
+-      dma_addr_t mask = 0xffffffff;
+-      /* If the device has a mask, use it, otherwise default to 32 bits */
+-      if (hwdev && hwdev->dma_mask)
+-              mask = *hwdev->dma_mask;
+-      return (addr & ~mask) != 0;
  -}
  -
--extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+-extern int range_straddles_page_boundary(paddr_t p, size_t size);
  -
--static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
--{
--      struct page *page = virt_to_page(pmd);
--      unsigned long pfn = page_to_pfn(page);
+-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
  -
--      paravirt_alloc_pd(mm, pfn);
+-void *dma_alloc_coherent(struct device *dev, size_t size,
+-                         dma_addr_t *dma_handle, gfp_t flag);
  -
--      /* Note: almost everything apart from _PAGE_PRESENT is
--         reserved at the pmd (PDPT) level. */
--      if (PagePinned(virt_to_page(mm->pgd))) {
--              BUG_ON(PageHighMem(page));
--              BUG_ON(HYPERVISOR_update_va_mapping(
--                        (unsigned long)__va(pfn << PAGE_SHIFT),
--                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
--              set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
--      } else
--              *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
+-void dma_free_coherent(struct device *dev, size_t size,
+-                       void *vaddr, dma_addr_t dma_handle);
  -
--      /*
--       * According to Intel App note "TLBs, Paging-Structure Caches,
--       * and Their Invalidation", April 2007, document 317080-001,
--       * section 8.1: in PAE mode we explicitly have to flush the
--       * TLB via cr3 if the top-level pgd is changed...
--       */
--      if (mm == current->active_mm)
--              xen_tlb_flush();
--}
--#endif        /* CONFIG_X86_PAE */
+-extern dma_addr_t
+-dma_map_single(struct device *dev, void *ptr, size_t size,
+-             enum dma_data_direction direction);
  -
--#endif /* _I386_PGALLOC_H */
---- a/include/asm-x86/mach-xen/asm/pgalloc_64.h
-+++ /dev/null
-@@ -1,179 +0,0 @@
--#ifndef _X86_64_PGALLOC_H
--#define _X86_64_PGALLOC_H
+-extern void
+-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+-               enum dma_data_direction direction);
  -
--#include <asm/pda.h>
--#include <linux/threads.h>
--#include <linux/mm.h>
--#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
+-extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+-                    int nents, enum dma_data_direction direction);
+-extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+-                       int nents, enum dma_data_direction direction);
  -
--pmd_t *early_get_pmd(unsigned long va);
--void early_make_page_readonly(void *va, unsigned int feature);
+-#ifdef CONFIG_HIGHMEM
+-extern dma_addr_t
+-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+-           size_t size, enum dma_data_direction direction);
  -
--#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+-extern void
+-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+-             enum dma_data_direction direction);
+-#else
+-#define dma_map_page(dev, page, offset, size, dir) \
+-      dma_map_single(dev, page_address(page) + (offset), (size), (dir))
+-#define dma_unmap_page dma_unmap_single
+-#endif
  -
--#define pmd_populate_kernel(mm, pmd, pte) \
--              set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+-extern void
+-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                      enum dma_data_direction direction);
  -
--static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+-extern void
+-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                           enum dma_data_direction direction);
+-
+-static inline void
+-dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+-                            unsigned long offset, size_t size,
+-                            enum dma_data_direction direction)
  -{
--      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
--              BUG_ON(HYPERVISOR_update_va_mapping(
--                             (unsigned long)pmd,
--                             pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, 
--                                     PAGE_KERNEL_RO), 0));
--              set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
--      } else {
--              *(pud) =  __pud(_PAGE_TABLE | __pa(pmd));
--      }
+-      dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
  -}
  -
--/*
-- * We need to use the batch mode here, but pgd_pupulate() won't be
-- * be called frequently.
-- */
--static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+-static inline void
+-dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+-                               unsigned long offset, size_t size,
+-                               enum dma_data_direction direction)
  -{
--      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
--              BUG_ON(HYPERVISOR_update_va_mapping(
--                             (unsigned long)pud,
--                             pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, 
--                                     PAGE_KERNEL_RO), 0));
--              set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
--              set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
--      } else {
--              *(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
--              *(__user_pgd(pgd)) = *(pgd);
--      }
+-      dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
  -}
  -
--#define pmd_pgtable(pmd) pmd_page(pmd)
+-extern void
+-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction);
  -
--static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
--{
--      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
--              BUG_ON(HYPERVISOR_update_va_mapping(
--                             (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT),
--                             pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
--              set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
--      } else {
--              *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
--      }
--}
+-extern void
+-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction);
  -
--extern void __pmd_free(pgtable_t);
--static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
--{
--      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
--      __pmd_free(virt_to_page(pmd));
--}
+-extern int
+-dma_mapping_error(dma_addr_t dma_addr);
  -
--extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
+-extern int
+-dma_supported(struct device *dev, u64 mask);
  -
--static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+-static inline int
+-dma_set_mask(struct device *dev, u64 mask)
  -{
--      return (pud_t *)pmd_alloc_one(mm, addr);
--}
+-      if(!dev->dma_mask || !dma_supported(dev, mask))
+-              return -EIO;
  -
--static inline void pud_free(struct mm_struct *mm, pud_t *pud)
--{
--      BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
--      __pmd_free(virt_to_page(pud));
+-      *dev->dma_mask = mask;
+-
+-      return 0;
  -}
  -
--static inline void pgd_list_add(pgd_t *pgd)
+-static inline int
+-dma_get_cache_alignment(void)
  -{
--      struct page *page = virt_to_page(pgd);
--      unsigned long flags;
--
--      spin_lock_irqsave(&pgd_lock, flags);
--      list_add(&page->lru, &pgd_list);
--      spin_unlock_irqrestore(&pgd_lock, flags);
+-      /* no easy way to get cache size on all x86, so return the
+-       * maximum possible, to be safe */
+-      return (1 << INTERNODE_CACHE_SHIFT);
  -}
  -
--static inline void pgd_list_del(pgd_t *pgd)
--{
--      struct page *page = virt_to_page(pgd);
--      unsigned long flags;
+-#define dma_is_consistent(d, h)       (1)
  -
--      spin_lock_irqsave(&pgd_lock, flags);
--      list_del(&page->lru);
--      spin_unlock_irqrestore(&pgd_lock, flags);
+-static inline void
+-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+-             enum dma_data_direction direction)
+-{
+-      flush_write_buffers();
  -}
  -
--extern void pgd_test_and_unpin(pgd_t *);
+-#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
+-extern int
+-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+-                          dma_addr_t device_addr, size_t size, int flags);
  -
--static inline pgd_t *pgd_alloc(struct mm_struct *mm)
--{
--      /*
--       * We allocate two contiguous pages for kernel and user.
--       */
--      unsigned boundary;
--      pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
--      if (!pgd)
--              return NULL;
--      pgd_list_add(pgd);
--      pgd_test_and_unpin(pgd);
--      /*
--       * Copy kernel pointers in from init.
--       * Could keep a freelist or slab cache of those because the kernel
--       * part never changes.
--       */
--      boundary = pgd_index(__PAGE_OFFSET);
--      memset(pgd, 0, boundary * sizeof(pgd_t));
--      memcpy(pgd + boundary,
--             init_level4_pgt + boundary,
--             (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+-extern void
+-dma_release_declared_memory(struct device *dev);
  -
--      memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
--      /*
--       * Set level3_user_pgt for vsyscall area
--       */
--      __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
--              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
--      return pgd;
--}
+-extern void *
+-dma_mark_declared_memory_occupied(struct device *dev,
+-                                dma_addr_t device_addr, size_t size);
  -
--static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
--{
--      pgd_test_and_unpin(pgd);
--      pgd_list_del(pgd);
--      free_pages((unsigned long)pgd, 1);
--}
+-#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h        2009-02-16 16:18:36.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,205 +0,0 @@
+-#ifndef _X8664_DMA_MAPPING_H
+-#define _X8664_DMA_MAPPING_H 1
  -
--static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
--{
--      pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
--      if (pte)
--              make_page_readonly(pte, XENFEAT_writable_page_tables);
+-/*
+- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+- * documentation.
+- */
  -
--      return pte;
--}
+-#include <linux/scatterlist.h>
+-#include <asm/io.h>
  -
--extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+-struct dma_mapping_ops {
+-      int             (*mapping_error)(dma_addr_t dma_addr);
+-      void*           (*alloc_coherent)(struct device *dev, size_t size,
+-                                dma_addr_t *dma_handle, gfp_t gfp);
+-      void            (*free_coherent)(struct device *dev, size_t size,
+-                                void *vaddr, dma_addr_t dma_handle);
+-      dma_addr_t      (*map_single)(struct device *hwdev, void *ptr,
+-                                size_t size, int direction);
+-      /* like map_single, but doesn't check the device mask */
+-      dma_addr_t      (*map_simple)(struct device *hwdev, char *ptr,
+-                                size_t size, int direction);
+-      void            (*unmap_single)(struct device *dev, dma_addr_t addr,
+-                              size_t size, int direction);
+-      void            (*sync_single_for_cpu)(struct device *hwdev,
+-                              dma_addr_t dma_handle, size_t size,
+-                              int direction);
+-      void            (*sync_single_for_device)(struct device *hwdev,
+-                                dma_addr_t dma_handle, size_t size,
+-                              int direction);
+-      void            (*sync_single_range_for_cpu)(struct device *hwdev,
+-                                dma_addr_t dma_handle, unsigned long offset,
+-                              size_t size, int direction);
+-      void            (*sync_single_range_for_device)(struct device *hwdev,
+-                              dma_addr_t dma_handle, unsigned long offset,
+-                              size_t size, int direction);
+-      void            (*sync_sg_for_cpu)(struct device *hwdev,
+-                                struct scatterlist *sg, int nelems,
+-                              int direction);
+-      void            (*sync_sg_for_device)(struct device *hwdev,
+-                              struct scatterlist *sg, int nelems,
+-                              int direction);
+-      int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
+-                              int nents, int direction);
+-      void            (*unmap_sg)(struct device *hwdev,
+-                              struct scatterlist *sg, int nents,
+-                              int direction);
+-      int             (*dma_supported)(struct device *hwdev, u64 mask);
+-      int             is_phys;
+-};
  -
--/* Should really implement gc for free page table pages. This could be
--   done with a reference count in struct page. */
+-extern dma_addr_t bad_dma_address;
+-extern const struct dma_mapping_ops* dma_ops;
+-extern int iommu_merge;
  -
--static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+-#if 0
+-static inline int dma_mapping_error(dma_addr_t dma_addr)
  -{
--      BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
--      make_page_writable(pte, XENFEAT_writable_page_tables);
--      free_page((unsigned long)pte); 
--}
+-      if (dma_ops->mapping_error)
+-              return dma_ops->mapping_error(dma_addr);
  -
--extern void __pte_free(pgtable_t);
--static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
--{
--      __pte_free(pte);
+-      return (dma_addr == bad_dma_address);
  -}
  -
--#define __pte_free_tlb(tlb,pte)                               \
--do {                                                  \
--      pgtable_page_dtor((pte));                               \
--      tlb_remove_page((tlb), (pte));                  \
--} while (0)
+-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
  -
--#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
--#define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
  -
--#endif /* _X86_64_PGALLOC_H */
---- a/include/asm-x86/mach-xen/asm/pgalloc.h
-+++ b/include/asm-x86/mach-xen/asm/pgalloc.h
-@@ -1,5 +1,149 @@
--#ifdef CONFIG_X86_32
--# include "pgalloc_32.h"
--#else
--# include "pgalloc_64.h"
-+#ifndef _ASM_X86_PGALLOC_H
-+#define _ASM_X86_PGALLOC_H
-+
-+#include <linux/threads.h>
-+#include <linux/mm.h>         /* for struct page */
-+#include <linux/pagemap.h>
-+
-+#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
-+
-+static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)        {}
-+static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)        {}
-+static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
-+                                          unsigned long start, unsigned long count) {}
-+static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)        {}
-+static inline void paravirt_release_pte(unsigned long pfn) {}
-+static inline void paravirt_release_pmd(unsigned long pfn) {}
-+static inline void paravirt_release_pud(unsigned long pfn) {}
-+
-+#ifdef CONFIG_X86_64
-+void early_make_page_readonly(void *va, unsigned int feature);
-+pmd_t *early_get_pmd(unsigned long va);
-+#define make_lowmem_page_readonly make_page_readonly
-+#define make_lowmem_page_writable make_page_writable
- #endif
-+
-+/*
-+ * Allocate and free page tables.
-+ */
-+extern pgd_t *pgd_alloc(struct mm_struct *);
-+extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-+
-+extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
-+extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
-+
-+/* Should really implement gc for free page table pages. This could be
-+   done with a reference count in struct page. */
-+
-+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-+{
-+      BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
-+      make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
-+      free_page((unsigned long)pte);
-+}
-+
-+extern void __pte_free(pgtable_t);
-+static inline void pte_free(struct mm_struct *mm, struct page *pte)
-+{
-+      __pte_free(pte);
-+}
-+
-+extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
-+
-+static inline void pmd_populate_kernel(struct mm_struct *mm,
-+                                     pmd_t *pmd, pte_t *pte)
-+{
-+      paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
-+      set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
-+}
-+
-+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
-+                              struct page *pte)
-+{
-+      unsigned long pfn = page_to_pfn(pte);
-+
-+      paravirt_alloc_pte(mm, pfn);
-+      if (PagePinned(virt_to_page(mm->pgd))) {
-+              if (!PageHighMem(pte))
-+                      BUG_ON(HYPERVISOR_update_va_mapping(
-+                        (unsigned long)__va(pfn << PAGE_SHIFT),
-+                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
-+#ifndef CONFIG_X86_64
-+              else if (!TestSetPagePinned(pte))
-+                      kmap_flush_unused();
-+#endif
-+              set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
-+      } else
-+              *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
-+}
-+
-+#define pmd_pgtable(pmd) pmd_page(pmd)
-+
-+#if PAGETABLE_LEVELS > 2
-+extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
-+extern void __pmd_free(pgtable_t);
-+
-+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-+{
-+      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-+      __pmd_free(virt_to_page(pmd));
-+}
-+
-+extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
-+
-+#ifdef CONFIG_X86_PAE
-+extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
-+#else /* !CONFIG_X86_PAE */
-+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-+{
-+      paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
-+      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
-+              BUG_ON(HYPERVISOR_update_va_mapping(
-+                             (unsigned long)pmd,
-+                             pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
-+                                     PAGE_KERNEL_RO), 0));
-+              set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
-+      } else
-+              *pud =  __pud(_PAGE_TABLE | __pa(pmd));
-+}
-+#endif        /* CONFIG_X86_PAE */
-+
-+#if PAGETABLE_LEVELS > 3
-+#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
-+
-+/*
-+ * We need to use the batch mode here, but pgd_pupulate() won't be
-+ * be called frequently.
-+ */
-+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
-+{
-+      paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
-+      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
-+              BUG_ON(HYPERVISOR_update_va_mapping(
-+                             (unsigned long)pud,
-+                             pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
-+                                     PAGE_KERNEL_RO), 0));
-+              set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
-+              set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
-+      } else {
-+              *(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
-+              *__user_pgd(pgd) = *(pgd);
-+      }
-+}
-+
-+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-+{
-+      return (pud_t *)pmd_alloc_one(mm, addr);
-+}
-+
-+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-+{
-+      BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-+      __pmd_free(virt_to_page(pud));
-+}
-+
-+extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
-+#endif        /* PAGETABLE_LEVELS > 3 */
-+#endif        /* PAGETABLE_LEVELS > 2 */
-+
-+#endif        /* _ASM_X86_PGALLOC_H */
---- a/include/asm-x86/mach-xen/asm/pgtable_32.h
-+++ b/include/asm-x86/mach-xen/asm/pgtable_32.h
-@@ -38,16 +38,13 @@ void paging_init(void);
- #ifdef CONFIG_X86_PAE
- # include <asm/pgtable-3level-defs.h>
- # define PMD_SIZE     (1UL << PMD_SHIFT)
--# define PMD_MASK     (~(PMD_SIZE-1))
-+# define PMD_MASK     (~(PMD_SIZE - 1))
- #else
- # include <asm/pgtable-2level-defs.h>
- #endif
- 
- #define PGDIR_SIZE    (1UL << PGDIR_SHIFT)
--#define PGDIR_MASK    (~(PGDIR_SIZE-1))
+-extern void *dma_alloc_coherent(struct device *dev, size_t size,
+-                              dma_addr_t *dma_handle, gfp_t gfp);
+-extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+-                            dma_addr_t dma_handle);
+-
+-static inline dma_addr_t
+-dma_map_single(struct device *hwdev, void *ptr, size_t size,
+-             int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      return dma_ops->map_single(hwdev, ptr, size, direction);
+-}
  -
--#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
--#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
-+#define PGDIR_MASK    (~(PGDIR_SIZE - 1))
- 
- /* Just any arbitrary offset to the start of the vmalloc VM area: the
-  * current 8MB value just means that there will be a 8MB "hole" after the
-@@ -56,21 +53,22 @@ void paging_init(void);
-  * The vmalloc() routines leaves a hole of 4kB between each vmalloced
-  * area for the same reason. ;)
-  */
--#define VMALLOC_OFFSET        (8*1024*1024)
--#define VMALLOC_START (((unsigned long) high_memory + \
--                      2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
-+#define VMALLOC_OFFSET        (8 * 1024 * 1024)
-+#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
-+                       & ~(VMALLOC_OFFSET - 1))
- #ifdef CONFIG_X86_PAE
- #define LAST_PKMAP 512
- #else
- #define LAST_PKMAP 1024
- #endif
- 
--#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
-+#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1))       \
-+                  & PMD_MASK)
- 
- #ifdef CONFIG_HIGHMEM
--# define VMALLOC_END  (PKMAP_BASE-2*PAGE_SIZE)
-+# define VMALLOC_END  (PKMAP_BASE - 2 * PAGE_SIZE)
- #else
--# define VMALLOC_END  (FIXADDR_START-2*PAGE_SIZE)
-+# define VMALLOC_END  (FIXADDR_START - 2 * PAGE_SIZE)
- #endif
- 
- /*
-@@ -91,10 +89,10 @@ extern unsigned long pg0[];
- /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
-    can temporarily clear it. */
- #define pmd_present(x)        (__pmd_val(x))
--#define pmd_bad(x)    ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
-+#define pmd_bad(x)    ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
- #else
- #define pmd_present(x)        (__pmd_val(x) & _PAGE_PRESENT)
--#define pmd_bad(x)    ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
-+#define pmd_bad(x)    ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
- #endif
- 
- 
-@@ -107,32 +105,18 @@ extern unsigned long pg0[];
- #endif
- 
- /*
-- * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
-- *
-- *  dst - pointer to pgd range anwhere on a pgd page
-- *  src - ""
-- *  count - the number of pgds to copy.
-- *
-- * dst and src can be on the same page, but the range must not overlap,
-- * and must not cross a page boundary.
-+ * Macro to mark a page protection value as "uncacheable".
-+ * On processors which do not support it, this is a no-op.
-  */
--static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+-static inline void
+-dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
+-               int direction)
  -{
--       memcpy(dst, src, count * sizeof(pgd_t));
+-      BUG_ON(!valid_dma_direction(direction));
+-      dma_ops->unmap_single(dev, addr, size, direction);
  -}
  -
--/*
-- * Macro to mark a page protection value as "uncacheable".  On processors which do not support
-- * it, this is a no-op.
-- */
--#define pgprot_noncached(prot)        ((boot_cpu_data.x86 > 3)                                          \
--                               ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
-+#define pgprot_noncached(prot)                                        \
-+      ((boot_cpu_data.x86 > 3)                                \
-+       ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
-+       : (prot))
- 
- /*
-  * Conversion functions: convert a page and protection to a page entry,
-  * and a page entry and page directory to the page they refer to.
-  */
+-#define dma_map_page(dev,page,offset,size,dir) \
+-      dma_map_single((dev), page_address(page)+(offset), (size), (dir))
  -
- #define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
- 
- /*
-@@ -141,20 +125,20 @@ static inline void clone_pgd_range(pgd_t
-  * this macro returns the index of the entry in the pgd page which would
-  * control the given virtual address
-  */
--#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
--#define pgd_index_k(addr) pgd_index(addr)
-+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-+#define pgd_index_k(addr) pgd_index((addr))
- 
- /*
-  * pgd_offset() returns a (pgd_t *)
-  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
-  */
--#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
-+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
- 
- /*
-  * a shortcut which implies the use of the kernel's pgd, instead
-  * of a process's
-  */
--#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
- 
- static inline int pud_large(pud_t pud) { return 0; }
- 
-@@ -164,8 +148,8 @@ static inline int pud_large(pud_t pud) {
-  * this macro returns the index of the entry in the pmd page which would
-  * control the given virtual address
-  */
--#define pmd_index(address) \
--              (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-+#define pmd_index(address)                            \
-+      (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
- 
- /*
-  * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
-@@ -173,33 +157,36 @@ static inline int pud_large(pud_t pud) {
-  * this macro returns the index of the entry in the pte page which would
-  * control the given virtual address
-  */
--#define pte_index(address) \
--              (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
--#define pte_offset_kernel(dir, address) \
--      ((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
-+#define pte_index(address)                                    \
-+      (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-+#define pte_offset_kernel(dir, address)                               \
-+      ((pte_t *)pmd_page_vaddr(*(dir)) +  pte_index((address)))
- 
--#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-+#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
- 
--#define pmd_page_vaddr(pmd) \
--              ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-+#define pmd_page_vaddr(pmd)                                   \
-+      ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
- 
- #if defined(CONFIG_HIGHPTE)
--#define pte_offset_map(dir, address) \
--      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
--#define pte_offset_map_nested(dir, address) \
--      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
--#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
--#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
--#else
--#define pte_offset_map(dir, address) \
--      ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
--#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
-+#define pte_offset_map(dir, address)                                  \
-+      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +          \
-+       pte_index((address)))
-+#define pte_offset_map_nested(dir, address)                           \
-+      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +          \
-+       pte_index((address)))
-+#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
-+#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
-+#else
-+#define pte_offset_map(dir, address)                                  \
-+      ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-+#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
- #define pte_unmap(pte) do { } while (0)
- #define pte_unmap_nested(pte) do { } while (0)
- #endif
- 
- /* Clear a kernel PTE and flush it from the TLB */
--#define kpte_clear_flush(ptep, vaddr) do { \
-+#define kpte_clear_flush(ptep, vaddr)                                 \
-+do {                                                                  \
-       if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
-               BUG(); \
- } while (0)
-@@ -208,7 +195,7 @@ static inline int pud_large(pud_t pud) {
-  * The i386 doesn't have any external MMU info: the kernel page
-  * tables contain all the necessary information.
-  */
--#define update_mmu_cache(vma,address,pte) do { } while (0)
-+#define update_mmu_cache(vma, address, pte) do { } while (0)
- 
- void make_lowmem_page_readonly(void *va, unsigned int feature);
- void make_lowmem_page_writable(void *va, unsigned int feature);
-@@ -225,7 +212,7 @@ void make_lowmem_page_writable(void *va,
- #define kern_addr_valid(kaddr)        (0)
- #endif
- 
--#define io_remap_pfn_range(vma,from,pfn,size,prot) \
--direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
-+#define io_remap_pfn_range(vma, from, pfn, size, prot)                        \
-+      direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
- 
- #endif /* _I386_PGTABLE_H */
---- a/include/asm-x86/mach-xen/asm/pgtable-3level.h
-+++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h
-@@ -8,25 +8,28 @@
-  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
-  */
- 
--#define pte_ERROR(e) \
--      printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", __FILE__, __LINE__, \
--             &(e), __pte_val(e), pte_pfn(e))
--#define pmd_ERROR(e) \
--      printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
--             &(e), __pmd_val(e), (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
--#define pgd_ERROR(e) \
--      printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
--             &(e), __pgd_val(e), (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
+-#define dma_unmap_page dma_unmap_single
+-
+-static inline void
+-dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+-                      size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_for_cpu)
+-              dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
+-                                           direction);
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
+-                         size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_for_device)
+-              dma_ops->sync_single_for_device(hwdev, dma_handle, size,
+-                                              direction);
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+-                            unsigned long offset, size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_range_for_cpu) {
+-              dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
+-      }
+-
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
+-                               unsigned long offset, size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_range_for_device)
+-              dma_ops->sync_single_range_for_device(hwdev, dma_handle,
+-                                                    offset, size, direction);
  -
-+#define pte_ERROR(e)                                                  \
-+      printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n",                \
-+              __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
-+#define pmd_ERROR(e)                                                  \
-+      printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n",                \
-+             __FILE__, __LINE__, &(e), __pmd_val(e),                  \
-+             (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
-+#define pgd_ERROR(e)                                                  \
-+      printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n",                \
-+             __FILE__, __LINE__, &(e), __pgd_val(e),                  \
-+             (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
- 
- static inline int pud_none(pud_t pud)
- {
-       return __pud_val(pud) == 0;
-+
- }
- static inline int pud_bad(pud_t pud)
- {
-       return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
- }
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+-                  int nelems, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_sg_for_cpu)
+-              dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+-                     int nelems, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_sg_for_device) {
+-              dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+-      }
+-
+-      flush_write_buffers();
+-}
+-
+-static inline int
+-dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      return dma_ops->map_sg(hwdev, sg, nents, direction);
+-}
+-
+-static inline void
+-dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+-           int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      dma_ops->unmap_sg(hwdev, sg, nents, direction);
+-}
+-
+-extern int dma_supported(struct device *hwdev, u64 mask);
+-
+-/* same for gart, swiotlb, and nommu */
+-static inline int dma_get_cache_alignment(void)
+-{
+-      return boot_cpu_data.x86_clflush_size;
+-}
+-
+-#define dma_is_consistent(d, h) 1
+-
+-extern int dma_set_mask(struct device *dev, u64 mask);
+-
+-static inline void
+-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+-      enum dma_data_direction dir)
+-{
+-      flush_write_buffers();
+-}
+-
+-extern struct device fallback_dev;
+-extern int panic_on_overflow;
+-#endif
+-
+-#endif /* _X8664_DMA_MAPPING_H */
+-
+-#include "dma-mapping_32.h"
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap.h        2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap.h     2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,13 @@
++#ifndef _ASM_FIXMAP_H
++#define _ASM_FIXMAP_H
  +
- static inline int pud_present(pud_t pud)
- {
-       return __pud_val(pud) & _PAGE_PRESENT;
-@@ -48,12 +51,14 @@ static inline void xen_set_pte(pte_t *pt
- 
- static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
- {
--      set_64bit((unsigned long long *)(ptep),__pte_val(pte));
-+      set_64bit((unsigned long long *)(ptep), __pte_val(pte));
- }
+ #ifdef CONFIG_X86_32
+ # include "fixmap_32.h"
+ #else
+ # include "fixmap_64.h"
+ #endif
  +
- static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
- {
-       xen_l2_entry_update(pmdp, pmd);
- }
++#define clear_fixmap(idx)                     \
++      __set_fixmap(idx, 0, __pgprot(0))
  +
- static inline void xen_set_pud(pud_t *pudp, pud_t pud)
- {
-       xen_l3_entry_update(pudp, pud);
-@@ -92,20 +97,19 @@ static inline void pud_clear(pud_t *pudp
-        * current pgd to avoid unnecessary TLB flushes.
-        */
-       pgd = read_cr3();
--      if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
-+      if (__pa(pudp) >= pgd && __pa(pudp) <
-+          (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
-               xen_tlb_flush();
- }
- 
--#define pud_page(pud) \
--((struct page *) __va(pud_val(pud) & PAGE_MASK))
-+#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
- 
--#define pud_page_vaddr(pud) \
--((unsigned long) __va(pud_val(pud) & PAGE_MASK))
-+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
- 
- 
- /* Find an entry in the second-level page table.. */
--#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
--                      pmd_index(address))
-+#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
-+                                pmd_index(address))
- 
- #ifdef CONFIG_SMP
- static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
-@@ -150,7 +154,8 @@ static inline int pte_none(pte_t pte)
-  * put the 32 bits of offset into the high part.
++#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_32.h     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_32.h  2009-03-16 16:38:05.000000000 +0100
+@@ -10,8 +10,8 @@
+  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
    */
- #define pte_to_pgoff(pte) ((pte).pte_high)
--#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
-+#define pgoff_to_pte(off)                                             \
-+      ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
- #define PTE_FILE_MAX_BITS       32
   
- /* Encode and de-code a swap entry */
---- a/include/asm-x86/mach-xen/asm/pgtable_64.h
-+++ b/include/asm-x86/mach-xen/asm/pgtable_64.h
-@@ -31,7 +31,7 @@ extern void paging_init(void);
+-#ifndef _ASM_FIXMAP_H
+-#define _ASM_FIXMAP_H
++#ifndef _ASM_FIXMAP_32_H
++#define _ASM_FIXMAP_32_H
   
- #endif /* !__ASSEMBLY__ */
+ /* used by vmalloc.c, vsyscall.lds.S.
+  *
+@@ -102,8 +102,7 @@ enum fixed_addresses {
+        */
+ #define NR_FIX_BTMAPS         64
+ #define FIX_BTMAPS_NESTING    4
+-      FIX_BTMAP_END =
+-              __end_of_permanent_fixed_addresses + 512 -
++      FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
+                       (__end_of_permanent_fixed_addresses & 511),
+       FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
+       FIX_WP_TEST,
+@@ -114,19 +113,16 @@ enum fixed_addresses {
+ };
   
--#define SHARED_KERNEL_PMD     1
-+#define SHARED_KERNEL_PMD     0
+ extern void __set_fixmap(enum fixed_addresses idx,
+-                                      maddr_t phys, pgprot_t flags);
++                       maddr_t phys, pgprot_t flags);
+ extern void reserve_top_address(unsigned long reserve);
   
+-#define set_fixmap(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL)
++#define set_fixmap(idx, phys)                         \
++      __set_fixmap(idx, phys, PAGE_KERNEL)
   /*
-  * PGDIR_SHIFT determines what a top-level page table entry can map
-@@ -59,18 +59,20 @@ extern void paging_init(void);
- 
- #ifndef __ASSEMBLY__
- 
--#define pte_ERROR(e) \
--      printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
--             &(e), __pte_val(e), pte_pfn(e))
--#define pmd_ERROR(e) \
--      printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
--             &(e), __pmd_val(e), pmd_pfn(e))
--#define pud_ERROR(e) \
--      printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
--             &(e), __pud_val(e), (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
--#define pgd_ERROR(e) \
--      printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
--             &(e), __pgd_val(e), (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
-+#define pte_ERROR(e)                                                  \
-+      printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n",               \
-+             __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
-+#define pmd_ERROR(e)                                                  \
-+      printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n",               \
-+             __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
-+#define pud_ERROR(e)                                                  \
-+      printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n",               \
-+             __FILE__, __LINE__, &(e), __pud_val(e),                  \
-+             (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
-+#define pgd_ERROR(e)                                                  \
-+      printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n",               \
-+             __FILE__, __LINE__, &(e), __pgd_val(e),                  \
-+             (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
- 
- #define pgd_none(x)   (!__pgd_val(x))
- #define pud_none(x)   (!__pud_val(x))
-@@ -125,7 +127,7 @@ static inline void xen_set_pgd(pgd_t *pg
-       xen_l4_entry_update(pgdp, pgd);
- }
- 
--static inline void xen_pgd_clear(pgd_t * pgd)
-+static inline void xen_pgd_clear(pgd_t *pgd)
- {
-       xen_set_pgd(pgd, xen_make_pgd(0));
-       xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
-@@ -135,43 +137,43 @@ static inline void xen_pgd_clear(pgd_t *
- 
- #endif /* !__ASSEMBLY__ */
- 
--#define PMD_SIZE      (_AC(1,UL) << PMD_SHIFT)
--#define PMD_MASK      (~(PMD_SIZE-1))
--#define PUD_SIZE      (_AC(1,UL) << PUD_SHIFT)
--#define PUD_MASK      (~(PUD_SIZE-1))
--#define PGDIR_SIZE    (_AC(1,UL) << PGDIR_SHIFT)
--#define PGDIR_MASK    (~(PGDIR_SIZE-1))
-+#define PMD_SIZE      (_AC(1, UL) << PMD_SHIFT)
-+#define PMD_MASK      (~(PMD_SIZE - 1))
-+#define PUD_SIZE      (_AC(1, UL) << PUD_SHIFT)
-+#define PUD_MASK      (~(PUD_SIZE - 1))
-+#define PGDIR_SIZE    (_AC(1, UL) << PGDIR_SHIFT)
-+#define PGDIR_MASK    (~(PGDIR_SIZE - 1))
- 
- 
--#define MAXMEM                 _AC(0x3fffffffffff, UL)
-+#define MAXMEM                 _AC(0x00003fffffffffff, UL)
- #define VMALLOC_START    _AC(0xffffc20000000000, UL)
- #define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
- #define VMEMMAP_START  _AC(0xffffe20000000000, UL)
--#define MODULES_VADDR    _AC(0xffffffff88000000, UL)
-+#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
- #define MODULES_END      _AC(0xfffffffffff00000, UL)
- #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+  * Some hardware wants to get fixmapped without caching.
+  */
+-#define set_fixmap_nocache(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+-
+-#define clear_fixmap(idx) \
+-              __set_fixmap(idx, 0, __pgprot(0))
++#define set_fixmap_nocache(idx, phys)                 \
++      __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
   
- #ifndef __ASSEMBLY__
+ #define FIXADDR_TOP   ((unsigned long)__FIXADDR_TOP)
   
--static inline unsigned long pgd_bad(pgd_t pgd)
-+static inline int pgd_bad(pgd_t pgd)
- {
--      return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
-+      return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
- }
+@@ -159,7 +155,7 @@ static __always_inline unsigned long fix
+       if (idx >= __end_of_fixed_addresses)
+               __this_fixmap_does_not_exist();
   
--static inline unsigned long pud_bad(pud_t pud)
-+static inline int pud_bad(pud_t pud)
- {
--      return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
-+      return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+-        return __fix_to_virt(idx);
++      return __fix_to_virt(idx);
   }
   
--static inline unsigned long pmd_bad(pmd_t pmd)
-+static inline int pmd_bad(pmd_t pmd)
- {
--      return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
-+      return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
- }
+ static inline unsigned long virt_to_fix(const unsigned long vaddr)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_64.h     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_64.h  2009-03-16 16:38:05.000000000 +0100
+@@ -8,8 +8,8 @@
+  * Copyright (C) 1998 Ingo Molnar
+  */
   
- #define pte_none(x)   (!(x).pte)
- #define pte_present(x)        ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
+-#ifndef _ASM_FIXMAP_H
+-#define _ASM_FIXMAP_H
++#ifndef _ASM_FIXMAP_64_H
++#define _ASM_FIXMAP_64_H
   
--#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))       /* FIXME: is this right? */
-+#define pages_to_mb(x)        ((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
+ #include <linux/kernel.h>
+ #include <asm/apicdef.h>
+@@ -35,7 +35,8 @@
   
- #define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
- #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
-@@ -181,13 +183,13 @@ static inline unsigned long pmd_bad(pmd_
-                      mfn_to_local_pfn(__pte_mfn(_pte)) :      \
-                      __pte_mfn(_pte))
+ enum fixed_addresses {
+       VSYSCALL_LAST_PAGE,
+-      VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
++      VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
++                          + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+       VSYSCALL_HPET,
+       FIX_DBGP_BASE,
+       FIX_EARLYCON_MEM_BASE,
+@@ -45,11 +46,12 @@ enum fixed_addresses {
+ #endif
+ #ifndef CONFIG_XEN
+       FIX_IO_APIC_BASE_0,
+-      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
++      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+ #endif
+ #ifdef CONFIG_EFI
+       FIX_EFI_IO_MAP_LAST_PAGE,
+-      FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
++      FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
++                                + MAX_EFI_IO_PAGES - 1,
+ #endif
+ #ifdef CONFIG_ACPI
+       FIX_ACPI_BEGIN,
+@@ -79,19 +81,16 @@ enum fixed_addresses {
+       __end_of_fixed_addresses
+ };
   
--#define pte_page(x)   pfn_to_page(pte_pfn(x))
-+#define pte_page(x)   pfn_to_page(pte_pfn((x)))
+-extern void __set_fixmap (enum fixed_addresses idx,
+-                                      unsigned long phys, pgprot_t flags);
++extern void __set_fixmap(enum fixed_addresses idx,
++                       unsigned long phys, pgprot_t flags);
   
+-#define set_fixmap(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL)
++#define set_fixmap(idx, phys)                 \
++      __set_fixmap(idx, phys, PAGE_KERNEL)
   /*
-  * Macro to mark a page protection value as "uncacheable".
+  * Some hardware wants to get fixmapped without caching.
    */
--#define pgprot_noncached(prot)        (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
+-#define set_fixmap_nocache(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
  -
-+#define pgprot_noncached(prot)                                        \
-+      (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
+-#define clear_fixmap(idx) \
+-                __set_fixmap(idx, 0, __pgprot(0))
++#define set_fixmap_nocache(idx, phys)                 \
++      __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
   
- /*
-  * Conversion functions: convert a page and protection to a page entry,
-@@ -197,36 +199,39 @@ static inline unsigned long pmd_bad(pmd_
- /*
-  * Level 4 access.
-  */
--#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
--#define pgd_page(pgd)         (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
--#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
--#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
--#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
-+#define pgd_page_vaddr(pgd)                                           \
-+      ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
-+#define pgd_page(pgd)         (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
-+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
-+#define pgd_offset(mm, address)       ((mm)->pgd + pgd_index((address)))
-+#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
- #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
- static inline int pgd_large(pgd_t pgd) { return 0; }
- #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
+ #define FIXADDR_TOP   (VSYSCALL_END-PAGE_SIZE)
+ #define FIXADDR_SIZE  (__end_of_fixed_addresses << PAGE_SHIFT)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/highmem.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/highmem.h    2009-03-16 16:38:05.000000000 +0100
+@@ -8,7 +8,7 @@
+  *                  Gerhard.Wichert@pdb.siemens.de
+  *
+  *
+- * Redesigned the x86 32-bit VM architecture to deal with 
++ * Redesigned the x86 32-bit VM architecture to deal with
+  * up to 16 Terabyte physical memory. With current x86 CPUs
+  * we now support up to 64 Gigabytes physical RAM.
+  *
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/io.h    2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/io.h 2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,22 @@
++#ifndef _ASM_X86_IO_H
++#define _ASM_X86_IO_H
++
++#define ARCH_HAS_IOREMAP_WC
++
+ #ifdef CONFIG_X86_32
+ # include "io_32.h"
+ #else
+ # include "io_64.h"
+ #endif
++
++extern void *xlate_dev_mem_ptr(unsigned long phys);
++extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
++
++extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
++extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
++
++extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
++                                   unsigned long prot_val);
++extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
++
++#endif /* _ASM_X86_IO_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/irqflags.h      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/irqflags.h   2009-03-16 16:38:05.000000000 +0100
+@@ -137,11 +137,11 @@ sysexit_ecrit:   /**** END OF SYSEXIT CRIT
+ #endif /* __ASSEMBLY__ */
   
- /* PUD - Level3 access */
- /* to find an entry in a page-table-directory. */
--#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
--#define pud_page(pud)         (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
--#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
--#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
-+#define pud_page_vaddr(pud)                                           \
-+      ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
-+#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
-+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-+#define pud_offset(pgd, address)                                      \
-+      ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
- #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
+ #ifndef __ASSEMBLY__
+-#define raw_local_save_flags(flags) \
+-              do { (flags) = __raw_local_save_flags(); } while (0)
++#define raw_local_save_flags(flags)                           \
++      do { (flags) = __raw_local_save_flags(); } while (0)
   
- static inline int pud_large(pud_t pte)
+-#define raw_local_irq_save(flags) \
+-              do { (flags) = __raw_local_irq_save(); } while (0)
++#define raw_local_irq_save(flags)                             \
++      do { (flags) = __raw_local_irq_save(); } while (0)
+ 
+ static inline int raw_irqs_disabled_flags(unsigned long flags)
   {
--      return (__pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
--              (_PAGE_PSE|_PAGE_PRESENT);
-+      return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
-+              (_PAGE_PSE | _PAGE_PRESENT);
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_32.h     2009-03-16 16:38:05.000000000 +0100
+@@ -94,7 +94,7 @@ static inline void switch_mm(struct mm_s
+               BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
+ 
+               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+-                      /* We were in lazy tlb mode and leave_mm disabled 
++                      /* We were in lazy tlb mode and leave_mm disabled
+                        * tlb flush IPI delivery. We must reload %cr3.
+                        */
+                       load_cr3(next->pgd);
+@@ -107,10 +107,10 @@ static inline void switch_mm(struct mm_s
+ #define deactivate_mm(tsk, mm)                        \
+       asm("movl %0,%%gs": :"r" (0));
+ 
+-#define activate_mm(prev, next)                               \
+-      do {                                            \
+-              xen_activate_mm(prev, next);            \
+-              switch_mm((prev),(next),NULL);          \
+-      } while(0)
++#define activate_mm(prev, next)                       \
++do {                                          \
++      xen_activate_mm(prev, next);            \
++      switch_mm((prev), (next), NULL);        \
++} while (0)
+ 
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_64.h     2009-03-16 16:38:05.000000000 +0100
+@@ -21,7 +21,7 @@ void destroy_context(struct mm_struct *m
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+ #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
+-      if (read_pda(mmu_state) == TLBSTATE_OK) 
++      if (read_pda(mmu_state) == TLBSTATE_OK)
+               write_pda(mmu_state, TLBSTATE_LAZY);
+ #endif
+ }
+@@ -62,7 +62,7 @@ extern void mm_pin(struct mm_struct *mm)
+ extern void mm_unpin(struct mm_struct *mm);
+ void mm_pin_all(void);
+ 
+-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
++static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+                            struct task_struct *tsk)
+ {
+       unsigned cpu = smp_processor_id();
+@@ -106,7 +106,7 @@ static inline void switch_mm(struct mm_s
+               if (read_pda(active_mm) != next)
+                       BUG();
+               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+-                      /* We were in lazy tlb mode and leave_mm disabled 
++                      /* We were in lazy tlb mode and leave_mm disabled
+                        * tlb flush IPI delivery. We must reload CR3
+                        * to make sure to use no freed page tables.
+                        */
+@@ -118,10 +118,11 @@ static inline void switch_mm(struct mm_s
+ #endif
   }
   
- /* PMD  - Level 2 access */
--#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
--#define pmd_page(pmd)         (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
-+#define pmd_page(pmd)         (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
+-#define deactivate_mm(tsk,mm) do { \
+-      load_gs_index(0); \
+-      asm volatile("movl %0,%%fs"::"r"(0));  \
+-} while(0)
++#define deactivate_mm(tsk, mm)                        \
++do {                                          \
++      load_gs_index(0);                       \
++      asm volatile("movl %0,%%fs"::"r"(0));   \
++} while (0)
+ 
+ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+ {
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page.h  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page.h       2009-03-16 16:38:05.000000000 +0100
+@@ -20,8 +20,16 @@
+ #define _PAGE_BIT_IO          9
+ #define _PAGE_IO              (_AC(1, L)<<_PAGE_BIT_IO)
+ 
+-#define PHYSICAL_PAGE_MASK    (~(_AT(phys_addr_t, PAGE_SIZE) - 1) & __PHYSICAL_MASK)
+-#define PTE_MASK              _AT(pteval_t, PHYSICAL_PAGE_MASK)
++#define __PHYSICAL_MASK               ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
++#define __VIRTUAL_MASK                ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
++
++/* Cast PAGE_MASK to a signed type so that it is sign-extended if
++   virtual addresses are 32-bits but physical addresses are larger
++   (ie, 32-bit PAE). */
++#define PHYSICAL_PAGE_MASK    (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
++
++/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
++#define PTE_MASK              ((pteval_t)PHYSICAL_PAGE_MASK)
+ 
+ #define PMD_PAGE_SIZE         (_AC(1, UL) << PMD_SHIFT)
+ #define PMD_PAGE_MASK         (~(PMD_PAGE_SIZE-1))
+@@ -34,19 +42,14 @@
+ /* to align the pointer to the (next) page boundary */
+ #define PAGE_ALIGN(addr)      (((addr)+PAGE_SIZE-1)&PAGE_MASK)
   
--#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
--#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
--                                  pmd_index(address))
-+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
-+#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
-+                                pmd_index(address))
- #define pmd_none(x)   (!__pmd_val(x))
- #if CONFIG_XEN_COMPAT <= 0x030002
- /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
-@@ -235,43 +240,56 @@ static inline int pud_large(pud_t pte)
- #else
- #define pmd_present(x)        (__pmd_val(x) & _PAGE_PRESENT)
+-#define __PHYSICAL_MASK               _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
+-#define __VIRTUAL_MASK                ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
+-
+ #ifndef __ASSEMBLY__
+ #include <linux/types.h>
   #endif
--#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
--#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
-+#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
-+#define pmd_pfn(x)  ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
- 
- #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
--#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
-+#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) |   \
-+                                          _PAGE_FILE })
- #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
   
- /* PTE - Level 1 access. */
+ #ifdef CONFIG_X86_64
+ #include <asm/page_64.h>
+-#define max_pfn_mapped                end_pfn_map
+ #else
+ #include <asm/page_32.h>
+-#define max_pfn_mapped                max_low_pfn
+ #endif        /* CONFIG_X86_64 */
   
- /* page, protection -> pte */
--#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
-- 
--#define pte_index(address) \
--              (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-+#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn((page)), (pgprot))
-+
-+#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
- #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
--                      pte_index(address))
-+                                       pte_index((address)))
+ #define PAGE_OFFSET           ((unsigned long)__PAGE_OFFSET)
+@@ -59,6 +62,9 @@
+ #ifndef __ASSEMBLY__
   
- /* x86-64 always has all page tables mapped. */
--#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
--#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
-+#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-+#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
- #define pte_unmap(pte) /* NOP */
--#define pte_unmap_nested(pte) /* NOP */ 
-+#define pte_unmap_nested(pte) /* NOP */
+ extern int page_is_ram(unsigned long pagenr);
++extern int devmem_is_allowed(unsigned long pagenr);
  +
-+#define update_mmu_cache(vma, address, pte) do { } while (0)
++extern unsigned long max_pfn_mapped;
   
--#define update_mmu_cache(vma,address,pte) do { } while (0)
-+extern int direct_gbpages;
+ struct page;
   
- /* Encode and de-code a swap entry */
--#define __swp_type(x)                 (((x).val >> 1) & 0x3f)
--#define __swp_offset(x)                       ((x).val >> 8)
--#define __swp_entry(type, offset)     ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
-+#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
-+#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
-+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
-+#else
-+#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
-+#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
-+#endif
-+
-+#define __swp_type(x)                 (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
-+                                       & ((1U << SWP_TYPE_BITS) - 1))
-+#define __swp_offset(x)                       ((x).val >> SWP_OFFSET_SHIFT)
-+#define __swp_entry(type, offset)     ((swp_entry_t) { \
-+                                       ((type) << (_PAGE_BIT_PRESENT + 1)) \
-+                                       | ((offset) << SWP_OFFSET_SHIFT) })
- #define __pte_to_swp_entry(pte)               ((swp_entry_t) { __pte_val(pte) })
- #define __swp_entry_to_pte(x)         ((pte_t) { .pte = (x).val })
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page_64.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page_64.h    2009-03-16 16:38:05.000000000 +0100
+@@ -5,7 +5,7 @@
   
--extern int kern_addr_valid(unsigned long addr); 
-+extern int kern_addr_valid(unsigned long addr);
- extern void cleanup_highmap(void);
+ #define THREAD_ORDER  1
+ #define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
+-#define CURRENT_MASK (~(THREAD_SIZE-1))
++#define CURRENT_MASK (~(THREAD_SIZE - 1))
   
--#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)               \
--              direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
-+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)       \
-+      direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
+ #define EXCEPTION_STACK_ORDER 0
+ #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+@@ -53,10 +53,10 @@
+ #define __VIRTUAL_MASK_SHIFT  48
   
- #define HAVE_ARCH_UNMAPPED_AREA
- #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-@@ -284,8 +302,10 @@ extern void cleanup_highmap(void);
+ /*
+- * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
++ * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
+  * arch/x86/kernel/head_64.S), and it is mapped here:
+  */
+-#define KERNEL_IMAGE_SIZE     (128*1024*1024)
++#define KERNEL_IMAGE_SIZE     (512 * 1024 * 1024)
+ #define KERNEL_IMAGE_START    _AC(0xffffffff80000000, UL)
   
- /* fs/proc/kcore.c */
- #define       kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
--#define       kc_offset_to_vaddr(o) \
--   (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
-+#define       kc_offset_to_vaddr(o)                           \
-+      (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1)))    \
-+       ? ((o) | ~__VIRTUAL_MASK)                      \
-+       : (o))
+ #ifndef __ASSEMBLY__
+@@ -64,7 +64,6 @@ void clear_page(void *page);
+ void copy_page(void *to, void *from);
   
- #define __HAVE_ARCH_PTE_SAME
- #endif /* !__ASSEMBLY__ */
---- a/include/asm-x86/mach-xen/asm/pgtable.h
-+++ b/include/asm-x86/mach-xen/asm/pgtable.h
-@@ -1,17 +1,15 @@
- #ifndef _ASM_X86_PGTABLE_H
- #define _ASM_X86_PGTABLE_H
+ extern unsigned long end_pfn;
+-extern unsigned long end_pfn_map;
   
--#define USER_PTRS_PER_PGD     ((TASK_SIZE-1)/PGDIR_SIZE+1)
- #define FIRST_USER_ADDRESS    0
+ static inline unsigned long __phys_addr(unsigned long x)
+ {
+@@ -89,6 +88,9 @@ typedef union { pteval_t pte; unsigned i
   
--#define _PAGE_BIT_PRESENT     0
--#define _PAGE_BIT_RW          1
--#define _PAGE_BIT_USER                2
--#define _PAGE_BIT_PWT         3
--#define _PAGE_BIT_PCD         4
--#define _PAGE_BIT_ACCESSED    5
--#define _PAGE_BIT_DIRTY               6
--#define _PAGE_BIT_FILE                6
-+#define _PAGE_BIT_PRESENT     0       /* is present */
-+#define _PAGE_BIT_RW          1       /* writeable */
-+#define _PAGE_BIT_USER                2       /* userspace addressable */
-+#define _PAGE_BIT_PWT         3       /* page write through */
-+#define _PAGE_BIT_PCD         4       /* page cache disabled */
-+#define _PAGE_BIT_ACCESSED    5       /* was accessed (raised by CPU) */
-+#define _PAGE_BIT_DIRTY               6       /* was written to (raised by CPU) */
- #define _PAGE_BIT_PSE         7       /* 4 MB (or 2MB) page */
- #define _PAGE_BIT_PAT         7       /* on 4KB pages */
- #define _PAGE_BIT_GLOBAL      8       /* Global TLB entry PPro+ */
-@@ -22,6 +20,14 @@
- #define _PAGE_BIT_PAT_LARGE   12      /* On 2MB or 1GB pages */
- #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
+ #define vmemmap ((struct page *)VMEMMAP_START)
   
-+/* If _PAGE_BIT_PRESENT is clear, we use these: */
-+
-+/* set: nonlinear file mapping, saved PTE; unset:swap */
-+#define _PAGE_BIT_FILE                _PAGE_BIT_DIRTY
-+
-+/* if the user mapped it with PROT_NONE; pte_present gives true */
-+#define _PAGE_BIT_PROTNONE    _PAGE_BIT_GLOBAL
++extern unsigned long init_memory_mapping(unsigned long start,
++                                       unsigned long end);
  +
- /*
-  * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
-  * sign-extended value on 32-bit with all 1's in the upper word,
-@@ -48,10 +54,8 @@
- #define _PAGE_NX      0
- #endif
+ #endif        /* !__ASSEMBLY__ */
   
--/* If _PAGE_PRESENT is clear, we use these: */
--#define _PAGE_FILE    _PAGE_DIRTY     /* nonlinear file mapping, saved PTE; unset:swap */
--#define _PAGE_PROTNONE        _PAGE_PSE       /* if the user mapped it with PROT_NONE;
--                                         pte_present gives true */
-+#define _PAGE_FILE    (_AC(1, L)<<_PAGE_BIT_FILE)
-+#define _PAGE_PROTNONE        (_AC(1, L)<<_PAGE_BIT_PROTNONE)
+ #ifdef CONFIG_FLATMEM
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pci.h   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pci.h        2009-03-16 16:38:05.000000000 +0100
+@@ -8,14 +8,13 @@
+ #include <asm/scatterlist.h>
+ #include <asm/io.h>
   
- #ifndef __ASSEMBLY__
- #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
-@@ -61,20 +65,42 @@ extern unsigned int __kernel_page_user;
- #endif
+-
+ #ifdef __KERNEL__
+ 
+ struct pci_sysdata {
+       int             domain;         /* PCI domain */
+       int             node;           /* NUMA node */
+ #ifdef CONFIG_X86_64
+-      void*           iommu;          /* IOMMU private data */
++      void            *iommu;         /* IOMMU private data */
   #endif
+ #ifdef CONFIG_XEN_PCIDEV_FRONTEND
+       struct pcifront_device *pdev;
+@@ -23,6 +22,8 @@ struct pci_sysdata {
+ };
   
--#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
--#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
-+#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |        \
-+                       _PAGE_ACCESSED | _PAGE_DIRTY)
-+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |    \
-+                       _PAGE_DIRTY | __kernel_page_user)
-+
-+/* Set of bits not changed in pte_modify */
-+#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO |       \
-+                       _PAGE_ACCESSED | _PAGE_DIRTY)
+ /* scan a bus after allocating a pci_sysdata for it */
++extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
++                                          int node);
+ extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
   
--#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO)
-+/*
-+ * PAT settings are part of the hypervisor interface, which sets the
-+ * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
-+ */
-+#define _PAGE_CACHE_MASK      (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
-+#define _PAGE_CACHE_WB                (0)
-+#define _PAGE_CACHE_WT                (_PAGE_PWT)
-+#define _PAGE_CACHE_WC                (_PAGE_PAT)
-+#define _PAGE_CACHE_WP                (_PAGE_PAT | _PAGE_PWT)
-+#define _PAGE_CACHE_UC_MINUS  (_PAGE_PCD)
-+#define _PAGE_CACHE_UC                (_PAGE_PCD | _PAGE_PWT)
+ static inline int pci_domain_nr(struct pci_bus *bus)
+@@ -36,6 +37,7 @@ static inline int pci_proc_domain(struct
+       return pci_domain_nr(bus);
+ }
+ 
++extern void pci_iommu_alloc(void);
+ 
+ /* Can be used to override the logic in pci_scan_bus for skipping
+    already-configured bus numbers - to be used for buggy BIOSes
+@@ -57,7 +59,7 @@ extern unsigned long pci_mem_start;
+ #define PCIBIOS_MIN_CARDBUS_IO        0x4000
+ 
+ void pcibios_config_init(void);
+-struct pci_bus * pcibios_scan_root(int bus);
++struct pci_bus *pcibios_scan_root(int bus);
   
- #define PAGE_NONE     __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
--#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
-+#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
-+                               _PAGE_ACCESSED | _PAGE_NX)
+ void pcibios_set_master(struct pci_dev *dev);
+ void pcibios_penalize_isa_irq(int irq, int active);
+@@ -67,7 +69,8 @@ int pcibios_set_irq_routing(struct pci_d
   
--#define PAGE_SHARED_EXEC      __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
--#define PAGE_COPY_NOEXEC      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
--#define PAGE_COPY_EXEC                __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-+#define PAGE_SHARED_EXEC      __pgprot(_PAGE_PRESENT | _PAGE_RW |     \
-+                                       _PAGE_USER | _PAGE_ACCESSED)
-+#define PAGE_COPY_NOEXEC      __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
-+                                       _PAGE_ACCESSED | _PAGE_NX)
-+#define PAGE_COPY_EXEC                __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
-+                                       _PAGE_ACCESSED)
- #define PAGE_COPY             PAGE_COPY_NOEXEC
--#define PAGE_READONLY         __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
--#define PAGE_READONLY_EXEC    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
-+#define PAGE_READONLY         __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
-+                                       _PAGE_ACCESSED | _PAGE_NX)
-+#define PAGE_READONLY_EXEC    __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
-+                                       _PAGE_ACCESSED)
+ #define HAVE_PCI_MMAP
+ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+-                             enum pci_mmap_state mmap_state, int write_combine);
++                             enum pci_mmap_state mmap_state,
++                             int write_combine);
   
- #ifdef CONFIG_X86_32
- #define _PAGE_KERNEL_EXEC \
-@@ -93,6 +119,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
- #define __PAGE_KERNEL_RO              (__PAGE_KERNEL & ~_PAGE_RW)
- #define __PAGE_KERNEL_RX              (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
- #define __PAGE_KERNEL_EXEC_NOCACHE    (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
-+#define __PAGE_KERNEL_WC              (__PAGE_KERNEL | _PAGE_CACHE_WC)
- #define __PAGE_KERNEL_NOCACHE         (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
- #define __PAGE_KERNEL_UC_MINUS                (__PAGE_KERNEL | _PAGE_PCD)
- #define __PAGE_KERNEL_VSYSCALL                (__PAGE_KERNEL_RX | _PAGE_USER)
-@@ -109,6 +136,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
- #define PAGE_KERNEL_RO                        MAKE_GLOBAL(__PAGE_KERNEL_RO)
- #define PAGE_KERNEL_EXEC              MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
- #define PAGE_KERNEL_RX                        MAKE_GLOBAL(__PAGE_KERNEL_RX)
-+#define PAGE_KERNEL_WC                        MAKE_GLOBAL(__PAGE_KERNEL_WC)
- #define PAGE_KERNEL_NOCACHE           MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
- #define PAGE_KERNEL_UC_MINUS          MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
- #define PAGE_KERNEL_EXEC_NOCACHE      MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
-@@ -142,7 +170,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
-  * ZERO_PAGE is a global shared page that is always zero: used
-  * for zero-mapped memory areas etc..
-  */
--extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
- #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
   
- extern spinlock_t pgd_lock;
-@@ -152,30 +180,111 @@ extern struct list_head pgd_list;
-  * The following only work if pte_present() is true.
-  * Undefined behaviour if not..
-  */
--static inline int pte_dirty(pte_t pte)                { return __pte_val(pte) & _PAGE_DIRTY; }
--static inline int pte_young(pte_t pte)                { return __pte_val(pte) & _PAGE_ACCESSED; }
--static inline int pte_write(pte_t pte)                { return __pte_val(pte) & _PAGE_RW; }
--static inline int pte_file(pte_t pte)         { return __pte_val(pte) & _PAGE_FILE; }
--static inline int pte_huge(pte_t pte)         { return __pte_val(pte) & _PAGE_PSE; }
--static inline int pte_global(pte_t pte)       { return 0; }
--static inline int pte_exec(pte_t pte)         { return !(__pte_val(pte) & _PAGE_NX); }
--
--static inline int pmd_large(pmd_t pte) {
--      return (__pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
--              (_PAGE_PSE|_PAGE_PRESENT);
--}
--
--static inline pte_t pte_mkclean(pte_t pte)    { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
--static inline pte_t pte_mkold(pte_t pte)      { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
--static inline pte_t pte_wrprotect(pte_t pte)  { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); }
--static inline pte_t pte_mkexec(pte_t pte)     { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); }
--static inline pte_t pte_mkdirty(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); }
--static inline pte_t pte_mkyoung(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); }
--static inline pte_t pte_mkwrite(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_RW); }
--static inline pte_t pte_mkhuge(pte_t pte)     { return __pte_ma(__pte_val(pte) | _PAGE_PSE); }
--static inline pte_t pte_clrhuge(pte_t pte)    { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
--static inline pte_t pte_mkglobal(pte_t pte)   { return pte; }
--static inline pte_t pte_clrglobal(pte_t pte)  { return pte; }
-+static inline int pte_dirty(pte_t pte)
-+{
-+      return __pte_val(pte) & _PAGE_DIRTY;
-+}
+ #ifdef CONFIG_PCI
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc.h       2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgalloc.h    2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,149 @@
+-#ifdef CONFIG_X86_32
+-# include "pgalloc_32.h"
+-#else
+-# include "pgalloc_64.h"
++#ifndef _ASM_X86_PGALLOC_H
++#define _ASM_X86_PGALLOC_H
  +
-+static inline int pte_young(pte_t pte)
-+{
-+      return __pte_val(pte) & _PAGE_ACCESSED;
-+}
++#include <linux/threads.h>
++#include <linux/mm.h>         /* for struct page */
++#include <linux/pagemap.h>
  +
-+static inline int pte_write(pte_t pte)
-+{
-+      return __pte_val(pte) & _PAGE_RW;
-+}
++#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
  +
-+static inline int pte_file(pte_t pte)
-+{
-+      return __pte_val(pte) & _PAGE_FILE;
-+}
++static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)        {}
++static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)        {}
++static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
++                                          unsigned long start, unsigned long count) {}
++static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)        {}
++static inline void paravirt_release_pte(unsigned long pfn) {}
++static inline void paravirt_release_pmd(unsigned long pfn) {}
++static inline void paravirt_release_pud(unsigned long pfn) {}
  +
-+static inline int pte_huge(pte_t pte)
-+{
-+      return __pte_val(pte) & _PAGE_PSE;
-+}
++#ifdef CONFIG_X86_64
++void early_make_page_readonly(void *va, unsigned int feature);
++pmd_t *early_get_pmd(unsigned long va);
++#define make_lowmem_page_readonly make_page_readonly
++#define make_lowmem_page_writable make_page_writable
+ #endif
  +
-+static inline int pte_global(pte_t pte)
-+{
-+      return 0;
-+}
++/*
++ * Allocate and free page tables.
++ */
++extern pgd_t *pgd_alloc(struct mm_struct *);
++extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
  +
-+static inline int pte_exec(pte_t pte)
-+{
-+      return !(__pte_val(pte) & _PAGE_NX);
-+}
++extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
++extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
  +
-+static inline int pte_special(pte_t pte)
-+{
-+      return 0;
-+}
++/* Should really implement gc for free page table pages. This could be
++   done with a reference count in struct page. */
  +
-+static inline int pmd_large(pmd_t pte)
++static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
  +{
-+      return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
-+              (_PAGE_PSE | _PAGE_PRESENT);
++      BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
++      make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
++      free_page((unsigned long)pte);
  +}
  +
-+static inline pte_t pte_mkclean(pte_t pte)
++extern void __pte_free(pgtable_t);
++static inline void pte_free(struct mm_struct *mm, struct page *pte)
  +{
-+      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
++      __pte_free(pte);
  +}
  +
-+static inline pte_t pte_mkold(pte_t pte)
-+{
-+      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
-+}
++extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
  +
-+static inline pte_t pte_wrprotect(pte_t pte)
++static inline void pmd_populate_kernel(struct mm_struct *mm,
++                                     pmd_t *pmd, pte_t *pte)
  +{
-+      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
++      paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
++      set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
  +}
  +
-+static inline pte_t pte_mkexec(pte_t pte)
++static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
++                              struct page *pte)
  +{
-+      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
-+}
++      unsigned long pfn = page_to_pfn(pte);
  +
-+static inline pte_t pte_mkdirty(pte_t pte)
-+{
-+      return __pte_ma(__pte_val(pte) | _PAGE_DIRTY);
++      paravirt_alloc_pte(mm, pfn);
++      if (PagePinned(virt_to_page(mm->pgd))) {
++              if (!PageHighMem(pte))
++                      BUG_ON(HYPERVISOR_update_va_mapping(
++                        (unsigned long)__va(pfn << PAGE_SHIFT),
++                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
++#ifndef CONFIG_X86_64
++              else if (!TestSetPagePinned(pte))
++                      kmap_flush_unused();
++#endif
++              set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
++      } else
++              *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
  +}
  +
-+static inline pte_t pte_mkyoung(pte_t pte)
-+{
-+      return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED);
-+}
++#define pmd_pgtable(pmd) pmd_page(pmd)
  +
-+static inline pte_t pte_mkwrite(pte_t pte)
-+{
-+      return __pte_ma(__pte_val(pte) | _PAGE_RW);
-+}
++#if PAGETABLE_LEVELS > 2
++extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
++extern void __pmd_free(pgtable_t);
  +
-+static inline pte_t pte_mkhuge(pte_t pte)
++static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
  +{
-+      return __pte_ma(__pte_val(pte) | _PAGE_PSE);
++      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
++      __pmd_free(virt_to_page(pmd));
  +}
  +
-+static inline pte_t pte_clrhuge(pte_t pte)
++extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
++
++#ifdef CONFIG_X86_PAE
++extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
++#else /* !CONFIG_X86_PAE */
++static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
  +{
-+      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
++      paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
++      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
++              BUG_ON(HYPERVISOR_update_va_mapping(
++                             (unsigned long)pmd,
++                             pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
++                                     PAGE_KERNEL_RO), 0));
++              set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
++      } else
++              *pud =  __pud(_PAGE_TABLE | __pa(pmd));
  +}
++#endif        /* CONFIG_X86_PAE */
  +
-+static inline pte_t pte_mkglobal(pte_t pte)
++#if PAGETABLE_LEVELS > 3
++#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
++
++/*
++ * We need to use the batch mode here, but pgd_pupulate() won't be
++ * be called frequently.
++ */
++static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
  +{
-+      return pte;
++      paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
++      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
++              BUG_ON(HYPERVISOR_update_va_mapping(
++                             (unsigned long)pud,
++                             pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
++                                     PAGE_KERNEL_RO), 0));
++              set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
++              set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
++      } else {
++              *(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
++              *__user_pgd(pgd) = *(pgd);
++      }
  +}
  +
-+static inline pte_t pte_clrglobal(pte_t pte)
++static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
  +{
-+      return pte;
++      return (pud_t *)pmd_alloc_one(mm, addr);
  +}
  +
-+static inline pte_t pte_mkspecial(pte_t pte)
++static inline void pud_free(struct mm_struct *mm, pud_t *pud)
  +{
-+      return pte;
++      BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
++      __pmd_free(virt_to_page(pud));
  +}
++
++extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
++#endif        /* PAGETABLE_LEVELS > 3 */
++#endif        /* PAGETABLE_LEVELS > 2 */
++
++#endif        /* _ASM_X86_PGALLOC_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h    2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,111 +0,0 @@
+-#ifndef _I386_PGALLOC_H
+-#define _I386_PGALLOC_H
+-
+-#include <linux/threads.h>
+-#include <linux/mm.h>         /* for struct page */
+-#include <linux/pagemap.h>
+-#include <asm/tlb.h>
+-#include <asm-generic/tlb.h>
+-#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
+-
+-#define paravirt_alloc_pt(mm, pfn) do { } while (0)
+-#define paravirt_alloc_pd(mm, pfn) do { } while (0)
+-#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
+-#define paravirt_release_pt(pfn) do { } while (0)
+-#define paravirt_release_pd(pfn) do { } while (0)
+-
+-static inline void pmd_populate_kernel(struct mm_struct *mm,
+-                                     pmd_t *pmd, pte_t *pte)
+-{
+-      paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
+-      set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+-}
+-
+-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+-{
+-      unsigned long pfn = page_to_pfn(pte);
+-
+-      paravirt_alloc_pt(mm, pfn);
+-      if (PagePinned(virt_to_page(mm->pgd))) {
+-              if (!PageHighMem(pte))
+-                      BUG_ON(HYPERVISOR_update_va_mapping(
+-                        (unsigned long)__va(pfn << PAGE_SHIFT),
+-                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
+-              else if (!test_and_set_bit(PG_pinned, &pte->flags))
+-                      kmap_flush_unused();
+-              set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+-      } else
+-              *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
+-}
+-#define pmd_pgtable(pmd) pmd_page(pmd)
+-
+-/*
+- * Allocate and free page tables.
+- */
+-extern void pgd_test_and_unpin(pgd_t *);
+-extern pgd_t *pgd_alloc(struct mm_struct *);
+-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+-
+-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
+-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+-
+-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+-{
+-      make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
+-      free_page((unsigned long)pte);
+-}
+-
+-extern void __pte_free(pgtable_t);
+-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+-{
+-      __pte_free(pte);
+-}
+-
+-
+-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+-
+-#ifdef CONFIG_X86_PAE
+-/*
+- * In the PAE case we free the pmds as part of the pgd.
+- */
+-extern pmd_t *pmd_alloc_one(struct mm_struct *, unsigned long);
+-
+-extern void __pmd_free(pgtable_t);
+-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+-{
+-      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+-      __pmd_free(virt_to_page(pmd));
+-}
+-
+-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+-
+-static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+-{
+-      struct page *page = virt_to_page(pmd);
+-      unsigned long pfn = page_to_pfn(page);
+-
+-      paravirt_alloc_pd(mm, pfn);
+-
+-      /* Note: almost everything apart from _PAGE_PRESENT is
+-         reserved at the pmd (PDPT) level. */
+-      if (PagePinned(virt_to_page(mm->pgd))) {
+-              BUG_ON(PageHighMem(page));
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                        (unsigned long)__va(pfn << PAGE_SHIFT),
+-                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
+-              set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+-      } else
+-              *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
+-
+-      /*
+-       * According to Intel App note "TLBs, Paging-Structure Caches,
+-       * and Their Invalidation", April 2007, document 317080-001,
+-       * section 8.1: in PAE mode we explicitly have to flush the
+-       * TLB via cr3 if the top-level pgd is changed...
+-       */
+-      if (mm == current->active_mm)
+-              xen_tlb_flush();
+-}
+-#endif        /* CONFIG_X86_PAE */
+-
+-#endif /* _I386_PGALLOC_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h    2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,179 +0,0 @@
+-#ifndef _X86_64_PGALLOC_H
+-#define _X86_64_PGALLOC_H
+-
+-#include <asm/pda.h>
+-#include <linux/threads.h>
+-#include <linux/mm.h>
+-#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
+-
+-pmd_t *early_get_pmd(unsigned long va);
+-void early_make_page_readonly(void *va, unsigned int feature);
+-
+-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+-
+-#define pmd_populate_kernel(mm, pmd, pte) \
+-              set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+-
+-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+-{
+-      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                             (unsigned long)pmd,
+-                             pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, 
+-                                     PAGE_KERNEL_RO), 0));
+-              set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
+-      } else {
+-              *(pud) =  __pud(_PAGE_TABLE | __pa(pmd));
+-      }
+-}
+-
+-/*
+- * We need to use the batch mode here, but pgd_pupulate() won't be
+- * be called frequently.
+- */
+-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+-{
+-      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                             (unsigned long)pud,
+-                             pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, 
+-                                     PAGE_KERNEL_RO), 0));
+-              set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+-              set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
+-      } else {
+-              *(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
+-              *(__user_pgd(pgd)) = *(pgd);
+-      }
+-}
+-
+-#define pmd_pgtable(pmd) pmd_page(pmd)
+-
+-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+-{
+-      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                             (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT),
+-                             pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
+-              set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
+-      } else {
+-              *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
+-      }
+-}
+-
+-extern void __pmd_free(pgtable_t);
+-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+-{
+-      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+-      __pmd_free(virt_to_page(pmd));
+-}
+-
+-extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
+-
+-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+-{
+-      return (pud_t *)pmd_alloc_one(mm, addr);
+-}
+-
+-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+-{
+-      BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+-      __pmd_free(virt_to_page(pud));
+-}
+-
+-static inline void pgd_list_add(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(&pgd_lock, flags);
+-      list_add(&page->lru, &pgd_list);
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-static inline void pgd_list_del(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(&pgd_lock, flags);
+-      list_del(&page->lru);
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-extern void pgd_test_and_unpin(pgd_t *);
+-
+-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+-{
+-      /*
+-       * We allocate two contiguous pages for kernel and user.
+-       */
+-      unsigned boundary;
+-      pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
+-      if (!pgd)
+-              return NULL;
+-      pgd_list_add(pgd);
+-      pgd_test_and_unpin(pgd);
+-      /*
+-       * Copy kernel pointers in from init.
+-       * Could keep a freelist or slab cache of those because the kernel
+-       * part never changes.
+-       */
+-      boundary = pgd_index(__PAGE_OFFSET);
+-      memset(pgd, 0, boundary * sizeof(pgd_t));
+-      memcpy(pgd + boundary,
+-             init_level4_pgt + boundary,
+-             (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+-
+-      memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
+-      /*
+-       * Set level3_user_pgt for vsyscall area
+-       */
+-      __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
+-              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
+-      return pgd;
+-}
+-
+-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      pgd_test_and_unpin(pgd);
+-      pgd_list_del(pgd);
+-      free_pages((unsigned long)pgd, 1);
+-}
+-
+-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+-{
+-      pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+-      if (pte)
+-              make_page_readonly(pte, XENFEAT_writable_page_tables);
+-
+-      return pte;
+-}
+-
+-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+-
+-/* Should really implement gc for free page table pages. This could be
+-   done with a reference count in struct page. */
+-
+-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+-{
+-      BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
+-      make_page_writable(pte, XENFEAT_writable_page_tables);
+-      free_page((unsigned long)pte); 
+-}
+-
+-extern void __pte_free(pgtable_t);
+-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+-{
+-      __pte_free(pte);
+-}
+-
+-#define __pte_free_tlb(tlb,pte)                               \
+-do {                                                  \
+-      pgtable_page_dtor((pte));                               \
+-      tlb_remove_page((tlb), (pte));                  \
+-} while (0)
+-
+-#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+-#define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+-
+-#endif /* _X86_64_PGALLOC_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable.h    2009-03-16 16:38:05.000000000 +0100
+@@ -1,17 +1,15 @@
+ #ifndef _ASM_X86_PGTABLE_H
+ #define _ASM_X86_PGTABLE_H
   
- extern pteval_t __supported_pte_mask;
- 
-@@ -202,15 +311,33 @@ static inline pte_t pte_modify(pte_t pte
-       pteval_t val = pte_val(pte);
- 
-       val &= _PAGE_CHG_MASK;
--      val |= pgprot_val(newprot) & __supported_pte_mask;
-+      val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
+-#define USER_PTRS_PER_PGD     ((TASK_SIZE-1)/PGDIR_SIZE+1)
+ #define FIRST_USER_ADDRESS    0
   
-       return __pte(val);
- }
+-#define _PAGE_BIT_PRESENT     0
+-#define _PAGE_BIT_RW          1
+-#define _PAGE_BIT_USER                2
+-#define _PAGE_BIT_PWT         3
+-#define _PAGE_BIT_PCD         4
+-#define _PAGE_BIT_ACCESSED    5
+-#define _PAGE_BIT_DIRTY               6
+-#define _PAGE_BIT_FILE                6
++#define _PAGE_BIT_PRESENT     0       /* is present */
++#define _PAGE_BIT_RW          1       /* writeable */
++#define _PAGE_BIT_USER                2       /* userspace addressable */
++#define _PAGE_BIT_PWT         3       /* page write through */
++#define _PAGE_BIT_PCD         4       /* page cache disabled */
++#define _PAGE_BIT_ACCESSED    5       /* was accessed (raised by CPU) */
++#define _PAGE_BIT_DIRTY               6       /* was written to (raised by CPU) */
+ #define _PAGE_BIT_PSE         7       /* 4 MB (or 2MB) page */
+ #define _PAGE_BIT_PAT         7       /* on 4KB pages */
+ #define _PAGE_BIT_GLOBAL      8       /* Global TLB entry PPro+ */
+@@ -22,6 +20,14 @@
+ #define _PAGE_BIT_PAT_LARGE   12      /* On 2MB or 1GB pages */
+ #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
   
--#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
-+/* mprotect needs to preserve PAT bits when updating vm_page_prot */
-+#define pgprot_modify pgprot_modify
-+static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
-+{
-+      pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
-+      pgprotval_t addbits = pgprot_val(newprot);
-+      return __pgprot(preservebits | addbits);
-+}
++/* If _PAGE_BIT_PRESENT is clear, we use these: */
  +
-+#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
- 
- #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
- 
-+#ifndef __ASSEMBLY__
-+#define __HAVE_PHYS_MEM_ACCESS_PROT
-+struct file;
-+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-+                              unsigned long size, pgprot_t vma_prot);
-+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
-+                              unsigned long size, pgprot_t *vma_prot);
-+#endif
++/* set: nonlinear file mapping, saved PTE; unset:swap */
++#define _PAGE_BIT_FILE                _PAGE_BIT_DIRTY
  +
- #define set_pte(ptep, pte)            xen_set_pte(ptep, pte)
- #define set_pte_at(mm, addr, ptep, pte)       xen_set_pte_at(mm, addr, ptep, pte)
- 
-@@ -246,6 +373,9 @@ static inline pte_t pte_modify(pte_t pte
- # include "pgtable_64.h"
++/* if the user mapped it with PROT_NONE; pte_present gives true */
++#define _PAGE_BIT_PROTNONE    _PAGE_BIT_GLOBAL
++
+ /*
+  * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
+  * sign-extended value on 32-bit with all 1's in the upper word,
+@@ -48,10 +54,8 @@
+ #define _PAGE_NX      0
   #endif
   
-+#define KERNEL_PGD_BOUNDARY   pgd_index(PAGE_OFFSET)
-+#define KERNEL_PGD_PTRS               (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
-+
- #ifndef __ASSEMBLY__
+-/* If _PAGE_PRESENT is clear, we use these: */
+-#define _PAGE_FILE    _PAGE_DIRTY     /* nonlinear file mapping, saved PTE; unset:swap */
+-#define _PAGE_PROTNONE        _PAGE_PSE       /* if the user mapped it with PROT_NONE;
+-                                         pte_present gives true */
++#define _PAGE_FILE    (_AC(1, L)<<_PAGE_BIT_FILE)
++#define _PAGE_PROTNONE        (_AC(1, L)<<_PAGE_BIT_PROTNONE)
   
- enum {
-@@ -312,46 +442,17 @@ static inline void xen_pte_clear(struct 
-  * bit at the same time.
-  */
- #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
--#define ptep_set_access_flags(vma, address, ptep, entry, dirty)               \
--({                                                                    \
--      int __changed = !pte_same(*(ptep), entry);                      \
--      if (__changed && (dirty)) {                                     \
--              if ( likely((vma)->vm_mm == current->mm) ) {            \
--                      BUG_ON(HYPERVISOR_update_va_mapping(address,    \
--                              entry,                                  \
--                              (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
--                                      UVMF_INVLPG|UVMF_MULTI));       \
--              } else {                                                \
--                      xen_l1_entry_update(ptep, entry);               \
--                      flush_tlb_page(vma, address);                   \
--              }                                                       \
--      }                                                               \
--      __changed;                                                      \
--})
-+extern int ptep_set_access_flags(struct vm_area_struct *vma,
-+                               unsigned long address, pte_t *ptep,
-+                               pte_t entry, int dirty);
+ #ifndef __ASSEMBLY__
+ #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
+@@ -61,20 +65,42 @@ extern unsigned int __kernel_page_user;
+ #endif
+ #endif
   
- #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
--#define ptep_test_and_clear_young(vma, addr, ptep) ({                 \
--      int __ret = 0;                                                  \
--      if (pte_young(*(ptep)))                                         \
--              __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,          \
--                                         &(ptep)->pte);               \
--      if (__ret)                                                      \
--              pte_update((vma)->vm_mm, addr, ptep);                   \
--      __ret;                                                          \
--})
-+extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
-+                                   unsigned long addr, pte_t *ptep);
+-#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
++#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |        \
++                       _PAGE_ACCESSED | _PAGE_DIRTY)
++#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |    \
++                       _PAGE_DIRTY | __kernel_page_user)
++
++/* Set of bits not changed in pte_modify */
++#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO |       \
++                       _PAGE_ACCESSED | _PAGE_DIRTY)
   
- #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
--#define ptep_clear_flush_young(vma, address, ptep)                    \
--({                                                                    \
--      pte_t __pte = *(ptep);                                          \
--      int __young = pte_young(__pte);                                 \
--      __pte = pte_mkold(__pte);                                       \
--      if (PagePinned(virt_to_page((vma)->vm_mm->pgd)))                \
--              (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
--      else if (__young)                                               \
--              (ptep)->pte_low = __pte.pte_low;                        \
--      __young;                                                        \
--})
-+extern int ptep_clear_flush_young(struct vm_area_struct *vma,
-+                                unsigned long address, pte_t *ptep);
+-#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO)
++/*
++ * PAT settings are part of the hypervisor interface, which sets the
++ * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
++ */
++#define _PAGE_CACHE_MASK      (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
++#define _PAGE_CACHE_WB                (0)
++#define _PAGE_CACHE_WT                (_PAGE_PWT)
++#define _PAGE_CACHE_WC                (_PAGE_PAT)
++#define _PAGE_CACHE_WP                (_PAGE_PAT | _PAGE_PWT)
++#define _PAGE_CACHE_UC_MINUS  (_PAGE_PCD)
++#define _PAGE_CACHE_UC                (_PAGE_PCD | _PAGE_PWT)
   
- #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
- #define ptep_clear_flush(vma, addr, ptep)                     \
-@@ -370,7 +471,8 @@ static inline void xen_pte_clear(struct 
- })
+ #define PAGE_NONE     __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+-#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
++                               _PAGE_ACCESSED | _PAGE_NX)
   
- #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
--static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-+                                     pte_t *ptep)
- {
-       pte_t pte = *ptep;
-       if (!pte_none(pte)
-@@ -398,13 +500,29 @@ static inline pte_t ptep_get_and_clear(s
- pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
+-#define PAGE_SHARED_EXEC      __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+-#define PAGE_COPY_NOEXEC      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+-#define PAGE_COPY_EXEC                __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_SHARED_EXEC      __pgprot(_PAGE_PRESENT | _PAGE_RW |     \
++                                       _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_COPY_NOEXEC      __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_COPY_EXEC                __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED)
+ #define PAGE_COPY             PAGE_COPY_NOEXEC
+-#define PAGE_READONLY         __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+-#define PAGE_READONLY_EXEC    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_READONLY         __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_READONLY_EXEC    __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED)
   
- #define __HAVE_ARCH_PTEP_SET_WRPROTECT
--static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-+static inline void ptep_set_wrprotect(struct mm_struct *mm,
-+                                    unsigned long addr, pte_t *ptep)
- {
-       pte_t pte = *ptep;
-       if (pte_write(pte))
-               set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
- }
+ #ifdef CONFIG_X86_32
+ #define _PAGE_KERNEL_EXEC \
+@@ -93,6 +119,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
+ #define __PAGE_KERNEL_RO              (__PAGE_KERNEL & ~_PAGE_RW)
+ #define __PAGE_KERNEL_RX              (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+ #define __PAGE_KERNEL_EXEC_NOCACHE    (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
++#define __PAGE_KERNEL_WC              (__PAGE_KERNEL | _PAGE_CACHE_WC)
+ #define __PAGE_KERNEL_NOCACHE         (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+ #define __PAGE_KERNEL_UC_MINUS                (__PAGE_KERNEL | _PAGE_PCD)
+ #define __PAGE_KERNEL_VSYSCALL                (__PAGE_KERNEL_RX | _PAGE_USER)
+@@ -109,6 +136,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
+ #define PAGE_KERNEL_RO                        MAKE_GLOBAL(__PAGE_KERNEL_RO)
+ #define PAGE_KERNEL_EXEC              MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
+ #define PAGE_KERNEL_RX                        MAKE_GLOBAL(__PAGE_KERNEL_RX)
++#define PAGE_KERNEL_WC                        MAKE_GLOBAL(__PAGE_KERNEL_WC)
+ #define PAGE_KERNEL_NOCACHE           MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+ #define PAGE_KERNEL_UC_MINUS          MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
+ #define PAGE_KERNEL_EXEC_NOCACHE      MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
+@@ -142,7 +170,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
+  * ZERO_PAGE is a global shared page that is always zero: used
+  * for zero-mapped memory areas etc..
+  */
+-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
++extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+ #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
   
-+/*
-+ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
-+ *
-+ *  dst - pointer to pgd range anwhere on a pgd page
-+ *  src - ""
-+ *  count - the number of pgds to copy.
-+ *
-+ * dst and src can be on the same page, but the range must not overlap,
-+ * and must not cross a page boundary.
-+ */
-+static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ extern spinlock_t pgd_lock;
+@@ -152,30 +180,111 @@ extern struct list_head pgd_list;
+  * The following only work if pte_present() is true.
+  * Undefined behaviour if not..
+  */
+-static inline int pte_dirty(pte_t pte)                { return __pte_val(pte) & _PAGE_DIRTY; }
+-static inline int pte_young(pte_t pte)                { return __pte_val(pte) & _PAGE_ACCESSED; }
+-static inline int pte_write(pte_t pte)                { return __pte_val(pte) & _PAGE_RW; }
+-static inline int pte_file(pte_t pte)         { return __pte_val(pte) & _PAGE_FILE; }
+-static inline int pte_huge(pte_t pte)         { return __pte_val(pte) & _PAGE_PSE; }
+-static inline int pte_global(pte_t pte)       { return 0; }
+-static inline int pte_exec(pte_t pte)         { return !(__pte_val(pte) & _PAGE_NX); }
+-
+-static inline int pmd_large(pmd_t pte) {
+-      return (__pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+-              (_PAGE_PSE|_PAGE_PRESENT);
+-}
+-
+-static inline pte_t pte_mkclean(pte_t pte)    { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
+-static inline pte_t pte_mkold(pte_t pte)      { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
+-static inline pte_t pte_wrprotect(pte_t pte)  { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); }
+-static inline pte_t pte_mkexec(pte_t pte)     { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); }
+-static inline pte_t pte_mkdirty(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); }
+-static inline pte_t pte_mkyoung(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); }
+-static inline pte_t pte_mkwrite(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_RW); }
+-static inline pte_t pte_mkhuge(pte_t pte)     { return __pte_ma(__pte_val(pte) | _PAGE_PSE); }
+-static inline pte_t pte_clrhuge(pte_t pte)    { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
+-static inline pte_t pte_mkglobal(pte_t pte)   { return pte; }
+-static inline pte_t pte_clrglobal(pte_t pte)  { return pte; }
++static inline int pte_dirty(pte_t pte)
  +{
-+       memcpy(dst, src, count * sizeof(pgd_t));
++      return __pte_val(pte) & _PAGE_DIRTY;
  +}
  +
- #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
-       xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
- 
---- a/include/asm-x86/mach-xen/asm/processor.h
-+++ b/include/asm-x86/mach-xen/asm/processor.h
-@@ -3,10 +3,6 @@
- 
- #include <asm/processor-flags.h>
- 
--/* migration helpers, for KVM - will be removed in 2.6.25: */
--#include <asm/vm86.h>
--#define Xgt_desc_struct       desc_ptr
--
- /* Forward declaration, a strange C thing */
- struct task_struct;
- struct mm_struct;
-@@ -24,6 +20,7 @@ struct mm_struct;
- #include <asm/msr.h>
- #include <asm/desc_defs.h>
- #include <asm/nops.h>
++static inline int pte_young(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_ACCESSED;
++}
  +
- #include <linux/personality.h>
- #include <linux/cpumask.h>
- #include <linux/cache.h>
-@@ -38,16 +35,18 @@ struct mm_struct;
- static inline void *current_text_addr(void)
- {
-       void *pc;
--      asm volatile("mov $1f,%0\n1:":"=r" (pc));
++static inline int pte_write(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_RW;
++}
  +
-+      asm volatile("mov $1f, %0; 1:":"=r" (pc));
++static inline int pte_file(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_FILE;
++}
  +
-       return pc;
- }
- 
- #ifdef CONFIG_X86_VSMP
--#define ARCH_MIN_TASKALIGN    (1 << INTERNODE_CACHE_SHIFT)
--#define ARCH_MIN_MMSTRUCT_ALIGN       (1 << INTERNODE_CACHE_SHIFT)
-+# define ARCH_MIN_TASKALIGN           (1 << INTERNODE_CACHE_SHIFT)
-+# define ARCH_MIN_MMSTRUCT_ALIGN      (1 << INTERNODE_CACHE_SHIFT)
- #else
--#define ARCH_MIN_TASKALIGN    16
--#define ARCH_MIN_MMSTRUCT_ALIGN       0
-+# define ARCH_MIN_TASKALIGN           16
-+# define ARCH_MIN_MMSTRUCT_ALIGN      0
- #endif
- 
- /*
-@@ -57,68 +56,80 @@ static inline void *current_text_addr(vo
-  */
- 
- struct cpuinfo_x86 {
--      __u8    x86;            /* CPU family */
--      __u8    x86_vendor;     /* CPU vendor */
--      __u8    x86_model;
--      __u8    x86_mask;
-+      __u8                    x86;            /* CPU family */
-+      __u8                    x86_vendor;     /* CPU vendor */
-+      __u8                    x86_model;
-+      __u8                    x86_mask;
- #ifdef CONFIG_X86_32
--      char    wp_works_ok;    /* It doesn't on 386's */
--      char    hlt_works_ok;   /* Problems on some 486Dx4's and old 386's */
--      char    hard_math;
--      char    rfu;
--      char    fdiv_bug;
--      char    f00f_bug;
--      char    coma_bug;
--      char    pad0;
-+      char                    wp_works_ok;    /* It doesn't on 386's */
++static inline int pte_huge(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_PSE;
++}
  +
-+      /* Problems on some 486Dx4's and old 386's: */
-+      char                    hlt_works_ok;
-+      char                    hard_math;
-+      char                    rfu;
-+      char                    fdiv_bug;
-+      char                    f00f_bug;
-+      char                    coma_bug;
-+      char                    pad0;
- #else
--      /* number of 4K pages in DTLB/ITLB combined(in pages)*/
--      int     x86_tlbsize;
--      __u8    x86_virt_bits, x86_phys_bits;
--      /* cpuid returned core id bits */
--      __u8    x86_coreid_bits;
--      /* Max extended CPUID function supported */
--      __u32   extended_cpuid_level;
--#endif
--      int     cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
--      __u32   x86_capability[NCAPINTS];
--      char    x86_vendor_id[16];
--      char    x86_model_id[64];
--      int     x86_cache_size;  /* in KB - valid for CPUS which support this
--                                  call  */
--      int     x86_cache_alignment;    /* In bytes */
--      int     x86_power;
--      unsigned long loops_per_jiffy;
-+      /* Number of 4K pages in DTLB/ITLB combined(in pages): */
-+      int                      x86_tlbsize;
-+      __u8                    x86_virt_bits;
-+      __u8                    x86_phys_bits;
-+      /* CPUID returned core id bits: */
-+      __u8                    x86_coreid_bits;
-+      /* Max extended CPUID function supported: */
-+      __u32                   extended_cpuid_level;
-+#endif
-+      /* Maximum supported CPUID level, -1=no CPUID: */
-+      int                     cpuid_level;
-+      __u32                   x86_capability[NCAPINTS];
-+      char                    x86_vendor_id[16];
-+      char                    x86_model_id[64];
-+      /* in KB - valid for CPUS which support this call: */
-+      int                     x86_cache_size;
-+      int                     x86_cache_alignment;    /* In bytes */
-+      int                     x86_power;
-+      unsigned long           loops_per_jiffy;
- #ifdef CONFIG_SMP
--      cpumask_t llc_shared_map;       /* cpus sharing the last level cache */
-+      /* cpus sharing the last level cache: */
-+      cpumask_t               llc_shared_map;
- #endif
--      u16 x86_max_cores;              /* cpuid returned max cores value */
--      u16 apicid;
--      u16 x86_clflush_size;
-+      /* cpuid returned max cores value: */
-+      u16                      x86_max_cores;
-+      u16                     apicid;
-+      u16                     initial_apicid;
-+      u16                     x86_clflush_size;
- #ifdef CONFIG_SMP
--      u16 booted_cores;               /* number of cores as seen by OS */
--      u16 phys_proc_id;               /* Physical processor id. */
--      u16 cpu_core_id;                /* Core id */
--      u16 cpu_index;                  /* index into per_cpu list */
-+      /* number of cores as seen by the OS: */
-+      u16                     booted_cores;
-+      /* Physical processor id: */
-+      u16                     phys_proc_id;
-+      /* Core id: */
-+      u16                     cpu_core_id;
-+      /* Index into per_cpu list: */
-+      u16                     cpu_index;
- #endif
- } __attribute__((__aligned__(SMP_CACHE_BYTES)));
- 
--#define X86_VENDOR_INTEL 0
--#define X86_VENDOR_CYRIX 1
--#define X86_VENDOR_AMD 2
--#define X86_VENDOR_UMC 3
--#define X86_VENDOR_NEXGEN 4
--#define X86_VENDOR_CENTAUR 5
--#define X86_VENDOR_TRANSMETA 7
--#define X86_VENDOR_NSC 8
--#define X86_VENDOR_NUM 9
--#define X86_VENDOR_UNKNOWN 0xff
-+#define X86_VENDOR_INTEL      0
-+#define X86_VENDOR_CYRIX      1
-+#define X86_VENDOR_AMD                2
-+#define X86_VENDOR_UMC                3
-+#define X86_VENDOR_CENTAUR    5
-+#define X86_VENDOR_TRANSMETA  7
-+#define X86_VENDOR_NSC                8
-+#define X86_VENDOR_NUM                9
++static inline int pte_global(pte_t pte)
++{
++      return 0;
++}
++
++static inline int pte_exec(pte_t pte)
++{
++      return !(__pte_val(pte) & _PAGE_NX);
++}
++
++static inline int pte_special(pte_t pte)
++{
++      return 0;
++}
++
++static inline int pmd_large(pmd_t pte)
++{
++      return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
++              (_PAGE_PSE | _PAGE_PRESENT);
++}
  +
-+#define X86_VENDOR_UNKNOWN    0xff
- 
- /*
-  * capabilities of CPUs
-  */
--extern struct cpuinfo_x86 boot_cpu_data;
--extern struct cpuinfo_x86 new_cpu_data;
--extern __u32 cleared_cpu_caps[NCAPINTS];
-+extern struct cpuinfo_x86     boot_cpu_data;
-+extern struct cpuinfo_x86     new_cpu_data;
++static inline pte_t pte_mkclean(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
++}
  +
-+extern __u32                  cleared_cpu_caps[NCAPINTS];
- 
- #ifdef CONFIG_SMP
- DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
-@@ -129,7 +140,18 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_
- #define current_cpu_data      boot_cpu_data
- #endif
- 
--void cpu_detect(struct cpuinfo_x86 *c);
-+static inline int hlt_works(int cpu)
++static inline pte_t pte_mkold(pte_t pte)
  +{
-+#ifdef CONFIG_X86_32
-+      return cpu_data(cpu).hlt_works_ok;
-+#else
-+      return 1;
-+#endif
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
  +}
  +
-+#define cache_line_size()     (boot_cpu_data.x86_cache_alignment)
++static inline pte_t pte_wrprotect(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
++}
  +
-+extern void cpu_detect(struct cpuinfo_x86 *c);
++static inline pte_t pte_mkexec(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
++}
++
++static inline pte_t pte_mkdirty(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_DIRTY);
++}
++
++static inline pte_t pte_mkyoung(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED);
++}
++
++static inline pte_t pte_mkwrite(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_RW);
++}
++
++static inline pte_t pte_mkhuge(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_PSE);
++}
++
++static inline pte_t pte_clrhuge(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
++}
++
++static inline pte_t pte_mkglobal(pte_t pte)
++{
++      return pte;
++}
++
++static inline pte_t pte_clrglobal(pte_t pte)
++{
++      return pte;
++}
++
++static inline pte_t pte_mkspecial(pte_t pte)
++{
++      return pte;
++}
   
- extern void identify_cpu(struct cpuinfo_x86 *);
- extern void identify_boot_cpu(void);
-@@ -149,12 +171,12 @@ static inline void xen_cpuid(unsigned in
-                            unsigned int *ecx, unsigned int *edx)
- {
-       /* ecx is often an input as well as an output. */
--      __asm__(XEN_CPUID
--              : "=a" (*eax),
--                "=b" (*ebx),
--                "=c" (*ecx),
--                "=d" (*edx)
--              : "0" (*eax), "2" (*ecx));
-+      asm(XEN_CPUID
-+          : "=a" (*eax),
-+            "=b" (*ebx),
-+            "=c" (*ecx),
-+            "=d" (*edx)
-+          : "0" (*eax), "2" (*ecx));
+ extern pteval_t __supported_pte_mask;
+ 
+@@ -202,15 +311,33 @@ static inline pte_t pte_modify(pte_t pte
+       pteval_t val = pte_val(pte);
+ 
+       val &= _PAGE_CHG_MASK;
+-      val |= pgprot_val(newprot) & __supported_pte_mask;
++      val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
+ 
+       return __pte(val);
   }
   
- static inline void load_cr3(pgd_t *pgdir)
-@@ -166,57 +188,70 @@ static inline void load_cr3(pgd_t *pgdir
- #ifdef CONFIG_X86_32
- /* This is the TSS defined by the hardware. */
- struct x86_hw_tss {
--      unsigned short  back_link, __blh;
--      unsigned long   sp0;
--      unsigned short  ss0, __ss0h;
--      unsigned long   sp1;
--      unsigned short  ss1, __ss1h;    /* ss1 caches MSR_IA32_SYSENTER_CS */
--      unsigned long   sp2;
--      unsigned short  ss2, __ss2h;
--      unsigned long   __cr3;
--      unsigned long   ip;
--      unsigned long   flags;
--      unsigned long   ax, cx, dx, bx;
--      unsigned long   sp, bp, si, di;
--      unsigned short  es, __esh;
--      unsigned short  cs, __csh;
--      unsigned short  ss, __ssh;
--      unsigned short  ds, __dsh;
--      unsigned short  fs, __fsh;
--      unsigned short  gs, __gsh;
--      unsigned short  ldt, __ldth;
--      unsigned short  trace, io_bitmap_base;
-+      unsigned short          back_link, __blh;
-+      unsigned long           sp0;
-+      unsigned short          ss0, __ss0h;
-+      unsigned long           sp1;
-+      /* ss1 caches MSR_IA32_SYSENTER_CS: */
-+      unsigned short          ss1, __ss1h;
-+      unsigned long           sp2;
-+      unsigned short          ss2, __ss2h;
-+      unsigned long           __cr3;
-+      unsigned long           ip;
-+      unsigned long           flags;
-+      unsigned long           ax;
-+      unsigned long           cx;
-+      unsigned long           dx;
-+      unsigned long           bx;
-+      unsigned long           sp;
-+      unsigned long           bp;
-+      unsigned long           si;
-+      unsigned long           di;
-+      unsigned short          es, __esh;
-+      unsigned short          cs, __csh;
-+      unsigned short          ss, __ssh;
-+      unsigned short          ds, __dsh;
-+      unsigned short          fs, __fsh;
-+      unsigned short          gs, __gsh;
-+      unsigned short          ldt, __ldth;
-+      unsigned short          trace;
-+      unsigned short          io_bitmap_base;
+-#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
++/* mprotect needs to preserve PAT bits when updating vm_page_prot */
++#define pgprot_modify pgprot_modify
++static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
++{
++      pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
++      pgprotval_t addbits = pgprot_val(newprot);
++      return __pgprot(preservebits | addbits);
++}
  +
- } __attribute__((packed));
- extern struct tss_struct doublefault_tss;
- #else
- struct x86_hw_tss {
--      u32 reserved1;
--      u64 sp0;
--      u64 sp1;
--      u64 sp2;
--      u64 reserved2;
--      u64 ist[7];
--      u32 reserved3;
--      u32 reserved4;
--      u16 reserved5;
--      u16 io_bitmap_base;
-+      u32                     reserved1;
-+      u64                     sp0;
-+      u64                     sp1;
-+      u64                     sp2;
-+      u64                     reserved2;
-+      u64                     ist[7];
-+      u32                     reserved3;
-+      u32                     reserved4;
-+      u16                     reserved5;
-+      u16                     io_bitmap_base;
++#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
+ 
+ #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
+ 
++#ifndef __ASSEMBLY__
++#define __HAVE_PHYS_MEM_ACCESS_PROT
++struct file;
++pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
++                              unsigned long size, pgprot_t vma_prot);
++int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
++                              unsigned long size, pgprot_t *vma_prot);
++#endif
  +
- } __attribute__((packed)) ____cacheline_aligned;
+ #define set_pte(ptep, pte)            xen_set_pte(ptep, pte)
+ #define set_pte_at(mm, addr, ptep, pte)       xen_set_pte_at(mm, addr, ptep, pte)
+ 
+@@ -246,6 +373,9 @@ static inline pte_t pte_modify(pte_t pte
+ # include "pgtable_64.h"
   #endif
- #endif /* CONFIG_X86_NO_TSS */
   
- /*
-- * Size of io_bitmap.
-+ * IO-bitmap sizes:
-  */
--#define IO_BITMAP_BITS  65536
--#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
--#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
--#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
--#define INVALID_IO_BITMAP_OFFSET 0x8000
--#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
-+#define IO_BITMAP_BITS                        65536
-+#define IO_BITMAP_BYTES                       (IO_BITMAP_BITS/8)
-+#define IO_BITMAP_LONGS                       (IO_BITMAP_BYTES/sizeof(long))
-+#define IO_BITMAP_OFFSET              offsetof(struct tss_struct, io_bitmap)
-+#define INVALID_IO_BITMAP_OFFSET      0x8000
-+#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
++#define KERNEL_PGD_BOUNDARY   pgd_index(PAGE_OFFSET)
++#define KERNEL_PGD_PTRS               (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
++
+ #ifndef __ASSEMBLY__
+ 
+ enum {
+@@ -312,46 +442,17 @@ static inline void xen_pte_clear(struct 
+  * bit at the same time.
+  */
+ #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+-#define ptep_set_access_flags(vma, address, ptep, entry, dirty)               \
+-({                                                                    \
+-      int __changed = !pte_same(*(ptep), entry);                      \
+-      if (__changed && (dirty)) {                                     \
+-              if ( likely((vma)->vm_mm == current->mm) ) {            \
+-                      BUG_ON(HYPERVISOR_update_va_mapping(address,    \
+-                              entry,                                  \
+-                              (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
+-                                      UVMF_INVLPG|UVMF_MULTI));       \
+-              } else {                                                \
+-                      xen_l1_entry_update(ptep, entry);               \
+-                      flush_tlb_page(vma, address);                   \
+-              }                                                       \
+-      }                                                               \
+-      __changed;                                                      \
+-})
++extern int ptep_set_access_flags(struct vm_area_struct *vma,
++                               unsigned long address, pte_t *ptep,
++                               pte_t entry, int dirty);
+ 
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+-#define ptep_test_and_clear_young(vma, addr, ptep) ({                 \
+-      int __ret = 0;                                                  \
+-      if (pte_young(*(ptep)))                                         \
+-              __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,          \
+-                                         &(ptep)->pte);               \
+-      if (__ret)                                                      \
+-              pte_update((vma)->vm_mm, addr, ptep);                   \
+-      __ret;                                                          \
+-})
++extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
++                                   unsigned long addr, pte_t *ptep);
+ 
+ #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+-#define ptep_clear_flush_young(vma, address, ptep)                    \
+-({                                                                    \
+-      pte_t __pte = *(ptep);                                          \
+-      int __young = pte_young(__pte);                                 \
+-      __pte = pte_mkold(__pte);                                       \
+-      if (PagePinned(virt_to_page((vma)->vm_mm->pgd)))                \
+-              (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
+-      else if (__young)                                               \
+-              (ptep)->pte_low = __pte.pte_low;                        \
+-      __young;                                                        \
+-})
++extern int ptep_clear_flush_young(struct vm_area_struct *vma,
++                                unsigned long address, pte_t *ptep);
   
- #ifndef CONFIG_X86_NO_TSS
- struct tss_struct {
--      struct x86_hw_tss x86_tss;
-+      /*
-+       * The hardware state:
-+       */
-+      struct x86_hw_tss       x86_tss;
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep)                     \
+@@ -370,7 +471,8 @@ static inline void xen_pte_clear(struct 
+ })
   
-       /*
-        * The extra 1 is there because the CPU will access an
-@@ -224,136 +259,162 @@ struct tss_struct {
-        * bitmap. The extra byte must be all 1 bits, and must
-        * be within the limit.
-        */
--      unsigned long   io_bitmap[IO_BITMAP_LONGS + 1];
-+      unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
-       /*
-        * Cache the current maximum and the last task that used the bitmap:
-        */
--      unsigned long io_bitmap_max;
--      struct thread_struct *io_bitmap_owner;
-+      unsigned long           io_bitmap_max;
-+      struct thread_struct    *io_bitmap_owner;
-+
-       /*
--       * pads the TSS to be cacheline-aligned (size is 0x100)
-+       * Pad the TSS to be cacheline-aligned (size is 0x100):
-        */
--      unsigned long __cacheline_filler[35];
-+      unsigned long           __cacheline_filler[35];
-       /*
--       * .. and then another 0x100 bytes for emergency kernel stack
-+       * .. and then another 0x100 bytes for the emergency kernel stack:
-        */
--      unsigned long stack[64];
-+      unsigned long           stack[64];
-+
- } __attribute__((packed));
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
++                                     pte_t *ptep)
+ {
+       pte_t pte = *ptep;
+       if (!pte_none(pte)
+@@ -398,13 +500,29 @@ static inline pte_t ptep_get_and_clear(s
+ pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
   
- DECLARE_PER_CPU(struct tss_struct, init_tss);
+ #define __HAVE_ARCH_PTEP_SET_WRPROTECT
+-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++static inline void ptep_set_wrprotect(struct mm_struct *mm,
++                                    unsigned long addr, pte_t *ptep)
+ {
+       pte_t pte = *ptep;
+       if (pte_write(pte))
+               set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+ }
   
--/* Save the original ist values for checking stack pointers during debugging */
  +/*
-+ * Save the original ist values for checking stack pointers during debugging
++ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
++ *
++ *  dst - pointer to pgd range anwhere on a pgd page
++ *  src - ""
++ *  count - the number of pgds to copy.
++ *
++ * dst and src can be on the same page, but the range must not overlap,
++ * and must not cross a page boundary.
  + */
- struct orig_ist {
--      unsigned long ist[7];
-+      unsigned long           ist[7];
- };
- #endif /* CONFIG_X86_NO_TSS */
++static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
++{
++       memcpy(dst, src, count * sizeof(pgd_t));
++}
++
+ #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
+       xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
   
- #define       MXCSR_DEFAULT           0x1f80
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable-3level.h     2009-03-16 16:38:05.000000000 +0100
+@@ -8,25 +8,28 @@
+  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+  */
   
- struct i387_fsave_struct {
--      u32     cwd;
--      u32     swd;
--      u32     twd;
--      u32     fip;
--      u32     fcs;
--      u32     foo;
--      u32     fos;
--      u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
--      u32     status;         /* software status information */
-+      u32                     cwd;    /* FPU Control Word             */
-+      u32                     swd;    /* FPU Status Word              */
-+      u32                     twd;    /* FPU Tag Word                 */
-+      u32                     fip;    /* FPU IP Offset                */
-+      u32                     fcs;    /* FPU IP Selector              */
-+      u32                     foo;    /* FPU Operand Pointer Offset   */
-+      u32                     fos;    /* FPU Operand Pointer Selector */
-+
-+      /* 8*10 bytes for each FP-reg = 80 bytes:                       */
-+      u32                     st_space[20];
-+
-+      /* Software status information [not touched by FSAVE ]:         */
-+      u32                     status;
- };
+-#define pte_ERROR(e) \
+-      printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", __FILE__, __LINE__, \
+-             &(e), __pte_val(e), pte_pfn(e))
+-#define pmd_ERROR(e) \
+-      printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
+-             &(e), __pmd_val(e), (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
+-#define pgd_ERROR(e) \
+-      printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
+-             &(e), __pgd_val(e), (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
+-
++#define pte_ERROR(e)                                                  \
++      printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n",                \
++              __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
++#define pmd_ERROR(e)                                                  \
++      printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n",                \
++             __FILE__, __LINE__, &(e), __pmd_val(e),                  \
++             (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
++#define pgd_ERROR(e)                                                  \
++      printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n",                \
++             __FILE__, __LINE__, &(e), __pgd_val(e),                  \
++             (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
   
- struct i387_fxsave_struct {
--      u16     cwd;
--      u16     swd;
--      u16     twd;
--      u16     fop;
-+      u16                     cwd; /* Control Word                    */
-+      u16                     swd; /* Status Word                     */
-+      u16                     twd; /* Tag Word                        */
-+      u16                     fop; /* Last Instruction Opcode         */
-       union {
-               struct {
--                      u64     rip;
--                      u64     rdp;
-+                      u64     rip; /* Instruction Pointer             */
-+                      u64     rdp; /* Data Pointer                    */
-               };
-               struct {
--                      u32     fip;
--                      u32     fcs;
--                      u32     foo;
--                      u32     fos;
-+                      u32     fip; /* FPU IP Offset                   */
-+                      u32     fcs; /* FPU IP Selector                 */
-+                      u32     foo; /* FPU Operand Offset              */
-+                      u32     fos; /* FPU Operand Selector            */
-               };
-       };
--      u32     mxcsr;
--      u32     mxcsr_mask;
--      u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
--      u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
--      u32     padding[24];
-+      u32                     mxcsr;          /* MXCSR Register State */
-+      u32                     mxcsr_mask;     /* MXCSR Mask           */
+ static inline int pud_none(pud_t pud)
+ {
+       return __pud_val(pud) == 0;
  +
-+      /* 8*16 bytes for each FP-reg = 128 bytes:                      */
-+      u32                     st_space[32];
+ }
+ static inline int pud_bad(pud_t pud)
+ {
+       return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
+ }
  +
-+      /* 16*16 bytes for each XMM-reg = 256 bytes:                    */
-+      u32                     xmm_space[64];
+ static inline int pud_present(pud_t pud)
+ {
+       return __pud_val(pud) & _PAGE_PRESENT;
+@@ -48,12 +51,14 @@ static inline void xen_set_pte(pte_t *pt
+ 
+ static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
+ {
+-      set_64bit((unsigned long long *)(ptep),__pte_val(pte));
++      set_64bit((unsigned long long *)(ptep), __pte_val(pte));
+ }
  +
-+      u32                     padding[24];
+ static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
+ {
+       xen_l2_entry_update(pmdp, pmd);
+ }
  +
- } __attribute__((aligned(16)));
+ static inline void xen_set_pud(pud_t *pudp, pud_t pud)
+ {
+       xen_l3_entry_update(pudp, pud);
+@@ -92,20 +97,19 @@ static inline void pud_clear(pud_t *pudp
+        * current pgd to avoid unnecessary TLB flushes.
+        */
+       pgd = read_cr3();
+-      if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
++      if (__pa(pudp) >= pgd && __pa(pudp) <
++          (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
+               xen_tlb_flush();
+ }
+ 
+-#define pud_page(pud) \
+-((struct page *) __va(pud_val(pud) & PAGE_MASK))
++#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
+ 
+-#define pud_page_vaddr(pud) \
+-((unsigned long) __va(pud_val(pud) & PAGE_MASK))
++#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
+ 
+ 
+ /* Find an entry in the second-level page table.. */
+-#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+-                      pmd_index(address))
++#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
++                                pmd_index(address))
+ 
+ #ifdef CONFIG_SMP
+ static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
+@@ -150,7 +154,8 @@ static inline int pte_none(pte_t pte)
+  * put the 32 bits of offset into the high part.
+  */
+ #define pte_to_pgoff(pte) ((pte).pte_high)
+-#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
++#define pgoff_to_pte(off)                                             \
++      ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
+ #define PTE_FILE_MAX_BITS       32
   
- struct i387_soft_struct {
--      u32     cwd;
--      u32     swd;
--      u32     twd;
--      u32     fip;
--      u32     fcs;
--      u32     foo;
--      u32     fos;
--      u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
--      u8      ftop, changed, lookahead, no_update, rm, alimit;
--      struct info     *info;
--      u32     entry_eip;
-+      u32                     cwd;
-+      u32                     swd;
-+      u32                     twd;
-+      u32                     fip;
-+      u32                     fcs;
-+      u32                     foo;
-+      u32                     fos;
-+      /* 8*10 bytes for each FP-reg = 80 bytes: */
-+      u32                     st_space[20];
-+      u8                      ftop;
-+      u8                      changed;
-+      u8                      lookahead;
-+      u8                      no_update;
-+      u8                      rm;
-+      u8                      alimit;
-+      struct info             *info;
-+      u32                     entry_eip;
- };
+ /* Encode and de-code a swap entry */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_32.h    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:38:05.000000000 +0100
+@@ -38,16 +38,13 @@ void paging_init(void);
+ #ifdef CONFIG_X86_PAE
+ # include <asm/pgtable-3level-defs.h>
+ # define PMD_SIZE     (1UL << PMD_SHIFT)
+-# define PMD_MASK     (~(PMD_SIZE-1))
++# define PMD_MASK     (~(PMD_SIZE - 1))
+ #else
+ # include <asm/pgtable-2level-defs.h>
+ #endif
   
--union i387_union {
-+union thread_xstate {
-       struct i387_fsave_struct        fsave;
-       struct i387_fxsave_struct       fxsave;
--      struct i387_soft_struct         soft;
-+      struct i387_soft_struct         soft;
- };
+ #define PGDIR_SIZE    (1UL << PGDIR_SHIFT)
+-#define PGDIR_MASK    (~(PGDIR_SIZE-1))
+-
+-#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+-#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
++#define PGDIR_MASK    (~(PGDIR_SIZE - 1))
   
--#ifdef CONFIG_X86_32
--DECLARE_PER_CPU(u8, cpu_llc_id);
--#elif !defined(CONFIG_X86_NO_TSS)
-+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
- DECLARE_PER_CPU(struct orig_ist, orig_ist);
+ /* Just any arbitrary offset to the start of the vmalloc VM area: the
+  * current 8MB value just means that there will be a 8MB "hole" after the
+@@ -56,21 +53,22 @@ void paging_init(void);
+  * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+  * area for the same reason. ;)
+  */
+-#define VMALLOC_OFFSET        (8*1024*1024)
+-#define VMALLOC_START (((unsigned long) high_memory + \
+-                      2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
++#define VMALLOC_OFFSET        (8 * 1024 * 1024)
++#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
++                       & ~(VMALLOC_OFFSET - 1))
+ #ifdef CONFIG_X86_PAE
+ #define LAST_PKMAP 512
+ #else
+ #define LAST_PKMAP 1024
   #endif
   
- extern void print_cpu_info(struct cpuinfo_x86 *);
-+extern unsigned int xstate_size;
-+extern void free_thread_xstate(struct task_struct *);
-+extern struct kmem_cache *task_xstate_cachep;
- extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
- extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
- extern unsigned short num_cache_leaves;
+-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
++#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1))       \
++                  & PMD_MASK)
   
- struct thread_struct {
--/* cached TLS descriptors. */
--      struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
--      unsigned long   sp0;
--      unsigned long   sp;
-+      /* Cached TLS descriptors: */
-+      struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
-+      unsigned long           sp0;
-+      unsigned long           sp;
- #ifdef CONFIG_X86_32
--      unsigned long   sysenter_cs;
-+      unsigned long           sysenter_cs;
+ #ifdef CONFIG_HIGHMEM
+-# define VMALLOC_END  (PKMAP_BASE-2*PAGE_SIZE)
++# define VMALLOC_END  (PKMAP_BASE - 2 * PAGE_SIZE)
   #else
--      unsigned long   usersp; /* Copy from PDA */
--      unsigned short  es, ds, fsindex, gsindex;
--#endif
--      unsigned long   ip;
--      unsigned long   fs;
--      unsigned long   gs;
--/* Hardware debugging registers */
--      unsigned long   debugreg0;
--      unsigned long   debugreg1;
--      unsigned long   debugreg2;
--      unsigned long   debugreg3;
--      unsigned long   debugreg6;
--      unsigned long   debugreg7;
--/* fault info */
--      unsigned long   cr2, trap_no, error_code;
--/* floating point info */
--      union i387_union        i387 __attribute__((aligned(16)));;
-+      unsigned long           usersp; /* Copy from PDA */
-+      unsigned short          es;
-+      unsigned short          ds;
-+      unsigned short          fsindex;
-+      unsigned short          gsindex;
-+#endif
-+      unsigned long           ip;
-+      unsigned long           fs;
-+      unsigned long           gs;
-+      /* Hardware debugging registers: */
-+      unsigned long           debugreg0;
-+      unsigned long           debugreg1;
-+      unsigned long           debugreg2;
-+      unsigned long           debugreg3;
-+      unsigned long           debugreg6;
-+      unsigned long           debugreg7;
-+      /* Fault info: */
-+      unsigned long           cr2;
-+      unsigned long           trap_no;
-+      unsigned long           error_code;
-+      /* floating point and extended processor state */
-+      union thread_xstate     *xstate;
- #ifdef CONFIG_X86_32
--/* virtual 86 mode info */
-+      /* Virtual 86 mode info */
-       struct vm86_struct __user *vm86_info;
-       unsigned long           screen_bitmap;
-       unsigned long           v86flags, v86mask, saved_sp0;
-       unsigned int            saved_fs, saved_gs;
+-# define VMALLOC_END  (FIXADDR_START-2*PAGE_SIZE)
++# define VMALLOC_END  (FIXADDR_START - 2 * PAGE_SIZE)
   #endif
--/* IO permissions */
--      unsigned long   *io_bitmap_ptr;
--      unsigned long   iopl;
--/* max allowed port in the bitmap, in bytes: */
--      unsigned io_bitmap_max;
-+      /* IO permissions: */
-+      unsigned long           *io_bitmap_ptr;
-+      unsigned long           iopl;
-+      /* Max allowed port in the bitmap, in bytes: */
-+      unsigned                io_bitmap_max;
- /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
-       unsigned long   debugctlmsr;
- /* Debug Store - if not 0 points to a DS Save Area configuration;
-@@ -384,12 +445,12 @@ static inline void xen_set_iopl_mask(uns
- }
   
- #ifndef CONFIG_X86_NO_TSS
--static inline void native_load_sp0(struct tss_struct *tss,
--                                 struct thread_struct *thread)
-+static inline void
-+native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
- {
-       tss->x86_tss.sp0 = thread->sp0;
- #ifdef CONFIG_X86_32
--      /* Only happens when SEP is enabled, no need to test "SEP"arately */
-+      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
-       if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
-               tss->x86_tss.ss1 = thread->sysenter_cs;
-               wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-@@ -403,8 +464,8 @@ static inline void native_load_sp0(struc
- } while (0)
+ /*
+@@ -91,10 +89,10 @@ extern unsigned long pg0[];
+ /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+    can temporarily clear it. */
+ #define pmd_present(x)        (__pmd_val(x))
+-#define pmd_bad(x)    ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
++#define pmd_bad(x)    ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+ #else
+ #define pmd_present(x)        (__pmd_val(x) & _PAGE_PRESENT)
+-#define pmd_bad(x)    ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
++#define pmd_bad(x)    ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+ #endif
+ 
+ 
+@@ -107,32 +105,18 @@ extern unsigned long pg0[];
   #endif
   
--#define __cpuid xen_cpuid
--#define paravirt_enabled() 0
-+#define __cpuid                       xen_cpuid
-+#define paravirt_enabled()    0
+ /*
+- * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
+- *
+- *  dst - pointer to pgd range anwhere on a pgd page
+- *  src - ""
+- *  count - the number of pgds to copy.
+- *
+- * dst and src can be on the same page, but the range must not overlap,
+- * and must not cross a page boundary.
++ * Macro to mark a page protection value as "uncacheable".
++ * On processors which do not support it, this is a no-op.
+  */
+-static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+-{
+-       memcpy(dst, src, count * sizeof(pgd_t));
+-}
+-
+-/*
+- * Macro to mark a page protection value as "uncacheable".  On processors which do not support
+- * it, this is a no-op.
+- */
+-#define pgprot_noncached(prot)        ((boot_cpu_data.x86 > 3)                                          \
+-                               ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
++#define pgprot_noncached(prot)                                        \
++      ((boot_cpu_data.x86 > 3)                                \
++       ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
++       : (prot))
+ 
+ /*
+  * Conversion functions: convert a page and protection to a page entry,
+  * and a page entry and page directory to the page they refer to.
+  */
+-
+ #define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
+ 
+ /*
+@@ -141,20 +125,20 @@ static inline void clone_pgd_range(pgd_t
+  * this macro returns the index of the entry in the pgd page which would
+  * control the given virtual address
+  */
+-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+-#define pgd_index_k(addr) pgd_index(addr)
++#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
++#define pgd_index_k(addr) pgd_index((addr))
+ 
+ /*
+  * pgd_offset() returns a (pgd_t *)
+  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+  */
+-#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
++#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+ 
+ /*
+  * a shortcut which implies the use of the kernel's pgd, instead
+  * of a process's
+  */
+-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
++#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
+ 
+ static inline int pud_large(pud_t pud) { return 0; }
+ 
+@@ -164,8 +148,8 @@ static inline int pud_large(pud_t pud) {
+  * this macro returns the index of the entry in the pmd page which would
+  * control the given virtual address
+  */
+-#define pmd_index(address) \
+-              (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
++#define pmd_index(address)                            \
++      (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
   
   /*
-  * These special macros can be used to get or set a debugging register
-@@ -424,11 +485,12 @@ static inline void native_load_sp0(struc
-  * enable), so that any CPU's that boot up
-  * after us can get the correct flags.
+  * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+@@ -173,33 +157,36 @@ static inline int pud_large(pud_t pud) {
+  * this macro returns the index of the entry in the pte page which would
+  * control the given virtual address
    */
--extern unsigned long mmu_cr4_features;
-+extern unsigned long          mmu_cr4_features;
+-#define pte_index(address) \
+-              (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+-#define pte_offset_kernel(dir, address) \
+-      ((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
++#define pte_index(address)                                    \
++      (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
++#define pte_offset_kernel(dir, address)                               \
++      ((pte_t *)pmd_page_vaddr(*(dir)) +  pte_index((address)))
   
- static inline void set_in_cr4(unsigned long mask)
- {
-       unsigned cr4;
-+
-       mmu_cr4_features |= mask;
-       cr4 = read_cr4();
-       cr4 |= mask;
-@@ -438,6 +500,7 @@ static inline void set_in_cr4(unsigned l
- static inline void clear_in_cr4(unsigned long mask)
- {
-       unsigned cr4;
-+
-       mmu_cr4_features &= ~mask;
-       cr4 = read_cr4();
-       cr4 &= ~mask;
-@@ -445,42 +508,42 @@ static inline void clear_in_cr4(unsigned
- }
+-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
++#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
   
- struct microcode_header {
--      unsigned int hdrver;
--      unsigned int rev;
--      unsigned int date;
--      unsigned int sig;
--      unsigned int cksum;
--      unsigned int ldrver;
--      unsigned int pf;
--      unsigned int datasize;
--      unsigned int totalsize;
--      unsigned int reserved[3];
-+      unsigned int            hdrver;
-+      unsigned int            rev;
-+      unsigned int            date;
-+      unsigned int            sig;
-+      unsigned int            cksum;
-+      unsigned int            ldrver;
-+      unsigned int            pf;
-+      unsigned int            datasize;
-+      unsigned int            totalsize;
-+      unsigned int            reserved[3];
- };
+-#define pmd_page_vaddr(pmd) \
+-              ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
++#define pmd_page_vaddr(pmd)                                   \
++      ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
   
- struct microcode {
--      struct microcode_header hdr;
--      unsigned int bits[0];
-+      struct microcode_header hdr;
-+      unsigned int            bits[0];
- };
+ #if defined(CONFIG_HIGHPTE)
+-#define pte_offset_map(dir, address) \
+-      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
+-#define pte_offset_map_nested(dir, address) \
+-      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
+-#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+-#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+-#else
+-#define pte_offset_map(dir, address) \
+-      ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
+-#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
++#define pte_offset_map(dir, address)                                  \
++      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +          \
++       pte_index((address)))
++#define pte_offset_map_nested(dir, address)                           \
++      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +          \
++       pte_index((address)))
++#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
++#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
++#else
++#define pte_offset_map(dir, address)                                  \
++      ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
++#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
+ #define pte_unmap(pte) do { } while (0)
+ #define pte_unmap_nested(pte) do { } while (0)
+ #endif
   
--typedef struct microcode microcode_t;
--typedef struct microcode_header microcode_header_t;
-+typedef struct microcode      microcode_t;
-+typedef struct microcode_header       microcode_header_t;
+ /* Clear a kernel PTE and flush it from the TLB */
+-#define kpte_clear_flush(ptep, vaddr) do { \
++#define kpte_clear_flush(ptep, vaddr)                                 \
++do {                                                                  \
+       if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
+               BUG(); \
+ } while (0)
+@@ -208,7 +195,7 @@ static inline int pud_large(pud_t pud) {
+  * The i386 doesn't have any external MMU info: the kernel page
+  * tables contain all the necessary information.
+  */
+-#define update_mmu_cache(vma,address,pte) do { } while (0)
++#define update_mmu_cache(vma, address, pte) do { } while (0)
   
- /* microcode format is extended from prescott processors */
- struct extended_signature {
--      unsigned int sig;
--      unsigned int pf;
--      unsigned int cksum;
-+      unsigned int            sig;
-+      unsigned int            pf;
-+      unsigned int            cksum;
- };
+ void make_lowmem_page_readonly(void *va, unsigned int feature);
+ void make_lowmem_page_writable(void *va, unsigned int feature);
+@@ -225,7 +212,7 @@ void make_lowmem_page_writable(void *va,
+ #define kern_addr_valid(kaddr)        (0)
+ #endif
   
- struct extended_sigtable {
--      unsigned int count;
--      unsigned int cksum;
--      unsigned int reserved[3];
-+      unsigned int            count;
-+      unsigned int            cksum;
-+      unsigned int            reserved[3];
-       struct extended_signature sigs[0];
- };
+-#define io_remap_pfn_range(vma,from,pfn,size,prot) \
+-direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
++#define io_remap_pfn_range(vma, from, pfn, size, prot)                        \
++      direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
   
- typedef struct {
--      unsigned long seg;
-+      unsigned long           seg;
- } mm_segment_t;
+ #endif /* _I386_PGTABLE_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_64.h    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:38:05.000000000 +0100
+@@ -31,7 +31,7 @@ extern void paging_init(void);
   
+ #endif /* !__ASSEMBLY__ */
   
-@@ -492,7 +555,7 @@ extern int kernel_thread(int (*fn)(void 
- /* Free all resources held by a thread. */
- extern void release_thread(struct task_struct *);
+-#define SHARED_KERNEL_PMD     1
++#define SHARED_KERNEL_PMD     0
   
--/* Prepare to copy thread state - unlazy all lazy status */
-+/* Prepare to copy thread state - unlazy all lazy state */
- extern void prepare_to_copy(struct task_struct *tsk);
+ /*
+  * PGDIR_SHIFT determines what a top-level page table entry can map
+@@ -59,18 +59,20 @@ extern void paging_init(void);
   
- unsigned long get_wchan(struct task_struct *p);
-@@ -529,118 +592,138 @@ static inline unsigned int cpuid_eax(uns
-       unsigned int eax, ebx, ecx, edx;
+ #ifndef __ASSEMBLY__
   
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-+
-       return eax;
- }
-+
- static inline unsigned int cpuid_ebx(unsigned int op)
- {
-       unsigned int eax, ebx, ecx, edx;
+-#define pte_ERROR(e) \
+-      printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pte_val(e), pte_pfn(e))
+-#define pmd_ERROR(e) \
+-      printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pmd_val(e), pmd_pfn(e))
+-#define pud_ERROR(e) \
+-      printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pud_val(e), (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
+-#define pgd_ERROR(e) \
+-      printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pgd_val(e), (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
++#define pte_ERROR(e)                                                  \
++      printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
++#define pmd_ERROR(e)                                                  \
++      printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
++#define pud_ERROR(e)                                                  \
++      printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pud_val(e),                  \
++             (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
++#define pgd_ERROR(e)                                                  \
++      printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pgd_val(e),                  \
++             (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
   
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-+
-       return ebx;
+ #define pgd_none(x)   (!__pgd_val(x))
+ #define pud_none(x)   (!__pud_val(x))
+@@ -125,7 +127,7 @@ static inline void xen_set_pgd(pgd_t *pg
+       xen_l4_entry_update(pgdp, pgd);
   }
-+
- static inline unsigned int cpuid_ecx(unsigned int op)
- {
-       unsigned int eax, ebx, ecx, edx;
   
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-+
-       return ecx;
- }
-+
- static inline unsigned int cpuid_edx(unsigned int op)
+-static inline void xen_pgd_clear(pgd_t * pgd)
++static inline void xen_pgd_clear(pgd_t *pgd)
   {
-       unsigned int eax, ebx, ecx, edx;
+       xen_set_pgd(pgd, xen_make_pgd(0));
+       xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
+@@ -135,43 +137,43 @@ static inline void xen_pgd_clear(pgd_t *
+ 
+ #endif /* !__ASSEMBLY__ */
+ 
+-#define PMD_SIZE      (_AC(1,UL) << PMD_SHIFT)
+-#define PMD_MASK      (~(PMD_SIZE-1))
+-#define PUD_SIZE      (_AC(1,UL) << PUD_SHIFT)
+-#define PUD_MASK      (~(PUD_SIZE-1))
+-#define PGDIR_SIZE    (_AC(1,UL) << PGDIR_SHIFT)
+-#define PGDIR_MASK    (~(PGDIR_SIZE-1))
++#define PMD_SIZE      (_AC(1, UL) << PMD_SHIFT)
++#define PMD_MASK      (~(PMD_SIZE - 1))
++#define PUD_SIZE      (_AC(1, UL) << PUD_SHIFT)
++#define PUD_MASK      (~(PUD_SIZE - 1))
++#define PGDIR_SIZE    (_AC(1, UL) << PGDIR_SHIFT)
++#define PGDIR_MASK    (~(PGDIR_SIZE - 1))
+ 
+ 
+-#define MAXMEM                 _AC(0x3fffffffffff, UL)
++#define MAXMEM                 _AC(0x00003fffffffffff, UL)
+ #define VMALLOC_START    _AC(0xffffc20000000000, UL)
+ #define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
+ #define VMEMMAP_START  _AC(0xffffe20000000000, UL)
+-#define MODULES_VADDR    _AC(0xffffffff88000000, UL)
++#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
+ #define MODULES_END      _AC(0xfffffffffff00000, UL)
+ #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
   
-       cpuid(op, &eax, &ebx, &ecx, &edx);
-+
-       return edx;
- }
+ #ifndef __ASSEMBLY__
   
- /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
- static inline void rep_nop(void)
+-static inline unsigned long pgd_bad(pgd_t pgd)
++static inline int pgd_bad(pgd_t pgd)
   {
--      __asm__ __volatile__("rep;nop": : :"memory");
-+      asm volatile("rep; nop" ::: "memory");
+-      return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
++      return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
   }
   
--/* Stop speculative execution */
-+static inline void cpu_relax(void)
-+{
-+      rep_nop();
-+}
-+
-+/* Stop speculative execution: */
- static inline void sync_core(void)
+-static inline unsigned long pud_bad(pud_t pud)
++static inline int pud_bad(pud_t pud)
   {
-       int tmp;
-+
-       asm volatile("cpuid" : "=a" (tmp) : "0" (1)
--                                        : "ebx", "ecx", "edx", "memory");
-+                   : "ebx", "ecx", "edx", "memory");
+-      return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
++      return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
   }
   
--#define cpu_relax()   rep_nop()
--
- static inline void __monitor(const void *eax, unsigned long ecx,
--              unsigned long edx)
-+                           unsigned long edx)
+-static inline unsigned long pmd_bad(pmd_t pmd)
++static inline int pmd_bad(pmd_t pmd)
   {
--      /* "monitor %eax,%ecx,%edx;" */
--      asm volatile(
--              ".byte 0x0f,0x01,0xc8;"
--              : :"a" (eax), "c" (ecx), "d"(edx));
-+      /* "monitor %eax, %ecx, %edx;" */
-+      asm volatile(".byte 0x0f, 0x01, 0xc8;"
-+                   :: "a" (eax), "c" (ecx), "d"(edx));
+-      return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
++      return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
   }
   
- static inline void __mwait(unsigned long eax, unsigned long ecx)
- {
--      /* "mwait %eax,%ecx;" */
--      asm volatile(
--              ".byte 0x0f,0x01,0xc9;"
--              : :"a" (eax), "c" (ecx));
-+      /* "mwait %eax, %ecx;" */
-+      asm volatile(".byte 0x0f, 0x01, 0xc9;"
-+                   :: "a" (eax), "c" (ecx));
- }
+ #define pte_none(x)   (!(x).pte)
+ #define pte_present(x)        ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
   
- static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))       /* FIXME: is this right? */
++#define pages_to_mb(x)        ((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
+ 
+ #define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
+ #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
+@@ -181,13 +183,13 @@ static inline unsigned long pmd_bad(pmd_
+                      mfn_to_local_pfn(__pte_mfn(_pte)) :      \
+                      __pte_mfn(_pte))
+ 
+-#define pte_page(x)   pfn_to_page(pte_pfn(x))
++#define pte_page(x)   pfn_to_page(pte_pfn((x)))
+ 
+ /*
+  * Macro to mark a page protection value as "uncacheable".
+  */
+-#define pgprot_noncached(prot)        (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
+-
++#define pgprot_noncached(prot)                                        \
++      (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
+ 
+ /*
+  * Conversion functions: convert a page and protection to a page entry,
+@@ -197,36 +199,39 @@ static inline unsigned long pmd_bad(pmd_
+ /*
+  * Level 4 access.
+  */
+-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
+-#define pgd_page(pgd)         (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
+-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+-#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
+-#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
++#define pgd_page_vaddr(pgd)                                           \
++      ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
++#define pgd_page(pgd)         (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
++#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
++#define pgd_offset(mm, address)       ((mm)->pgd + pgd_index((address)))
++#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
+ #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
+ static inline int pgd_large(pgd_t pgd) { return 0; }
+ #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
+ 
+ /* PUD - Level3 access */
+ /* to find an entry in a page-table-directory. */
+-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+-#define pud_page(pud)         (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
+-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+-#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
++#define pud_page_vaddr(pud)                                           \
++      ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
++#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
++#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
++#define pud_offset(pgd, address)                                      \
++      ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
+ #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
+ 
+ static inline int pud_large(pud_t pte)
   {
--      /* "mwait %eax,%ecx;" */
--      asm volatile(
--              "sti; .byte 0x0f,0x01,0xc9;"
--              : :"a" (eax), "c" (ecx));
-+      trace_hardirqs_on();
-+      /* "mwait %eax, %ecx;" */
-+      asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
-+                   :: "a" (eax), "c" (ecx));
+-      return (__pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+-              (_PAGE_PSE|_PAGE_PRESENT);
++      return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
++              (_PAGE_PSE | _PAGE_PRESENT);
   }
   
- extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+ /* PMD  - Level 2 access */
+-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+-#define pmd_page(pmd)         (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
++#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
++#define pmd_page(pmd)         (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
   
--extern int force_mwait;
-+extern int                    force_mwait;
+-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+-#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
+-                                  pmd_index(address))
++#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
++#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
++                                pmd_index(address))
+ #define pmd_none(x)   (!__pmd_val(x))
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+@@ -235,43 +240,56 @@ static inline int pud_large(pud_t pte)
+ #else
+ #define pmd_present(x)        (__pmd_val(x) & _PAGE_PRESENT)
+ #endif
+-#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
+-#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
++#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
++#define pmd_pfn(x)  ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
   
- extern void select_idle_routine(const struct cpuinfo_x86 *c);
+ #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+-#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
++#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) |   \
++                                          _PAGE_FILE })
+ #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
   
--extern unsigned long boot_option_idle_override;
-+extern unsigned long          boot_option_idle_override;
+ /* PTE - Level 1 access. */
   
- extern void enable_sep_cpu(void);
- extern int sysenter_setup(void);
+ /* page, protection -> pte */
+-#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
+- 
+-#define pte_index(address) \
+-              (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
++#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn((page)), (pgprot))
++
++#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
+-                      pte_index(address))
++                                       pte_index((address)))
   
- /* Defined in head.S */
--extern struct desc_ptr early_gdt_descr;
-+extern struct desc_ptr                early_gdt_descr;
+ /* x86-64 always has all page tables mapped. */
+-#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
+-#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
++#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
++#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
+ #define pte_unmap(pte) /* NOP */
+-#define pte_unmap_nested(pte) /* NOP */ 
++#define pte_unmap_nested(pte) /* NOP */
++
++#define update_mmu_cache(vma, address, pte) do { } while (0)
   
- extern void cpu_set_gdt(int);
- extern void switch_to_new_gdt(void);
- extern void cpu_init(void);
- extern void init_gdt(int cpu);
+-#define update_mmu_cache(vma,address,pte) do { } while (0)
++#define direct_gbpages 0
   
--/* from system description table in BIOS.  Mostly for MCA use, but
-- * others may find it useful. */
--extern unsigned int machine_id;
--extern unsigned int machine_submodel_id;
--extern unsigned int BIOS_revision;
-+static inline void update_debugctlmsr(unsigned long debugctlmsr)
-+{
-+#ifndef CONFIG_X86_DEBUGCTLMSR
-+      if (boot_cpu_data.x86 < 6)
-+              return;
+ /* Encode and de-code a swap entry */
+-#define __swp_type(x)                 (((x).val >> 1) & 0x3f)
+-#define __swp_offset(x)                       ((x).val >> 8)
+-#define __swp_entry(type, offset)     ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
++#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
++#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
++#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
++#else
++#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
++#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
  +#endif
-+      wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-+}
- 
--/* Boot loader type from the setup header */
--extern int bootloader_type;
-+/*
-+ * from system description table in BIOS. Mostly for MCA use, but
-+ * others may find it useful:
-+ */
-+extern unsigned int           machine_id;
-+extern unsigned int           machine_submodel_id;
-+extern unsigned int           BIOS_revision;
  +
-+/* Boot loader type from the setup header: */
-+extern int                    bootloader_type;
++#define __swp_type(x)                 (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
++                                       & ((1U << SWP_TYPE_BITS) - 1))
++#define __swp_offset(x)                       ((x).val >> SWP_OFFSET_SHIFT)
++#define __swp_entry(type, offset)     ((swp_entry_t) { \
++                                       ((type) << (_PAGE_BIT_PRESENT + 1)) \
++                                       | ((offset) << SWP_OFFSET_SHIFT) })
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t) { __pte_val(pte) })
+ #define __swp_entry_to_pte(x)         ((pte_t) { .pte = (x).val })
   
--extern char ignore_fpu_irq;
--#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
-+extern char                   ignore_fpu_irq;
+-extern int kern_addr_valid(unsigned long addr); 
++extern int kern_addr_valid(unsigned long addr);
+ extern void cleanup_highmap(void);
   
- #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
- #define ARCH_HAS_PREFETCHW
- #define ARCH_HAS_SPINLOCK_PREFETCH
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)               \
+-              direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)       \
++      direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
+ 
+ #define HAVE_ARCH_UNMAPPED_AREA
+ #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+@@ -284,8 +302,10 @@ extern void cleanup_highmap(void);
+ 
+ /* fs/proc/kcore.c */
+ #define       kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
+-#define       kc_offset_to_vaddr(o) \
+-   (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
++#define       kc_offset_to_vaddr(o)                           \
++      (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1)))    \
++       ? ((o) | ~__VIRTUAL_MASK)                      \
++       : (o))
   
- #ifdef CONFIG_X86_32
--#define BASE_PREFETCH ASM_NOP4
--#define ARCH_HAS_PREFETCH
-+# define BASE_PREFETCH                ASM_NOP4
-+# define ARCH_HAS_PREFETCH
- #else
--#define BASE_PREFETCH "prefetcht0 (%1)"
-+# define BASE_PREFETCH                "prefetcht0 (%1)"
- #endif
+ #define __HAVE_ARCH_PTE_SAME
+ #endif /* !__ASSEMBLY__ */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor.h     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor.h  2009-03-16 16:38:05.000000000 +0100
+@@ -3,10 +3,6 @@
   
--/* Prefetch instructions for Pentium III and AMD Athlon */
--/* It's not worth to care about 3dnow! prefetches for the K6
--   because they are microcoded there and very slow.
--   However we don't do prefetches for pre XP Athlons currently
--   That should be fixed. */
-+/*
-+ * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
-+ *
-+ * It's not worth to care about 3dnow prefetches for the K6
-+ * because they are microcoded there and very slow.
-+ */
- static inline void prefetch(const void *x)
- {
-       alternative_input(BASE_PREFETCH,
-@@ -649,8 +732,11 @@ static inline void prefetch(const void *
-                         "r" (x));
- }
+ #include <asm/processor-flags.h>
   
--/* 3dnow! prefetch to get an exclusive cache line. Useful for
--   spinlocks to avoid one state transition in the cache coherency protocol. */
-+/*
-+ * 3dnow prefetch to get an exclusive cache line.
-+ * Useful for spinlocks to avoid one state transition in the
-+ * cache coherency protocol:
-+ */
- static inline void prefetchw(const void *x)
+-/* migration helpers, for KVM - will be removed in 2.6.25: */
+-#include <asm/vm86.h>
+-#define Xgt_desc_struct       desc_ptr
+-
+ /* Forward declaration, a strange C thing */
+ struct task_struct;
+ struct mm_struct;
+@@ -24,6 +20,7 @@ struct mm_struct;
+ #include <asm/msr.h>
+ #include <asm/desc_defs.h>
+ #include <asm/nops.h>
++
+ #include <linux/personality.h>
+ #include <linux/cpumask.h>
+ #include <linux/cache.h>
+@@ -38,16 +35,18 @@ struct mm_struct;
+ static inline void *current_text_addr(void)
   {
-       alternative_input(BASE_PREFETCH,
-@@ -659,21 +745,25 @@ static inline void prefetchw(const void 
-                         "r" (x));
- }
- 
--#define spin_lock_prefetch(x) prefetchw(x)
-+static inline void spin_lock_prefetch(const void *x)
-+{
-+      prefetchw(x);
-+}
+       void *pc;
+-      asm volatile("mov $1f,%0\n1:":"=r" (pc));
  +
- #ifdef CONFIG_X86_32
- /*
-  * User space process size: 3GB (default).
-  */
--#define TASK_SIZE     (PAGE_OFFSET)
--#define STACK_TOP     TASK_SIZE
--#define STACK_TOP_MAX STACK_TOP
--
--#define INIT_THREAD  {                                                        \
--      .sp0 = sizeof(init_stack) + (long)&init_stack,                  \
--      .vm86_info = NULL,                                              \
--      .sysenter_cs = __KERNEL_CS,                                     \
--      .io_bitmap_ptr = NULL,                                          \
--      .fs = __KERNEL_PERCPU,                                          \
-+#define TASK_SIZE             PAGE_OFFSET
-+#define STACK_TOP             TASK_SIZE
-+#define STACK_TOP_MAX         STACK_TOP
++      asm volatile("mov $1f, %0; 1:":"=r" (pc));
  +
-+#define INIT_THREAD  {                                                          \
-+      .sp0                    = sizeof(init_stack) + (long)&init_stack, \
-+      .vm86_info              = NULL,                                   \
-+      .sysenter_cs            = __KERNEL_CS,                            \
-+      .io_bitmap_ptr          = NULL,                                   \
-+      .fs                     = __KERNEL_PERCPU,                        \
+       return pc;
   }
   
+ #ifdef CONFIG_X86_VSMP
+-#define ARCH_MIN_TASKALIGN    (1 << INTERNODE_CACHE_SHIFT)
+-#define ARCH_MIN_MMSTRUCT_ALIGN       (1 << INTERNODE_CACHE_SHIFT)
++# define ARCH_MIN_TASKALIGN           (1 << INTERNODE_CACHE_SHIFT)
++# define ARCH_MIN_MMSTRUCT_ALIGN      (1 << INTERNODE_CACHE_SHIFT)
+ #else
+-#define ARCH_MIN_TASKALIGN    16
+-#define ARCH_MIN_MMSTRUCT_ALIGN       0
++# define ARCH_MIN_TASKALIGN           16
++# define ARCH_MIN_MMSTRUCT_ALIGN      0
+ #endif
+ 
   /*
-@@ -682,28 +772,15 @@ static inline void prefetchw(const void 
-  * permission bitmap. The extra byte must be all 1 bits, and must
-  * be within the limit.
+@@ -57,68 +56,80 @@ static inline void *current_text_addr(vo
    */
--#define INIT_TSS  {                                                   \
--      .x86_tss = {                                                    \
-+#define INIT_TSS  {                                                     \
-+      .x86_tss = {                                                      \
-               .sp0            = sizeof(init_stack) + (long)&init_stack, \
--              .ss0            = __KERNEL_DS,                          \
--              .ss1            = __KERNEL_CS,                          \
--              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,             \
--       },                                                             \
--      .io_bitmap      = { [0 ... IO_BITMAP_LONGS] = ~0 },             \
--}
--
--#define start_thread(regs, new_eip, new_esp) do {             \
--      __asm__("movl %0,%%gs": :"r" (0));                      \
--      regs->fs = 0;                                           \
--      set_fs(USER_DS);                                        \
--      regs->ds = __USER_DS;                                   \
--      regs->es = __USER_DS;                                   \
--      regs->ss = __USER_DS;                                   \
--      regs->cs = __USER_CS;                                   \
--      regs->ip = new_eip;                                     \
--      regs->sp = new_esp;                                     \
--} while (0)
--
-+              .ss0            = __KERNEL_DS,                            \
-+              .ss1            = __KERNEL_CS,                            \
-+              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,               \
-+       },                                                               \
-+      .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },       \
-+}
- 
- extern unsigned long thread_saved_pc(struct task_struct *tsk);
   
-@@ -731,24 +808,24 @@ extern unsigned long thread_saved_pc(str
-        __regs__ - 1;                                                   \
- })
+ struct cpuinfo_x86 {
+-      __u8    x86;            /* CPU family */
+-      __u8    x86_vendor;     /* CPU vendor */
+-      __u8    x86_model;
+-      __u8    x86_mask;
++      __u8                    x86;            /* CPU family */
++      __u8                    x86_vendor;     /* CPU vendor */
++      __u8                    x86_model;
++      __u8                    x86_mask;
+ #ifdef CONFIG_X86_32
+-      char    wp_works_ok;    /* It doesn't on 386's */
+-      char    hlt_works_ok;   /* Problems on some 486Dx4's and old 386's */
+-      char    hard_math;
+-      char    rfu;
+-      char    fdiv_bug;
+-      char    f00f_bug;
+-      char    coma_bug;
+-      char    pad0;
++      char                    wp_works_ok;    /* It doesn't on 386's */
++
++      /* Problems on some 486Dx4's and old 386's: */
++      char                    hlt_works_ok;
++      char                    hard_math;
++      char                    rfu;
++      char                    fdiv_bug;
++      char                    f00f_bug;
++      char                    coma_bug;
++      char                    pad0;
+ #else
+-      /* number of 4K pages in DTLB/ITLB combined(in pages)*/
+-      int     x86_tlbsize;
+-      __u8    x86_virt_bits, x86_phys_bits;
+-      /* cpuid returned core id bits */
+-      __u8    x86_coreid_bits;
+-      /* Max extended CPUID function supported */
+-      __u32   extended_cpuid_level;
+-#endif
+-      int     cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
+-      __u32   x86_capability[NCAPINTS];
+-      char    x86_vendor_id[16];
+-      char    x86_model_id[64];
+-      int     x86_cache_size;  /* in KB - valid for CPUS which support this
+-                                  call  */
+-      int     x86_cache_alignment;    /* In bytes */
+-      int     x86_power;
+-      unsigned long loops_per_jiffy;
++      /* Number of 4K pages in DTLB/ITLB combined(in pages): */
++      int                      x86_tlbsize;
++      __u8                    x86_virt_bits;
++      __u8                    x86_phys_bits;
++      /* CPUID returned core id bits: */
++      __u8                    x86_coreid_bits;
++      /* Max extended CPUID function supported: */
++      __u32                   extended_cpuid_level;
++#endif
++      /* Maximum supported CPUID level, -1=no CPUID: */
++      int                     cpuid_level;
++      __u32                   x86_capability[NCAPINTS];
++      char                    x86_vendor_id[16];
++      char                    x86_model_id[64];
++      /* in KB - valid for CPUS which support this call: */
++      int                     x86_cache_size;
++      int                     x86_cache_alignment;    /* In bytes */
++      int                     x86_power;
++      unsigned long           loops_per_jiffy;
+ #ifdef CONFIG_SMP
+-      cpumask_t llc_shared_map;       /* cpus sharing the last level cache */
++      /* cpus sharing the last level cache: */
++      cpumask_t               llc_shared_map;
+ #endif
+-      u16 x86_max_cores;              /* cpuid returned max cores value */
+-      u16 apicid;
+-      u16 x86_clflush_size;
++      /* cpuid returned max cores value: */
++      u16                      x86_max_cores;
++      u16                     apicid;
++      u16                     initial_apicid;
++      u16                     x86_clflush_size;
+ #ifdef CONFIG_SMP
+-      u16 booted_cores;               /* number of cores as seen by OS */
+-      u16 phys_proc_id;               /* Physical processor id. */
+-      u16 cpu_core_id;                /* Core id */
+-      u16 cpu_index;                  /* index into per_cpu list */
++      /* number of cores as seen by the OS: */
++      u16                     booted_cores;
++      /* Physical processor id: */
++      u16                     phys_proc_id;
++      /* Core id: */
++      u16                     cpu_core_id;
++      /* Index into per_cpu list: */
++      u16                     cpu_index;
+ #endif
+ } __attribute__((__aligned__(SMP_CACHE_BYTES)));
   
--#define KSTK_ESP(task) (task_pt_regs(task)->sp)
-+#define KSTK_ESP(task)                (task_pt_regs(task)->sp)
+-#define X86_VENDOR_INTEL 0
+-#define X86_VENDOR_CYRIX 1
+-#define X86_VENDOR_AMD 2
+-#define X86_VENDOR_UMC 3
+-#define X86_VENDOR_NEXGEN 4
+-#define X86_VENDOR_CENTAUR 5
+-#define X86_VENDOR_TRANSMETA 7
+-#define X86_VENDOR_NSC 8
+-#define X86_VENDOR_NUM 9
+-#define X86_VENDOR_UNKNOWN 0xff
++#define X86_VENDOR_INTEL      0
++#define X86_VENDOR_CYRIX      1
++#define X86_VENDOR_AMD                2
++#define X86_VENDOR_UMC                3
++#define X86_VENDOR_CENTAUR    5
++#define X86_VENDOR_TRANSMETA  7
++#define X86_VENDOR_NSC                8
++#define X86_VENDOR_NUM                9
++
++#define X86_VENDOR_UNKNOWN    0xff
   
- #else
   /*
-  * User space process size. 47bits minus one guard page.
+  * capabilities of CPUs
    */
--#define TASK_SIZE64   (0x800000000000UL - 4096)
-+#define TASK_SIZE64   ((1UL << 47) - PAGE_SIZE)
+-extern struct cpuinfo_x86 boot_cpu_data;
+-extern struct cpuinfo_x86 new_cpu_data;
+-extern __u32 cleared_cpu_caps[NCAPINTS];
++extern struct cpuinfo_x86     boot_cpu_data;
++extern struct cpuinfo_x86     new_cpu_data;
++
++extern __u32                  cleared_cpu_caps[NCAPINTS];
   
- /* This decides where the kernel will search for a free chunk of vm
-  * space during mmap's.
-  */
--#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
--                         0xc0000000 : 0xFFFFe000)
-+#define IA32_PAGE_OFFSET      ((current->personality & ADDR_LIMIT_3GB) ? \
-+                                      0xc0000000 : 0xFFFFe000)
+ #ifdef CONFIG_SMP
+ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+@@ -129,7 +140,18 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_
+ #define current_cpu_data      boot_cpu_data
+ #endif
   
--#define TASK_SIZE             (test_thread_flag(TIF_IA32) ? \
--                               IA32_PAGE_OFFSET : TASK_SIZE64)
--#define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child, TIF_IA32)) ? \
--                                IA32_PAGE_OFFSET : TASK_SIZE64)
-+#define TASK_SIZE             (test_thread_flag(TIF_IA32) ? \
-+                                      IA32_PAGE_OFFSET : TASK_SIZE64)
-+#define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child, TIF_IA32)) ? \
-+                                      IA32_PAGE_OFFSET : TASK_SIZE64)
+-void cpu_detect(struct cpuinfo_x86 *c);
++static inline int hlt_works(int cpu)
++{
++#ifdef CONFIG_X86_32
++      return cpu_data(cpu).hlt_works_ok;
++#else
++      return 1;
++#endif
++}
++
++#define cache_line_size()     (boot_cpu_data.x86_cache_alignment)
++
++extern void cpu_detect(struct cpuinfo_x86 *c);
   
- #define STACK_TOP             TASK_SIZE
- #define STACK_TOP_MAX         TASK_SIZE64
-@@ -761,33 +838,32 @@ extern unsigned long thread_saved_pc(str
-       .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+ extern void identify_cpu(struct cpuinfo_x86 *);
+ extern void identify_boot_cpu(void);
+@@ -149,12 +171,12 @@ static inline void xen_cpuid(unsigned in
+                            unsigned int *ecx, unsigned int *edx)
+ {
+       /* ecx is often an input as well as an output. */
+-      __asm__(XEN_CPUID
+-              : "=a" (*eax),
+-                "=b" (*ebx),
+-                "=c" (*ecx),
+-                "=d" (*edx)
+-              : "0" (*eax), "2" (*ecx));
++      asm(XEN_CPUID
++          : "=a" (*eax),
++            "=b" (*ebx),
++            "=c" (*ecx),
++            "=d" (*edx)
++          : "0" (*eax), "2" (*ecx));
   }
   
--#define start_thread(regs, new_rip, new_rsp) do {                          \
--      asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));  \
--      load_gs_index(0);                                                    \
--      (regs)->ip = (new_rip);                                              \
--      (regs)->sp = (new_rsp);                                              \
--      write_pda(oldrsp, (new_rsp));                                        \
--      (regs)->cs = __USER_CS;                                              \
--      (regs)->ss = __USER_DS;                                              \
--      (regs)->flags = 0x200;                                               \
--      set_fs(USER_DS);                                                     \
--} while (0)
--
+ static inline void load_cr3(pgd_t *pgdir)
+@@ -166,57 +188,70 @@ static inline void load_cr3(pgd_t *pgdir
+ #ifdef CONFIG_X86_32
+ /* This is the TSS defined by the hardware. */
+ struct x86_hw_tss {
+-      unsigned short  back_link, __blh;
+-      unsigned long   sp0;
+-      unsigned short  ss0, __ss0h;
+-      unsigned long   sp1;
+-      unsigned short  ss1, __ss1h;    /* ss1 caches MSR_IA32_SYSENTER_CS */
+-      unsigned long   sp2;
+-      unsigned short  ss2, __ss2h;
+-      unsigned long   __cr3;
+-      unsigned long   ip;
+-      unsigned long   flags;
+-      unsigned long   ax, cx, dx, bx;
+-      unsigned long   sp, bp, si, di;
+-      unsigned short  es, __esh;
+-      unsigned short  cs, __csh;
+-      unsigned short  ss, __ssh;
+-      unsigned short  ds, __dsh;
+-      unsigned short  fs, __fsh;
+-      unsigned short  gs, __gsh;
+-      unsigned short  ldt, __ldth;
+-      unsigned short  trace, io_bitmap_base;
++      unsigned short          back_link, __blh;
++      unsigned long           sp0;
++      unsigned short          ss0, __ss0h;
++      unsigned long           sp1;
++      /* ss1 caches MSR_IA32_SYSENTER_CS: */
++      unsigned short          ss1, __ss1h;
++      unsigned long           sp2;
++      unsigned short          ss2, __ss2h;
++      unsigned long           __cr3;
++      unsigned long           ip;
++      unsigned long           flags;
++      unsigned long           ax;
++      unsigned long           cx;
++      unsigned long           dx;
++      unsigned long           bx;
++      unsigned long           sp;
++      unsigned long           bp;
++      unsigned long           si;
++      unsigned long           di;
++      unsigned short          es, __esh;
++      unsigned short          cs, __csh;
++      unsigned short          ss, __ssh;
++      unsigned short          ds, __dsh;
++      unsigned short          fs, __fsh;
++      unsigned short          gs, __gsh;
++      unsigned short          ldt, __ldth;
++      unsigned short          trace;
++      unsigned short          io_bitmap_base;
++
+ } __attribute__((packed));
+ extern struct tss_struct doublefault_tss;
+ #else
+ struct x86_hw_tss {
+-      u32 reserved1;
+-      u64 sp0;
+-      u64 sp1;
+-      u64 sp2;
+-      u64 reserved2;
+-      u64 ist[7];
+-      u32 reserved3;
+-      u32 reserved4;
+-      u16 reserved5;
+-      u16 io_bitmap_base;
++      u32                     reserved1;
++      u64                     sp0;
++      u64                     sp1;
++      u64                     sp2;
++      u64                     reserved2;
++      u64                     ist[7];
++      u32                     reserved3;
++      u32                     reserved4;
++      u16                     reserved5;
++      u16                     io_bitmap_base;
++
+ } __attribute__((packed)) ____cacheline_aligned;
+ #endif
+ #endif /* CONFIG_X86_NO_TSS */
+ 
   /*
-  * Return saved PC of a blocked thread.
-  * What is this good for? it will be always the scheduler or ret_from_fork.
+- * Size of io_bitmap.
++ * IO-bitmap sizes:
    */
--#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
-+#define thread_saved_pc(t)    (*(unsigned long *)((t)->thread.sp - 8))
- 
--#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
--#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
-+#define task_pt_regs(tsk)     ((struct pt_regs *)(tsk)->thread.sp0 - 1)
-+#define KSTK_ESP(tsk)         -1 /* sorry. doesn't work for syscall. */
- #endif /* CONFIG_X86_64 */
+-#define IO_BITMAP_BITS  65536
+-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+-#define INVALID_IO_BITMAP_OFFSET 0x8000
+-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
++#define IO_BITMAP_BITS                        65536
++#define IO_BITMAP_BYTES                       (IO_BITMAP_BITS/8)
++#define IO_BITMAP_LONGS                       (IO_BITMAP_BYTES/sizeof(long))
++#define IO_BITMAP_OFFSET              offsetof(struct tss_struct, io_bitmap)
++#define INVALID_IO_BITMAP_OFFSET      0x8000
++#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
   
--/* This decides where the kernel will search for a free chunk of vm
-+extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
-+                                             unsigned long new_sp);
-+
-+/*
-+ * This decides where the kernel will search for a free chunk of vm
-  * space during mmap's.
-  */
- #define TASK_UNMAPPED_BASE    (PAGE_ALIGN(TASK_SIZE / 3))
+ #ifndef CONFIG_X86_NO_TSS
+ struct tss_struct {
+-      struct x86_hw_tss x86_tss;
++      /*
++       * The hardware state:
++       */
++      struct x86_hw_tss       x86_tss;
   
--#define KSTK_EIP(task) (task_pt_regs(task)->ip)
-+#define KSTK_EIP(task)                (task_pt_regs(task)->ip)
+       /*
+        * The extra 1 is there because the CPU will access an
+@@ -224,136 +259,162 @@ struct tss_struct {
+        * bitmap. The extra byte must be all 1 bits, and must
+        * be within the limit.
+        */
+-      unsigned long   io_bitmap[IO_BITMAP_LONGS + 1];
++      unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+       /*
+        * Cache the current maximum and the last task that used the bitmap:
+        */
+-      unsigned long io_bitmap_max;
+-      struct thread_struct *io_bitmap_owner;
++      unsigned long           io_bitmap_max;
++      struct thread_struct    *io_bitmap_owner;
  +
-+/* Get/set a process' ability to use the timestamp counter instruction */
-+#define GET_TSC_CTL(adr)      get_tsc_mode((adr))
-+#define SET_TSC_CTL(val)      set_tsc_mode((val))
+       /*
+-       * pads the TSS to be cacheline-aligned (size is 0x100)
++       * Pad the TSS to be cacheline-aligned (size is 0x100):
+        */
+-      unsigned long __cacheline_filler[35];
++      unsigned long           __cacheline_filler[35];
+       /*
+-       * .. and then another 0x100 bytes for emergency kernel stack
++       * .. and then another 0x100 bytes for the emergency kernel stack:
+        */
+-      unsigned long stack[64];
++      unsigned long           stack[64];
  +
-+extern int get_tsc_mode(unsigned long adr);
-+extern int set_tsc_mode(unsigned int val);
- 
- #endif
---- a/include/asm-x86/mach-xen/asm/segment.h
-+++ b/include/asm-x86/mach-xen/asm/segment.h
-@@ -191,13 +191,14 @@
- #define SEGMENT_TI_MASK               0x4
+ } __attribute__((packed));
   
- #define IDT_ENTRIES 256
-+#define NUM_EXCEPTION_VECTORS 32
- #define GDT_SIZE (GDT_ENTRIES * 8)
- #define GDT_ENTRY_TLS_ENTRIES 3
- #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+ DECLARE_PER_CPU(struct tss_struct, init_tss);
   
- #ifdef __KERNEL__
- #ifndef __ASSEMBLY__
--extern const char early_idt_handlers[IDT_ENTRIES][10];
-+extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
- #endif
- #endif
+-/* Save the original ist values for checking stack pointers during debugging */
++/*
++ * Save the original ist values for checking stack pointers during debugging
++ */
+ struct orig_ist {
+-      unsigned long ist[7];
++      unsigned long           ist[7];
+ };
+ #endif /* CONFIG_X86_NO_TSS */
   
---- a/include/asm-x86/mach-xen/asm/smp_32.h
-+++ /dev/null
-@@ -1,178 +0,0 @@
--#ifndef __ASM_SMP_H
--#define __ASM_SMP_H
--
--#ifndef __ASSEMBLY__
--#include <linux/cpumask.h>
--#include <linux/init.h>
--
--/*
-- * We need the APIC definitions automatically as part of 'smp.h'
-- */
--#ifdef CONFIG_X86_LOCAL_APIC
--# include <asm/mpspec.h>
--# include <asm/apic.h>
--# ifdef CONFIG_X86_IO_APIC
--#  include <asm/io_apic.h>
--# endif
--#endif
--
--#define cpu_callout_map cpu_possible_map
--#define cpu_callin_map cpu_possible_map
--
--extern int smp_num_siblings;
--extern unsigned int num_processors;
--
--extern void smp_alloc_memory(void);
--extern void lock_ipi_call_lock(void);
--extern void unlock_ipi_call_lock(void);
--
--extern void (*mtrr_hook) (void);
--extern void zap_low_mappings (void);
--
--DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
--DECLARE_PER_CPU(cpumask_t, cpu_core_map);
--DECLARE_PER_CPU(u8, cpu_llc_id);
--DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
--
--#ifdef CONFIG_HOTPLUG_CPU
--extern void cpu_exit_clear(void);
--extern void cpu_uninit(void);
--#endif
--
--#ifdef CONFIG_SMP
--
--#ifndef CONFIG_XEN
--
--/* Globals due to paravirt */
--extern void set_cpu_sibling_map(int cpu);
--
--struct smp_ops
--{
--      void (*smp_prepare_boot_cpu)(void);
--      void (*smp_prepare_cpus)(unsigned max_cpus);
--      int (*cpu_up)(unsigned cpu);
--      void (*smp_cpus_done)(unsigned max_cpus);
--
--      void (*smp_send_stop)(void);
--      void (*smp_send_reschedule)(int cpu);
--      int (*smp_call_function_mask)(cpumask_t mask,
--                                    void (*func)(void *info), void *info,
--                                    int wait);
--};
--
--extern struct smp_ops smp_ops;
--
--static inline void smp_prepare_boot_cpu(void)
--{
--      smp_ops.smp_prepare_boot_cpu();
--}
--static inline void smp_prepare_cpus(unsigned int max_cpus)
--{
--      smp_ops.smp_prepare_cpus(max_cpus);
--}
--static inline int __cpu_up(unsigned int cpu)
--{
--      return smp_ops.cpu_up(cpu);
--}
--static inline void smp_cpus_done(unsigned int max_cpus)
--{
--      smp_ops.smp_cpus_done(max_cpus);
--}
--
--static inline void smp_send_stop(void)
--{
--      smp_ops.smp_send_stop();
--}
--static inline void smp_send_reschedule(int cpu)
--{
--      smp_ops.smp_send_reschedule(cpu);
--}
--static inline int smp_call_function_mask(cpumask_t mask,
--                                       void (*func) (void *info), void *info,
--                                       int wait)
--{
--      return smp_ops.smp_call_function_mask(mask, func, info, wait);
--}
--
--void native_smp_prepare_boot_cpu(void);
--void native_smp_prepare_cpus(unsigned int max_cpus);
--int native_cpu_up(unsigned int cpunum);
--void native_smp_cpus_done(unsigned int max_cpus);
--
--#ifndef CONFIG_PARAVIRT
--#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
--#endif
--
--#else /* CONFIG_XEN */
--
--void xen_smp_send_stop(void);
--void xen_smp_send_reschedule(int cpu);
--int xen_smp_call_function_mask(cpumask_t mask,
--                             void (*func) (void *info), void *info,
--                             int wait);
--
--#define smp_send_stop         xen_smp_send_stop
--#define smp_send_reschedule   xen_smp_send_reschedule
--#define smp_call_function_mask        xen_smp_call_function_mask
--
--extern void prefill_possible_map(void);
--
--#endif /* CONFIG_XEN */
--
--extern int __cpu_disable(void);
--extern void __cpu_die(unsigned int cpu);
--
--/*
-- * This function is needed by all SMP systems. It must _always_ be valid
-- * from the initial startup. We map APIC_BASE very early in page_setup(),
-- * so this is correct in the x86 case.
-- */
--DECLARE_PER_CPU(int, cpu_number);
--#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
--
--#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
--
--#define safe_smp_processor_id() smp_processor_id()
--
--/* We don't mark CPUs online until __cpu_up(), so we need another measure */
--static inline int num_booting_cpus(void)
--{
--      return cpus_weight(cpu_callout_map);
--}
--
--#else /* CONFIG_SMP */
--
--#define safe_smp_processor_id()               0
--#define cpu_physical_id(cpu)          boot_cpu_physical_apicid
--
--#endif /* !CONFIG_SMP */
--
--#ifdef CONFIG_X86_LOCAL_APIC
--
--static __inline int logical_smp_processor_id(void)
--{
--      /* we don't want to mark this access volatile - bad code generation */
--      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
--}
--
--# ifdef APIC_DEFINITION
--extern int hard_smp_processor_id(void);
--# else
--#  include <mach_apicdef.h>
--static inline int hard_smp_processor_id(void)
--{
--      /* we don't want to mark this access volatile - bad code generation */
--      return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
--}
--# endif /* APIC_DEFINITION */
--
--#else /* CONFIG_X86_LOCAL_APIC */
--
--# ifndef CONFIG_SMP
--#  define hard_smp_processor_id()     0
--# endif
--
--#endif /* CONFIG_X86_LOCAL_APIC */
--
--#endif /* !ASSEMBLY */
--#endif
---- a/include/asm-x86/mach-xen/asm/smp_64.h
-+++ /dev/null
-@@ -1,103 +0,0 @@
--#ifndef __ASM_SMP_H
--#define __ASM_SMP_H
--
--#include <linux/cpumask.h>
--#include <linux/init.h>
--
--#ifdef CONFIG_X86_LOCAL_APIC
--/*
-- * We need the APIC definitions automatically as part of 'smp.h'
-- */
--#include <asm/apic.h>
--#ifdef CONFIG_X86_IO_APIC
--#include <asm/io_apic.h>
--#endif
--#include <asm/mpspec.h>
--#endif
--#include <asm/pda.h>
--#include <asm/thread_info.h>
--
--extern cpumask_t cpu_initialized;
--
--extern int smp_num_siblings;
--extern unsigned int num_processors;
--
--extern void smp_alloc_memory(void);
--extern void lock_ipi_call_lock(void);
--extern void unlock_ipi_call_lock(void);
--
--extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
--                                void *info, int wait);
--
--DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
--DECLARE_PER_CPU(cpumask_t, cpu_core_map);
--DECLARE_PER_CPU(u16, cpu_llc_id);
--DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
--DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
--
--#ifdef CONFIG_X86_LOCAL_APIC
--static inline int cpu_present_to_apicid(int mps_cpu)
--{
--      if (cpu_present(mps_cpu))
--              return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
--      else
--              return BAD_APICID;
--}
+ #define       MXCSR_DEFAULT           0x1f80
+ 
+ struct i387_fsave_struct {
+-      u32     cwd;
+-      u32     swd;
+-      u32     twd;
+-      u32     fip;
+-      u32     fcs;
+-      u32     foo;
+-      u32     fos;
+-      u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+-      u32     status;         /* software status information */
++      u32                     cwd;    /* FPU Control Word             */
++      u32                     swd;    /* FPU Status Word              */
++      u32                     twd;    /* FPU Tag Word                 */
++      u32                     fip;    /* FPU IP Offset                */
++      u32                     fcs;    /* FPU IP Selector              */
++      u32                     foo;    /* FPU Operand Pointer Offset   */
++      u32                     fos;    /* FPU Operand Pointer Selector */
++
++      /* 8*10 bytes for each FP-reg = 80 bytes:                       */
++      u32                     st_space[20];
++
++      /* Software status information [not touched by FSAVE ]:         */
++      u32                     status;
+ };
+ 
+ struct i387_fxsave_struct {
+-      u16     cwd;
+-      u16     swd;
+-      u16     twd;
+-      u16     fop;
++      u16                     cwd; /* Control Word                    */
++      u16                     swd; /* Status Word                     */
++      u16                     twd; /* Tag Word                        */
++      u16                     fop; /* Last Instruction Opcode         */
+       union {
+               struct {
+-                      u64     rip;
+-                      u64     rdp;
++                      u64     rip; /* Instruction Pointer             */
++                      u64     rdp; /* Data Pointer                    */
+               };
+               struct {
+-                      u32     fip;
+-                      u32     fcs;
+-                      u32     foo;
+-                      u32     fos;
++                      u32     fip; /* FPU IP Offset                   */
++                      u32     fcs; /* FPU IP Selector                 */
++                      u32     foo; /* FPU Operand Offset              */
++                      u32     fos; /* FPU Operand Selector            */
+               };
+       };
+-      u32     mxcsr;
+-      u32     mxcsr_mask;
+-      u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+-      u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
+-      u32     padding[24];
++      u32                     mxcsr;          /* MXCSR Register State */
++      u32                     mxcsr_mask;     /* MXCSR Mask           */
++
++      /* 8*16 bytes for each FP-reg = 128 bytes:                      */
++      u32                     st_space[32];
++
++      /* 16*16 bytes for each XMM-reg = 256 bytes:                    */
++      u32                     xmm_space[64];
++
++      u32                     padding[24];
++
+ } __attribute__((aligned(16)));
+ 
+ struct i387_soft_struct {
+-      u32     cwd;
+-      u32     swd;
+-      u32     twd;
+-      u32     fip;
+-      u32     fcs;
+-      u32     foo;
+-      u32     fos;
+-      u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+-      u8      ftop, changed, lookahead, no_update, rm, alimit;
+-      struct info     *info;
+-      u32     entry_eip;
++      u32                     cwd;
++      u32                     swd;
++      u32                     twd;
++      u32                     fip;
++      u32                     fcs;
++      u32                     foo;
++      u32                     fos;
++      /* 8*10 bytes for each FP-reg = 80 bytes: */
++      u32                     st_space[20];
++      u8                      ftop;
++      u8                      changed;
++      u8                      lookahead;
++      u8                      no_update;
++      u8                      rm;
++      u8                      alimit;
++      struct info             *info;
++      u32                     entry_eip;
+ };
+ 
+-union i387_union {
++union thread_xstate {
+       struct i387_fsave_struct        fsave;
+       struct i387_fxsave_struct       fxsave;
+-      struct i387_soft_struct         soft;
++      struct i387_soft_struct         soft;
+ };
+ 
+-#ifdef CONFIG_X86_32
+-DECLARE_PER_CPU(u8, cpu_llc_id);
+-#elif !defined(CONFIG_X86_NO_TSS)
++#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
+ DECLARE_PER_CPU(struct orig_ist, orig_ist);
+ #endif
+ 
+ extern void print_cpu_info(struct cpuinfo_x86 *);
++extern unsigned int xstate_size;
++extern void free_thread_xstate(struct task_struct *);
++extern struct kmem_cache *task_xstate_cachep;
+ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+ extern unsigned short num_cache_leaves;
+ 
+ struct thread_struct {
+-/* cached TLS descriptors. */
+-      struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+-      unsigned long   sp0;
+-      unsigned long   sp;
++      /* Cached TLS descriptors: */
++      struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
++      unsigned long           sp0;
++      unsigned long           sp;
+ #ifdef CONFIG_X86_32
+-      unsigned long   sysenter_cs;
++      unsigned long           sysenter_cs;
+ #else
+-      unsigned long   usersp; /* Copy from PDA */
+-      unsigned short  es, ds, fsindex, gsindex;
  -#endif
+-      unsigned long   ip;
+-      unsigned long   fs;
+-      unsigned long   gs;
+-/* Hardware debugging registers */
+-      unsigned long   debugreg0;
+-      unsigned long   debugreg1;
+-      unsigned long   debugreg2;
+-      unsigned long   debugreg3;
+-      unsigned long   debugreg6;
+-      unsigned long   debugreg7;
+-/* fault info */
+-      unsigned long   cr2, trap_no, error_code;
+-/* floating point info */
+-      union i387_union        i387 __attribute__((aligned(16)));;
++      unsigned long           usersp; /* Copy from PDA */
++      unsigned short          es;
++      unsigned short          ds;
++      unsigned short          fsindex;
++      unsigned short          gsindex;
++#endif
++      unsigned long           ip;
++      unsigned long           fs;
++      unsigned long           gs;
++      /* Hardware debugging registers: */
++      unsigned long           debugreg0;
++      unsigned long           debugreg1;
++      unsigned long           debugreg2;
++      unsigned long           debugreg3;
++      unsigned long           debugreg6;
++      unsigned long           debugreg7;
++      /* Fault info: */
++      unsigned long           cr2;
++      unsigned long           trap_no;
++      unsigned long           error_code;
++      /* floating point and extended processor state */
++      union thread_xstate     *xstate;
+ #ifdef CONFIG_X86_32
+-/* virtual 86 mode info */
++      /* Virtual 86 mode info */
+       struct vm86_struct __user *vm86_info;
+       unsigned long           screen_bitmap;
+       unsigned long           v86flags, v86mask, saved_sp0;
+       unsigned int            saved_fs, saved_gs;
+ #endif
+-/* IO permissions */
+-      unsigned long   *io_bitmap_ptr;
+-      unsigned long   iopl;
+-/* max allowed port in the bitmap, in bytes: */
+-      unsigned io_bitmap_max;
++      /* IO permissions: */
++      unsigned long           *io_bitmap_ptr;
++      unsigned long           iopl;
++      /* Max allowed port in the bitmap, in bytes: */
++      unsigned                io_bitmap_max;
+ /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+       unsigned long   debugctlmsr;
+ /* Debug Store - if not 0 points to a DS Save Area configuration;
+@@ -384,12 +445,12 @@ static inline void xen_set_iopl_mask(uns
+ }
+ 
+ #ifndef CONFIG_X86_NO_TSS
+-static inline void native_load_sp0(struct tss_struct *tss,
+-                                 struct thread_struct *thread)
++static inline void
++native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+       tss->x86_tss.sp0 = thread->sp0;
+ #ifdef CONFIG_X86_32
+-      /* Only happens when SEP is enabled, no need to test "SEP"arately */
++      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+       if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+               tss->x86_tss.ss1 = thread->sysenter_cs;
+               wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+@@ -403,8 +464,8 @@ static inline void native_load_sp0(struc
+ } while (0)
+ #endif
+ 
+-#define __cpuid xen_cpuid
+-#define paravirt_enabled() 0
++#define __cpuid                       xen_cpuid
++#define paravirt_enabled()    0
+ 
+ /*
+  * These special macros can be used to get or set a debugging register
+@@ -424,11 +485,12 @@ static inline void native_load_sp0(struc
+  * enable), so that any CPU's that boot up
+  * after us can get the correct flags.
+  */
+-extern unsigned long mmu_cr4_features;
++extern unsigned long          mmu_cr4_features;
+ 
+ static inline void set_in_cr4(unsigned long mask)
+ {
+       unsigned cr4;
++
+       mmu_cr4_features |= mask;
+       cr4 = read_cr4();
+       cr4 |= mask;
+@@ -438,6 +500,7 @@ static inline void set_in_cr4(unsigned l
+ static inline void clear_in_cr4(unsigned long mask)
+ {
+       unsigned cr4;
++
+       mmu_cr4_features &= ~mask;
+       cr4 = read_cr4();
+       cr4 &= ~mask;
+@@ -445,42 +508,42 @@ static inline void clear_in_cr4(unsigned
+ }
+ 
+ struct microcode_header {
+-      unsigned int hdrver;
+-      unsigned int rev;
+-      unsigned int date;
+-      unsigned int sig;
+-      unsigned int cksum;
+-      unsigned int ldrver;
+-      unsigned int pf;
+-      unsigned int datasize;
+-      unsigned int totalsize;
+-      unsigned int reserved[3];
++      unsigned int            hdrver;
++      unsigned int            rev;
++      unsigned int            date;
++      unsigned int            sig;
++      unsigned int            cksum;
++      unsigned int            ldrver;
++      unsigned int            pf;
++      unsigned int            datasize;
++      unsigned int            totalsize;
++      unsigned int            reserved[3];
+ };
+ 
+ struct microcode {
+-      struct microcode_header hdr;
+-      unsigned int bits[0];
++      struct microcode_header hdr;
++      unsigned int            bits[0];
+ };
+ 
+-typedef struct microcode microcode_t;
+-typedef struct microcode_header microcode_header_t;
++typedef struct microcode      microcode_t;
++typedef struct microcode_header       microcode_header_t;
+ 
+ /* microcode format is extended from prescott processors */
+ struct extended_signature {
+-      unsigned int sig;
+-      unsigned int pf;
+-      unsigned int cksum;
++      unsigned int            sig;
++      unsigned int            pf;
++      unsigned int            cksum;
+ };
+ 
+ struct extended_sigtable {
+-      unsigned int count;
+-      unsigned int cksum;
+-      unsigned int reserved[3];
++      unsigned int            count;
++      unsigned int            cksum;
++      unsigned int            reserved[3];
+       struct extended_signature sigs[0];
+ };
+ 
+ typedef struct {
+-      unsigned long seg;
++      unsigned long           seg;
+ } mm_segment_t;
+ 
+ 
+@@ -492,7 +555,7 @@ extern int kernel_thread(int (*fn)(void 
+ /* Free all resources held by a thread. */
+ extern void release_thread(struct task_struct *);
+ 
+-/* Prepare to copy thread state - unlazy all lazy status */
++/* Prepare to copy thread state - unlazy all lazy state */
+ extern void prepare_to_copy(struct task_struct *tsk);
+ 
+ unsigned long get_wchan(struct task_struct *p);
+@@ -529,118 +592,138 @@ static inline unsigned int cpuid_eax(uns
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return eax;
+ }
++
+ static inline unsigned int cpuid_ebx(unsigned int op)
+ {
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return ebx;
+ }
++
+ static inline unsigned int cpuid_ecx(unsigned int op)
+ {
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return ecx;
+ }
++
+ static inline unsigned int cpuid_edx(unsigned int op)
+ {
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return edx;
+ }
+ 
+ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+ static inline void rep_nop(void)
+ {
+-      __asm__ __volatile__("rep;nop": : :"memory");
++      asm volatile("rep; nop" ::: "memory");
+ }
+ 
+-/* Stop speculative execution */
++static inline void cpu_relax(void)
++{
++      rep_nop();
++}
++
++/* Stop speculative execution: */
+ static inline void sync_core(void)
+ {
+       int tmp;
++
+       asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+-                                        : "ebx", "ecx", "edx", "memory");
++                   : "ebx", "ecx", "edx", "memory");
+ }
+ 
+-#define cpu_relax()   rep_nop()
  -
--#ifdef CONFIG_SMP
--
--#define SMP_TRAMPOLINE_BASE 0x6000
--
--extern int __cpu_disable(void);
--extern void __cpu_die(unsigned int cpu);
--extern void prefill_possible_map(void);
--extern unsigned __cpuinitdata disabled_cpus;
--
--#define raw_smp_processor_id()        read_pda(cpunumber)
--#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
--
--#define stack_smp_processor_id()                                      \
--      ({                                                              \
--      struct thread_info *ti;                                         \
--      __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));      \
--      ti->cpu;                                                        \
--})
--
--/*
-- * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
-- * scheduling and IPI sending and compresses data structures.
-- */
--static inline int num_booting_cpus(void)
--{
--      return cpus_weight(cpu_possible_map);
--}
--
--extern void smp_send_reschedule(int cpu);
--
--#else /* CONFIG_SMP */
--
--extern unsigned int boot_cpu_id;
--#define cpu_physical_id(cpu)  boot_cpu_id
--#define stack_smp_processor_id() 0
--
--#endif /* !CONFIG_SMP */
--
--#define safe_smp_processor_id()               smp_processor_id()
+ static inline void __monitor(const void *eax, unsigned long ecx,
+-              unsigned long edx)
++                           unsigned long edx)
+ {
+-      /* "monitor %eax,%ecx,%edx;" */
+-      asm volatile(
+-              ".byte 0x0f,0x01,0xc8;"
+-              : :"a" (eax), "c" (ecx), "d"(edx));
++      /* "monitor %eax, %ecx, %edx;" */
++      asm volatile(".byte 0x0f, 0x01, 0xc8;"
++                   :: "a" (eax), "c" (ecx), "d"(edx));
+ }
+ 
+ static inline void __mwait(unsigned long eax, unsigned long ecx)
+ {
+-      /* "mwait %eax,%ecx;" */
+-      asm volatile(
+-              ".byte 0x0f,0x01,0xc9;"
+-              : :"a" (eax), "c" (ecx));
++      /* "mwait %eax, %ecx;" */
++      asm volatile(".byte 0x0f, 0x01, 0xc9;"
++                   :: "a" (eax), "c" (ecx));
+ }
+ 
+ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+ {
+-      /* "mwait %eax,%ecx;" */
+-      asm volatile(
+-              "sti; .byte 0x0f,0x01,0xc9;"
+-              : :"a" (eax), "c" (ecx));
++      trace_hardirqs_on();
++      /* "mwait %eax, %ecx;" */
++      asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
++                   :: "a" (eax), "c" (ecx));
+ }
+ 
+ extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+ 
+-extern int force_mwait;
++extern int                    force_mwait;
+ 
+ extern void select_idle_routine(const struct cpuinfo_x86 *c);
+ 
+-extern unsigned long boot_option_idle_override;
++extern unsigned long          boot_option_idle_override;
+ 
+ extern void enable_sep_cpu(void);
+ extern int sysenter_setup(void);
+ 
+ /* Defined in head.S */
+-extern struct desc_ptr early_gdt_descr;
++extern struct desc_ptr                early_gdt_descr;
+ 
+ extern void cpu_set_gdt(int);
+ extern void switch_to_new_gdt(void);
+ extern void cpu_init(void);
+ extern void init_gdt(int cpu);
+ 
+-/* from system description table in BIOS.  Mostly for MCA use, but
+- * others may find it useful. */
+-extern unsigned int machine_id;
+-extern unsigned int machine_submodel_id;
+-extern unsigned int BIOS_revision;
++static inline void update_debugctlmsr(unsigned long debugctlmsr)
++{
++#ifndef CONFIG_X86_DEBUGCTLMSR
++      if (boot_cpu_data.x86 < 6)
++              return;
++#endif
++      wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
++}
+ 
+-/* Boot loader type from the setup header */
+-extern int bootloader_type;
++/*
++ * from system description table in BIOS. Mostly for MCA use, but
++ * others may find it useful:
++ */
++extern unsigned int           machine_id;
++extern unsigned int           machine_submodel_id;
++extern unsigned int           BIOS_revision;
++
++/* Boot loader type from the setup header: */
++extern int                    bootloader_type;
+ 
+-extern char ignore_fpu_irq;
+-#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
++extern char                   ignore_fpu_irq;
+ 
+ #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
+ #define ARCH_HAS_PREFETCHW
+ #define ARCH_HAS_SPINLOCK_PREFETCH
+ 
+ #ifdef CONFIG_X86_32
+-#define BASE_PREFETCH ASM_NOP4
+-#define ARCH_HAS_PREFETCH
++# define BASE_PREFETCH                ASM_NOP4
++# define ARCH_HAS_PREFETCH
+ #else
+-#define BASE_PREFETCH "prefetcht0 (%1)"
++# define BASE_PREFETCH                "prefetcht0 (%1)"
+ #endif
+ 
+-/* Prefetch instructions for Pentium III and AMD Athlon */
+-/* It's not worth to care about 3dnow! prefetches for the K6
+-   because they are microcoded there and very slow.
+-   However we don't do prefetches for pre XP Athlons currently
+-   That should be fixed. */
++/*
++ * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
++ *
++ * It's not worth to care about 3dnow prefetches for the K6
++ * because they are microcoded there and very slow.
++ */
+ static inline void prefetch(const void *x)
+ {
+       alternative_input(BASE_PREFETCH,
+@@ -649,8 +732,11 @@ static inline void prefetch(const void *
+                         "r" (x));
+ }
+ 
+-/* 3dnow! prefetch to get an exclusive cache line. Useful for
+-   spinlocks to avoid one state transition in the cache coherency protocol. */
++/*
++ * 3dnow prefetch to get an exclusive cache line.
++ * Useful for spinlocks to avoid one state transition in the
++ * cache coherency protocol:
++ */
+ static inline void prefetchw(const void *x)
+ {
+       alternative_input(BASE_PREFETCH,
+@@ -659,21 +745,25 @@ static inline void prefetchw(const void 
+                         "r" (x));
+ }
+ 
+-#define spin_lock_prefetch(x) prefetchw(x)
++static inline void spin_lock_prefetch(const void *x)
++{
++      prefetchw(x);
++}
++
+ #ifdef CONFIG_X86_32
+ /*
+  * User space process size: 3GB (default).
+  */
+-#define TASK_SIZE     (PAGE_OFFSET)
+-#define STACK_TOP     TASK_SIZE
+-#define STACK_TOP_MAX STACK_TOP
  -
--#ifdef CONFIG_X86_LOCAL_APIC
--static __inline int logical_smp_processor_id(void)
--{
--      /* we don't want to mark this access volatile - bad code generation */
--      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+-#define INIT_THREAD  {                                                        \
+-      .sp0 = sizeof(init_stack) + (long)&init_stack,                  \
+-      .vm86_info = NULL,                                              \
+-      .sysenter_cs = __KERNEL_CS,                                     \
+-      .io_bitmap_ptr = NULL,                                          \
+-      .fs = __KERNEL_PERCPU,                                          \
++#define TASK_SIZE             PAGE_OFFSET
++#define STACK_TOP             TASK_SIZE
++#define STACK_TOP_MAX         STACK_TOP
++
++#define INIT_THREAD  {                                                          \
++      .sp0                    = sizeof(init_stack) + (long)&init_stack, \
++      .vm86_info              = NULL,                                   \
++      .sysenter_cs            = __KERNEL_CS,                            \
++      .io_bitmap_ptr          = NULL,                                   \
++      .fs                     = __KERNEL_PERCPU,                        \
+ }
+ 
+ /*
+@@ -682,28 +772,15 @@ static inline void prefetchw(const void 
+  * permission bitmap. The extra byte must be all 1 bits, and must
+  * be within the limit.
+  */
+-#define INIT_TSS  {                                                   \
+-      .x86_tss = {                                                    \
++#define INIT_TSS  {                                                     \
++      .x86_tss = {                                                      \
+               .sp0            = sizeof(init_stack) + (long)&init_stack, \
+-              .ss0            = __KERNEL_DS,                          \
+-              .ss1            = __KERNEL_CS,                          \
+-              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,             \
+-       },                                                             \
+-      .io_bitmap      = { [0 ... IO_BITMAP_LONGS] = ~0 },             \
  -}
  -
--static inline int hard_smp_processor_id(void)
--{
--      /* we don't want to mark this access volatile - bad code generation */
--      return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
--}
--#endif
+-#define start_thread(regs, new_eip, new_esp) do {             \
+-      __asm__("movl %0,%%gs": :"r" (0));                      \
+-      regs->fs = 0;                                           \
+-      set_fs(USER_DS);                                        \
+-      regs->ds = __USER_DS;                                   \
+-      regs->es = __USER_DS;                                   \
+-      regs->ss = __USER_DS;                                   \
+-      regs->cs = __USER_CS;                                   \
+-      regs->ip = new_eip;                                     \
+-      regs->sp = new_esp;                                     \
+-} while (0)
  -
--#endif
++              .ss0            = __KERNEL_DS,                            \
++              .ss1            = __KERNEL_CS,                            \
++              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,               \
++       },                                                               \
++      .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },       \
++}
+ 
+ extern unsigned long thread_saved_pc(struct task_struct *tsk);
+ 
+@@ -731,24 +808,24 @@ extern unsigned long thread_saved_pc(str
+        __regs__ - 1;                                                   \
+ })
+ 
+-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
++#define KSTK_ESP(task)                (task_pt_regs(task)->sp)
+ 
+ #else
+ /*
+  * User space process size. 47bits minus one guard page.
+  */
+-#define TASK_SIZE64   (0x800000000000UL - 4096)
++#define TASK_SIZE64   ((1UL << 47) - PAGE_SIZE)
+ 
+ /* This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
+-                         0xc0000000 : 0xFFFFe000)
++#define IA32_PAGE_OFFSET      ((current->personality & ADDR_LIMIT_3GB) ? \
++                                      0xc0000000 : 0xFFFFe000)
+ 
+-#define TASK_SIZE             (test_thread_flag(TIF_IA32) ? \
+-                               IA32_PAGE_OFFSET : TASK_SIZE64)
+-#define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child, TIF_IA32)) ? \
+-                                IA32_PAGE_OFFSET : TASK_SIZE64)
++#define TASK_SIZE             (test_thread_flag(TIF_IA32) ? \
++                                      IA32_PAGE_OFFSET : TASK_SIZE64)
++#define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child, TIF_IA32)) ? \
++                                      IA32_PAGE_OFFSET : TASK_SIZE64)
+ 
+ #define STACK_TOP             TASK_SIZE
+ #define STACK_TOP_MAX         TASK_SIZE64
+@@ -761,33 +838,32 @@ extern unsigned long thread_saved_pc(str
+       .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+ }
+ 
+-#define start_thread(regs, new_rip, new_rsp) do {                          \
+-      asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));  \
+-      load_gs_index(0);                                                    \
+-      (regs)->ip = (new_rip);                                              \
+-      (regs)->sp = (new_rsp);                                              \
+-      write_pda(oldrsp, (new_rsp));                                        \
+-      (regs)->cs = __USER_CS;                                              \
+-      (regs)->ss = __USER_DS;                                              \
+-      (regs)->flags = 0x200;                                               \
+-      set_fs(USER_DS);                                                     \
+-} while (0)
  -
---- a/include/asm-x86/mach-xen/asm/smp.h
-+++ b/include/asm-x86/mach-xen/asm/smp.h
+ /*
+  * Return saved PC of a blocked thread.
+  * What is this good for? it will be always the scheduler or ret_from_fork.
+  */
+-#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
++#define thread_saved_pc(t)    (*(unsigned long *)((t)->thread.sp - 8))
+ 
+-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
++#define task_pt_regs(tsk)     ((struct pt_regs *)(tsk)->thread.sp0 - 1)
++#define KSTK_ESP(tsk)         -1 /* sorry. doesn't work for syscall. */
+ #endif /* CONFIG_X86_64 */
+ 
+-/* This decides where the kernel will search for a free chunk of vm
++extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
++                                             unsigned long new_sp);
++
++/*
++ * This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+ #define TASK_UNMAPPED_BASE    (PAGE_ALIGN(TASK_SIZE / 3))
+ 
+-#define KSTK_EIP(task) (task_pt_regs(task)->ip)
++#define KSTK_EIP(task)                (task_pt_regs(task)->ip)
++
++/* Get/set a process' ability to use the timestamp counter instruction */
++#define GET_TSC_CTL(adr)      get_tsc_mode((adr))
++#define SET_TSC_CTL(val)      set_tsc_mode((val))
++
++extern int get_tsc_mode(unsigned long adr);
++extern int set_tsc_mode(unsigned int val);
+ 
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/segment.h    2009-03-16 16:38:05.000000000 +0100
+@@ -191,13 +191,14 @@
+ #define SEGMENT_TI_MASK               0x4
+ 
+ #define IDT_ENTRIES 256
++#define NUM_EXCEPTION_VECTORS 32
+ #define GDT_SIZE (GDT_ENTRIES * 8)
+ #define GDT_ENTRY_TLS_ENTRIES 3
+ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+ 
+ #ifdef __KERNEL__
+ #ifndef __ASSEMBLY__
+-extern const char early_idt_handlers[IDT_ENTRIES][10];
++extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
+ #endif
+ #endif
+ 
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp.h   2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp.h        2009-03-16 16:38:05.000000000 +0100
  @@ -1,5 +1,227 @@
  -#ifdef CONFIG_X86_32
  -# include "smp_32.h"
@@ -20309,8 +19399,295 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
  +extern void unlock_ipi_call_lock(void);
  +#endif /* __ASSEMBLY__ */
   #endif
---- a/include/asm-x86/mach-xen/asm/spinlock.h
-+++ b/include/asm-x86/mach-xen/asm/spinlock.h
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_32.h        2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,178 +0,0 @@
+-#ifndef __ASM_SMP_H
+-#define __ASM_SMP_H
+-
+-#ifndef __ASSEMBLY__
+-#include <linux/cpumask.h>
+-#include <linux/init.h>
+-
+-/*
+- * We need the APIC definitions automatically as part of 'smp.h'
+- */
+-#ifdef CONFIG_X86_LOCAL_APIC
+-# include <asm/mpspec.h>
+-# include <asm/apic.h>
+-# ifdef CONFIG_X86_IO_APIC
+-#  include <asm/io_apic.h>
+-# endif
+-#endif
+-
+-#define cpu_callout_map cpu_possible_map
+-#define cpu_callin_map cpu_possible_map
+-
+-extern int smp_num_siblings;
+-extern unsigned int num_processors;
+-
+-extern void smp_alloc_memory(void);
+-extern void lock_ipi_call_lock(void);
+-extern void unlock_ipi_call_lock(void);
+-
+-extern void (*mtrr_hook) (void);
+-extern void zap_low_mappings (void);
+-
+-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+-DECLARE_PER_CPU(u8, cpu_llc_id);
+-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-extern void cpu_exit_clear(void);
+-extern void cpu_uninit(void);
+-#endif
+-
+-#ifdef CONFIG_SMP
+-
+-#ifndef CONFIG_XEN
+-
+-/* Globals due to paravirt */
+-extern void set_cpu_sibling_map(int cpu);
+-
+-struct smp_ops
+-{
+-      void (*smp_prepare_boot_cpu)(void);
+-      void (*smp_prepare_cpus)(unsigned max_cpus);
+-      int (*cpu_up)(unsigned cpu);
+-      void (*smp_cpus_done)(unsigned max_cpus);
+-
+-      void (*smp_send_stop)(void);
+-      void (*smp_send_reschedule)(int cpu);
+-      int (*smp_call_function_mask)(cpumask_t mask,
+-                                    void (*func)(void *info), void *info,
+-                                    int wait);
+-};
+-
+-extern struct smp_ops smp_ops;
+-
+-static inline void smp_prepare_boot_cpu(void)
+-{
+-      smp_ops.smp_prepare_boot_cpu();
+-}
+-static inline void smp_prepare_cpus(unsigned int max_cpus)
+-{
+-      smp_ops.smp_prepare_cpus(max_cpus);
+-}
+-static inline int __cpu_up(unsigned int cpu)
+-{
+-      return smp_ops.cpu_up(cpu);
+-}
+-static inline void smp_cpus_done(unsigned int max_cpus)
+-{
+-      smp_ops.smp_cpus_done(max_cpus);
+-}
+-
+-static inline void smp_send_stop(void)
+-{
+-      smp_ops.smp_send_stop();
+-}
+-static inline void smp_send_reschedule(int cpu)
+-{
+-      smp_ops.smp_send_reschedule(cpu);
+-}
+-static inline int smp_call_function_mask(cpumask_t mask,
+-                                       void (*func) (void *info), void *info,
+-                                       int wait)
+-{
+-      return smp_ops.smp_call_function_mask(mask, func, info, wait);
+-}
+-
+-void native_smp_prepare_boot_cpu(void);
+-void native_smp_prepare_cpus(unsigned int max_cpus);
+-int native_cpu_up(unsigned int cpunum);
+-void native_smp_cpus_done(unsigned int max_cpus);
+-
+-#ifndef CONFIG_PARAVIRT
+-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
+-#endif
+-
+-#else /* CONFIG_XEN */
+-
+-void xen_smp_send_stop(void);
+-void xen_smp_send_reschedule(int cpu);
+-int xen_smp_call_function_mask(cpumask_t mask,
+-                             void (*func) (void *info), void *info,
+-                             int wait);
+-
+-#define smp_send_stop         xen_smp_send_stop
+-#define smp_send_reschedule   xen_smp_send_reschedule
+-#define smp_call_function_mask        xen_smp_call_function_mask
+-
+-extern void prefill_possible_map(void);
+-
+-#endif /* CONFIG_XEN */
+-
+-extern int __cpu_disable(void);
+-extern void __cpu_die(unsigned int cpu);
+-
+-/*
+- * This function is needed by all SMP systems. It must _always_ be valid
+- * from the initial startup. We map APIC_BASE very early in page_setup(),
+- * so this is correct in the x86 case.
+- */
+-DECLARE_PER_CPU(int, cpu_number);
+-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+-
+-#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
+-
+-#define safe_smp_processor_id() smp_processor_id()
+-
+-/* We don't mark CPUs online until __cpu_up(), so we need another measure */
+-static inline int num_booting_cpus(void)
+-{
+-      return cpus_weight(cpu_callout_map);
+-}
+-
+-#else /* CONFIG_SMP */
+-
+-#define safe_smp_processor_id()               0
+-#define cpu_physical_id(cpu)          boot_cpu_physical_apicid
+-
+-#endif /* !CONFIG_SMP */
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-
+-static __inline int logical_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+-}
+-
+-# ifdef APIC_DEFINITION
+-extern int hard_smp_processor_id(void);
+-# else
+-#  include <mach_apicdef.h>
+-static inline int hard_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
+-}
+-# endif /* APIC_DEFINITION */
+-
+-#else /* CONFIG_X86_LOCAL_APIC */
+-
+-# ifndef CONFIG_SMP
+-#  define hard_smp_processor_id()     0
+-# endif
+-
+-#endif /* CONFIG_X86_LOCAL_APIC */
+-
+-#endif /* !ASSEMBLY */
+-#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_64.h        2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,103 +0,0 @@
+-#ifndef __ASM_SMP_H
+-#define __ASM_SMP_H
+-
+-#include <linux/cpumask.h>
+-#include <linux/init.h>
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-/*
+- * We need the APIC definitions automatically as part of 'smp.h'
+- */
+-#include <asm/apic.h>
+-#ifdef CONFIG_X86_IO_APIC
+-#include <asm/io_apic.h>
+-#endif
+-#include <asm/mpspec.h>
+-#endif
+-#include <asm/pda.h>
+-#include <asm/thread_info.h>
+-
+-extern cpumask_t cpu_initialized;
+-
+-extern int smp_num_siblings;
+-extern unsigned int num_processors;
+-
+-extern void smp_alloc_memory(void);
+-extern void lock_ipi_call_lock(void);
+-extern void unlock_ipi_call_lock(void);
+-
+-extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
+-                                void *info, int wait);
+-
+-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+-DECLARE_PER_CPU(u16, cpu_llc_id);
+-DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
+-DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-      if (cpu_present(mps_cpu))
+-              return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+-      else
+-              return BAD_APICID;
+-}
+-#endif
+-
+-#ifdef CONFIG_SMP
+-
+-#define SMP_TRAMPOLINE_BASE 0x6000
+-
+-extern int __cpu_disable(void);
+-extern void __cpu_die(unsigned int cpu);
+-extern void prefill_possible_map(void);
+-extern unsigned __cpuinitdata disabled_cpus;
+-
+-#define raw_smp_processor_id()        read_pda(cpunumber)
+-#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
+-
+-#define stack_smp_processor_id()                                      \
+-      ({                                                              \
+-      struct thread_info *ti;                                         \
+-      __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));      \
+-      ti->cpu;                                                        \
+-})
+-
+-/*
+- * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
+- * scheduling and IPI sending and compresses data structures.
+- */
+-static inline int num_booting_cpus(void)
+-{
+-      return cpus_weight(cpu_possible_map);
+-}
+-
+-extern void smp_send_reschedule(int cpu);
+-
+-#else /* CONFIG_SMP */
+-
+-extern unsigned int boot_cpu_id;
+-#define cpu_physical_id(cpu)  boot_cpu_id
+-#define stack_smp_processor_id() 0
+-
+-#endif /* !CONFIG_SMP */
+-
+-#define safe_smp_processor_id()               smp_processor_id()
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-static __inline int logical_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+-}
+-
+-static inline int hard_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
+-}
+-#endif
+-
+-#endif
+-
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/spinlock.h      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/spinlock.h   2009-03-16 16:38:05.000000000 +0100
  @@ -88,7 +88,7 @@ extern void xen_spin_kick(raw_spinlock_t
             : "memory", "cc")
   
@@ -20381,8 +19758,24 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   {
         unsigned int token, count;
         bool free;
---- a/include/asm-x86/mach-xen/asm/swiotlb_32.h
-+++ /dev/null
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/swiotlb.h       2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/swiotlb.h    2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,8 @@
+-#ifdef CONFIG_X86_32
+-# include "swiotlb_32.h"
+-#else
+-# include "../../swiotlb.h"
+-#endif
++#ifndef _ASM_SWIOTLB_H
++
++#include "../../swiotlb.h"
++
++dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
++                                 int dir);
++
++#endif /* _ASM_SWIOTLB_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/swiotlb_32.h    2009-05-14 10:56:29.000000000 +0200
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
  @@ -1,43 +0,0 @@
  -#ifndef _ASM_SWIOTLB_H
  -#define _ASM_SWIOTLB_H 1
@@ -20427,24 +19820,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
  -#endif
  -
  -#endif
---- a/include/asm-x86/mach-xen/asm/swiotlb.h
-+++ b/include/asm-x86/mach-xen/asm/swiotlb.h
-@@ -1,5 +1,8 @@
--#ifdef CONFIG_X86_32
--# include "swiotlb_32.h"
--#else
--# include "../../swiotlb.h"
--#endif
-+#ifndef _ASM_SWIOTLB_H
-+
-+#include "../../swiotlb.h"
-+
-+dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
-+                                 int dir);
-+
-+#endif /* _ASM_SWIOTLB_H */
---- a/include/asm-x86/mach-xen/asm/system.h
-+++ b/include/asm-x86/mach-xen/asm/system.h
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system.h     2009-03-16 16:38:05.000000000 +0100
  @@ -28,22 +28,44 @@ struct task_struct *__switch_to(struct t
    * Saving eflags is important. It switches not only IOPL between tasks,
    * it also protects other tasks from NT leaking through sysenter etc.
@@ -20643,8 +20020,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   #else
   #define smp_mb()      barrier()
   #define smp_rmb()     barrier()
---- a/include/asm-x86/mach-xen/asm/tlbflush.h
-+++ b/include/asm-x86/mach-xen/asm/tlbflush.h
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush.h      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush.h   2009-03-16 16:38:05.000000000 +0100
  @@ -86,8 +86,7 @@ static inline void flush_tlb_range(struc
   #define TLBSTATE_LAZY 2
   
@@ -20655,8 +20032,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
         struct mm_struct *active_mm;
         int state;
         char __cacheline_padding[L1_CACHE_BYTES-8];
---- a/include/asm-x86/mach-xen/asm/vga.h
-+++ b/include/asm-x86/mach-xen/asm/vga.h
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/vga.h   2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/vga.h        2009-03-16 16:38:05.000000000 +0100
  @@ -12,9 +12,9 @@
    *    access the videoram directly without any black magic.
    */
@@ -20669,8 +20046,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
  +#define vga_writeb(x, y) (*(y) = (x))
   
   #endif
---- a/include/asm-x86/mach-xen/asm/xor_64.h
-+++ b/include/asm-x86/mach-xen/asm/xor_64.h
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/xor_64.h        2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/xor_64.h     2009-03-16 16:38:05.000000000 +0100
  @@ -1,20 +1,23 @@
   /*
  - * x86-64 changes / gcc fixes from Andi Kleen. 
@@ -21133,8 +20510,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   
   /* We force the use of the SSE xor block because it can write around L2.
      We may also be able to load into the L1 only depending on how the cpu
---- a/include/asm-x86/scatterlist.h
-+++ b/include/asm-x86/scatterlist.h
+--- sle11-2009-05-14.orig/include/asm-x86/scatterlist.h        2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/scatterlist.h     2009-03-16 16:38:05.000000000 +0100
  @@ -24,7 +24,7 @@ struct scatterlist {
    * returns.
    */
@@ -21144,9 +20521,9 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   # define sg_dma_len(sg)               ((sg)->length)
   #else
   # define sg_dma_len(sg)               ((sg)->dma_length)
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -276,18 +276,25 @@ static inline void SetPageUptodate(struc
+--- sle11-2009-05-14.orig/include/linux/page-flags.h   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/linux/page-flags.h        2009-03-16 16:38:05.000000000 +0100
+@@ -278,18 +278,25 @@ static inline void SetPageUptodate(struc
   
   CLEARPAGEFLAG(Uptodate, uptodate)
   
@@ -21184,8 +20561,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   
   extern void cancel_dirty_page(struct page *page, unsigned int account_size);
   
---- a/include/xen/balloon.h
-+++ b/include/xen/balloon.h
+--- sle11-2009-05-14.orig/include/xen/balloon.h        2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/include/xen/balloon.h     2009-03-16 16:38:05.000000000 +0100
  @@ -31,9 +31,12 @@
    * IN THE SOFTWARE.
    */
@@ -21209,8 +20586,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   
  -#endif /* __ASM_BALLOON_H__ */
  +#endif /* __XEN_BALLOON_H__ */
---- a/include/xen/interface/grant_table.h
-+++ b/include/xen/interface/grant_table.h
+--- sle11-2009-05-14.orig/include/xen/interface/grant_table.h  2008-11-25 12:22:34.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/grant_table.h       2009-03-16 16:38:05.000000000 +0100
  @@ -193,6 +193,7 @@ struct gnttab_map_grant_ref {
       grant_handle_t handle;
       uint64_t dev_bus_addr;
@@ -21267,8 +20644,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   typedef struct gnttab_query_size gnttab_query_size_t;
   DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
   
---- a/include/xen/interface/io/fbif.h
-+++ b/include/xen/interface/io/fbif.h
+--- sle11-2009-05-14.orig/include/xen/interface/io/fbif.h      2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/io/fbif.h   2009-03-16 16:38:05.000000000 +0100
  @@ -150,7 +150,12 @@ struct xenfb_page
        * framebuffer with a max resolution of 12,800x10,240.  Should
        * be enough for a while with room leftover for expansion.
@@ -21282,8 +20659,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   };
   
   /*
---- a/include/xen/interface/memory.h
-+++ b/include/xen/interface/memory.h
+--- sle11-2009-05-14.orig/include/xen/interface/memory.h       2009-02-16 16:17:21.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/memory.h    2009-03-16 16:38:05.000000000 +0100
  @@ -62,7 +62,7 @@ struct xen_memory_reservation {
        *   OUT: GMFN bases of extents that were allocated
        *   (NB. This command also updates the mach_to_phys translation table)
@@ -21351,8 +20728,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   };
   DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
   typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
---- a/include/xen/interface/vcpu.h
-+++ b/include/xen/interface/vcpu.h
+--- sle11-2009-05-14.orig/include/xen/interface/vcpu.h 2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/vcpu.h      2009-03-16 16:38:05.000000000 +0100
  @@ -85,6 +85,7 @@ struct vcpu_runstate_info {
        */
       uint64_t time[4];
@@ -21385,8 +20762,8 @@ Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches
   typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
   DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
   
---- a/lib/swiotlb-xen.c
-+++ b/lib/swiotlb-xen.c
+--- sle11-2009-05-14.orig/lib/swiotlb-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/lib/swiotlb-xen.c 2009-03-16 16:38:05.000000000 +0100
  @@ -20,6 +20,7 @@
   #include <linux/ctype.h>
   #include <linux/init.h>