Move xen patchset to new version's subdir.

[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen3-patch-2.6.26
diff --git a/src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.26 b/src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.26

new file mode 100644 (file)

index 0000000..0f2c30f
--- /dev/null
+++ b/src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.26
@@ -0,0 +1,21120 @@
+From: kernel.org
+Subject: 2.6.26
+Patch-mainline: 2.6.26
+
+Acked-by: Jeff Mahoney <jeffm@suse.com>
+Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches.py
+
+--- sle11-2009-05-14.orig/arch/x86/Kconfig     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/Kconfig  2009-03-16 16:38:05.000000000 +0100
+@@ -28,7 +28,7 @@ config X86
+       select HAVE_DYNAMIC_FTRACE
+       select HAVE_FTRACE
+       select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN
+-      select HAVE_ARCH_KGDB if !X86_VOYAGER
++      select HAVE_ARCH_KGDB if !X86_VOYAGER && !XEN
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_GENERIC_DMA_COHERENT if X86_32
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS
+@@ -486,6 +486,7 @@ config PARAVIRT_DEBUG
+ 
+ config MEMTEST
+       bool "Memtest"
++      depends on !XEN
+       help
+         This option adds a kernel parameter 'memtest', which allows memtest
+         to be set.
+@@ -1007,7 +1008,7 @@ config X86_PAE
+ config DIRECT_GBPAGES
+       bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
+       default y
+-      depends on X86_64
++      depends on X86_64 && !XEN
+       help
+         Allow the kernel linear mapping to use 1GB pages on CPUs that
+         support it. This can improve the kernel's performance a tiny bit by
+@@ -1349,8 +1350,7 @@ source kernel/Kconfig.hz
+ 
+ config KEXEC
+       bool "kexec system call"
+-      depends on X86_BIOS_REBOOT
+-      depends on !XEN_UNPRIVILEGED_GUEST
++      depends on X86_BIOS_REBOOT || (XEN && !XEN_UNPRIVILEGED_GUEST)
+       help
+         kexec is a system call that implements the ability to shutdown your
+         current kernel, and to start another kernel.  It is like a reboot
+@@ -1948,6 +1948,4 @@ source "crypto/Kconfig"
+ 
+ source "arch/x86/kvm/Kconfig"
+ 
+-source "drivers/xen/Kconfig"
+-
+ source "lib/Kconfig"
+--- sle11-2009-05-14.orig/arch/x86/ia32/ia32entry-xen.S        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/ia32/ia32entry-xen.S     2009-03-16 16:38:05.000000000 +0100
+@@ -129,12 +129,14 @@ sysenter_tracesys:
+       SAVE_REST
+       CLEAR_RREGS
+       movq    %r9,R9(%rsp)
+-      movq    $-ENOSYS,RAX(%rsp)      /* really needed? */
++      movq    $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
+       movq    %rsp,%rdi        /* &pt_regs -> arg1 */
+       call    syscall_trace_enter
+       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       RESTORE_REST
+       xchgl   %ebp,%r9d
++      cmpl    $(IA32_NR_syscalls-1),%eax
++      ja      int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
+       jmp     sysenter_do_call
+       CFI_ENDPROC
+ ENDPROC(ia32_sysenter_target)
+@@ -200,13 +202,15 @@ cstar_tracesys:  
+       SAVE_REST
+       CLEAR_RREGS
+       movq %r9,R9(%rsp)
+-      movq $-ENOSYS,RAX(%rsp) /* really needed? */
++      movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
+       movq %rsp,%rdi        /* &pt_regs -> arg1 */
+       call syscall_trace_enter
+       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       RESTORE_REST
+       xchgl %ebp,%r9d
+       movl RSP-ARGOFFSET(%rsp), %r8d
++      cmpl $(IA32_NR_syscalls-1),%eax
++      ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
+       jmp cstar_do_call
+ END(ia32_cstar_target)
+                               
+@@ -264,7 +268,7 @@ ENTRY(ia32_syscall)
+       jnz ia32_tracesys
+ ia32_do_syscall:      
+       cmpl $(IA32_NR_syscalls-1),%eax
+-      ja  ia32_badsys
++      ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
+       IA32_ARG_FIXUP
+       call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
+ ia32_sysret:
+@@ -274,7 +278,7 @@ ia32_sysret:
+ ia32_tracesys:                         
+       SAVE_REST
+       CLEAR_RREGS
+-      movq $-ENOSYS,RAX(%rsp) /* really needed? */
++      movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
+       movq %rsp,%rdi        /* &pt_regs -> arg1 */
+       call syscall_trace_enter
+       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+@@ -365,7 +369,7 @@ ia32_sys_call_table:
+       .quad sys_setuid16
+       .quad sys_getuid16
+       .quad compat_sys_stime  /* stime */             /* 25 */
+-      .quad sys32_ptrace      /* ptrace */
++      .quad compat_sys_ptrace /* ptrace */
+       .quad sys_alarm
+       .quad sys_fstat /* (old)fstat */
+       .quad sys_pause
+--- sle11-2009-05-14.orig/arch/x86/kernel/Makefile     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/Makefile  2009-03-16 16:38:05.000000000 +0100
+@@ -122,8 +122,7 @@ ifeq ($(CONFIG_X86_64),y)
+ 
+       obj-$(CONFIG_XEN)               += nmi_64.o
+       time_64-$(CONFIG_XEN)           += time_32.o
+-      pci-dma_64-$(CONFIG_XEN)        += pci-dma_32.o
+ endif
+ 
+-disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
+-      smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
++disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
++      pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
+--- sle11-2009-05-14.orig/arch/x86/kernel/acpi/Makefile        2008-12-01 11:11:08.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/acpi/Makefile     2009-03-16 16:38:05.000000000 +0100
+@@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o:    $(obj)/realmode/w
+ $(obj)/realmode/wakeup.bin: FORCE
+       $(Q)$(MAKE) $(build)=$(obj)/realmode
+ 
+-disabled-obj-$(CONFIG_XEN)    := cstate.o wakeup_$(BITS).o
++disabled-obj-$(CONFIG_XEN)    := cstate.o wakeup_%.o
+--- sle11-2009-05-14.orig/arch/x86/kernel/acpi/boot.c  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/acpi/boot.c       2009-03-16 16:38:05.000000000 +0100
+@@ -251,19 +251,23 @@ static int __init acpi_parse_madt(struct
+ 
+ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
+ {
++#ifndef CONFIG_XEN
+       unsigned int ver = 0;
++#endif
+ 
+       if (!enabled) {
+               ++disabled_cpus;
+               return;
+       }
+ 
++#ifndef CONFIG_XEN
+ #ifdef CONFIG_X86_32
+       if (boot_cpu_physical_apicid != -1U)
+               ver = apic_version[boot_cpu_physical_apicid];
+ #endif
+ 
+       generic_processor_info(id, ver);
++#endif
+ }
+ 
+ static int __init
+@@ -774,6 +778,7 @@ static int __init acpi_parse_fadt(struct
+  * returns 0 on success, < 0 on error
+  */
+ 
++#ifndef CONFIG_XEN
+ static void __init acpi_register_lapic_address(unsigned long address)
+ {
+       mp_lapic_addr = address;
+@@ -787,6 +792,9 @@ static void __init acpi_register_lapic_a
+ #endif
+       }
+ }
++#else
++#define acpi_register_lapic_address(address)
++#endif
+ 
+ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
+ {
+--- sle11-2009-05-14.orig/arch/x86/kernel/acpi/sleep-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/acpi/sleep-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -10,15 +10,19 @@
+ #include <linux/dmi.h>
+ #include <linux/cpumask.h>
+ 
+-#include <asm/smp.h>
++#include "realmode/wakeup.h"
++#include "sleep.h"
+ 
+ #ifndef CONFIG_ACPI_PV_SLEEP
+-/* address in low memory of the wakeup routine. */
+-unsigned long acpi_wakeup_address = 0;
++unsigned long acpi_wakeup_address;
+ unsigned long acpi_realmode_flags;
+-extern char wakeup_start, wakeup_end;
+ 
+-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
++/* address in low memory of the wakeup routine. */
++static unsigned long acpi_realmode;
++
++#ifdef CONFIG_64BIT
++static char temp_stack[10240];
++#endif
+ #endif
+ 
+ /**
+@@ -26,17 +30,69 @@ extern unsigned long acpi_copy_wakeup_ro
+  *
+  * Create an identity mapped page table and copy the wakeup routine to
+  * low memory.
++ *
++ * Note that this is too late to change acpi_wakeup_address.
+  */
+ int acpi_save_state_mem(void)
+ {
+ #ifndef CONFIG_ACPI_PV_SLEEP
+-      if (!acpi_wakeup_address) {
+-              printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
++      struct wakeup_header *header;
++
++      if (!acpi_realmode) {
++              printk(KERN_ERR "Could not allocate memory during boot, "
++                     "S3 disabled\n");
+               return -ENOMEM;
+       }
+-      memcpy((void *)acpi_wakeup_address, &wakeup_start,
+-             &wakeup_end - &wakeup_start);
+-      acpi_copy_wakeup_routine(acpi_wakeup_address);
++      memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
++
++      header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
++      if (header->signature != 0x51ee1111) {
++              printk(KERN_ERR "wakeup header does not match\n");
++              return -EINVAL;
++      }
++
++      header->video_mode = saved_video_mode;
++
++      header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
++      /* GDT[0]: GDT self-pointer */
++      header->wakeup_gdt[0] =
++              (u64)(sizeof(header->wakeup_gdt) - 1) +
++              ((u64)(acpi_wakeup_address +
++                      ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
++                              << 16);
++      /* GDT[1]: real-mode-like code segment */
++      header->wakeup_gdt[1] = (0x009bULL << 40) +
++              ((u64)acpi_wakeup_address << 16) + 0xffff;
++      /* GDT[2]: real-mode-like data segment */
++      header->wakeup_gdt[2] = (0x0093ULL << 40) +
++              ((u64)acpi_wakeup_address << 16) + 0xffff;
++
++#ifndef CONFIG_64BIT
++      store_gdt((struct desc_ptr *)&header->pmode_gdt);
++
++      header->pmode_efer_low = nx_enabled;
++      if (header->pmode_efer_low & 1) {
++              /* This is strange, why not save efer, always? */
++              rdmsr(MSR_EFER, header->pmode_efer_low,
++                      header->pmode_efer_high);
++      }
++#endif /* !CONFIG_64BIT */
++
++      header->pmode_cr0 = read_cr0();
++      header->pmode_cr4 = read_cr4();
++      header->realmode_flags = acpi_realmode_flags;
++      header->real_magic = 0x12345678;
++
++#ifndef CONFIG_64BIT
++      header->pmode_entry = (u32)&wakeup_pmode_return;
++      header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
++      saved_magic = 0x12345678;
++#else /* CONFIG_64BIT */
++      header->trampoline_segment = setup_trampoline() >> 4;
++      init_rsp = (unsigned long)temp_stack + 4096;
++      initial_code = (unsigned long)wakeup_long64;
++      saved_magic = 0x123456789abcdef0;
++#endif /* CONFIG_64BIT */
+ #endif
+ 
+       return 0;
+@@ -61,15 +117,20 @@ void acpi_restore_state_mem(void)
+ void __init acpi_reserve_bootmem(void)
+ {
+ #ifndef CONFIG_ACPI_PV_SLEEP
+-      if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
++      if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
+               printk(KERN_ERR
+                      "ACPI: Wakeup code way too big, S3 disabled.\n");
+               return;
+       }
+ 
+-      acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
+-      if (!acpi_wakeup_address)
++      acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
++
++      if (!acpi_realmode) {
+               printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
++              return;
++      }
++
++      acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
+ #endif
+ }
+ 
+--- sle11-2009-05-14.orig/arch/x86/kernel/cpu/common-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/cpu/common-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -5,7 +5,6 @@
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/bootmem.h>
+-#include <asm/semaphore.h>
+ #include <asm/processor.h>
+ #include <asm/i387.h>
+ #include <asm/msr.h>
+@@ -13,6 +12,7 @@
+ #include <asm/mmu_context.h>
+ #include <asm/mtrr.h>
+ #include <asm/mce.h>
++#include <asm/pat.h>
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/mpspec.h>
+ #include <asm/apic.h>
+@@ -69,9 +69,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuin
+ static int cachesize_override __cpuinitdata = -1;
+ static int disable_x86_serial_nr __cpuinitdata = 1;
+ 
+-struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
++struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
+ 
+-static void __cpuinit default_init(struct cpuinfo_x86 * c)
++static void __cpuinit default_init(struct cpuinfo_x86 *c)
+ {
+       /* Not much we can do here... */
+       /* Check if at least it has cpuid */
+@@ -88,11 +88,11 @@ static struct cpu_dev __cpuinitdata defa
+       .c_init = default_init,
+       .c_vendor = "Unknown",
+ };
+-static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
++static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
+ 
+ static int __init cachesize_setup(char *str)
+ {
+-      get_option (&str, &cachesize_override);
++      get_option(&str, &cachesize_override);
+       return 1;
+ }
+ __setup("cachesize=", cachesize_setup);
+@@ -114,12 +114,12 @@ int __cpuinit get_model_name(struct cpui
+       /* Intel chips right-justify this string for some dumb reason;
+          undo that brain damage */
+       p = q = &c->x86_model_id[0];
+-      while ( *p == ' ' )
++      while (*p == ' ')
+            p++;
+-      if ( p != q ) {
+-           while ( *p )
++      if (p != q) {
++           while (*p)
+                 *q++ = *p++;
+-           while ( q <= &c->x86_model_id[48] )
++           while (q <= &c->x86_model_id[48])
+                 *q++ = '\0';  /* Zero-pad the rest */
+       }
+ 
+@@ -137,7 +137,7 @@ void __cpuinit display_cacheinfo(struct 
+               cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+               printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+                       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+-              c->x86_cache_size=(ecx>>24)+(edx>>24);  
++              c->x86_cache_size = (ecx>>24)+(edx>>24);
+       }
+ 
+       if (n < 0x80000006)     /* Some chips just has a large L1. */
+@@ -145,16 +145,16 @@ void __cpuinit display_cacheinfo(struct 
+ 
+       ecx = cpuid_ecx(0x80000006);
+       l2size = ecx >> 16;
+-      
++
+       /* do processor-specific cache resizing */
+       if (this_cpu->c_size_cache)
+-              l2size = this_cpu->c_size_cache(c,l2size);
++              l2size = this_cpu->c_size_cache(c, l2size);
+ 
+       /* Allow user to override all this if necessary. */
+       if (cachesize_override != -1)
+               l2size = cachesize_override;
+ 
+-      if ( l2size == 0 )
++      if (l2size == 0)
+               return;         /* Again, no L2 cache is possible */
+ 
+       c->x86_cache_size = l2size;
+@@ -163,16 +163,19 @@ void __cpuinit display_cacheinfo(struct 
+              l2size, ecx & 0xFF);
+ }
+ 
+-/* Naming convention should be: <Name> [(<Codename>)] */
+-/* This table only is used unless init_<vendor>() below doesn't set it; */
+-/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
++/*
++ * Naming convention should be: <Name> [(<Codename>)]
++ * This table only is used unless init_<vendor>() below doesn't set it;
++ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
++ *
++ */
+ 
+ /* Look up CPU names by table lookup. */
+ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
+ {
+       struct cpu_model_info *info;
+ 
+-      if ( c->x86_model >= 16 )
++      if (c->x86_model >= 16)
+               return NULL;    /* Range check */
+ 
+       if (!this_cpu)
+@@ -197,9 +200,9 @@ static void __cpuinit get_cpu_vendor(str
+ 
+       for (i = 0; i < X86_VENDOR_NUM; i++) {
+               if (cpu_devs[i]) {
+-                      if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
+-                          (cpu_devs[i]->c_ident[1] && 
+-                           !strcmp(v,cpu_devs[i]->c_ident[1]))) {
++                      if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
++                          (cpu_devs[i]->c_ident[1] &&
++                           !strcmp(v, cpu_devs[i]->c_ident[1]))) {
+                               c->x86_vendor = i;
+                               if (!early)
+                                       this_cpu = cpu_devs[i];
+@@ -217,7 +220,7 @@ static void __cpuinit get_cpu_vendor(str
+ }
+ 
+ 
+-static int __init x86_fxsr_setup(char * s)
++static int __init x86_fxsr_setup(char *s)
+ {
+       setup_clear_cpu_cap(X86_FEATURE_FXSR);
+       setup_clear_cpu_cap(X86_FEATURE_XMM);
+@@ -226,7 +229,7 @@ static int __init x86_fxsr_setup(char * 
+ __setup("nofxsr", x86_fxsr_setup);
+ 
+ 
+-static int __init x86_sep_setup(char * s)
++static int __init x86_sep_setup(char *s)
+ {
+       setup_clear_cpu_cap(X86_FEATURE_SEP);
+       return 1;
+@@ -315,12 +318,15 @@ static void __cpuinit early_get_cap(stru
+ 
+ }
+ 
+-/* Do minimum CPU detection early.
+-   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+-   The others are not touched to avoid unwanted side effects.
+-
+-   WARNING: this function is only called on the BP.  Don't add code here
+-   that is supposed to run on all CPUs. */
++/*
++ * Do minimum CPU detection early.
++ * Fields really needed: vendor, cpuid_level, family, model, mask,
++ * cache alignment.
++ * The others are not touched to avoid unwanted side effects.
++ *
++ * WARNING: this function is only called on the BP.  Don't add code here
++ * that is supposed to run on all CPUs.
++ */
+ static void __init early_cpu_detect(void)
+ {
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+@@ -335,19 +341,14 @@ static void __init early_cpu_detect(void
+ 
+       get_cpu_vendor(c, 1);
+ 
+-      switch (c->x86_vendor) {
+-      case X86_VENDOR_AMD:
+-              early_init_amd(c);
+-              break;
+-      case X86_VENDOR_INTEL:
+-              early_init_intel(c);
+-              break;
+-      }
++      if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
++          cpu_devs[c->x86_vendor]->c_early_init)
++              cpu_devs[c->x86_vendor]->c_early_init(c);
+ 
+       early_get_cap(c);
+ }
+ 
+-static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
++static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
+ {
+       u32 tfms, xlvl;
+       unsigned int ebx;
+@@ -358,13 +359,12 @@ static void __cpuinit generic_identify(s
+                     (unsigned int *)&c->x86_vendor_id[0],
+                     (unsigned int *)&c->x86_vendor_id[8],
+                     (unsigned int *)&c->x86_vendor_id[4]);
+-              
++
+               get_cpu_vendor(c, 0);
+               /* Initialize the standard set of capabilities */
+               /* Note that the vendor-specific code below might override */
+-      
+               /* Intel-defined flags: level 0x00000001 */
+-              if ( c->cpuid_level >= 0x00000001 ) {
++              if (c->cpuid_level >= 0x00000001) {
+                       u32 capability, excap;
+                       cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+                       c->x86_capability[0] = capability;
+@@ -376,12 +376,14 @@ static void __cpuinit generic_identify(s
+                       if (c->x86 >= 0x6)
+                               c->x86_model += ((tfms >> 16) & 0xF) << 4;
+                       c->x86_mask = tfms & 15;
++                      c->initial_apicid = (ebx >> 24) & 0xFF;
+ #ifdef CONFIG_X86_HT
+-                      c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
++                      c->apicid = phys_pkg_id(c->initial_apicid, 0);
++                      c->phys_proc_id = c->initial_apicid;
+ #else
+-                      c->apicid = (ebx >> 24) & 0xFF;
++                      c->apicid = c->initial_apicid;
+ #endif
+-                      if (c->x86_capability[0] & (1<<19))
++                      if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
+                               c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
+               } else {
+                       /* Have CPUID level 0 only - unheard of */
+@@ -390,33 +392,30 @@ static void __cpuinit generic_identify(s
+ 
+               /* AMD-defined flags: level 0x80000001 */
+               xlvl = cpuid_eax(0x80000000);
+-              if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+-                      if ( xlvl >= 0x80000001 ) {
++              if ((xlvl & 0xffff0000) == 0x80000000) {
++                      if (xlvl >= 0x80000001) {
+                               c->x86_capability[1] = cpuid_edx(0x80000001);
+                               c->x86_capability[6] = cpuid_ecx(0x80000001);
+                       }
+-                      if ( xlvl >= 0x80000004 )
++                      if (xlvl >= 0x80000004)
+                               get_model_name(c); /* Default name */
+               }
+ 
+               init_scattered_cpuid_features(c);
+       }
+ 
+-#ifdef CONFIG_X86_HT
+-      c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+-#endif
+ }
+ 
+ static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+ {
+-      if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
++      if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
+               /* Disable processor serial number */
+-              unsigned long lo,hi;
+-              rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
++              unsigned long lo, hi;
++              rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+               lo |= 0x200000;
+-              wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
++              wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+               printk(KERN_NOTICE "CPU serial number disabled.\n");
+-              clear_bit(X86_FEATURE_PN, c->x86_capability);
++              clear_cpu_cap(c, X86_FEATURE_PN);
+ 
+               /* Disabling the serial number may affect the cpuid level */
+               c->cpuid_level = cpuid_eax(0);
+@@ -451,9 +450,11 @@ void __cpuinit identify_cpu(struct cpuin
+       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+ 
+       if (!have_cpuid_p()) {
+-              /* First of all, decide if this is a 486 or higher */
+-              /* It's a 486 if we can modify the AC flag */
+-              if ( flag_is_changeable_p(X86_EFLAGS_AC) )
++              /*
++               * First of all, decide if this is a 486 or higher
++               * It's a 486 if we can modify the AC flag
++               */
++              if (flag_is_changeable_p(X86_EFLAGS_AC))
+                       c->x86 = 4;
+               else
+                       c->x86 = 3;
+@@ -486,10 +487,10 @@ void __cpuinit identify_cpu(struct cpuin
+        */
+ 
+       /* If the model name is still unset, do table lookup. */
+-      if ( !c->x86_model_id[0] ) {
++      if (!c->x86_model_id[0]) {
+               char *p;
+               p = table_lookup_model(c);
+-              if ( p )
++              if (p)
+                       strcpy(c->x86_model_id, p);
+               else
+                       /* Last resort... */
+@@ -503,9 +504,9 @@ void __cpuinit identify_cpu(struct cpuin
+        * common between the CPUs.  The first time this routine gets
+        * executed, c == &boot_cpu_data.
+        */
+-      if ( c != &boot_cpu_data ) {
++      if (c != &boot_cpu_data) {
+               /* AND the already accumulated flags with these */
+-              for ( i = 0 ; i < NCAPINTS ; i++ )
++              for (i = 0 ; i < NCAPINTS ; i++)
+                       boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+       }
+ 
+@@ -549,7 +550,7 @@ void __cpuinit detect_ht(struct cpuinfo_
+ 
+       if (smp_num_siblings == 1) {
+               printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+-      } else if (smp_num_siblings > 1 ) {
++      } else if (smp_num_siblings > 1) {
+ 
+               if (smp_num_siblings > NR_CPUS) {
+                       printk(KERN_WARNING "CPU: Unsupported number of the "
+@@ -559,7 +560,7 @@ void __cpuinit detect_ht(struct cpuinfo_
+               }
+ 
+               index_msb = get_count_order(smp_num_siblings);
+-              c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
++              c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
+ 
+               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
+                      c->phys_proc_id);
+@@ -570,7 +571,7 @@ void __cpuinit detect_ht(struct cpuinfo_
+ 
+               core_bits = get_count_order(c->x86_max_cores);
+ 
+-              c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
++              c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
+                                              ((1 << core_bits) - 1);
+ 
+               if (c->x86_max_cores > 1)
+@@ -604,7 +605,7 @@ void __cpuinit print_cpu_info(struct cpu
+       else
+               printk("%s", c->x86_model_id);
+ 
+-      if (c->x86_mask || c->cpuid_level >= 0) 
++      if (c->x86_mask || c->cpuid_level >= 0)
+               printk(" stepping %02x\n", c->x86_mask);
+       else
+               printk("\n");
+@@ -623,24 +624,17 @@ __setup("clearcpuid=", setup_disablecpui
+ 
+ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+ 
+-/* This is hacky. :)
+- * We're emulating future behavior.
+- * In the future, the cpu-specific init functions will be called implicitly
+- * via the magic of initcalls.
+- * They will insert themselves into the cpu_devs structure.
+- * Then, when cpu_init() is called, we can just iterate over that array.
+- */
+ void __init early_cpu_init(void)
+ {
+-      intel_cpu_init();
+-      cyrix_init_cpu();
+-      nsc_init_cpu();
+-      amd_init_cpu();
+-      centaur_init_cpu();
+-      transmeta_init_cpu();
+-      nexgen_init_cpu();
+-      umc_init_cpu();
++      struct cpu_vendor_dev *cvdev;
++
++      for (cvdev = __x86cpuvendor_start ;
++           cvdev < __x86cpuvendor_end   ;
++           cvdev++)
++              cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
++
+       early_cpu_detect();
++      validate_pat_support(&boot_cpu_data);
+ }
+ 
+ /* Make sure %fs is initialized properly in idle threads */
+@@ -685,7 +679,7 @@ void __cpuinit cpu_init(void)
+       int cpu = smp_processor_id();
+       struct task_struct *curr = current;
+ #ifndef CONFIG_X86_NO_TSS
+-      struct tss_struct * t = &per_cpu(init_tss, cpu);
++      struct tss_struct *t = &per_cpu(init_tss, cpu);
+ #endif
+       struct thread_struct *thread = &curr->thread;
+ 
+@@ -738,7 +732,7 @@ void __cpuinit cpu_init(void)
+       mxcsr_feature_mask_init();
+ }
+ 
+-#ifdef CONFIG_HOTPLUG_CPU
++#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
+ void __cpuinit cpu_uninit(void)
+ {
+       int cpu = raw_smp_processor_id();
+--- sle11-2009-05-14.orig/arch/x86/kernel/cpu/mtrr/main-xen.c  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/cpu/mtrr/main-xen.c       2009-03-16 16:38:05.000000000 +0100
+@@ -35,6 +35,8 @@ struct mtrr_ops *mtrr_if = &generic_mtrr
+ unsigned int num_var_ranges;
+ unsigned int mtrr_usage_table[MAX_VAR_RANGES];
+ 
++static u64 tom2;
++
+ static void __init set_num_var_ranges(void)
+ {
+       struct xen_platform_op op;
+@@ -162,8 +164,144 @@ mtrr_del(int reg, unsigned long base, un
+ EXPORT_SYMBOL(mtrr_add);
+ EXPORT_SYMBOL(mtrr_del);
+ 
++/*
++ * Returns the effective MTRR type for the region
++ * Error returns:
++ * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
++ * - 0xFF - when MTRR is not enabled
++ */
++u8 mtrr_type_lookup(u64 start, u64 end)
++{
++      int i, error;
++      u64 start_mfn, end_mfn, base_mfn, top_mfn;
++      u8 prev_match, curr_match;
++      struct xen_platform_op op;
++
++      if (!is_initial_xendomain())
++              return MTRR_TYPE_WRBACK;
++
++      if (!num_var_ranges)
++              return 0xFF;
++
++      start_mfn = start >> PAGE_SHIFT;
++      /* Make end inclusive end, instead of exclusive */
++      end_mfn = --end >> PAGE_SHIFT;
++
++      /* Look in fixed ranges. Just return the type as per start */
++      if (start_mfn < 0x100) {
++#if 0//todo
++              op.cmd = XENPF_read_memtype;
++              op.u.read_memtype.reg = ???;
++              error = HYPERVISOR_platform_op(&op);
++              if (!error)
++                      return op.u.read_memtype.type;
++#endif
++              return MTRR_TYPE_UNCACHABLE;
++      }
++
++      /*
++       * Look in variable ranges
++       * Look of multiple ranges matching this address and pick type
++       * as per MTRR precedence
++       */
++      prev_match = 0xFF;
++      for (i = 0; i < num_var_ranges; ++i) {
++              op.cmd = XENPF_read_memtype;
++              op.u.read_memtype.reg = i;
++              error = HYPERVISOR_platform_op(&op);
++
++              if (error || !op.u.read_memtype.nr_mfns)
++                      continue;
++
++              base_mfn = op.u.read_memtype.mfn;
++              top_mfn = base_mfn + op.u.read_memtype.nr_mfns - 1;
++
++              if (base_mfn > end_mfn || start_mfn > top_mfn) {
++                      continue;
++              }
++
++              if (base_mfn > start_mfn || end_mfn > top_mfn) {
++                      return 0xFE;
++              }
++
++              curr_match = op.u.read_memtype.type;
++              if (prev_match == 0xFF) {
++                      prev_match = curr_match;
++                      continue;
++              }
++
++              if (prev_match == MTRR_TYPE_UNCACHABLE ||
++                  curr_match == MTRR_TYPE_UNCACHABLE) {
++                      return MTRR_TYPE_UNCACHABLE;
++              }
++
++              if ((prev_match == MTRR_TYPE_WRBACK &&
++                   curr_match == MTRR_TYPE_WRTHROUGH) ||
++                  (prev_match == MTRR_TYPE_WRTHROUGH &&
++                   curr_match == MTRR_TYPE_WRBACK)) {
++                      prev_match = MTRR_TYPE_WRTHROUGH;
++                      curr_match = MTRR_TYPE_WRTHROUGH;
++              }
++
++              if (prev_match != curr_match) {
++                      return MTRR_TYPE_UNCACHABLE;
++              }
++      }
++
++      if (tom2) {
++              if (start >= (1ULL<<32) && (end < tom2))
++                      return MTRR_TYPE_WRBACK;
++      }
++
++      if (prev_match != 0xFF)
++              return prev_match;
++
++#if 0//todo
++      op.cmd = XENPF_read_def_memtype;
++      error = HYPERVISOR_platform_op(&op);
++      if (!error)
++              return op.u.read_def_memtype.type;
++#endif
++      return MTRR_TYPE_UNCACHABLE;
++}
++
++/*
++ * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
++ * for memory >4GB. Check for that here.
++ * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
++ * apply to are wrong, but so far we don't know of any such case in the wild.
++ */
++#define Tom2Enabled (1U << 21)
++#define Tom2ForceMemTypeWB (1U << 22)
++
++int __init amd_special_default_mtrr(void)
++{
++      u32 l, h;
++
++      if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
++              return 0;
++      if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
++              return 0;
++      /* In case some hypervisor doesn't pass SYSCFG through */
++      if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
++              return 0;
++      /*
++       * Memory between 4GB and top of mem is forced WB by this magic bit.
++       * Reserved before K8RevF, but should be zero there.
++       */
++      if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
++               (Tom2Enabled | Tom2ForceMemTypeWB))
++              return 1;
++      return 0;
++}
++
+ void __init mtrr_bp_init(void)
+ {
++      if (amd_special_default_mtrr()) {
++              /* TOP_MEM2 */
++              rdmsrl(MSR_K8_TOP_MEM2, tom2);
++              tom2 &= 0xffffff8000000ULL;
++      }
+ }
+ 
+ void mtrr_ap_init(void)
+--- sle11-2009-05-14.orig/arch/x86/kernel/e820_32-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/e820_32-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -469,7 +469,7 @@ int __init sanitize_e820_map(struct e820
+  * thinkpad 560x, for example, does not cooperate with the memory
+  * detection code.)
+  */
+-int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
++int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
+ {
+ #ifndef CONFIG_XEN
+       /* Only one memory region (or negative)? Ignore it */
+@@ -480,33 +480,17 @@ int __init copy_e820_map(struct e820entr
+ #endif
+ 
+       do {
+-              unsigned long long start = biosmap->addr;
+-              unsigned long long size = biosmap->size;
+-              unsigned long long end = start + size;
+-              unsigned long type = biosmap->type;
++              u64 start = biosmap->addr;
++              u64 size = biosmap->size;
++              u64 end = start + size;
++              u32 type = biosmap->type;
+ 
+               /* Overflow in 64 bits? Ignore the memory map. */
+               if (start > end)
+                       return -1;
+ 
+-#ifndef CONFIG_XEN
+-              /*
+-               * Some BIOSes claim RAM in the 640k - 1M region.
+-               * Not right. Fix it up.
+-               */
+-              if (type == E820_RAM) {
+-                      if (start < 0x100000ULL && end > 0xA0000ULL) {
+-                              if (start < 0xA0000ULL)
+-                                      add_memory_region(start, 0xA0000ULL-start, type);
+-                              if (end <= 0x100000ULL)
+-                                      continue;
+-                              start = 0x100000ULL;
+-                              size = end - start;
+-                      }
+-              }
+-#endif
+               add_memory_region(start, size, type);
+-      } while (biosmap++,--nr_map);
++      } while (biosmap++, --nr_map);
+ 
+ #ifdef CONFIG_XEN
+       if (is_initial_xendomain()) {
+@@ -528,7 +512,7 @@ int __init copy_e820_map(struct e820entr
+ /*
+  * Find the highest page frame number we have available
+  */
+-void __init find_max_pfn(void)
++void __init propagate_e820_map(void)
+ {
+       int i;
+ 
+@@ -814,7 +798,7 @@ static int __init parse_memmap(char *arg
+                * size before original memory map is
+                * reset.
+                */
+-              find_max_pfn();
++              propagate_e820_map();
+               saved_max_pfn = max_pfn;
+ #endif
+               e820.nr_map = 0;
+--- sle11-2009-05-14.orig/arch/x86/kernel/e820_64-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/e820_64-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -40,11 +40,11 @@ struct e820map machine_e820;
+ unsigned long end_pfn;
+ 
+ /*
+- * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
+- * The direct mapping extends to end_pfn_map, so that we can directly access
++ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
++ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+  * apertures, ACPI and other tables without having to play with fixmaps.
+  */
+-unsigned long end_pfn_map;
++unsigned long max_pfn_mapped;
+ 
+ /*
+  * Last pfn which the user wants to use.
+@@ -63,8 +63,8 @@ struct early_res {
+ static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+ #ifndef CONFIG_XEN
+       { 0, PAGE_SIZE, "BIOS data page" },                     /* BIOS data page */
+-#ifdef CONFIG_SMP
+-      { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" },
++#ifdef CONFIG_X86_TRAMPOLINE
++      { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
+ #endif
+ #endif
+       {}
+@@ -89,19 +89,47 @@ void __init reserve_early(unsigned long 
+               strncpy(r->name, name, sizeof(r->name) - 1);
+ }
+ 
+-void __init early_res_to_bootmem(void)
++void __init free_early(unsigned long start, unsigned long end)
++{
++      struct early_res *r;
++      int i, j;
++
++      for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
++              r = &early_res[i];
++              if (start == r->start && end == r->end)
++                      break;
++      }
++      if (i >= MAX_EARLY_RES || !early_res[i].end)
++              panic("free_early on not reserved area: %lx-%lx!", start, end);
++
++      for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
++              ;
++
++      memmove(&early_res[i], &early_res[i + 1],
++             (j - 1 - i) * sizeof(struct early_res));
++
++      early_res[j - 1].end = 0;
++}
++
++void __init early_res_to_bootmem(unsigned long start, unsigned long end)
+ {
+       int i;
++      unsigned long final_start, final_end;
+       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+               struct early_res *r = &early_res[i];
+-              printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
+-                      r->start, r->end - 1, r->name);
+-              reserve_bootmem_generic(r->start, r->end - r->start);
++              final_start = max(start, r->start);
++              final_end = min(end, r->end);
++              if (final_start >= final_end)
++                      continue;
++              printk(KERN_INFO "  early res: %d [%lx-%lx] %s\n", i,
++                      final_start, final_end - 1, r->name);
++              reserve_bootmem_generic(final_start, final_end - final_start);
+       }
+ }
+ 
+ /* Check for already reserved areas */
+-static inline int bad_addr(unsigned long *addrp, unsigned long size)
++static inline int __init
++bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
+ {
+       int i;
+       unsigned long addr = *addrp, last;
+@@ -111,7 +139,7 @@ again:
+       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+               struct early_res *r = &early_res[i];
+               if (last >= r->start && addr < r->end) {
+-                      *addrp = addr = r->end;
++                      *addrp = addr = round_up(r->end, align);
+                       changed = 1;
+                       goto again;
+               }
+@@ -119,6 +147,40 @@ again:
+       return changed;
+ }
+ 
++/* Check for already reserved areas */
++static inline int __init
++bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
++{
++      int i;
++      unsigned long addr = *addrp, last;
++      unsigned long size = *sizep;
++      int changed = 0;
++again:
++      last = addr + size;
++      for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
++              struct early_res *r = &early_res[i];
++              if (last > r->start && addr < r->start) {
++                      size = r->start - addr;
++                      changed = 1;
++                      goto again;
++              }
++              if (last > r->end && addr < r->end) {
++                      addr = round_up(r->end, align);
++                      size = last - addr;
++                      changed = 1;
++                      goto again;
++              }
++              if (last <= r->end && addr >= r->start) {
++                      (*sizep)++;
++                      return 0;
++              }
++      }
++      if (changed) {
++              *addrp = addr;
++              *sizep = size;
++      }
++      return changed;
++}
+ /*
+  * This function checks if any part of the range <start,end> is mapped
+  * with type.
+@@ -194,26 +256,27 @@ int __init e820_all_mapped(unsigned long
+  * Find a free area with specified alignment in a specific range.
+  */
+ unsigned long __init find_e820_area(unsigned long start, unsigned long end,
+-                                  unsigned size, unsigned long align)
++                                  unsigned long size, unsigned long align)
+ {
+       int i;
+-      unsigned long mask = ~(align - 1);
+ 
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+-              unsigned long addr = ei->addr, last;
++              unsigned long addr, last;
++              unsigned long ei_last;
+ 
+               if (ei->type != E820_RAM)
+                       continue;
++              addr = round_up(ei->addr, align);
++              ei_last = ei->addr + ei->size;
+               if (addr < start)
+-                      addr = start;
+-              if (addr > ei->addr + ei->size)
++                      addr = round_up(start, align);
++              if (addr >= ei_last)
+                       continue;
+-              while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
++              while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+                       ;
+-              addr = (addr + align - 1) & mask;
+               last = addr + size;
+-              if (last > ei->addr + ei->size)
++              if (last > ei_last)
+                       continue;
+               if (last > end)
+                       continue;
+@@ -223,6 +286,40 @@ unsigned long __init find_e820_area(unsi
+ }
+ 
+ /*
++ * Find next free range after *start
++ */
++unsigned long __init find_e820_area_size(unsigned long start,
++                                       unsigned long *sizep,
++                                       unsigned long align)
++{
++      int i;
++
++      for (i = 0; i < e820.nr_map; i++) {
++              struct e820entry *ei = &e820.map[i];
++              unsigned long addr, last;
++              unsigned long ei_last;
++
++              if (ei->type != E820_RAM)
++                      continue;
++              addr = round_up(ei->addr, align);
++              ei_last = ei->addr + ei->size;
++              if (addr < start)
++                      addr = round_up(start, align);
++              if (addr >= ei_last)
++                      continue;
++              *sizep = ei_last - addr;
++              while (bad_addr_size(&addr, sizep, align) &&
++                      addr + *sizep <= ei_last)
++                      ;
++              last = addr + *sizep;
++              if (last > ei_last)
++                      continue;
++              return addr;
++      }
++      return -1UL;
++
++}
++/*
+  * Find the highest page frame number we have available
+  */
+ unsigned long __init e820_end_of_ram(void)
+@@ -231,31 +328,29 @@ unsigned long __init e820_end_of_ram(voi
+ 
+       end_pfn = find_max_pfn_with_active_regions();
+ 
+-      if (end_pfn > end_pfn_map)
+-              end_pfn_map = end_pfn;
+-      if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
+-              end_pfn_map = MAXMEM>>PAGE_SHIFT;
++      if (end_pfn > max_pfn_mapped)
++              max_pfn_mapped = end_pfn;
++      if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
++              max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
+       if (end_pfn > end_user_pfn)
+               end_pfn = end_user_pfn;
+-      if (end_pfn > end_pfn_map)
+-              end_pfn = end_pfn_map;
++      if (end_pfn > max_pfn_mapped)
++              end_pfn = max_pfn_mapped;
+ 
+-      printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
++      printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
+       return end_pfn;
+ }
+ 
+ /*
+  * Mark e820 reserved areas as busy for the resource manager.
+  */
+-void __init e820_reserve_resources(struct e820entry *e820, int nr_map,
+-                                 struct resource *code_resource,
+-                                 struct resource *data_resource,
+-                                 struct resource *bss_resource)
++void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
+ {
+       int i;
++      struct resource *res;
++
++      res = alloc_bootmem_low(sizeof(struct resource) * nr_map);
+       for (i = 0; i < nr_map; i++) {
+-              struct resource *res;
+-              res = alloc_bootmem_low(sizeof(struct resource));
+               switch (e820[i].type) {
+               case E820_RAM:  res->name = "System RAM"; break;
+               case E820_ACPI: res->name = "ACPI Tables"; break;
+@@ -265,26 +360,8 @@ void __init e820_reserve_resources(struc
+               res->start = e820[i].addr;
+               res->end = res->start + e820[i].size - 1;
+               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+-              request_resource(&iomem_resource, res);
+-              if (e820[i].type == E820_RAM) {
+-                      /*
+-                       * We don't know which RAM region contains kernel data,
+-                       * so we try it repeatedly and let the resource manager
+-                       * test it.
+-                       */
+-#ifndef CONFIG_XEN
+-                      request_resource(res, code_resource);
+-                      request_resource(res, data_resource);
+-                      request_resource(res, bss_resource);
+-#endif
+-#ifdef CONFIG_KEXEC
+-                      if (crashk_res.start != crashk_res.end)
+-                              request_resource(res, &crashk_res);
+-#ifdef CONFIG_XEN
+-                      xen_machine_kexec_register_resources(res);
+-#endif
+-#endif
+-              }
++              insert_resource(&iomem_resource, res);
++              res++;
+       }
+ }
+ 
+@@ -338,9 +415,9 @@ static int __init e820_find_active_regio
+       if (*ei_startpfn >= *ei_endpfn)
+               return 0;
+ 
+-      /* Check if end_pfn_map should be updated */
+-      if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
+-              end_pfn_map = *ei_endpfn;
++      /* Check if max_pfn_mapped should be updated */
++      if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
++              max_pfn_mapped = *ei_endpfn;
+ 
+       /* Skip if map is outside the node */
+       if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
+@@ -667,10 +744,10 @@ static int __init copy_e820_map(struct e
+ #endif
+ 
+       do {
+-              unsigned long start = biosmap->addr;
+-              unsigned long size = biosmap->size;
+-              unsigned long end = start + size;
+-              unsigned long type = biosmap->type;
++              u64 start = biosmap->addr;
++              u64 size = biosmap->size;
++              u64 end = start + size;
++              u32 type = biosmap->type;
+ 
+               /* Overflow in 64 bits? Ignore the memory map. */
+               if (start > end)
+@@ -801,7 +878,7 @@ static int __init parse_memmap_opt(char 
+               saved_max_pfn = e820_end_of_ram();
+               remove_all_active_ranges();
+ #endif
+-              end_pfn_map = 0;
++              max_pfn_mapped = 0;
+               e820.nr_map = 0;
+               userdef = 1;
+               return 0;
+--- sle11-2009-05-14.orig/arch/x86/kernel/early_printk-xen.c   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/early_printk-xen.c        2009-03-16 16:38:05.000000000 +0100
+@@ -13,7 +13,7 @@
+ 
+ #ifndef CONFIG_XEN
+ static int max_ypos = 25, max_xpos = 80;
+-static int current_ypos = 25, current_xpos = 0; 
++static int current_ypos = 25, current_xpos;
+ 
+ static void early_vga_write(struct console *con, const char *str, unsigned n)
+ {
+@@ -108,12 +108,12 @@ static __init void early_serial_init(cha
+ 
+       if (*s) {
+               unsigned port;
+-              if (!strncmp(s,"0x",2)) {
++              if (!strncmp(s, "0x", 2)) {
+                       early_serial_base = simple_strtoul(s, &e, 16);
+               } else {
+                       static int bases[] = { 0x3f8, 0x2f8 };
+ 
+-                      if (!strncmp(s,"ttyS",4))
++                      if (!strncmp(s, "ttyS", 4))
+                               s += 4;
+                       port = simple_strtoul(s, &e, 10);
+                       if (port > 1 || s == e)
+@@ -223,7 +223,7 @@ static struct console simnow_console = {
+ 
+ /* Direct interface for emergencies */
+ static struct console *early_console = &early_vga_console;
+-static int early_console_initialized = 0;
++static int early_console_initialized;
+ 
+ void early_printk(const char *fmt, ...)
+ {
+@@ -231,9 +231,9 @@ void early_printk(const char *fmt, ...)
+       int n;
+       va_list ap;
+ 
+-      va_start(ap,fmt);
+-      n = vscnprintf(buf,512,fmt,ap);
+-      early_console->write(early_console,buf,n);
++      va_start(ap, fmt);
++      n = vscnprintf(buf, 512, fmt, ap);
++      early_console->write(early_console, buf, n);
+       va_end(ap);
+ }
+ 
+@@ -259,16 +259,16 @@ static int __init setup_early_printk(cha
+               early_console = &early_serial_console;
+       } else if (!strncmp(buf, "vga", 3)) {
+ #ifndef CONFIG_XEN
+-                 && boot_params.screen_info.orig_video_isVGA == 1) {
++              && boot_params.screen_info.orig_video_isVGA == 1) {
+               max_xpos = boot_params.screen_info.orig_video_cols;
+               max_ypos = boot_params.screen_info.orig_video_lines;
+               current_ypos = boot_params.screen_info.orig_y;
+ #endif
+               early_console = &early_vga_console;
+-      } else if (!strncmp(buf, "simnow", 6)) {
+-              simnow_init(buf + 6);
+-              early_console = &simnow_console;
+-              keep_early = 1;
++      } else if (!strncmp(buf, "simnow", 6)) {
++              simnow_init(buf + 6);
++              early_console = &simnow_console;
++              keep_early = 1;
+ #ifdef CONFIG_XEN
+       } else if (!strncmp(buf, "xen", 3)) {
+               early_console = &xenboot_console;
+--- sle11-2009-05-14.orig/arch/x86/kernel/entry_32-xen.S       2009-05-14 11:18:32.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/entry_32-xen.S    2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,4 @@
+ /*
+- *  linux/arch/i386/entry.S
+  *
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  */
+@@ -51,6 +50,7 @@
+ #include <asm/desc.h>
+ #include <asm/percpu.h>
+ #include <asm/dwarf2.h>
++#include <asm/processor-flags.h>
+ #include "irq_vectors.h"
+ #include <xen/interface/xen.h>
+ 
+@@ -69,12 +69,6 @@
+ 
+ #define nr_syscalls ((syscall_table_size)/4)
+ 
+-CF_MASK               = 0x00000001
+-TF_MASK               = 0x00000100
+-IF_MASK               = 0x00000200
+-DF_MASK               = 0x00000400 
+-NT_MASK               = 0x00004000
+-VM_MASK               = 0x00020000
+ /* Pseudo-eflags. */
+ NMI_MASK      = 0x80000000
+ 
+@@ -87,7 +81,7 @@ NMI_MASK     = 0x80000000
+ 
+ .macro TRACE_IRQS_IRET
+ #ifdef CONFIG_TRACE_IRQFLAGS
+-      testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
++      testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
+       jz 1f
+       TRACE_IRQS_ON
+ 1:
+@@ -249,7 +243,7 @@ ret_from_intr:
+ check_userspace:
+       movl PT_EFLAGS(%esp), %eax      # mix EFLAGS and CS
+       movb PT_CS(%esp), %al
+-      andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
++      andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+       cmpl $USER_RPL, %eax
+       jb resume_kernel                # not returning to v8086 or userspace
+ 
+@@ -258,6 +252,7 @@ ENTRY(resume_userspace)
+       DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
++      TRACE_IRQS_OFF
+       movl TI_flags(%ebp), %ecx
+       andl $_TIF_WORK_MASK, %ecx      # is there any work to be done on
+                                       # int/exception return?
+@@ -274,7 +269,7 @@ need_resched:
+       movl TI_flags(%ebp), %ecx       # need_resched set ?
+       testb $_TIF_NEED_RESCHED, %cl
+       jz restore_all
+-      testl $IF_MASK,PT_EFLAGS(%esp)  # interrupts off (exception path) ?
++      testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)    # interrupts off (exception path) ?
+       jz restore_all
+       call preempt_schedule_irq
+       jmp need_resched
+@@ -299,10 +294,10 @@ ENTRY(ia32_sysenter_target)
+       movl SYSENTER_stack_sp0(%esp),%esp
+ sysenter_past_esp:
+       /*
+-       * No need to follow this irqs on/off section: the syscall
+-       * disabled irqs and here we enable it straight after entry:
++       * Interrupts are disabled here, but we can't trace it until
++       * enough kernel state to call TRACE_IRQS_OFF can be called - but
++       * we immediately enable interrupts at that point anyway.
+        */
+-      ENABLE_INTERRUPTS(CLBR_NONE)
+       pushl $(__USER_DS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ss, 0*/
+@@ -310,6 +305,7 @@ sysenter_past_esp:
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esp, 0
+       pushfl
++      orl $X86_EFLAGS_IF, (%esp)
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl $(__USER_CS)
+       CFI_ADJUST_CFA_OFFSET 4
+@@ -323,6 +319,11 @@ sysenter_past_esp:
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eip, 0
+ 
++      pushl %eax
++      CFI_ADJUST_CFA_OFFSET 4
++      SAVE_ALL
++      ENABLE_INTERRUPTS(CLBR_NONE)
++
+ /*
+  * Load the potential sixth argument from user stack.
+  * Careful about security.
+@@ -330,14 +331,12 @@ sysenter_past_esp:
+       cmpl $__PAGE_OFFSET-3,%ebp
+       jae syscall_fault
+ 1:    movl (%ebp),%ebp
++      movl %ebp,PT_EBP(%esp)
+ .section __ex_table,"a"
+       .align 4
+       .long 1b,syscall_fault
+ .previous
+ 
+-      pushl %eax
+-      CFI_ADJUST_CFA_OFFSET 4
+-      SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+       test_tif %ebp
+       jnz syscall_trace_entry
+@@ -414,7 +413,7 @@ syscall_exit:
+                                       # setting need_resched or sigpending
+                                       # between sampling and the iret
+       TRACE_IRQS_OFF
+-      testl $TF_MASK,PT_EFLAGS(%esp)  # If tracing set singlestep flag on exit
++      testl $X86_EFLAGS_TF,PT_EFLAGS(%esp)    # If tracing set singlestep flag on exit
+       jz no_singlestep
+       orl $_TIF_SINGLESTEP,TI_flags(%ebp)
+ no_singlestep:
+@@ -430,7 +429,7 @@ restore_all:
+       # See comments in process.c:copy_thread() for details.
+       movb PT_OLDSS(%esp), %ah
+       movb PT_CS(%esp), %al
+-      andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
++      andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
+       cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
+       CFI_REMEMBER_STATE
+       je ldt_ss                       # returning to user-space with LDT SS
+@@ -438,7 +437,7 @@ restore_nocheck:
+ #else
+ restore_nocheck:
+       movl PT_EFLAGS(%esp), %eax
+-      testl $(VM_MASK|NMI_MASK), %eax
++      testl $(X86_EFLAGS_VM|NMI_MASK), %eax
+       CFI_REMEMBER_STATE
+       jnz hypervisor_iret
+       shr $9, %eax                    # EAX[0] == IRET_EFLAGS.IF
+@@ -456,7 +455,7 @@ restore_nocheck_notrace:
+ irq_return:
+       INTERRUPT_RETURN
+ .section .fixup,"ax"
+-iret_exc:
++ENTRY(iret_exc)
+       pushl $0                        # no error code
+       pushl $do_iret_error
+       jmp error_code
+@@ -560,7 +559,7 @@ work_resched:
+ work_notifysig:                               # deal with pending signals and
+                                       # notify-resume requests
+ #ifdef CONFIG_VM86
+-      testl $VM_MASK, PT_EFLAGS(%esp)
++      testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
+       movl %esp, %eax
+       jne work_notifysig_v86          # returning to kernel-space or
+                                       # vm86-space
+@@ -617,9 +616,6 @@ END(syscall_exit_work)
+ 
+       RING0_INT_FRAME                 # can't unwind into user space anyway
+ syscall_fault:
+-      pushl %eax                      # save orig_eax
+-      CFI_ADJUST_CFA_OFFSET 4
+-      SAVE_ALL
+       GET_THREAD_INFO(%ebp)
+       movl $-EFAULT,PT_EAX(%esp)
+       jmp resume_userspace
+--- sle11-2009-05-14.orig/arch/x86/kernel/entry_64-xen.S       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/entry_64-xen.S    2009-03-16 16:38:05.000000000 +0100
+@@ -338,19 +338,17 @@ badsys:
+       /* Do syscall tracing */
+ tracesys:                      
+       SAVE_REST
+-      movq $-ENOSYS,RAX(%rsp)
++      movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
+       FIXUP_TOP_OF_STACK %rdi
+       movq %rsp,%rdi
+       call syscall_trace_enter
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       RESTORE_REST
+       cmpq $__NR_syscall_max,%rax
+-      movq $-ENOSYS,%rcx
+-      cmova %rcx,%rax
+-      ja  1f
++      ja   int_ret_from_sys_call      /* RAX(%rsp) set to -ENOSYS above */
+       movq %r10,%rcx  /* fixup for C */
+       call *sys_call_table(,%rax,8)
+-1:    movq %rax,RAX-ARGOFFSET(%rsp)
++      movq %rax,RAX-ARGOFFSET(%rsp)
+       /* Use IRET because user could have changed frame */
+               
+ /* 
+--- sle11-2009-05-14.orig/arch/x86/kernel/genapic_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/genapic_64-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/kernel.h>
+ #include <linux/ctype.h>
+ #include <linux/init.h>
++#include <linux/hardirq.h>
+ 
+ #include <asm/smp.h>
+ #include <asm/ipi.h>
+@@ -24,17 +25,12 @@
+ #include <acpi/acpi_bus.h>
+ #endif
+ 
+-/* which logical CPU number maps to which CPU (physical APIC ID) */
+ #ifndef CONFIG_XEN
+-u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
+-                                      = { [0 ... NR_CPUS-1] = BAD_APICID };
+-void *x86_cpu_to_apicid_early_ptr;
+-#endif
+-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
+-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
++DEFINE_PER_CPU(int, x2apic_extra_bits);
+ 
+-#ifndef CONFIG_XEN
+ struct genapic __read_mostly *genapic = &apic_flat;
++
++static enum uv_system_type uv_system_type;
+ #else
+ extern struct genapic apic_xen;
+ struct genapic __read_mostly *genapic = &apic_xen;
+@@ -47,6 +43,9 @@ struct genapic __read_mostly *genapic = 
+ void __init setup_apic_routing(void)
+ {
+ #ifndef CONFIG_XEN
++      if (uv_system_type == UV_NON_UNIQUE_APIC)
++              genapic = &apic_x2apic_uv_x;
++      else
+ #ifdef CONFIG_ACPI
+       /*
+        * Quirk: some x86_64 machines can only use physical APIC mode
+@@ -59,7 +58,7 @@ void __init setup_apic_routing(void)
+       else
+ #endif
+ 
+-      if (cpus_weight(cpu_possible_map) <= 8)
++      if (num_possible_cpus() <= 8)
+               genapic = &apic_flat;
+       else
+               genapic = &apic_physflat;
+@@ -85,3 +84,41 @@ void send_IPI_self(int vector)
+       xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+ #endif
+ }
++
++int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++#ifndef CONFIG_XEN
++      if (!strcmp(oem_id, "SGI")) {
++              if (!strcmp(oem_table_id, "UVL"))
++                      uv_system_type = UV_LEGACY_APIC;
++              else if (!strcmp(oem_table_id, "UVX"))
++                      uv_system_type = UV_X2APIC;
++              else if (!strcmp(oem_table_id, "UVH"))
++                      uv_system_type = UV_NON_UNIQUE_APIC;
++      }
++#endif
++      return 0;
++}
++
++#ifndef CONFIG_XEN
++unsigned int read_apic_id(void)
++{
++      unsigned int id;
++
++      WARN_ON(preemptible() && num_online_cpus() > 1);
++      id = apic_read(APIC_ID);
++      if (uv_system_type >= UV_X2APIC)
++              id  |= __get_cpu_var(x2apic_extra_bits);
++      return id;
++}
++
++enum uv_system_type get_uv_system_type(void)
++{
++      return uv_system_type;
++}
++
++int is_uv_system(void)
++{
++      return uv_system_type != UV_NONE;
++}
++#endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/genapic_xen_64.c     2008-12-15 11:27:22.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/genapic_xen_64.c  2009-03-16 16:38:05.000000000 +0100
+@@ -72,9 +72,7 @@ static cpumask_t xen_target_cpus(void)
+ 
+ static cpumask_t xen_vector_allocation_domain(int cpu)
+ {
+-      cpumask_t domain = CPU_MASK_NONE;
+-      cpu_set(cpu, domain);
+-      return domain;
++      return cpumask_of_cpu(cpu);
+ }
+ 
+ /*
+--- sle11-2009-05-14.orig/arch/x86/kernel/head64-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/head64-xen.c      2009-03-16 16:38:05.000000000 +0100
+@@ -17,6 +17,7 @@
+ #include <linux/string.h>
+ #include <linux/percpu.h>
+ #include <linux/start_kernel.h>
++#include <linux/io.h>
+ #include <linux/module.h>
+ 
+ #include <asm/processor.h>
+@@ -29,6 +30,7 @@
+ #include <asm/sections.h>
+ #include <asm/kdebug.h>
+ #include <asm/e820.h>
++#include <asm/bios_ebda.h>
+ 
+ unsigned long start_pfn;
+ 
+@@ -75,34 +77,75 @@ EXPORT_SYMBOL(machine_to_phys_mapping);
+ unsigned int machine_to_phys_order;
+ EXPORT_SYMBOL(machine_to_phys_order);
+ 
+-#define EBDA_ADDR_POINTER 0x40E
++#define BIOS_LOWMEM_KILOBYTES 0x413
+ 
+-static __init void reserve_ebda(void)
++/*
++ * The BIOS places the EBDA/XBDA at the top of conventional
++ * memory, and usually decreases the reported amount of
++ * conventional memory (int 0x12) too. This also contains a
++ * workaround for Dell systems that neglect to reserve EBDA.
++ * The same workaround also avoids a problem with the AMD768MPX
++ * chipset: reserve a page before VGA to prevent PCI prefetch
++ * into it (errata #56). Usually the page is reserved anyways,
++ * unless you have no PS/2 mouse plugged in.
++ */
++static void __init reserve_ebda_region(void)
+ {
+ #ifndef CONFIG_XEN
+-      unsigned ebda_addr, ebda_size;
++      unsigned int lowmem, ebda_addr;
+ 
+-      /*
+-       * there is a real-mode segmented pointer pointing to the
+-       * 4K EBDA area at 0x40E
+-       */
+-      ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+-      ebda_addr <<= 4;
+-
+-      if (!ebda_addr)
++      /* To determine the position of the EBDA and the */
++      /* end of conventional memory, we need to look at */
++      /* the BIOS data area. In a paravirtual environment */
++      /* that area is absent. We'll just have to assume */
++      /* that the paravirt case can handle memory setup */
++      /* correctly, without our help. */
++      if (paravirt_enabled())
+               return;
+ 
+-      ebda_size = *(unsigned short *)__va(ebda_addr);
++      /* end of low (conventional) memory */
++      lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
++      lowmem <<= 10;
++
++      /* start of EBDA area */
++      ebda_addr = get_bios_ebda();
++
++      /* Fixup: bios puts an EBDA in the top 64K segment */
++      /* of conventional memory, but does not adjust lowmem. */
++      if ((lowmem - ebda_addr) <= 0x10000)
++              lowmem = ebda_addr;
++
++      /* Fixup: bios does not report an EBDA at all. */
++      /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
++      if ((ebda_addr == 0) && (lowmem >= 0x9f000))
++              lowmem = 0x9f000;
++
++      /* Paranoia: should never happen, but... */
++      if ((lowmem == 0) || (lowmem >= 0x100000))
++              lowmem = 0x9f000;
+ 
+-      /* Round EBDA up to pages */
+-      if (ebda_size == 0)
+-              ebda_size = 1;
+-      ebda_size <<= 10;
+-      ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+-      if (ebda_size > 64*1024)
+-              ebda_size = 64*1024;
++      /* reserve all memory between lowmem and the 1MB mark */
++      reserve_early(lowmem, 0x100000, "BIOS reserved");
++#endif
++}
+ 
+-      reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA");
++static void __init reserve_setup_data(void)
++{
++#ifndef CONFIG_XEN
++      struct setup_data *data;
++      unsigned long pa_data;
++      char buf[32];
++
++      if (boot_params.hdr.version < 0x0209)
++              return;
++      pa_data = boot_params.hdr.setup_data;
++      while (pa_data) {
++              data = early_ioremap(pa_data, sizeof(*data));
++              sprintf(buf, "setup data %x", data->type);
++              reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
++              pa_data = data->next;
++              early_iounmap(data, sizeof(*data));
++      }
+ #endif
+ }
+ 
+@@ -112,6 +155,19 @@ void __init x86_64_start_kernel(char * r
+       unsigned long machine_to_phys_nr_ents;
+       int i;
+ 
++      /*
++       * Build-time sanity checks on the kernel image and module
++       * area mappings. (these are purely build-time and produce no code)
++       */
++      BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
++      BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
++      BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
++      BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
++      BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
++      BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
++      BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
++                              (__START_KERNEL & PGDIR_MASK)));
++
+       xen_setup_features();
+ 
+       xen_start_info = (struct start_info *)real_mode_data;
+@@ -140,7 +196,7 @@ void __init x86_64_start_kernel(char * r
+       /* Cleanup the over mapped high alias */
+       cleanup_highmap();
+ 
+-      for (i = 0; i < IDT_ENTRIES; i++) {
++      for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
+ #ifdef CONFIG_EARLY_PRINTK
+               set_intr_gate(i, &early_idt_handlers[i]);
+ #else
+@@ -163,7 +219,8 @@ void __init x86_64_start_kernel(char * r
+       reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
+                     start_pfn << PAGE_SHIFT, "Xen provided");
+ 
+-      reserve_ebda();
++      reserve_ebda_region();
++      reserve_setup_data();
+ 
+       /*
+        * At this point everything still needed from the boot loader
+--- sle11-2009-05-14.orig/arch/x86/kernel/head_32-xen.S        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/head_32-xen.S     2009-03-16 16:38:05.000000000 +0100
+@@ -69,7 +69,7 @@ ENTRY(startup_32)
+       cld                     # gcc2 wants the direction flag cleared at all times
+ 
+       pushl $0                # fake return address for unwinder
+-      jmp start_kernel
++      jmp i386_start_kernel
+ 
+ #define HYPERCALL_PAGE_OFFSET 0x1000
+ .org HYPERCALL_PAGE_OFFSET
+--- sle11-2009-05-14.orig/arch/x86/kernel/init_task-xen.c      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/init_task-xen.c   2009-03-16 16:38:05.000000000 +0100
+@@ -11,7 +11,6 @@
+ #include <asm/desc.h>
+ 
+ static struct fs_struct init_fs = INIT_FS;
+-static struct files_struct init_files = INIT_FILES;
+ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+ #ifdef CONFIG_X86_XEN
+--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_32-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/io_apic_32-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -88,6 +88,16 @@ int sis_apic_bug = -1;
+  */
+ int nr_ioapic_registers[MAX_IO_APICS];
+ 
++/* I/O APIC entries */
++struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
++int nr_ioapics;
++
++/* MP IRQ source entries */
++struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
++
++/* # of MP IRQ source entries */
++int mp_irq_entries;
++
+ static int disable_timer_pin_1 __initdata;
+ 
+ /*
+@@ -863,10 +873,7 @@ static int __init find_isa_irq_pin(int i
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+ 
+-              if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_MCA
+-                  ) &&
++              if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+ 
+@@ -882,10 +889,7 @@ static int __init find_isa_irq_apic(int 
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+ 
+-              if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_MCA
+-                  ) &&
++              if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+@@ -926,7 +930,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
+                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                               break;
+ 
+-              if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
++              if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].mpc_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+@@ -977,6 +981,7 @@ void __init setup_ioapic_dest(void)
+ #endif /* !CONFIG_XEN */
+ #endif
+ 
++#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+ /*
+  * EISA Edge/Level control register, ELCR
+  */
+@@ -990,6 +995,13 @@ static int EISA_ELCR(unsigned int irq)
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+ }
++#endif
++
++/* ISA interrupts are always polarity zero edge triggered,
++ * when listed as conforming in the MP table. */
++
++#define default_ISA_trigger(idx)      (0)
++#define default_ISA_polarity(idx)     (0)
+ 
+ /* EISA interrupts are always polarity zero and can be edge or level
+  * trigger depending on the ELCR value.  If an interrupt is listed as
+@@ -997,13 +1009,7 @@ static int EISA_ELCR(unsigned int irq)
+  * be read in from the ELCR */
+ 
+ #define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+-#define default_EISA_polarity(idx)    (0)
+-
+-/* ISA interrupts are always polarity zero edge triggered,
+- * when listed as conforming in the MP table. */
+-
+-#define default_ISA_trigger(idx)      (0)
+-#define default_ISA_polarity(idx)     (0)
++#define default_EISA_polarity(idx)    default_ISA_polarity(idx)
+ 
+ /* PCI interrupts are always polarity one level triggered,
+  * when listed as conforming in the MP table. */
+@@ -1015,7 +1021,7 @@ static int EISA_ELCR(unsigned int irq)
+  * when listed as conforming in the MP table. */
+ 
+ #define default_MCA_trigger(idx)      (1)
+-#define default_MCA_polarity(idx)     (0)
++#define default_MCA_polarity(idx)     default_ISA_polarity(idx)
+ 
+ static int MPBIOS_polarity(int idx)
+ {
+@@ -1029,35 +1035,9 @@ static int MPBIOS_polarity(int idx)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+               {
+-                      switch (mp_bus_id_to_type[bus])
+-                      {
+-                              case MP_BUS_ISA: /* ISA pin */
+-                              {
+-                                      polarity = default_ISA_polarity(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_EISA: /* EISA pin */
+-                              {
+-                                      polarity = default_EISA_polarity(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_PCI: /* PCI pin */
+-                              {
+-                                      polarity = default_PCI_polarity(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_MCA: /* MCA pin */
+-                              {
+-                                      polarity = default_MCA_polarity(idx);
+-                                      break;
+-                              }
+-                              default:
+-                              {
+-                                      printk(KERN_WARNING "broken BIOS!!\n");
+-                                      polarity = 1;
+-                                      break;
+-                              }
+-                      }
++                      polarity = test_bit(bus, mp_bus_not_pci)?
++                              default_ISA_polarity(idx):
++                              default_PCI_polarity(idx);
+                       break;
+               }
+               case 1: /* high active */
+@@ -1098,11 +1078,15 @@ static int MPBIOS_trigger(int idx)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+               {
++                      trigger = test_bit(bus, mp_bus_not_pci)?
++                                      default_ISA_trigger(idx):
++                                      default_PCI_trigger(idx);
++#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+-                                      trigger = default_ISA_trigger(idx);
++                                      /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+@@ -1112,7 +1096,7 @@ static int MPBIOS_trigger(int idx)
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+-                                      trigger = default_PCI_trigger(idx);
++                                      /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+@@ -1127,6 +1111,7 @@ static int MPBIOS_trigger(int idx)
+                                       break;
+                               }
+                       }
++#endif
+                       break;
+               }
+               case 1: /* edge */
+@@ -1176,39 +1161,22 @@ static int pin_2_irq(int idx, int apic, 
+       if (mp_irqs[idx].mpc_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+ 
+-      switch (mp_bus_id_to_type[bus])
+-      {
+-              case MP_BUS_ISA: /* ISA pin */
+-              case MP_BUS_EISA:
+-              case MP_BUS_MCA:
+-              {
+-                      irq = mp_irqs[idx].mpc_srcbusirq;
+-                      break;
+-              }
+-              case MP_BUS_PCI: /* PCI pin */
+-              {
+-                      /*
+-                       * PCI IRQs are mapped in order
+-                       */
+-                      i = irq = 0;
+-                      while (i < apic)
+-                              irq += nr_ioapic_registers[i++];
+-                      irq += pin;
+-
+-                      /*
+-                       * For MPS mode, so far only needed by ES7000 platform
+-                       */
+-                      if (ioapic_renumber_irq)
+-                              irq = ioapic_renumber_irq(apic, irq);
++      if (test_bit(bus, mp_bus_not_pci))
++              irq = mp_irqs[idx].mpc_srcbusirq;
++      else {
++              /*
++               * PCI IRQs are mapped in order
++               */
++              i = irq = 0;
++              while (i < apic)
++                      irq += nr_ioapic_registers[i++];
++              irq += pin;
+ 
+-                      break;
+-              }
+-              default:
+-              {
+-                      printk(KERN_ERR "unknown bus type %d.\n",bus); 
+-                      irq = 0;
+-                      break;
+-              }
++              /*
++               * For MPS mode, so far only needed by ES7000 platform
++               */
++              if (ioapic_renumber_irq)
++                      irq = ioapic_renumber_irq(apic, irq);
+       }
+ 
+       /*
+@@ -1314,7 +1282,6 @@ static void __init setup_IO_APIC_irqs(vo
+ {
+       struct IO_APIC_route_entry entry;
+       int apic, pin, idx, irq, first_notcon = 1, vector;
+-      unsigned long flags;
+ 
+       apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ 
+@@ -1380,9 +1347,7 @@ static void __init setup_IO_APIC_irqs(vo
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              __ioapic_write_entry(apic, pin, entry);
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
++              ioapic_write_entry(apic, pin, entry);
+       }
+       }
+ 
+@@ -1577,8 +1542,8 @@ void /*__init*/ print_local_APIC(void * 
+ 
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+-      v = apic_read(APIC_ID);
+-      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
++      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
++                      GET_APIC_ID(read_apic_id()));
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+@@ -1791,7 +1756,7 @@ void disable_IO_APIC(void)
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+               entry.dest.physical.physical_dest =
+-                                      GET_APIC_ID(apic_read(APIC_ID));
++                                      GET_APIC_ID(read_apic_id());
+ 
+               /*
+                * Add it to the IO-APIC irq-routing table:
+@@ -2090,8 +2055,7 @@ static inline void init_IO_APIC_traps(vo
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+-              int tmp = irq;
+-              if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
++              if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+@@ -2166,7 +2130,7 @@ static void __init setup_nmi(void)
+  * cycles as some i82489DX-based boards have glue logic that keeps the
+  * 8259A interrupt line asserted until INTA.  --macro
+  */
+-static inline void unlock_ExtINT_logic(void)
++static inline void __init unlock_ExtINT_logic(void)
+ {
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+@@ -2218,8 +2182,6 @@ static inline void unlock_ExtINT_logic(v
+       ioapic_write_entry(apic, pin, entry0);
+ }
+ 
+-int timer_uses_ioapic_pin_0;
+-
+ /*
+  * This code may look a bit paranoid, but it's supposed to cooperate with
+  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+@@ -2259,9 +2221,6 @@ static inline void __init check_timer(vo
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+ 
+-      if (pin1 == 0)
+-              timer_uses_ioapic_pin_0 = 1;
+-
+       printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
+ 
+@@ -2555,6 +2514,7 @@ void destroy_irq(unsigned int irq)
+       dynamic_irq_cleanup(irq);
+ 
+       spin_lock_irqsave(&vector_lock, flags);
++      clear_bit(irq_vector[irq], used_vectors);
+       irq_vector[irq] = 0;
+       spin_unlock_irqrestore(&vector_lock, flags);
+ }
+@@ -2871,7 +2831,6 @@ int __init io_apic_get_redir_entries (in
+ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
+ {
+       struct IO_APIC_route_entry entry;
+-      unsigned long flags;
+ 
+       if (!IO_APIC_IRQ(irq)) {
+               printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+@@ -2912,9 +2871,7 @@ int io_apic_set_pci_routing (int ioapic,
+       if (!ioapic && (irq < 16))
+               disable_8259A_irq(irq);
+ 
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      __ioapic_write_entry(ioapic, pin, entry);
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(ioapic, pin, entry);
+ 
+       return 0;
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/io_apic_64-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -43,13 +43,15 @@
+ #include <asm/smp.h>
+ #include <asm/desc.h>
+ #include <asm/proto.h>
+-#include <asm/mach_apic.h>
+ #include <asm/acpi.h>
+ #include <asm/dma.h>
+ #include <asm/nmi.h>
+ #include <asm/msidef.h>
+ #include <asm/hypertransport.h>
+ 
++#include <mach_ipi.h>
++#include <mach_apic.h>
++
+ struct irq_cfg {
+ #ifndef CONFIG_XEN
+       cpumask_t domain;
+@@ -101,6 +103,16 @@ DEFINE_SPINLOCK(vector_lock);
+  */
+ int nr_ioapic_registers[MAX_IO_APICS];
+ 
++/* I/O APIC entries */
++struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
++int nr_ioapics;
++
++/* MP IRQ source entries */
++struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
++
++/* # of MP IRQ source entries */
++int mp_irq_entries;
++
+ /*
+  * Rough estimation of how many shared IRQs there are, can
+  * be changed anytime.
+@@ -181,11 +193,10 @@ static inline void io_apic_modify(unsign
+       writel(value, &io_apic->data);
+ }
+ 
+-static int io_apic_level_ack_pending(unsigned int irq)
++static bool io_apic_level_ack_pending(unsigned int irq)
+ {
+       struct irq_pin_list *entry;
+       unsigned long flags;
+-      int pending = 0;
+ 
+       spin_lock_irqsave(&ioapic_lock, flags);
+       entry = irq_2_pin + irq;
+@@ -198,13 +209,17 @@ static int io_apic_level_ack_pending(uns
+                       break;
+               reg = io_apic_read(entry->apic, 0x10 + pin*2);
+               /* Is the remote IRR bit set? */
+-              pending |= (reg >> 14) & 1;
++              if ((reg >> 14) & 1) {
++                      spin_unlock_irqrestore(&ioapic_lock, flags);
++                      return true;
++              }
+               if (!entry->next)
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+-      return pending;
++
++      return false;
+ }
+ #endif
+ 
+@@ -762,7 +777,7 @@ static void __clear_irq_vector(int irq)
+               per_cpu(vector_irq, cpu)[vector] = -1;
+ 
+       cfg->vector = 0;
+-      cfg->domain = CPU_MASK_NONE;
++      cpus_clear(cfg->domain);
+ }
+ 
+ void __setup_vector_irq(int cpu)
+@@ -899,9 +914,8 @@ static void __init setup_IO_APIC_irqs(vo
+ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
+ {
+       struct IO_APIC_route_entry entry;
+-      unsigned long flags;
+ 
+-      memset(&entry,0,sizeof(entry));
++      memset(&entry, 0, sizeof(entry));
+ 
+       disable_8259A_irq(0);
+ 
+@@ -929,10 +943,7 @@ static void __init setup_ExtINT_IRQ0_pin
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+-      io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(apic, pin, entry);
+ 
+       enable_8259A_irq(0);
+ }
+@@ -1061,8 +1072,7 @@ void __apicdebuginit print_local_APIC(vo
+ 
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+-      v = apic_read(APIC_ID);
+-      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
++      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+@@ -1260,7 +1270,7 @@ void disable_IO_APIC(void)
+               entry.dest_mode       = 0; /* Physical */
+               entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+               entry.vector          = 0;
+-              entry.dest          = GET_APIC_ID(apic_read(APIC_ID));
++              entry.dest          = GET_APIC_ID(read_apic_id());
+ 
+               /*
+                * Add it to the IO-APIC irq-routing table:
+@@ -1353,9 +1363,7 @@ static int ioapic_retrigger_irq(unsigned
+       unsigned long flags;
+ 
+       spin_lock_irqsave(&vector_lock, flags);
+-      cpus_clear(mask);
+-      cpu_set(first_cpu(cfg->domain), mask);
+-
++      mask = cpumask_of_cpu(first_cpu(cfg->domain));
+       send_IPI_mask(mask, cfg->vector);
+       spin_unlock_irqrestore(&vector_lock, flags);
+ 
+@@ -1519,8 +1527,7 @@ static inline void init_IO_APIC_traps(vo
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+-              int tmp = irq;
+-              if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
++              if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+@@ -1597,22 +1604,19 @@ static void __init setup_nmi(void)
+  * cycles as some i82489DX-based boards have glue logic that keeps the
+  * 8259A interrupt line asserted until INTA.  --macro
+  */
+-static inline void unlock_ExtINT_logic(void)
++static inline void __init unlock_ExtINT_logic(void)
+ {
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+-      unsigned long flags;
+ 
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (pin == -1)
+               return;
+ 
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+-      *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      entry0 = ioapic_read_entry(apic, pin);
++
+       clear_IO_APIC_pin(apic, pin);
+ 
+       memset(&entry1, 0, sizeof(entry1));
+@@ -1625,10 +1629,7 @@ static inline void unlock_ExtINT_logic(v
+       entry1.trigger = 0;
+       entry1.vector = 0;
+ 
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+-      io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(apic, pin, entry1);
+ 
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+@@ -1647,10 +1648,7 @@ static inline void unlock_ExtINT_logic(v
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(apic, pin);
+ 
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+-      io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(apic, pin, entry0);
+ }
+ 
+ /*
+@@ -2327,7 +2325,6 @@ static struct resource * __init ioapic_s
+       res = (void *)mem;
+ 
+       if (mem != NULL) {
+-              memset(mem, 0, n);
+               mem += sizeof(struct resource) * nr_ioapics;
+ 
+               for (i = 0; i < nr_ioapics; i++) {
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/ipi-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,232 @@
++#include <linux/cpumask.h>
++#include <linux/interrupt.h>
++#include <linux/init.h>
++
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/spinlock.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/cache.h>
++#include <linux/interrupt.h>
++#include <linux/cpu.h>
++#include <linux/module.h>
++
++#include <asm/smp.h>
++#include <asm/mtrr.h>
++#include <asm/tlbflush.h>
++#include <asm/mmu_context.h>
++#include <asm/apic.h>
++#include <asm/proto.h>
++
++#ifdef CONFIG_X86_32
++#ifndef CONFIG_XEN
++#include <mach_apic.h>
++/*
++ * the following functions deal with sending IPIs between CPUs.
++ *
++ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
++ */
++
++static inline int __prepare_ICR(unsigned int shortcut, int vector)
++{
++      unsigned int icr = shortcut | APIC_DEST_LOGICAL;
++
++      switch (vector) {
++      default:
++              icr |= APIC_DM_FIXED | vector;
++              break;
++      case NMI_VECTOR:
++              icr |= APIC_DM_NMI;
++              break;
++      }
++      return icr;
++}
++
++static inline int __prepare_ICR2(unsigned int mask)
++{
++      return SET_APIC_DEST_FIELD(mask);
++}
++#else
++#include <xen/evtchn.h>
++
++DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
++
++static inline void __send_IPI_one(unsigned int cpu, int vector)
++{
++      int irq = per_cpu(ipi_to_irq, cpu)[vector];
++      BUG_ON(irq < 0);
++      notify_remote_via_irq(irq);
++}
++#endif
++
++void __send_IPI_shortcut(unsigned int shortcut, int vector)
++{
++#ifndef CONFIG_XEN
++      /*
++       * Subtle. In the case of the 'never do double writes' workaround
++       * we have to lock out interrupts to be safe.  As we don't care
++       * of the value read we use an atomic rmw access to avoid costly
++       * cli/sti.  Otherwise we use an even cheaper single atomic write
++       * to the APIC.
++       */
++      unsigned int cfg;
++
++      /*
++       * Wait for idle.
++       */
++      apic_wait_icr_idle();
++
++      /*
++       * No need to touch the target chip field
++       */
++      cfg = __prepare_ICR(shortcut, vector);
++
++      /*
++       * Send the IPI. The write to APIC_ICR fires this off.
++       */
++      apic_write_around(APIC_ICR, cfg);
++#else
++      int cpu;
++
++      switch (shortcut) {
++      case APIC_DEST_SELF:
++              __send_IPI_one(smp_processor_id(), vector);
++              break;
++      case APIC_DEST_ALLBUT:
++              for_each_online_cpu(cpu)
++                      if (cpu != smp_processor_id())
++                              __send_IPI_one(cpu, vector);
++              break;
++      default:
++              printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
++                     vector);
++              break;
++      }
++#endif
++}
++
++void send_IPI_self(int vector)
++{
++      __send_IPI_shortcut(APIC_DEST_SELF, vector);
++}
++
++#ifndef CONFIG_XEN
++/*
++ * This is used to send an IPI with no shorthand notation (the destination is
++ * specified in bits 56 to 63 of the ICR).
++ */
++static inline void __send_IPI_dest_field(unsigned long mask, int vector)
++{
++      unsigned long cfg;
++
++      /*
++       * Wait for idle.
++       */
++      if (unlikely(vector == NMI_VECTOR))
++              safe_apic_wait_icr_idle();
++      else
++              apic_wait_icr_idle();
++
++      /*
++       * prepare target chip field
++       */
++      cfg = __prepare_ICR2(mask);
++      apic_write_around(APIC_ICR2, cfg);
++
++      /*
++       * program the ICR
++       */
++      cfg = __prepare_ICR(0, vector);
++
++      /*
++       * Send the IPI. The write to APIC_ICR fires this off.
++       */
++      apic_write_around(APIC_ICR, cfg);
++}
++#endif
++
++/*
++ * This is only used on smaller machines.
++ */
++void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
++{
++#ifndef CONFIG_XEN
++      unsigned long mask = cpus_addr(cpumask)[0];
++#else
++      cpumask_t mask;
++      unsigned int cpu;
++#endif
++      unsigned long flags;
++
++      local_irq_save(flags);
++#ifndef CONFIG_XEN
++      WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
++      __send_IPI_dest_field(mask, vector);
++#else
++      cpus_andnot(mask, cpumask, cpu_online_map);
++      WARN_ON(!cpus_empty(mask));
++      for_each_online_cpu(cpu)
++              if (cpu_isset(cpu, cpumask))
++                      __send_IPI_one(cpu, vector);
++#endif
++      local_irq_restore(flags);
++}
++
++void send_IPI_mask_sequence(cpumask_t mask, int vector)
++{
++#ifndef CONFIG_XEN
++      unsigned long flags;
++      unsigned int query_cpu;
++
++      /*
++       * Hack. The clustered APIC addressing mode doesn't allow us to send
++       * to an arbitrary mask, so I do a unicasts to each CPU instead. This
++       * should be modified to do 1 message per cluster ID - mbligh
++       */
++
++      local_irq_save(flags);
++      for_each_possible_cpu(query_cpu) {
++              if (cpu_isset(query_cpu, mask)) {
++                      __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
++                                            vector);
++              }
++      }
++      local_irq_restore(flags);
++#else
++      send_IPI_mask_bitmask(mask, vector);
++#endif
++}
++
++/* must come after the send_IPI functions above for inlining */
++#include <mach_ipi.h>
++
++#ifndef CONFIG_XEN
++static int convert_apicid_to_cpu(int apic_id)
++{
++      int i;
++
++      for_each_possible_cpu(i) {
++              if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
++                      return i;
++      }
++      return -1;
++}
++
++int safe_smp_processor_id(void)
++{
++      int apicid, cpuid;
++
++      if (!boot_cpu_has(X86_FEATURE_APIC))
++              return 0;
++
++      apicid = hard_smp_processor_id();
++      if (apicid == BAD_APICID)
++              return 0;
++
++      cpuid = convert_apicid_to_cpu(apicid);
++
++      return cpuid >= 0 ? cpuid : 0;
++}
++#endif
++#endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/irq_32-xen.c      2009-03-16 16:38:05.000000000 +0100
+@@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs
+ 
+       if (unlikely((unsigned)irq >= NR_IRQS)) {
+               printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+-                                      __FUNCTION__, irq);
++                                      __func__, irq);
+               BUG();
+       }
+ 
+@@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs
+                       : "=a" (arg1), "=d" (arg2), "=b" (bx)
+                       :  "0" (irq),   "1" (desc),  "2" (isp),
+                          "D" (desc->handle_irq)
+-                      : "memory", "cc"
++                      : "memory", "cc", "ecx"
+               );
+       } else
+ #endif
+@@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu)
+       hardirq_ctx[cpu] = NULL;
+ }
+ 
+-extern asmlinkage void __do_softirq(void);
+-
+ asmlinkage void do_softirq(void)
+ {
+       unsigned long flags;
+--- sle11-2009-05-14.orig/arch/x86/kernel/machine_kexec_64.c   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/machine_kexec_64.c        2009-03-16 16:38:05.000000000 +0100
+@@ -120,8 +120,6 @@ int __init machine_kexec_setup_resources
+       return 0;
+ }
+ 
+-void machine_kexec_register_resources(struct resource *res) { ; }
+-
+ #else /* CONFIG_XEN */
+ 
+ #define x__pmd(x) __pmd(x)
+--- sle11-2009-05-14.orig/arch/x86/kernel/microcode-xen.c      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/microcode-xen.c   2009-03-16 16:38:05.000000000 +0100
+@@ -162,7 +162,7 @@ static int request_microcode(void)
+               c->x86, c->x86_model, c->x86_mask);
+       error = request_firmware(&firmware, name, &microcode_pdev->dev);
+       if (error) {
+-              pr_debug("ucode data file %s load failed\n", name);
++              pr_debug("microcode: ucode data file %s load failed\n", name);
+               return error;
+       }
+ 
+--- sle11-2009-05-14.orig/arch/x86/kernel/mmconf-fam10h_64.c   2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/mmconf-fam10h_64.c        2009-03-16 16:38:05.000000000 +0100
+@@ -219,6 +219,16 @@ void __cpuinit fam10h_check_enable_mmcfg
+       val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
+              FAM10H_MMIO_CONF_ENABLE;
+       wrmsrl(address, val);
++
++#ifdef CONFIG_XEN
++      {
++              u64 val2;
++
++              rdmsrl(address, val2);
++              if (val2 != val)
++                      pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
++      }
++#endif
+ }
+ 
+ static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/mpparse-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,1101 @@
++/*
++ *    Intel Multiprocessor Specification 1.1 and 1.4
++ *    compliant MP-table parsing routines.
++ *
++ *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
++ *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
++ *      (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
++ */
++
++#include <linux/mm.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/bootmem.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/bitops.h>
++#include <linux/acpi.h>
++#include <linux/module.h>
++
++#include <asm/smp.h>
++#include <asm/mtrr.h>
++#include <asm/mpspec.h>
++#include <asm/pgalloc.h>
++#include <asm/io_apic.h>
++#include <asm/proto.h>
++#include <asm/acpi.h>
++#include <asm/bios_ebda.h>
++
++#include <mach_apic.h>
++#ifdef CONFIG_X86_32
++#include <mach_apicdef.h>
++#include <mach_mpparse.h>
++#endif
++
++/* Have we found an MP table */
++int smp_found_config;
++
++/*
++ * Various Linux-internal data structures created from the
++ * MP-table.
++ */
++#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
++int mp_bus_id_to_type[MAX_MP_BUSSES];
++#endif
++
++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
++int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
++
++static int mp_current_pci_id;
++
++int pic_mode;
++
++/*
++ * Intel MP BIOS table parsing routines:
++ */
++
++/*
++ * Checksum an MP configuration block.
++ */
++
++static int __init mpf_checksum(unsigned char *mp, int len)
++{
++      int sum = 0;
++
++      while (len--)
++              sum += *mp++;
++
++      return sum & 0xFF;
++}
++
++#ifdef CONFIG_X86_NUMAQ
++/*
++ * Have to match translation table entries to main table entries by counter
++ * hence the mpc_record variable .... can't see a less disgusting way of
++ * doing this ....
++ */
++
++static int mpc_record;
++static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
++    __cpuinitdata;
++#endif
++
++static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
++{
++#ifndef CONFIG_XEN
++      int apicid;
++      char *bootup_cpu = "";
++
++      if (!(m->mpc_cpuflag & CPU_ENABLED)) {
++              disabled_cpus++;
++              return;
++      }
++#ifdef CONFIG_X86_NUMAQ
++      apicid = mpc_apic_id(m, translation_table[mpc_record]);
++#else
++      apicid = m->mpc_apicid;
++#endif
++      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
++              bootup_cpu = " (Bootup-CPU)";
++              boot_cpu_physical_apicid = m->mpc_apicid;
++      }
++
++      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
++      generic_processor_info(apicid, m->mpc_apicver);
++#else /* CONFIG_XEN */
++      num_processors++;
++#endif
++}
++
++static void __init MP_bus_info(struct mpc_config_bus *m)
++{
++      char str[7];
++
++      memcpy(str, m->mpc_bustype, 6);
++      str[6] = 0;
++
++#ifdef CONFIG_X86_NUMAQ
++      mpc_oem_bus_info(m, str, translation_table[mpc_record]);
++#else
++      Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
++#endif
++
++#if MAX_MP_BUSSES < 256
++      if (m->mpc_busid >= MAX_MP_BUSSES) {
++              printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
++                     " is too large, max. supported is %d\n",
++                     m->mpc_busid, str, MAX_MP_BUSSES - 1);
++              return;
++      }
++#endif
++
++      if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
++               set_bit(m->mpc_busid, mp_bus_not_pci);
++#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
++#endif
++      } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
++#ifdef CONFIG_X86_NUMAQ
++              mpc_oem_pci_bus(m, translation_table[mpc_record]);
++#endif
++              clear_bit(m->mpc_busid, mp_bus_not_pci);
++              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
++              mp_current_pci_id++;
++#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
++      } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
++      } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
++              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
++#endif
++      } else
++              printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
++}
++
++#ifdef CONFIG_X86_IO_APIC
++
++static int bad_ioapic(unsigned long address)
++{
++      if (nr_ioapics >= MAX_IO_APICS) {
++              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
++                     "(found %d)\n", MAX_IO_APICS, nr_ioapics);
++              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
++      }
++      if (!address) {
++              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
++                     " found in table, skipping!\n");
++              return 1;
++      }
++      return 0;
++}
++
++static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
++{
++      if (!(m->mpc_flags & MPC_APIC_USABLE))
++              return;
++
++      printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
++             m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
++
++      if (bad_ioapic(m->mpc_apicaddr))
++              return;
++
++      mp_ioapics[nr_ioapics] = *m;
++      nr_ioapics++;
++}
++
++static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
++{
++      mp_irqs[mp_irq_entries] = *m;
++      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
++              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
++              m->mpc_irqtype, m->mpc_irqflag & 3,
++              (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
++              m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
++      if (++mp_irq_entries == MAX_IRQ_SOURCES)
++              panic("Max # of irq sources exceeded!!\n");
++}
++
++#endif
++
++static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
++{
++      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
++              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
++              m->mpc_irqtype, m->mpc_irqflag & 3,
++              (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
++              m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
++}
++
++#ifdef CONFIG_X86_NUMAQ
++static void __init MP_translation_info(struct mpc_config_translation *m)
++{
++      printk(KERN_INFO
++             "Translation: record %d, type %d, quad %d, global %d, local %d\n",
++             mpc_record, m->trans_type, m->trans_quad, m->trans_global,
++             m->trans_local);
++
++      if (mpc_record >= MAX_MPC_ENTRY)
++              printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
++      else
++              translation_table[mpc_record] = m;      /* stash this for later */
++      if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
++              node_set_online(m->trans_quad);
++}
++
++/*
++ * Read/parse the MPC oem tables
++ */
++
++static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
++                                  unsigned short oemsize)
++{
++      int count = sizeof(*oemtable);  /* the header size */
++      unsigned char *oemptr = ((unsigned char *)oemtable) + count;
++
++      mpc_record = 0;
++      printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
++             oemtable);
++      if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
++              printk(KERN_WARNING
++                     "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
++                     oemtable->oem_signature[0], oemtable->oem_signature[1],
++                     oemtable->oem_signature[2], oemtable->oem_signature[3]);
++              return;
++      }
++      if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
++              printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
++              return;
++      }
++      while (count < oemtable->oem_length) {
++              switch (*oemptr) {
++              case MP_TRANSLATION:
++                      {
++                              struct mpc_config_translation *m =
++                                  (struct mpc_config_translation *)oemptr;
++                              MP_translation_info(m);
++                              oemptr += sizeof(*m);
++                              count += sizeof(*m);
++                              ++mpc_record;
++                              break;
++                      }
++              default:
++                      {
++                              printk(KERN_WARNING
++                                     "Unrecognised OEM table entry type! - %d\n",
++                                     (int)*oemptr);
++                              return;
++                      }
++              }
++      }
++}
++
++static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
++                               char *productid)
++{
++      if (strncmp(oem, "IBM NUMA", 8))
++              printk("Warning!  May not be a NUMA-Q system!\n");
++      if (mpc->mpc_oemptr)
++              smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
++                               mpc->mpc_oemsize);
++}
++#endif /* CONFIG_X86_NUMAQ */
++
++/*
++ * Read/parse the MPC
++ */
++
++static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
++{
++      char str[16];
++      char oem[10];
++      int count = sizeof(*mpc);
++      unsigned char *mpt = ((unsigned char *)mpc) + count;
++
++      if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
++              printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
++                     mpc->mpc_signature[0], mpc->mpc_signature[1],
++                     mpc->mpc_signature[2], mpc->mpc_signature[3]);
++              return 0;
++      }
++      if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
++              printk(KERN_ERR "MPTABLE: checksum error!\n");
++              return 0;
++      }
++      if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
++              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
++                     mpc->mpc_spec);
++              return 0;
++      }
++      if (!mpc->mpc_lapic) {
++              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
++              return 0;
++      }
++      memcpy(oem, mpc->mpc_oem, 8);
++      oem[8] = 0;
++      printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
++
++      memcpy(str, mpc->mpc_productid, 12);
++      str[12] = 0;
++      printk("Product ID: %s ", str);
++
++#ifdef CONFIG_X86_32
++      mps_oem_check(mpc, oem, str);
++#endif
++      printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
++
++      printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
++
++      /* save the local APIC address, it might be non-default */
++      if (!acpi_lapic)
++              mp_lapic_addr = mpc->mpc_lapic;
++
++      if (early)
++              return 1;
++
++      /*
++       *      Now process the configuration blocks.
++       */
++#ifdef CONFIG_X86_NUMAQ
++      mpc_record = 0;
++#endif
++      while (count < mpc->mpc_length) {
++              switch (*mpt) {
++              case MP_PROCESSOR:
++                      {
++                              struct mpc_config_processor *m =
++                                  (struct mpc_config_processor *)mpt;
++                              /* ACPI may have already provided this data */
++                              if (!acpi_lapic)
++                                      MP_processor_info(m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
++                              break;
++                      }
++              case MP_BUS:
++                      {
++                              struct mpc_config_bus *m =
++                                  (struct mpc_config_bus *)mpt;
++                              MP_bus_info(m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
++                              break;
++                      }
++              case MP_IOAPIC:
++                      {
++#ifdef CONFIG_X86_IO_APIC
++                              struct mpc_config_ioapic *m =
++                                  (struct mpc_config_ioapic *)mpt;
++                              MP_ioapic_info(m);
++#endif
++                              mpt += sizeof(struct mpc_config_ioapic);
++                              count += sizeof(struct mpc_config_ioapic);
++                              break;
++                      }
++              case MP_INTSRC:
++                      {
++#ifdef CONFIG_X86_IO_APIC
++                              struct mpc_config_intsrc *m =
++                                  (struct mpc_config_intsrc *)mpt;
++
++                              MP_intsrc_info(m);
++#endif
++                              mpt += sizeof(struct mpc_config_intsrc);
++                              count += sizeof(struct mpc_config_intsrc);
++                              break;
++                      }
++              case MP_LINTSRC:
++                      {
++                              struct mpc_config_lintsrc *m =
++                                  (struct mpc_config_lintsrc *)mpt;
++                              MP_lintsrc_info(m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
++                              break;
++                      }
++              default:
++                      /* wrong mptable */
++                      printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
++                      printk(KERN_ERR "type %x\n", *mpt);
++                      print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
++                                      1, mpc, mpc->mpc_length, 1);
++                      count = mpc->mpc_length;
++                      break;
++              }
++#ifdef CONFIG_X86_NUMAQ
++              ++mpc_record;
++#endif
++      }
++      setup_apic_routing();
++      if (!num_processors)
++              printk(KERN_ERR "MPTABLE: no processors registered!\n");
++      return num_processors;
++}
++
++#ifdef CONFIG_X86_IO_APIC
++
++static int __init ELCR_trigger(unsigned int irq)
++{
++      unsigned int port;
++
++      port = 0x4d0 + (irq >> 3);
++      return (inb(port) >> (irq & 7)) & 1;
++}
++
++static void __init construct_default_ioirq_mptable(int mpc_default_type)
++{
++      struct mpc_config_intsrc intsrc;
++      int i;
++      int ELCR_fallback = 0;
++
++      intsrc.mpc_type = MP_INTSRC;
++      intsrc.mpc_irqflag = 0; /* conforming */
++      intsrc.mpc_srcbus = 0;
++      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
++
++      intsrc.mpc_irqtype = mp_INT;
++
++      /*
++       *  If true, we have an ISA/PCI system with no IRQ entries
++       *  in the MP table. To prevent the PCI interrupts from being set up
++       *  incorrectly, we try to use the ELCR. The sanity check to see if
++       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
++       *  never be level sensitive, so we simply see if the ELCR agrees.
++       *  If it does, we assume it's valid.
++       */
++      if (mpc_default_type == 5) {
++              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
++                     "falling back to ELCR\n");
++
++              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
++                  ELCR_trigger(13))
++                      printk(KERN_ERR "ELCR contains invalid data... "
++                             "not using ELCR\n");
++              else {
++                      printk(KERN_INFO
++                             "Using ELCR to identify PCI interrupts\n");
++                      ELCR_fallback = 1;
++              }
++      }
++
++      for (i = 0; i < 16; i++) {
++              switch (mpc_default_type) {
++              case 2:
++                      if (i == 0 || i == 13)
++                              continue;       /* IRQ0 & IRQ13 not connected */
++                      /* fall through */
++              default:
++                      if (i == 2)
++                              continue;       /* IRQ2 is never connected */
++              }
++
++              if (ELCR_fallback) {
++                      /*
++                       *  If the ELCR indicates a level-sensitive interrupt, we
++                       *  copy that information over to the MP table in the
++                       *  irqflag field (level sensitive, active high polarity).
++                       */
++                      if (ELCR_trigger(i))
++                              intsrc.mpc_irqflag = 13;
++                      else
++                              intsrc.mpc_irqflag = 0;
++              }
++
++              intsrc.mpc_srcbusirq = i;
++              intsrc.mpc_dstirq = i ? i : 2;  /* IRQ0 to INTIN2 */
++              MP_intsrc_info(&intsrc);
++      }
++
++      intsrc.mpc_irqtype = mp_ExtINT;
++      intsrc.mpc_srcbusirq = 0;
++      intsrc.mpc_dstirq = 0;  /* 8259A to INTIN0 */
++      MP_intsrc_info(&intsrc);
++}
++
++#endif
++
++static inline void __init construct_default_ISA_mptable(int mpc_default_type)
++{
++      struct mpc_config_processor processor;
++      struct mpc_config_bus bus;
++#ifdef CONFIG_X86_IO_APIC
++      struct mpc_config_ioapic ioapic;
++#endif
++      struct mpc_config_lintsrc lintsrc;
++      int linttypes[2] = { mp_ExtINT, mp_NMI };
++      int i;
++
++      /*
++       * local APIC has default address
++       */
++      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++
++      /*
++       * 2 CPUs, numbered 0 & 1.
++       */
++      processor.mpc_type = MP_PROCESSOR;
++      /* Either an integrated APIC or a discrete 82489DX. */
++      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++      processor.mpc_cpuflag = CPU_ENABLED;
++      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
++          (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
++      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
++      processor.mpc_reserved[0] = 0;
++      processor.mpc_reserved[1] = 0;
++      for (i = 0; i < 2; i++) {
++              processor.mpc_apicid = i;
++              MP_processor_info(&processor);
++      }
++
++      bus.mpc_type = MP_BUS;
++      bus.mpc_busid = 0;
++      switch (mpc_default_type) {
++      default:
++              printk(KERN_ERR "???\nUnknown standard configuration %d\n",
++                     mpc_default_type);
++              /* fall through */
++      case 1:
++      case 5:
++              memcpy(bus.mpc_bustype, "ISA   ", 6);
++              break;
++      case 2:
++      case 6:
++      case 3:
++              memcpy(bus.mpc_bustype, "EISA  ", 6);
++              break;
++      case 4:
++      case 7:
++              memcpy(bus.mpc_bustype, "MCA   ", 6);
++      }
++      MP_bus_info(&bus);
++      if (mpc_default_type > 4) {
++              bus.mpc_busid = 1;
++              memcpy(bus.mpc_bustype, "PCI   ", 6);
++              MP_bus_info(&bus);
++      }
++
++#ifdef CONFIG_X86_IO_APIC
++      ioapic.mpc_type = MP_IOAPIC;
++      ioapic.mpc_apicid = 2;
++      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++      ioapic.mpc_flags = MPC_APIC_USABLE;
++      ioapic.mpc_apicaddr = 0xFEC00000;
++      MP_ioapic_info(&ioapic);
++
++      /*
++       * We set up most of the low 16 IO-APIC pins according to MPS rules.
++       */
++      construct_default_ioirq_mptable(mpc_default_type);
++#endif
++      lintsrc.mpc_type = MP_LINTSRC;
++      lintsrc.mpc_irqflag = 0;        /* conforming */
++      lintsrc.mpc_srcbusid = 0;
++      lintsrc.mpc_srcbusirq = 0;
++      lintsrc.mpc_destapic = MP_APIC_ALL;
++      for (i = 0; i < 2; i++) {
++              lintsrc.mpc_irqtype = linttypes[i];
++              lintsrc.mpc_destapiclint = i;
++              MP_lintsrc_info(&lintsrc);
++      }
++}
++
++static struct intel_mp_floating *mpf_found;
++
++/*
++ * Scan the memory blocks for an SMP configuration block.
++ */
++static void __init __get_smp_config(unsigned early)
++{
++      struct intel_mp_floating *mpf = mpf_found;
++
++      if (acpi_lapic && early)
++              return;
++      /*
++       * ACPI supports both logical (e.g. Hyper-Threading) and physical
++       * processors, where MPS only supports physical.
++       */
++      if (acpi_lapic && acpi_ioapic) {
++              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
++                     "information\n");
++              return;
++      } else if (acpi_lapic)
++              printk(KERN_INFO "Using ACPI for processor (LAPIC) "
++                     "configuration information\n");
++
++      printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
++             mpf->mpf_specification);
++#ifdef CONFIG_X86_32
++      if (mpf->mpf_feature2 & (1 << 7)) {
++              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
++              pic_mode = 1;
++      } else {
++              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
++              pic_mode = 0;
++      }
++#endif
++      /*
++       * Now see if we need to read further.
++       */
++      if (mpf->mpf_feature1 != 0) {
++              if (early) {
++                      /*
++                       * local APIC has default address
++                       */
++                      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++                      return;
++              }
++
++              printk(KERN_INFO "Default MP configuration #%d\n",
++                     mpf->mpf_feature1);
++              construct_default_ISA_mptable(mpf->mpf_feature1);
++
++      } else if (mpf->mpf_physptr) {
++
++              /*
++               * Read the physical hardware table.  Anything here will
++               * override the defaults.
++               */
++              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
++                      smp_found_config = 0;
++                      printk(KERN_ERR
++                             "BIOS bug, MP table errors detected!...\n");
++                      printk(KERN_ERR "... disabling SMP support. "
++                             "(tell your hw vendor)\n");
++                      return;
++              }
++
++              if (early)
++                      return;
++#ifdef CONFIG_X86_IO_APIC
++              /*
++               * If there are no explicit MP IRQ entries, then we are
++               * broken.  We set up most of the low 16 IO-APIC pins to
++               * ISA defaults and hope it will work.
++               */
++              if (!mp_irq_entries) {
++                      struct mpc_config_bus bus;
++
++                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
++                             "using default mptable. "
++                             "(tell your hw vendor)\n");
++
++                      bus.mpc_type = MP_BUS;
++                      bus.mpc_busid = 0;
++                      memcpy(bus.mpc_bustype, "ISA   ", 6);
++                      MP_bus_info(&bus);
++
++                      construct_default_ioirq_mptable(0);
++              }
++#endif
++      } else
++              BUG();
++
++      if (!early)
++              printk(KERN_INFO "Processors: %d\n", num_processors);
++      /*
++       * Only use the first configuration found.
++       */
++}
++
++void __init early_get_smp_config(void)
++{
++      __get_smp_config(1);
++}
++
++void __init get_smp_config(void)
++{
++      __get_smp_config(0);
++}
++
++static int __init smp_scan_config(unsigned long base, unsigned long length,
++                                unsigned reserve)
++{
++      unsigned int *bp = isa_bus_to_virt(base);
++      struct intel_mp_floating *mpf;
++
++      Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
++      BUILD_BUG_ON(sizeof(*mpf) != 16);
++
++      while (length > 0) {
++              mpf = (struct intel_mp_floating *)bp;
++              if ((*bp == SMP_MAGIC_IDENT) &&
++                  (mpf->mpf_length == 1) &&
++                  !mpf_checksum((unsigned char *)bp, 16) &&
++                  ((mpf->mpf_specification == 1)
++                   || (mpf->mpf_specification == 4))) {
++
++                      smp_found_config = 1;
++                      mpf_found = mpf;
++#ifdef CONFIG_X86_32
++#ifndef CONFIG_XEN
++                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
++                             mpf, virt_to_phys(mpf));
++                      reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
++                                      BOOTMEM_DEFAULT);
++                      if (mpf->mpf_physptr) {
++                              /*
++                               * We cannot access to MPC table to compute
++                               * table size yet, as only few megabytes from
++                               * the bottom is mapped now.
++                               * PC-9800's MPC table places on the very last
++                               * of physical memory; so that simply reserving
++                               * PAGE_SIZE from mpg->mpf_physptr yields BUG()
++                               * in reserve_bootmem.
++                               */
++                              unsigned long size = PAGE_SIZE;
++                              unsigned long end = max_low_pfn * PAGE_SIZE;
++                              if (mpf->mpf_physptr + size > end)
++                                      size = end - mpf->mpf_physptr;
++                              reserve_bootmem(mpf->mpf_physptr, size,
++                                              BOOTMEM_DEFAULT);
++                      }
++#else
++                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
++                              mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
++#endif
++#elif !defined(CONFIG_XEN)
++                      if (!reserve)
++                              return 1;
++
++                      reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
++                      if (mpf->mpf_physptr)
++                              reserve_bootmem_generic(mpf->mpf_physptr,
++                                                      PAGE_SIZE);
++#endif
++              return 1;
++              }
++              bp += 4;
++              length -= 16;
++      }
++      return 0;
++}
++
++static void __init __find_smp_config(unsigned reserve)
++{
++#ifndef CONFIG_XEN
++      unsigned int address;
++#endif
++
++      /*
++       * FIXME: Linux assumes you have 640K of base ram..
++       * this continues the error...
++       *
++       * 1) Scan the bottom 1K for a signature
++       * 2) Scan the top 1K of base RAM
++       * 3) Scan the 64K of bios
++       */
++      if (smp_scan_config(0x0, 0x400, reserve) ||
++          smp_scan_config(639 * 0x400, 0x400, reserve) ||
++          smp_scan_config(0xF0000, 0x10000, reserve))
++              return;
++      /*
++       * If it is an SMP machine we should know now, unless the
++       * configuration is in an EISA/MCA bus machine with an
++       * extended bios data area.
++       *
++       * there is a real-mode segmented pointer pointing to the
++       * 4K EBDA area at 0x40E, calculate and scan it here.
++       *
++       * NOTE! There are Linux loaders that will corrupt the EBDA
++       * area, and as such this kind of SMP config may be less
++       * trustworthy, simply because the SMP table may have been
++       * stomped on during early boot. These loaders are buggy and
++       * should be fixed.
++       *
++       * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
++       */
++
++#ifndef CONFIG_XEN
++      address = get_bios_ebda();
++      if (address)
++              smp_scan_config(address, 0x400, reserve);
++#endif
++}
++
++void __init early_find_smp_config(void)
++{
++      __find_smp_config(0);
++}
++
++void __init find_smp_config(void)
++{
++      __find_smp_config(1);
++}
++
++/* --------------------------------------------------------------------------
++                            ACPI-based MP Configuration
++   -------------------------------------------------------------------------- */
++
++/*
++ * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
++ */
++int es7000_plat;
++
++#ifdef CONFIG_ACPI
++
++#ifdef        CONFIG_X86_IO_APIC
++
++#define MP_ISA_BUS            0
++
++extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
++
++static int mp_find_ioapic(int gsi)
++{
++      int i = 0;
++
++      /* Find the IOAPIC that manages this GSI. */
++      for (i = 0; i < nr_ioapics; i++) {
++              if ((gsi >= mp_ioapic_routing[i].gsi_base)
++                  && (gsi <= mp_ioapic_routing[i].gsi_end))
++                      return i;
++      }
++
++      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
++      return -1;
++}
++
++static u8 __init uniq_ioapic_id(u8 id)
++{
++#ifdef CONFIG_X86_32
++      if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
++          !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
++              return io_apic_get_unique_id(nr_ioapics, id);
++      else
++              return id;
++#else
++      int i;
++      DECLARE_BITMAP(used, 256);
++      bitmap_zero(used, 256);
++      for (i = 0; i < nr_ioapics; i++) {
++              struct mpc_config_ioapic *ia = &mp_ioapics[i];
++              __set_bit(ia->mpc_apicid, used);
++      }
++      if (!test_bit(id, used))
++              return id;
++      return find_first_zero_bit(used, 256);
++#endif
++}
++
++void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
++{
++      int idx = 0;
++
++      if (bad_ioapic(address))
++              return;
++
++      idx = nr_ioapics;
++
++      mp_ioapics[idx].mpc_type = MP_IOAPIC;
++      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
++      mp_ioapics[idx].mpc_apicaddr = address;
++
++#ifndef CONFIG_XEN
++      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
++#endif
++      mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
++#ifdef CONFIG_X86_32
++      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
++#else
++      mp_ioapics[idx].mpc_apicver = 0;
++#endif
++      /*
++       * Build basic GSI lookup table to facilitate gsi->io_apic lookups
++       * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
++       */
++      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
++      mp_ioapic_routing[idx].gsi_base = gsi_base;
++      mp_ioapic_routing[idx].gsi_end = gsi_base +
++          io_apic_get_redir_entries(idx);
++
++      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
++             "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
++             mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
++             mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
++
++      nr_ioapics++;
++}
++
++void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
++{
++      struct mpc_config_intsrc intsrc;
++      int ioapic = -1;
++      int pin = -1;
++
++      /*
++       * Convert 'gsi' to 'ioapic.pin'.
++       */
++      ioapic = mp_find_ioapic(gsi);
++      if (ioapic < 0)
++              return;
++      pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
++
++      /*
++       * TBD: This check is for faulty timer entries, where the override
++       *      erroneously sets the trigger to level, resulting in a HUGE
++       *      increase of timer interrupts!
++       */
++      if ((bus_irq == 0) && (trigger == 3))
++              trigger = 1;
++
++      intsrc.mpc_type = MP_INTSRC;
++      intsrc.mpc_irqtype = mp_INT;
++      intsrc.mpc_irqflag = (trigger << 2) | polarity;
++      intsrc.mpc_srcbus = MP_ISA_BUS;
++      intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
++      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;     /* APIC ID */
++      intsrc.mpc_dstirq = pin;        /* INTIN# */
++
++      MP_intsrc_info(&intsrc);
++}
++
++void __init mp_config_acpi_legacy_irqs(void)
++{
++      struct mpc_config_intsrc intsrc;
++      int i = 0;
++      int ioapic = -1;
++
++#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
++      /*
++       * Fabricate the legacy ISA bus (bus #31).
++       */
++      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
++#endif
++      set_bit(MP_ISA_BUS, mp_bus_not_pci);
++      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
++
++      /*
++       * Older generations of ES7000 have no legacy identity mappings
++       */
++      if (es7000_plat == 1)
++              return;
++
++      /*
++       * Locate the IOAPIC that manages the ISA IRQs (0-15).
++       */
++      ioapic = mp_find_ioapic(0);
++      if (ioapic < 0)
++              return;
++
++      intsrc.mpc_type = MP_INTSRC;
++      intsrc.mpc_irqflag = 0; /* Conforming */
++      intsrc.mpc_srcbus = MP_ISA_BUS;
++#ifdef CONFIG_X86_IO_APIC
++      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
++#endif
++      /*
++       * Use the default configuration for the IRQs 0-15.  Unless
++       * overridden by (MADT) interrupt source override entries.
++       */
++      for (i = 0; i < 16; i++) {
++              int idx;
++
++              for (idx = 0; idx < mp_irq_entries; idx++) {
++                      struct mpc_config_intsrc *irq = mp_irqs + idx;
++
++                      /* Do we already have a mapping for this ISA IRQ? */
++                      if (irq->mpc_srcbus == MP_ISA_BUS
++                          && irq->mpc_srcbusirq == i)
++                              break;
++
++                      /* Do we already have a mapping for this IOAPIC pin */
++                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
++                          (irq->mpc_dstirq == i))
++                              break;
++              }
++
++              if (idx != mp_irq_entries) {
++                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
++                      continue;       /* IRQ already used */
++              }
++
++              intsrc.mpc_irqtype = mp_INT;
++              intsrc.mpc_srcbusirq = i;       /* Identity mapped */
++              intsrc.mpc_dstirq = i;
++
++              MP_intsrc_info(&intsrc);
++      }
++}
++
++int mp_register_gsi(u32 gsi, int triggering, int polarity)
++{
++      int ioapic;
++      int ioapic_pin;
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
++#define MAX_GSI_NUM   4096
++#define IRQ_COMPRESSION_START 64
++
++      static int pci_irq = IRQ_COMPRESSION_START;
++      /*
++       * Mapping between Global System Interrupts, which
++       * represent all possible interrupts, and IRQs
++       * assigned to actual devices.
++       */
++      static int gsi_to_irq[MAX_GSI_NUM];
++#else
++
++      if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
++              return gsi;
++#endif
++
++      /* Don't set up the ACPI SCI because it's already set up */
++      if (acpi_gbl_FADT.sci_interrupt == gsi)
++              return gsi;
++
++      ioapic = mp_find_ioapic(gsi);
++      if (ioapic < 0) {
++              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
++              return gsi;
++      }
++
++      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
++
++#ifndef CONFIG_X86_32
++      if (ioapic_renumber_irq)
++              gsi = ioapic_renumber_irq(ioapic, gsi);
++#endif
++
++      /*
++       * Avoid pin reprogramming.  PRTs typically include entries
++       * with redundant pin->gsi mappings (but unique PCI devices);
++       * we only program the IOAPIC on the first.
++       */
++      if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
++              printk(KERN_ERR "Invalid reference to IOAPIC pin "
++                     "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
++                     ioapic_pin);
++              return gsi;
++      }
++      if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
++              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
++                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
++              return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
++#else
++              return gsi;
++#endif
++      }
++
++      set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
++      /*
++       * For GSI >= 64, use IRQ compression
++       */
++      if ((gsi >= IRQ_COMPRESSION_START)
++          && (triggering == ACPI_LEVEL_SENSITIVE)) {
++              /*
++               * For PCI devices assign IRQs in order, avoiding gaps
++               * due to unused I/O APIC pins.
++               */
++              int irq = gsi;
++              if (gsi < MAX_GSI_NUM) {
++                      /*
++                       * Retain the VIA chipset work-around (gsi > 15), but
++                       * avoid a problem where the 8254 timer (IRQ0) is setup
++                       * via an override (so it's not on pin 0 of the ioapic),
++                       * and at the same time, the pin 0 interrupt is a PCI
++                       * type.  The gsi > 15 test could cause these two pins
++                       * to be shared as IRQ0, and they are not shareable.
++                       * So test for this condition, and if necessary, avoid
++                       * the pin collision.
++                       */
++                      gsi = pci_irq++;
++                      /*
++                       * Don't assign IRQ used by ACPI SCI
++                       */
++                      if (gsi == acpi_gbl_FADT.sci_interrupt)
++                              gsi = pci_irq++;
++                      gsi_to_irq[irq] = gsi;
++              } else {
++                      printk(KERN_ERR "GSI %u is too high\n", gsi);
++                      return gsi;
++              }
++      }
++#endif
++      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
++                              triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
++                              polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
++      return gsi;
++}
++
++#endif /* CONFIG_X86_IO_APIC */
++#endif /* CONFIG_ACPI */
+--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_32-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,1161 +0,0 @@
+-/*
+- *    Intel Multiprocessor Specification 1.1 and 1.4
+- *    compliant MP-table parsing routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *    Fixes
+- *            Erich Boleyn    :       MP v1.4 and additional changes.
+- *            Alan Cox        :       Added EBDA scanning
+- *            Ingo Molnar     :       various cleanups and rewrites
+- *            Maciej W. Rozycki:      Bits for default MP configurations
+- *            Paul Diefenbaugh:       Added full ACPI support
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/init.h>
+-#include <linux/acpi.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/bitops.h>
+-
+-#include <asm/smp.h>
+-#include <asm/acpi.h>
+-#include <asm/mtrr.h>
+-#include <asm/mpspec.h>
+-#include <asm/io_apic.h>
+-
+-#include <mach_apic.h>
+-#include <mach_apicdef.h>
+-#include <mach_mpparse.h>
+-#include <bios_ebda.h>
+-
+-/* Have we found an MP table */
+-int smp_found_config;
+-unsigned int __cpuinitdata maxcpus = NR_CPUS;
+-
+-/*
+- * Various Linux-internal data structures created from the
+- * MP-table.
+- */
+-int apic_version [MAX_APICS];
+-int mp_bus_id_to_type [MAX_MP_BUSSES];
+-int mp_bus_id_to_node [MAX_MP_BUSSES];
+-int mp_bus_id_to_local [MAX_MP_BUSSES];
+-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+-static int mp_current_pci_id;
+-
+-/* I/O APIC entries */
+-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+-
+-/* # of MP IRQ source entries */
+-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+-
+-/* MP IRQ source entries */
+-int mp_irq_entries;
+-
+-int nr_ioapics;
+-
+-int pic_mode;
+-unsigned long mp_lapic_addr;
+-
+-unsigned int def_to_bigsmp = 0;
+-
+-/* Processor that is doing the boot up */
+-unsigned int boot_cpu_physical_apicid = -1U;
+-/* Internal processor count */
+-unsigned int num_processors;
+-
+-/* Bitmask of physically existing CPUs */
+-physid_mask_t phys_cpu_present_map;
+-
+-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+-
+-/*
+- * Intel MP BIOS table parsing routines:
+- */
+-
+-
+-/*
+- * Checksum an MP configuration block.
+- */
+-
+-static int __init mpf_checksum(unsigned char *mp, int len)
+-{
+-      int sum = 0;
+-
+-      while (len--)
+-              sum += *mp++;
+-
+-      return sum & 0xFF;
+-}
+-
+-/*
+- * Have to match translation table entries to main table entries by counter
+- * hence the mpc_record variable .... can't see a less disgusting way of
+- * doing this ....
+- */
+-
+-static int mpc_record; 
+-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
+-
+-#ifndef CONFIG_XEN
+-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+-{
+-      int ver, apicid;
+-      physid_mask_t phys_cpu;
+-      
+-      if (!(m->mpc_cpuflag & CPU_ENABLED))
+-              return;
+-
+-      apicid = mpc_apic_id(m, translation_table[mpc_record]);
+-
+-      if (m->mpc_featureflag&(1<<0))
+-              Dprintk("    Floating point unit present.\n");
+-      if (m->mpc_featureflag&(1<<7))
+-              Dprintk("    Machine Exception supported.\n");
+-      if (m->mpc_featureflag&(1<<8))
+-              Dprintk("    64 bit compare & exchange supported.\n");
+-      if (m->mpc_featureflag&(1<<9))
+-              Dprintk("    Internal APIC present.\n");
+-      if (m->mpc_featureflag&(1<<11))
+-              Dprintk("    SEP present.\n");
+-      if (m->mpc_featureflag&(1<<12))
+-              Dprintk("    MTRR  present.\n");
+-      if (m->mpc_featureflag&(1<<13))
+-              Dprintk("    PGE  present.\n");
+-      if (m->mpc_featureflag&(1<<14))
+-              Dprintk("    MCA  present.\n");
+-      if (m->mpc_featureflag&(1<<15))
+-              Dprintk("    CMOV  present.\n");
+-      if (m->mpc_featureflag&(1<<16))
+-              Dprintk("    PAT  present.\n");
+-      if (m->mpc_featureflag&(1<<17))
+-              Dprintk("    PSE  present.\n");
+-      if (m->mpc_featureflag&(1<<18))
+-              Dprintk("    PSN  present.\n");
+-      if (m->mpc_featureflag&(1<<19))
+-              Dprintk("    Cache Line Flush Instruction present.\n");
+-      /* 20 Reserved */
+-      if (m->mpc_featureflag&(1<<21))
+-              Dprintk("    Debug Trace and EMON Store present.\n");
+-      if (m->mpc_featureflag&(1<<22))
+-              Dprintk("    ACPI Thermal Throttle Registers  present.\n");
+-      if (m->mpc_featureflag&(1<<23))
+-              Dprintk("    MMX  present.\n");
+-      if (m->mpc_featureflag&(1<<24))
+-              Dprintk("    FXSR  present.\n");
+-      if (m->mpc_featureflag&(1<<25))
+-              Dprintk("    XMM  present.\n");
+-      if (m->mpc_featureflag&(1<<26))
+-              Dprintk("    Willamette New Instructions  present.\n");
+-      if (m->mpc_featureflag&(1<<27))
+-              Dprintk("    Self Snoop  present.\n");
+-      if (m->mpc_featureflag&(1<<28))
+-              Dprintk("    HT  present.\n");
+-      if (m->mpc_featureflag&(1<<29))
+-              Dprintk("    Thermal Monitor present.\n");
+-      /* 30, 31 Reserved */
+-
+-
+-      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              Dprintk("    Bootup CPU\n");
+-              boot_cpu_physical_apicid = m->mpc_apicid;
+-      }
+-
+-      ver = m->mpc_apicver;
+-
+-      /*
+-       * Validate version
+-       */
+-      if (ver == 0x0) {
+-              printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+-                              "fixing up to 0x10. (tell your hw vendor)\n",
+-                              m->mpc_apicid);
+-              ver = 0x10;
+-      }
+-      apic_version[m->mpc_apicid] = ver;
+-
+-      phys_cpu = apicid_to_cpu_present(apicid);
+-      physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+-
+-      if (num_processors >= NR_CPUS) {
+-              printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+-                      "  Processor ignored.\n", NR_CPUS);
+-              return;
+-      }
+-
+-      if (num_processors >= maxcpus) {
+-              printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+-                      " Processor ignored.\n", maxcpus);
+-              return;
+-      }
+-
+-      cpu_set(num_processors, cpu_possible_map);
+-      num_processors++;
+-
+-      /*
+-       * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+-       * but we need to work other dependencies like SMP_SUSPEND etc
+-       * before this can be done without some confusion.
+-       * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+-       *       - Ashok Raj <ashok.raj@intel.com>
+-       */
+-      if (num_processors > 8) {
+-              switch (boot_cpu_data.x86_vendor) {
+-              case X86_VENDOR_INTEL:
+-                      if (!APIC_XAPIC(ver)) {
+-                              def_to_bigsmp = 0;
+-                              break;
+-                      }
+-                      /* If P4 and above fall through */
+-              case X86_VENDOR_AMD:
+-                      def_to_bigsmp = 1;
+-              }
+-      }
+-      bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+-}
+-#else
+-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+-{
+-      num_processors++;
+-}
+-#endif /* CONFIG_XEN */
+-
+-static void __init MP_bus_info (struct mpc_config_bus *m)
+-{
+-      char str[7];
+-
+-      memcpy(str, m->mpc_bustype, 6);
+-      str[6] = 0;
+-
+-      mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+-
+-#if MAX_MP_BUSSES < 256
+-      if (m->mpc_busid >= MAX_MP_BUSSES) {
+-              printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+-                      " is too large, max. supported is %d\n",
+-                      m->mpc_busid, str, MAX_MP_BUSSES - 1);
+-              return;
+-      }
+-#endif
+-
+-      if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+-      } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+-      } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+-              mpc_oem_pci_bus(m, translation_table[mpc_record]);
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+-              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+-              mp_current_pci_id++;
+-      } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+-      } else {
+-              printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+-      }
+-}
+-
+-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+-{
+-      if (!(m->mpc_flags & MPC_APIC_USABLE))
+-              return;
+-
+-      printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
+-              m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
+-                      MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+-      }
+-      if (!m->mpc_apicaddr) {
+-              printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+-                      " found in MP table, skipping!\n");
+-              return;
+-      }
+-      mp_ioapics[nr_ioapics] = *m;
+-      nr_ioapics++;
+-}
+-
+-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+-{
+-      mp_irqs [mp_irq_entries] = *m;
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+-                      m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+-      if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!!\n");
+-}
+-
+-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+-{
+-      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+-                      m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+-}
+-
+-#ifdef CONFIG_X86_NUMAQ
+-static void __init MP_translation_info (struct mpc_config_translation *m)
+-{
+-      printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+-
+-      if (mpc_record >= MAX_MPC_ENTRY) 
+-              printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+-      else
+-              translation_table[mpc_record] = m; /* stash this for later */
+-      if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+-              node_set_online(m->trans_quad);
+-}
+-
+-/*
+- * Read/parse the MPC oem tables
+- */
+-
+-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
+-      unsigned short oemsize)
+-{
+-      int count = sizeof (*oemtable); /* the header size */
+-      unsigned char *oemptr = ((unsigned char *)oemtable)+count;
+-      
+-      mpc_record = 0;
+-      printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
+-      if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
+-      {
+-              printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+-                      oemtable->oem_signature[0],
+-                      oemtable->oem_signature[1],
+-                      oemtable->oem_signature[2],
+-                      oemtable->oem_signature[3]);
+-              return;
+-      }
+-      if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
+-      {
+-              printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+-              return;
+-      }
+-      while (count < oemtable->oem_length) {
+-              switch (*oemptr) {
+-                      case MP_TRANSLATION:
+-                      {
+-                              struct mpc_config_translation *m=
+-                                      (struct mpc_config_translation *)oemptr;
+-                              MP_translation_info(m);
+-                              oemptr += sizeof(*m);
+-                              count += sizeof(*m);
+-                              ++mpc_record;
+-                              break;
+-                      }
+-                      default:
+-                      {
+-                              printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+-                              return;
+-                      }
+-              }
+-       }
+-}
+-
+-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+-              char *productid)
+-{
+-      if (strncmp(oem, "IBM NUMA", 8))
+-              printk("Warning!  May not be a NUMA-Q system!\n");
+-      if (mpc->mpc_oemptr)
+-              smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
+-                              mpc->mpc_oemsize);
+-}
+-#endif        /* CONFIG_X86_NUMAQ */
+-
+-/*
+- * Read/parse the MPC
+- */
+-
+-static int __init smp_read_mpc(struct mp_config_table *mpc)
+-{
+-      char str[16];
+-      char oem[10];
+-      int count=sizeof(*mpc);
+-      unsigned char *mpt=((unsigned char *)mpc)+count;
+-
+-      if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+-              printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
+-                      *(u32 *)mpc->mpc_signature);
+-              return 0;
+-      }
+-      if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+-              printk(KERN_ERR "SMP mptable: checksum error!\n");
+-              return 0;
+-      }
+-      if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+-              printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+-                      mpc->mpc_spec);
+-              return 0;
+-      }
+-      if (!mpc->mpc_lapic) {
+-              printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+-              return 0;
+-      }
+-      memcpy(oem,mpc->mpc_oem,8);
+-      oem[8]=0;
+-      printk(KERN_INFO "OEM ID: %s ",oem);
+-
+-      memcpy(str,mpc->mpc_productid,12);
+-      str[12]=0;
+-      printk("Product ID: %s ",str);
+-
+-      mps_oem_check(mpc, oem, str);
+-
+-      printk("APIC at: 0x%X\n", mpc->mpc_lapic);
+-
+-      /*
+-       * Save the local APIC address (it might be non-default) -- but only
+-       * if we're not using ACPI.
+-       */
+-      if (!acpi_lapic)
+-              mp_lapic_addr = mpc->mpc_lapic;
+-
+-      /*
+-       *      Now process the configuration blocks.
+-       */
+-      mpc_record = 0;
+-      while (count < mpc->mpc_length) {
+-              switch(*mpt) {
+-                      case MP_PROCESSOR:
+-                      {
+-                              struct mpc_config_processor *m=
+-                                      (struct mpc_config_processor *)mpt;
+-                              /* ACPI may have already provided this data */
+-                              if (!acpi_lapic)
+-                                      MP_processor_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_BUS:
+-                      {
+-                              struct mpc_config_bus *m=
+-                                      (struct mpc_config_bus *)mpt;
+-                              MP_bus_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_IOAPIC:
+-                      {
+-                              struct mpc_config_ioapic *m=
+-                                      (struct mpc_config_ioapic *)mpt;
+-                              MP_ioapic_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_INTSRC:
+-                      {
+-                              struct mpc_config_intsrc *m=
+-                                      (struct mpc_config_intsrc *)mpt;
+-
+-                              MP_intsrc_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_LINTSRC:
+-                      {
+-                              struct mpc_config_lintsrc *m=
+-                                      (struct mpc_config_lintsrc *)mpt;
+-                              MP_lintsrc_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
+-                              break;
+-                      }
+-                      default:
+-                      {
+-                              count = mpc->mpc_length;
+-                              break;
+-                      }
+-              }
+-              ++mpc_record;
+-      }
+-      setup_apic_routing();
+-      if (!num_processors)
+-              printk(KERN_ERR "SMP mptable: no processors registered!\n");
+-      return num_processors;
+-}
+-
+-static int __init ELCR_trigger(unsigned int irq)
+-{
+-      unsigned int port;
+-
+-      port = 0x4d0 + (irq >> 3);
+-      return (inb(port) >> (irq & 7)) & 1;
+-}
+-
+-static void __init construct_default_ioirq_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i;
+-      int ELCR_fallback = 0;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                 /* conforming */
+-      intsrc.mpc_srcbus = 0;
+-      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+-
+-      intsrc.mpc_irqtype = mp_INT;
+-
+-      /*
+-       *  If true, we have an ISA/PCI system with no IRQ entries
+-       *  in the MP table. To prevent the PCI interrupts from being set up
+-       *  incorrectly, we try to use the ELCR. The sanity check to see if
+-       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+-       *  never be level sensitive, so we simply see if the ELCR agrees.
+-       *  If it does, we assume it's valid.
+-       */
+-      if (mpc_default_type == 5) {
+-              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+-
+-              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+-                      printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
+-              else {
+-                      printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+-                      ELCR_fallback = 1;
+-              }
+-      }
+-
+-      for (i = 0; i < 16; i++) {
+-              switch (mpc_default_type) {
+-              case 2:
+-                      if (i == 0 || i == 13)
+-                              continue;       /* IRQ0 & IRQ13 not connected */
+-                      /* fall through */
+-              default:
+-                      if (i == 2)
+-                              continue;       /* IRQ2 is never connected */
+-              }
+-
+-              if (ELCR_fallback) {
+-                      /*
+-                       *  If the ELCR indicates a level-sensitive interrupt, we
+-                       *  copy that information over to the MP table in the
+-                       *  irqflag field (level sensitive, active high polarity).
+-                       */
+-                      if (ELCR_trigger(i))
+-                              intsrc.mpc_irqflag = 13;
+-                      else
+-                              intsrc.mpc_irqflag = 0;
+-              }
+-
+-              intsrc.mpc_srcbusirq = i;
+-              intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+-              MP_intsrc_info(&intsrc);
+-      }
+-
+-      intsrc.mpc_irqtype = mp_ExtINT;
+-      intsrc.mpc_srcbusirq = 0;
+-      intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+-      MP_intsrc_info(&intsrc);
+-}
+-
+-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_processor processor;
+-      struct mpc_config_bus bus;
+-      struct mpc_config_ioapic ioapic;
+-      struct mpc_config_lintsrc lintsrc;
+-      int linttypes[2] = { mp_ExtINT, mp_NMI };
+-      int i;
+-
+-      /*
+-       * local APIC has default address
+-       */
+-      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+-
+-      /*
+-       * 2 CPUs, numbered 0 & 1.
+-       */
+-      processor.mpc_type = MP_PROCESSOR;
+-      /* Either an integrated APIC or a discrete 82489DX. */
+-      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+-      processor.mpc_cpuflag = CPU_ENABLED;
+-      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+-                                 (boot_cpu_data.x86_model << 4) |
+-                                 boot_cpu_data.x86_mask;
+-      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-      for (i = 0; i < 2; i++) {
+-              processor.mpc_apicid = i;
+-              MP_processor_info(&processor);
+-      }
+-
+-      bus.mpc_type = MP_BUS;
+-      bus.mpc_busid = 0;
+-      switch (mpc_default_type) {
+-              default:
+-                      printk("???\n");
+-                      printk(KERN_ERR "Unknown standard configuration %d\n",
+-                              mpc_default_type);
+-                      /* fall through */
+-              case 1:
+-              case 5:
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      break;
+-              case 2:
+-              case 6:
+-              case 3:
+-                      memcpy(bus.mpc_bustype, "EISA  ", 6);
+-                      break;
+-              case 4:
+-              case 7:
+-                      memcpy(bus.mpc_bustype, "MCA   ", 6);
+-      }
+-      MP_bus_info(&bus);
+-      if (mpc_default_type > 4) {
+-              bus.mpc_busid = 1;
+-              memcpy(bus.mpc_bustype, "PCI   ", 6);
+-              MP_bus_info(&bus);
+-      }
+-
+-      ioapic.mpc_type = MP_IOAPIC;
+-      ioapic.mpc_apicid = 2;
+-      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+-      ioapic.mpc_flags = MPC_APIC_USABLE;
+-      ioapic.mpc_apicaddr = 0xFEC00000;
+-      MP_ioapic_info(&ioapic);
+-
+-      /*
+-       * We set up most of the low 16 IO-APIC pins according to MPS rules.
+-       */
+-      construct_default_ioirq_mptable(mpc_default_type);
+-
+-      lintsrc.mpc_type = MP_LINTSRC;
+-      lintsrc.mpc_irqflag = 0;                /* conforming */
+-      lintsrc.mpc_srcbusid = 0;
+-      lintsrc.mpc_srcbusirq = 0;
+-      lintsrc.mpc_destapic = MP_APIC_ALL;
+-      for (i = 0; i < 2; i++) {
+-              lintsrc.mpc_irqtype = linttypes[i];
+-              lintsrc.mpc_destapiclint = i;
+-              MP_lintsrc_info(&lintsrc);
+-      }
+-}
+-
+-static struct intel_mp_floating *mpf_found;
+-
+-/*
+- * Scan the memory blocks for an SMP configuration block.
+- */
+-void __init get_smp_config (void)
+-{
+-      struct intel_mp_floating *mpf = mpf_found;
+-
+-      /*
+-       * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+-       * processors, where MPS only supports physical.
+-       */
+-      if (acpi_lapic && acpi_ioapic) {
+-              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+-              return;
+-      }
+-      else if (acpi_lapic)
+-              printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+-
+-      printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+-      if (mpf->mpf_feature2 & (1<<7)) {
+-              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+-              pic_mode = 1;
+-      } else {
+-              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+-              pic_mode = 0;
+-      }
+-
+-      /*
+-       * Now see if we need to read further.
+-       */
+-      if (mpf->mpf_feature1 != 0) {
+-
+-              printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+-              construct_default_ISA_mptable(mpf->mpf_feature1);
+-
+-      } else if (mpf->mpf_physptr) {
+-
+-              /*
+-               * Read the physical hardware table.  Anything here will
+-               * override the defaults.
+-               */
+-              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+-                      smp_found_config = 0;
+-                      printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+-                      printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+-                      return;
+-              }
+-              /*
+-               * If there are no explicit MP IRQ entries, then we are
+-               * broken.  We set up most of the low 16 IO-APIC pins to
+-               * ISA defaults and hope it will work.
+-               */
+-              if (!mp_irq_entries) {
+-                      struct mpc_config_bus bus;
+-
+-                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+-
+-                      bus.mpc_type = MP_BUS;
+-                      bus.mpc_busid = 0;
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      MP_bus_info(&bus);
+-
+-                      construct_default_ioirq_mptable(0);
+-              }
+-
+-      } else
+-              BUG();
+-
+-      printk(KERN_INFO "Processors: %d\n", num_processors);
+-      /*
+-       * Only use the first configuration found.
+-       */
+-}
+-
+-static int __init smp_scan_config (unsigned long base, unsigned long length)
+-{
+-      unsigned long *bp = isa_bus_to_virt(base);
+-      struct intel_mp_floating *mpf;
+-
+-      printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
+-      if (sizeof(*mpf) != 16)
+-              printk("Error: MPF size\n");
+-
+-      while (length > 0) {
+-              mpf = (struct intel_mp_floating *)bp;
+-              if ((*bp == SMP_MAGIC_IDENT) &&
+-                      (mpf->mpf_length == 1) &&
+-                      !mpf_checksum((unsigned char *)bp, 16) &&
+-                      ((mpf->mpf_specification == 1)
+-                              || (mpf->mpf_specification == 4)) ) {
+-
+-                      smp_found_config = 1;
+-#ifndef CONFIG_XEN
+-                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+-                              mpf, virt_to_phys(mpf));
+-                      reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
+-                                      BOOTMEM_DEFAULT);
+-                      if (mpf->mpf_physptr) {
+-                              /*
+-                               * We cannot access to MPC table to compute
+-                               * table size yet, as only few megabytes from
+-                               * the bottom is mapped now.
+-                               * PC-9800's MPC table places on the very last
+-                               * of physical memory; so that simply reserving
+-                               * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+-                               * in reserve_bootmem.
+-                               */
+-                              unsigned long size = PAGE_SIZE;
+-                              unsigned long end = max_low_pfn * PAGE_SIZE;
+-                              if (mpf->mpf_physptr + size > end)
+-                                      size = end - mpf->mpf_physptr;
+-                              reserve_bootmem(mpf->mpf_physptr, size,
+-                                              BOOTMEM_DEFAULT);
+-                      }
+-#else
+-                      printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+-                              mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
+-#endif
+-
+-                      mpf_found = mpf;
+-                      return 1;
+-              }
+-              bp += 4;
+-              length -= 16;
+-      }
+-      return 0;
+-}
+-
+-void __init find_smp_config (void)
+-{
+-#ifndef CONFIG_XEN
+-      unsigned int address;
+-#endif
+-
+-      /*
+-       * FIXME: Linux assumes you have 640K of base ram..
+-       * this continues the error...
+-       *
+-       * 1) Scan the bottom 1K for a signature
+-       * 2) Scan the top 1K of base RAM
+-       * 3) Scan the 64K of bios
+-       */
+-      if (smp_scan_config(0x0,0x400) ||
+-              smp_scan_config(639*0x400,0x400) ||
+-                      smp_scan_config(0xF0000,0x10000))
+-              return;
+-      /*
+-       * If it is an SMP machine we should know now, unless the
+-       * configuration is in an EISA/MCA bus machine with an
+-       * extended bios data area.
+-       *
+-       * there is a real-mode segmented pointer pointing to the
+-       * 4K EBDA area at 0x40E, calculate and scan it here.
+-       *
+-       * NOTE! There are Linux loaders that will corrupt the EBDA
+-       * area, and as such this kind of SMP config may be less
+-       * trustworthy, simply because the SMP table may have been
+-       * stomped on during early boot. These loaders are buggy and
+-       * should be fixed.
+-       *
+-       * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
+-       */
+-
+-#ifndef CONFIG_XEN
+-      address = get_bios_ebda();
+-      if (address)
+-              smp_scan_config(address, 0x400);
+-#endif
+-}
+-
+-int es7000_plat;
+-
+-/* --------------------------------------------------------------------------
+-                            ACPI-based MP Configuration
+-   -------------------------------------------------------------------------- */
+-
+-#ifdef CONFIG_ACPI
+-
+-void __init mp_register_lapic_address(u64 address)
+-{
+-#ifndef CONFIG_XEN
+-      mp_lapic_addr = (unsigned long) address;
+-
+-      set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+-
+-      if (boot_cpu_physical_apicid == -1U)
+-              boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+-
+-      Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+-#endif
+-}
+-
+-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+-{
+-      struct mpc_config_processor processor;
+-      int boot_cpu = 0;
+-      
+-      if (MAX_APICS - id <= 0) {
+-              printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+-                      id, MAX_APICS);
+-              return;
+-      }
+-
+-      if (id == boot_cpu_physical_apicid)
+-              boot_cpu = 1;
+-
+-#ifndef CONFIG_XEN
+-      processor.mpc_type = MP_PROCESSOR;
+-      processor.mpc_apicid = id;
+-      processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
+-      processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+-      processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+-      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+-              (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+-      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-#endif
+-
+-      MP_processor_info(&processor);
+-}
+-
+-#ifdef        CONFIG_X86_IO_APIC
+-
+-#define MP_ISA_BUS            0
+-#define MP_MAX_IOAPIC_PIN     127
+-
+-static struct mp_ioapic_routing {
+-      int                     apic_id;
+-      int                     gsi_base;
+-      int                     gsi_end;
+-      u32                     pin_programmed[4];
+-} mp_ioapic_routing[MAX_IO_APICS];
+-
+-static int mp_find_ioapic (int gsi)
+-{
+-      int i = 0;
+-
+-      /* Find the IOAPIC that manages this GSI. */
+-      for (i = 0; i < nr_ioapics; i++) {
+-              if ((gsi >= mp_ioapic_routing[i].gsi_base)
+-                      && (gsi <= mp_ioapic_routing[i].gsi_end))
+-                      return i;
+-      }
+-
+-      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+-
+-      return -1;
+-}
+-
+-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+-{
+-      int idx = 0;
+-      int tmpid;
+-
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+-                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+-      }
+-      if (!address) {
+-              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+-                      " found in MADT table, skipping!\n");
+-              return;
+-      }
+-
+-      idx = nr_ioapics++;
+-
+-      mp_ioapics[idx].mpc_type = MP_IOAPIC;
+-      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+-      mp_ioapics[idx].mpc_apicaddr = address;
+-
+-#ifndef CONFIG_XEN
+-      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+-#endif
+-      if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+-              && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+-              tmpid = io_apic_get_unique_id(idx, id);
+-      else
+-              tmpid = id;
+-      if (tmpid == -1) {
+-              nr_ioapics--;
+-              return;
+-      }
+-      mp_ioapics[idx].mpc_apicid = tmpid;
+-      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
+-      
+-      /* 
+-       * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+-       * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+-       */
+-      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+-      mp_ioapic_routing[idx].gsi_base = gsi_base;
+-      mp_ioapic_routing[idx].gsi_end = gsi_base +
+-              io_apic_get_redir_entries(idx);
+-
+-      printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+-             "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+-             mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+-             mp_ioapic_routing[idx].gsi_base,
+-             mp_ioapic_routing[idx].gsi_end);
+-}
+-
+-void __init
+-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int                     ioapic = -1;
+-      int                     pin = -1;
+-
+-      /* 
+-       * Convert 'gsi' to 'ioapic.pin'.
+-       */
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0)
+-              return;
+-      pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+-
+-      /*
+-       * TBD: This check is for faulty timer entries, where the override
+-       *      erroneously sets the trigger to level, resulting in a HUGE 
+-       *      increase of timer interrupts!
+-       */
+-      if ((bus_irq == 0) && (trigger == 3))
+-              trigger = 1;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqtype = mp_INT;
+-      intsrc.mpc_irqflag = (trigger << 2) | polarity;
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+-      intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+-
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
+-              intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-              (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-              intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+-
+-      mp_irqs[mp_irq_entries] = intsrc;
+-      if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!\n");
+-}
+-
+-void __init mp_config_acpi_legacy_irqs (void)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i = 0;
+-      int ioapic = -1;
+-
+-      /* 
+-       * Fabricate the legacy ISA bus (bus #31).
+-       */
+-      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+-      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+-
+-      /*
+-       * Older generations of ES7000 have no legacy identity mappings
+-       */
+-      if (es7000_plat == 1)
+-              return;
+-
+-      /* 
+-       * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+-       */
+-      ioapic = mp_find_ioapic(0);
+-      if (ioapic < 0)
+-              return;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                                 /* Conforming */
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+-
+-      /* 
+-       * Use the default configuration for the IRQs 0-15.  Unless
+-       * overridden by (MADT) interrupt source override entries.
+-       */
+-      for (i = 0; i < 16; i++) {
+-              int idx;
+-
+-              for (idx = 0; idx < mp_irq_entries; idx++) {
+-                      struct mpc_config_intsrc *irq = mp_irqs + idx;
+-
+-                      /* Do we already have a mapping for this ISA IRQ? */
+-                      if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+-                              break;
+-
+-                      /* Do we already have a mapping for this IOAPIC pin */
+-                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+-                              (irq->mpc_dstirq == i))
+-                              break;
+-              }
+-
+-              if (idx != mp_irq_entries) {
+-                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+-                      continue;                       /* IRQ already used */
+-              }
+-
+-              intsrc.mpc_irqtype = mp_INT;
+-              intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+-              intsrc.mpc_dstirq = i;
+-
+-              Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+-                      "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-                      (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-                      intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+-                      intsrc.mpc_dstirq);
+-
+-              mp_irqs[mp_irq_entries] = intsrc;
+-              if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-                      panic("Max # of irq sources exceeded!\n");
+-      }
+-}
+-
+-#define MAX_GSI_NUM   4096
+-#define IRQ_COMPRESSION_START 64
+-
+-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+-{
+-      int ioapic = -1;
+-      int ioapic_pin = 0;
+-      int idx, bit = 0;
+-      static int pci_irq = IRQ_COMPRESSION_START;
+-      /*
+-       * Mapping between Global System Interrupts, which
+-       * represent all possible interrupts, and IRQs
+-       * assigned to actual devices.
+-       */
+-      static int              gsi_to_irq[MAX_GSI_NUM];
+-
+-      /* Don't set up the ACPI SCI because it's already set up */
+-      if (acpi_gbl_FADT.sci_interrupt == gsi)
+-              return gsi;
+-
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0) {
+-              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+-              return gsi;
+-      }
+-
+-      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+-
+-      if (ioapic_renumber_irq)
+-              gsi = ioapic_renumber_irq(ioapic, gsi);
+-
+-      /* 
+-       * Avoid pin reprogramming.  PRTs typically include entries  
+-       * with redundant pin->gsi mappings (but unique PCI devices);
+-       * we only program the IOAPIC on the first.
+-       */
+-      bit = ioapic_pin % 32;
+-      idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+-      if (idx > 3) {
+-              printk(KERN_ERR "Invalid reference to IOAPIC pin "
+-                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+-                      ioapic_pin);
+-              return gsi;
+-      }
+-      if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+-              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+-                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+-              return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
+-      }
+-
+-      mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+-
+-      /*
+-       * For GSI >= 64, use IRQ compression
+-       */
+-      if ((gsi >= IRQ_COMPRESSION_START)
+-              && (triggering == ACPI_LEVEL_SENSITIVE)) {
+-              /*
+-               * For PCI devices assign IRQs in order, avoiding gaps
+-               * due to unused I/O APIC pins.
+-               */
+-              int irq = gsi;
+-              if (gsi < MAX_GSI_NUM) {
+-                      /*
+-                       * Retain the VIA chipset work-around (gsi > 15), but
+-                       * avoid a problem where the 8254 timer (IRQ0) is setup
+-                       * via an override (so it's not on pin 0 of the ioapic),
+-                       * and at the same time, the pin 0 interrupt is a PCI
+-                       * type.  The gsi > 15 test could cause these two pins
+-                       * to be shared as IRQ0, and they are not shareable.
+-                       * So test for this condition, and if necessary, avoid
+-                       * the pin collision.
+-                       */
+-                      if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+-                              gsi = pci_irq++;
+-                      /*
+-                       * Don't assign IRQ used by ACPI SCI
+-                       */
+-                      if (gsi == acpi_gbl_FADT.sci_interrupt)
+-                              gsi = pci_irq++;
+-                      gsi_to_irq[irq] = gsi;
+-              } else {
+-                      printk(KERN_ERR "GSI %u is too high\n", gsi);
+-                      return gsi;
+-              }
+-      }
+-
+-      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+-                  triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+-                  polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+-      return gsi;
+-}
+-
+-#endif /* CONFIG_X86_IO_APIC */
+-#endif /* CONFIG_ACPI */
+--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,879 +0,0 @@
+-/*
+- *    Intel Multiprocessor Specification 1.1 and 1.4
+- *    compliant MP-table parsing routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *    Fixes
+- *            Erich Boleyn    :       MP v1.4 and additional changes.
+- *            Alan Cox        :       Added EBDA scanning
+- *            Ingo Molnar     :       various cleanups and rewrites
+- *            Maciej W. Rozycki:      Bits for default MP configurations
+- *            Paul Diefenbaugh:       Added full ACPI support
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/acpi.h>
+-#include <linux/module.h>
+-
+-#include <asm/smp.h>
+-#include <asm/mtrr.h>
+-#include <asm/mpspec.h>
+-#include <asm/pgalloc.h>
+-#include <asm/io_apic.h>
+-#include <asm/proto.h>
+-#include <asm/acpi.h>
+-
+-/* Have we found an MP table */
+-int smp_found_config;
+-
+-/*
+- * Various Linux-internal data structures created from the
+- * MP-table.
+- */
+-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+-
+-static int mp_current_pci_id = 0;
+-/* I/O APIC entries */
+-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+-
+-/* # of MP IRQ source entries */
+-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+-
+-/* MP IRQ source entries */
+-int mp_irq_entries;
+-
+-int nr_ioapics;
+-unsigned long mp_lapic_addr = 0;
+-
+-
+-
+-/* Processor that is doing the boot up */
+-unsigned int boot_cpu_id = -1U;
+-EXPORT_SYMBOL(boot_cpu_id);
+-
+-/* Internal processor count */
+-unsigned int num_processors;
+-
+-unsigned disabled_cpus __cpuinitdata;
+-
+-/* Bitmask of physically existing CPUs */
+-physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
+-
+-#ifndef CONFIG_XEN
+-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
+-                              = { [0 ... NR_CPUS-1] = BAD_APICID };
+-void *x86_bios_cpu_apicid_early_ptr;
+-#endif
+-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+-
+-
+-/*
+- * Intel MP BIOS table parsing routines:
+- */
+-
+-/*
+- * Checksum an MP configuration block.
+- */
+-
+-static int __init mpf_checksum(unsigned char *mp, int len)
+-{
+-      int sum = 0;
+-
+-      while (len--)
+-              sum += *mp++;
+-
+-      return sum & 0xFF;
+-}
+-
+-#ifndef CONFIG_XEN
+-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+-{
+-      int cpu;
+-      cpumask_t tmp_map;
+-      char *bootup_cpu = "";
+-
+-      if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+-              disabled_cpus++;
+-              return;
+-      }
+-      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              bootup_cpu = " (Bootup-CPU)";
+-              boot_cpu_id = m->mpc_apicid;
+-      }
+-
+-      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
+-
+-      if (num_processors >= NR_CPUS) {
+-              printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+-                      " Processor ignored.\n", NR_CPUS);
+-              return;
+-      }
+-
+-      num_processors++;
+-      cpus_complement(tmp_map, cpu_present_map);
+-      cpu = first_cpu(tmp_map);
+-
+-      physid_set(m->mpc_apicid, phys_cpu_present_map);
+-      if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              /*
+-               * x86_bios_cpu_apicid is required to have processors listed
+-               * in same order as logical cpu numbers. Hence the first
+-               * entry is BSP, and so on.
+-               */
+-              cpu = 0;
+-      }
+-      /* are we being called early in kernel startup? */
+-      if (x86_cpu_to_apicid_early_ptr) {
+-              u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+-              u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+-
+-              cpu_to_apicid[cpu] = m->mpc_apicid;
+-              bios_cpu_apicid[cpu] = m->mpc_apicid;
+-      } else {
+-              per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+-              per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
+-      }
+-
+-      cpu_set(cpu, cpu_possible_map);
+-      cpu_set(cpu, cpu_present_map);
+-}
+-#else
+-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
+-{
+-      num_processors++;
+-}
+-#endif /* CONFIG_XEN */
+-
+-static void __init MP_bus_info (struct mpc_config_bus *m)
+-{
+-      char str[7];
+-
+-      memcpy(str, m->mpc_bustype, 6);
+-      str[6] = 0;
+-      Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+-
+-      if (strncmp(str, "ISA", 3) == 0) {
+-              set_bit(m->mpc_busid, mp_bus_not_pci);
+-      } else if (strncmp(str, "PCI", 3) == 0) {
+-              clear_bit(m->mpc_busid, mp_bus_not_pci);
+-              mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+-              mp_current_pci_id++;
+-      } else {
+-              printk(KERN_ERR "Unknown bustype %s\n", str);
+-      }
+-}
+-
+-static int bad_ioapic(unsigned long address)
+-{
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+-                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+-      }
+-      if (!address) {
+-              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+-                      " found in table, skipping!\n");
+-              return 1;
+-      }
+-      return 0;
+-}
+-
+-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+-{
+-      if (!(m->mpc_flags & MPC_APIC_USABLE))
+-              return;
+-
+-      printk("I/O APIC #%d at 0x%X.\n",
+-              m->mpc_apicid, m->mpc_apicaddr);
+-
+-      if (bad_ioapic(m->mpc_apicaddr))
+-              return;
+-
+-      mp_ioapics[nr_ioapics] = *m;
+-      nr_ioapics++;
+-}
+-
+-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+-{
+-      mp_irqs [mp_irq_entries] = *m;
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+-                      m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+-      if (++mp_irq_entries >= MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!!\n");
+-}
+-
+-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+-{
+-      Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+-              " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+-                      m->mpc_irqtype, m->mpc_irqflag & 3,
+-                      (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+-                      m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+-}
+-
+-/*
+- * Read/parse the MPC
+- */
+-
+-static int __init smp_read_mpc(struct mp_config_table *mpc)
+-{
+-      char str[16];
+-      int count=sizeof(*mpc);
+-      unsigned char *mpt=((unsigned char *)mpc)+count;
+-
+-      if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+-              printk("MPTABLE: bad signature [%c%c%c%c]!\n",
+-                      mpc->mpc_signature[0],
+-                      mpc->mpc_signature[1],
+-                      mpc->mpc_signature[2],
+-                      mpc->mpc_signature[3]);
+-              return 0;
+-      }
+-      if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+-              printk("MPTABLE: checksum error!\n");
+-              return 0;
+-      }
+-      if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+-              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
+-                      mpc->mpc_spec);
+-              return 0;
+-      }
+-      if (!mpc->mpc_lapic) {
+-              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
+-              return 0;
+-      }
+-      memcpy(str,mpc->mpc_oem,8);
+-      str[8] = 0;
+-      printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
+-
+-      memcpy(str,mpc->mpc_productid,12);
+-      str[12] = 0;
+-      printk("MPTABLE: Product ID: %s ",str);
+-
+-      printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
+-
+-      /* save the local APIC address, it might be non-default */
+-      if (!acpi_lapic)
+-              mp_lapic_addr = mpc->mpc_lapic;
+-
+-      /*
+-       *      Now process the configuration blocks.
+-       */
+-      while (count < mpc->mpc_length) {
+-              switch(*mpt) {
+-                      case MP_PROCESSOR:
+-                      {
+-                              struct mpc_config_processor *m=
+-                                      (struct mpc_config_processor *)mpt;
+-                              if (!acpi_lapic)
+-                                      MP_processor_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_BUS:
+-                      {
+-                              struct mpc_config_bus *m=
+-                                      (struct mpc_config_bus *)mpt;
+-                              MP_bus_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_IOAPIC:
+-                      {
+-                              struct mpc_config_ioapic *m=
+-                                      (struct mpc_config_ioapic *)mpt;
+-                              MP_ioapic_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_INTSRC:
+-                      {
+-                              struct mpc_config_intsrc *m=
+-                                      (struct mpc_config_intsrc *)mpt;
+-
+-                              MP_intsrc_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-                      case MP_LINTSRC:
+-                      {
+-                              struct mpc_config_lintsrc *m=
+-                                      (struct mpc_config_lintsrc *)mpt;
+-                              MP_lintsrc_info(m);
+-                              mpt += sizeof(*m);
+-                              count += sizeof(*m);
+-                              break;
+-                      }
+-              }
+-      }
+-      setup_apic_routing();
+-      if (!num_processors)
+-              printk(KERN_ERR "MPTABLE: no processors registered!\n");
+-      return num_processors;
+-}
+-
+-static int __init ELCR_trigger(unsigned int irq)
+-{
+-      unsigned int port;
+-
+-      port = 0x4d0 + (irq >> 3);
+-      return (inb(port) >> (irq & 7)) & 1;
+-}
+-
+-static void __init construct_default_ioirq_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i;
+-      int ELCR_fallback = 0;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                 /* conforming */
+-      intsrc.mpc_srcbus = 0;
+-      intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+-
+-      intsrc.mpc_irqtype = mp_INT;
+-
+-      /*
+-       *  If true, we have an ISA/PCI system with no IRQ entries
+-       *  in the MP table. To prevent the PCI interrupts from being set up
+-       *  incorrectly, we try to use the ELCR. The sanity check to see if
+-       *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+-       *  never be level sensitive, so we simply see if the ELCR agrees.
+-       *  If it does, we assume it's valid.
+-       */
+-      if (mpc_default_type == 5) {
+-              printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+-
+-              if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+-                      printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
+-              else {
+-                      printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+-                      ELCR_fallback = 1;
+-              }
+-      }
+-
+-      for (i = 0; i < 16; i++) {
+-              switch (mpc_default_type) {
+-              case 2:
+-                      if (i == 0 || i == 13)
+-                              continue;       /* IRQ0 & IRQ13 not connected */
+-                      /* fall through */
+-              default:
+-                      if (i == 2)
+-                              continue;       /* IRQ2 is never connected */
+-              }
+-
+-              if (ELCR_fallback) {
+-                      /*
+-                       *  If the ELCR indicates a level-sensitive interrupt, we
+-                       *  copy that information over to the MP table in the
+-                       *  irqflag field (level sensitive, active high polarity).
+-                       */
+-                      if (ELCR_trigger(i))
+-                              intsrc.mpc_irqflag = 13;
+-                      else
+-                              intsrc.mpc_irqflag = 0;
+-              }
+-
+-              intsrc.mpc_srcbusirq = i;
+-              intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+-              MP_intsrc_info(&intsrc);
+-      }
+-
+-      intsrc.mpc_irqtype = mp_ExtINT;
+-      intsrc.mpc_srcbusirq = 0;
+-      intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+-      MP_intsrc_info(&intsrc);
+-}
+-
+-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+-{
+-      struct mpc_config_processor processor;
+-      struct mpc_config_bus bus;
+-      struct mpc_config_ioapic ioapic;
+-      struct mpc_config_lintsrc lintsrc;
+-      int linttypes[2] = { mp_ExtINT, mp_NMI };
+-      int i;
+-
+-      /*
+-       * local APIC has default address
+-       */
+-      mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+-
+-      /*
+-       * 2 CPUs, numbered 0 & 1.
+-       */
+-      processor.mpc_type = MP_PROCESSOR;
+-      processor.mpc_apicver = 0;
+-      processor.mpc_cpuflag = CPU_ENABLED;
+-      processor.mpc_cpufeature = 0;
+-      processor.mpc_featureflag = 0;
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-      for (i = 0; i < 2; i++) {
+-              processor.mpc_apicid = i;
+-              MP_processor_info(&processor);
+-      }
+-
+-      bus.mpc_type = MP_BUS;
+-      bus.mpc_busid = 0;
+-      switch (mpc_default_type) {
+-              default:
+-                      printk(KERN_ERR "???\nUnknown standard configuration %d\n",
+-                              mpc_default_type);
+-                      /* fall through */
+-              case 1:
+-              case 5:
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      break;
+-      }
+-      MP_bus_info(&bus);
+-      if (mpc_default_type > 4) {
+-              bus.mpc_busid = 1;
+-              memcpy(bus.mpc_bustype, "PCI   ", 6);
+-              MP_bus_info(&bus);
+-      }
+-
+-      ioapic.mpc_type = MP_IOAPIC;
+-      ioapic.mpc_apicid = 2;
+-      ioapic.mpc_apicver = 0;
+-      ioapic.mpc_flags = MPC_APIC_USABLE;
+-      ioapic.mpc_apicaddr = 0xFEC00000;
+-      MP_ioapic_info(&ioapic);
+-
+-      /*
+-       * We set up most of the low 16 IO-APIC pins according to MPS rules.
+-       */
+-      construct_default_ioirq_mptable(mpc_default_type);
+-
+-      lintsrc.mpc_type = MP_LINTSRC;
+-      lintsrc.mpc_irqflag = 0;                /* conforming */
+-      lintsrc.mpc_srcbusid = 0;
+-      lintsrc.mpc_srcbusirq = 0;
+-      lintsrc.mpc_destapic = MP_APIC_ALL;
+-      for (i = 0; i < 2; i++) {
+-              lintsrc.mpc_irqtype = linttypes[i];
+-              lintsrc.mpc_destapiclint = i;
+-              MP_lintsrc_info(&lintsrc);
+-      }
+-}
+-
+-static struct intel_mp_floating *mpf_found;
+-
+-/*
+- * Scan the memory blocks for an SMP configuration block.
+- */
+-void __init get_smp_config (void)
+-{
+-      struct intel_mp_floating *mpf = mpf_found;
+-
+-      /*
+-       * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+-       * processors, where MPS only supports physical.
+-       */
+-      if (acpi_lapic && acpi_ioapic) {
+-              printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+-              return;
+-      }
+-      else if (acpi_lapic)
+-              printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+-
+-      printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+-
+-      /*
+-       * Now see if we need to read further.
+-       */
+-      if (mpf->mpf_feature1 != 0) {
+-
+-              printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+-              construct_default_ISA_mptable(mpf->mpf_feature1);
+-
+-      } else if (mpf->mpf_physptr) {
+-
+-              /*
+-               * Read the physical hardware table.  Anything here will
+-               * override the defaults.
+-               */
+-              if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
+-                      smp_found_config = 0;
+-                      printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+-                      printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+-                      return;
+-              }
+-              /*
+-               * If there are no explicit MP IRQ entries, then we are
+-               * broken.  We set up most of the low 16 IO-APIC pins to
+-               * ISA defaults and hope it will work.
+-               */
+-              if (!mp_irq_entries) {
+-                      struct mpc_config_bus bus;
+-
+-                      printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+-
+-                      bus.mpc_type = MP_BUS;
+-                      bus.mpc_busid = 0;
+-                      memcpy(bus.mpc_bustype, "ISA   ", 6);
+-                      MP_bus_info(&bus);
+-
+-                      construct_default_ioirq_mptable(0);
+-              }
+-
+-      } else
+-              BUG();
+-
+-      printk(KERN_INFO "Processors: %d\n", num_processors);
+-      /*
+-       * Only use the first configuration found.
+-       */
+-}
+-
+-static int __init smp_scan_config (unsigned long base, unsigned long length)
+-{
+-      extern void __bad_mpf_size(void); 
+-      unsigned int *bp = isa_bus_to_virt(base);
+-      struct intel_mp_floating *mpf;
+-
+-      Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+-      if (sizeof(*mpf) != 16)
+-              __bad_mpf_size();
+-
+-      while (length > 0) {
+-              mpf = (struct intel_mp_floating *)bp;
+-              if ((*bp == SMP_MAGIC_IDENT) &&
+-                      (mpf->mpf_length == 1) &&
+-                      !mpf_checksum((unsigned char *)bp, 16) &&
+-                      ((mpf->mpf_specification == 1)
+-                              || (mpf->mpf_specification == 4)) ) {
+-
+-                      smp_found_config = 1;
+-                      mpf_found = mpf;
+-                      return 1;
+-              }
+-              bp += 4;
+-              length -= 16;
+-      }
+-      return 0;
+-}
+-
+-void __init find_smp_config(void)
+-{
+-      unsigned int address;
+-
+-      /*
+-       * FIXME: Linux assumes you have 640K of base ram..
+-       * this continues the error...
+-       *
+-       * 1) Scan the bottom 1K for a signature
+-       * 2) Scan the top 1K of base RAM
+-       * 3) Scan the 64K of bios
+-       */
+-      if (smp_scan_config(0x0,0x400) ||
+-              smp_scan_config(639*0x400,0x400) ||
+-                      smp_scan_config(0xF0000,0x10000))
+-              return;
+-      /*
+-       * If it is an SMP machine we should know now.
+-       *
+-       * there is a real-mode segmented pointer pointing to the
+-       * 4K EBDA area at 0x40E, calculate and scan it here.
+-       *
+-       * NOTE! There are Linux loaders that will corrupt the EBDA
+-       * area, and as such this kind of SMP config may be less
+-       * trustworthy, simply because the SMP table may have been
+-       * stomped on during early boot. These loaders are buggy and
+-       * should be fixed.
+-       */
+-
+-      address = *(unsigned short *)phys_to_virt(0x40E);
+-      address <<= 4;
+-      if (smp_scan_config(address, 0x1000))
+-              return;
+-
+-      /* If we have come this far, we did not find an MP table  */
+-       printk(KERN_INFO "No mptable found.\n");
+-}
+-
+-/* --------------------------------------------------------------------------
+-                            ACPI-based MP Configuration
+-   -------------------------------------------------------------------------- */
+-
+-#ifdef CONFIG_ACPI
+-
+-void __init mp_register_lapic_address(u64 address)
+-{
+-#ifndef CONFIG_XEN
+-      mp_lapic_addr = (unsigned long) address;
+-      set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+-      if (boot_cpu_id == -1U)
+-              boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+-#endif
+-}
+-
+-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+-{
+-      struct mpc_config_processor processor;
+-      int                     boot_cpu = 0;
+-      
+-      if (id == boot_cpu_id)
+-              boot_cpu = 1;
+-
+-#ifndef CONFIG_XEN
+-      processor.mpc_type = MP_PROCESSOR;
+-      processor.mpc_apicid = id;
+-      processor.mpc_apicver = 0;
+-      processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+-      processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+-      processor.mpc_cpufeature = 0;
+-      processor.mpc_featureflag = 0;
+-      processor.mpc_reserved[0] = 0;
+-      processor.mpc_reserved[1] = 0;
+-#endif
+-
+-      MP_processor_info(&processor);
+-}
+-
+-#define MP_ISA_BUS            0
+-#define MP_MAX_IOAPIC_PIN     127
+-
+-static struct mp_ioapic_routing {
+-      int                     apic_id;
+-      int                     gsi_start;
+-      int                     gsi_end;
+-      u32                     pin_programmed[4];
+-} mp_ioapic_routing[MAX_IO_APICS];
+-
+-static int mp_find_ioapic(int gsi)
+-{
+-      int i = 0;
+-
+-      /* Find the IOAPIC that manages this GSI. */
+-      for (i = 0; i < nr_ioapics; i++) {
+-              if ((gsi >= mp_ioapic_routing[i].gsi_start)
+-                      && (gsi <= mp_ioapic_routing[i].gsi_end))
+-                      return i;
+-      }
+-
+-      printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+-      return -1;
+-}
+-
+-static u8 uniq_ioapic_id(u8 id)
+-{
+-      int i;
+-      DECLARE_BITMAP(used, 256);
+-      bitmap_zero(used, 256);
+-      for (i = 0; i < nr_ioapics; i++) {
+-              struct mpc_config_ioapic *ia = &mp_ioapics[i];
+-              __set_bit(ia->mpc_apicid, used);
+-      }
+-      if (!test_bit(id, used))
+-              return id;
+-      return find_first_zero_bit(used, 256);
+-}
+-
+-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+-{
+-      int idx = 0;
+-
+-      if (bad_ioapic(address))
+-              return;
+-
+-      idx = nr_ioapics;
+-
+-      mp_ioapics[idx].mpc_type = MP_IOAPIC;
+-      mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
+-      mp_ioapics[idx].mpc_apicaddr = address;
+-
+-#ifndef CONFIG_XEN
+-      set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+-#endif
+-      mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
+-      mp_ioapics[idx].mpc_apicver = 0;
+-      
+-      /* 
+-       * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
+-       * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
+-       */
+-      mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
+-      mp_ioapic_routing[idx].gsi_start = gsi_base;
+-      mp_ioapic_routing[idx].gsi_end = gsi_base + 
+-              io_apic_get_redir_entries(idx);
+-
+-      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
+-              "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+-              mp_ioapics[idx].mpc_apicaddr,
+-              mp_ioapic_routing[idx].gsi_start,
+-              mp_ioapic_routing[idx].gsi_end);
+-
+-      nr_ioapics++;
+-}
+-
+-void __init
+-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32       gsi)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int                     ioapic = -1;
+-      int                     pin = -1;
+-
+-      /* 
+-       * Convert 'gsi' to 'ioapic.pin'.
+-       */
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0)
+-              return;
+-      pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
+-
+-      /*
+-       * TBD: This check is for faulty timer entries, where the override
+-       *      erroneously sets the trigger to level, resulting in a HUGE 
+-       *      increase of timer interrupts!
+-       */
+-      if ((bus_irq == 0) && (trigger == 3))
+-              trigger = 1;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqtype = mp_INT;
+-      intsrc.mpc_irqflag = (trigger << 2) | polarity;
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_srcbusirq = bus_irq;                                /* IRQ */
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;        /* APIC ID */
+-      intsrc.mpc_dstirq = pin;                                    /* INTIN# */
+-
+-      Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", 
+-              intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-              (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-              intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
+-
+-      mp_irqs[mp_irq_entries] = intsrc;
+-      if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-              panic("Max # of irq sources exceeded!\n");
+-}
+-
+-void __init mp_config_acpi_legacy_irqs(void)
+-{
+-      struct mpc_config_intsrc intsrc;
+-      int i = 0;
+-      int ioapic = -1;
+-
+-      /* 
+-       * Fabricate the legacy ISA bus (bus #31).
+-       */
+-      set_bit(MP_ISA_BUS, mp_bus_not_pci);
+-
+-      /* 
+-       * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+-       */
+-      ioapic = mp_find_ioapic(0);
+-      if (ioapic < 0)
+-              return;
+-
+-      intsrc.mpc_type = MP_INTSRC;
+-      intsrc.mpc_irqflag = 0;                                 /* Conforming */
+-      intsrc.mpc_srcbus = MP_ISA_BUS;
+-      intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+-
+-      /* 
+-       * Use the default configuration for the IRQs 0-15.  Unless
+-       * overridden by (MADT) interrupt source override entries.
+-       */
+-      for (i = 0; i < 16; i++) {
+-              int idx;
+-
+-              for (idx = 0; idx < mp_irq_entries; idx++) {
+-                      struct mpc_config_intsrc *irq = mp_irqs + idx;
+-
+-                      /* Do we already have a mapping for this ISA IRQ? */
+-                      if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+-                              break;
+-
+-                      /* Do we already have a mapping for this IOAPIC pin */
+-                      if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
+-                              (irq->mpc_dstirq == i))
+-                              break;
+-              }
+-
+-              if (idx != mp_irq_entries) {
+-                      printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+-                      continue;                       /* IRQ already used */
+-              }
+-
+-              intsrc.mpc_irqtype = mp_INT;
+-              intsrc.mpc_srcbusirq = i;                  /* Identity mapped */
+-              intsrc.mpc_dstirq = i;
+-
+-              Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
+-                      "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
+-                      (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+-                      intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+-                      intsrc.mpc_dstirq);
+-
+-              mp_irqs[mp_irq_entries] = intsrc;
+-              if (++mp_irq_entries == MAX_IRQ_SOURCES)
+-                      panic("Max # of irq sources exceeded!\n");
+-      }
+-}
+-
+-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+-{
+-      int ioapic = -1;
+-      int ioapic_pin = 0;
+-      int idx, bit = 0;
+-
+-      if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+-              return gsi;
+-
+-      /* Don't set up the ACPI SCI because it's already set up */
+-      if (acpi_gbl_FADT.sci_interrupt == gsi)
+-              return gsi;
+-
+-      ioapic = mp_find_ioapic(gsi);
+-      if (ioapic < 0) {
+-              printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+-              return gsi;
+-      }
+-
+-      ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
+-
+-      /* 
+-       * Avoid pin reprogramming.  PRTs typically include entries  
+-       * with redundant pin->gsi mappings (but unique PCI devices);
+-       * we only program the IOAPIC on the first.
+-       */
+-      bit = ioapic_pin % 32;
+-      idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
+-      if (idx > 3) {
+-              printk(KERN_ERR "Invalid reference to IOAPIC pin "
+-                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
+-                      ioapic_pin);
+-              return gsi;
+-      }
+-      if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+-              Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+-                      mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+-              return gsi;
+-      }
+-
+-      mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+-
+-      io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+-              triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+-              polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+-      return gsi;
+-}
+-#endif /*CONFIG_ACPI*/
+--- sle11-2009-05-14.orig/arch/x86/kernel/pci-dma-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/pci-dma-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -1,283 +1,251 @@
+-/*
+- * Dynamic DMA mapping support.
+- *
+- * On i386 there is no hardware dynamic DMA address translation,
+- * so consistent alloc/free are merely page allocation/freeing.
+- * The rest of the dynamic DMA mapping interface is implemented
+- * in asm/pci.h.
+- */
+-
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/string.h>
++#include <linux/dma-mapping.h>
++#include <linux/dmar.h>
++#include <linux/bootmem.h>
+ #include <linux/pci.h>
+-#include <linux/module.h>
+-#include <linux/version.h>
+-#include <asm/io.h>
+-#include <xen/balloon.h>
+-#include <xen/gnttab.h>
+-#include <asm/swiotlb.h>
+-#include <asm/tlbflush.h>
+-#include <asm/swiotlb_32.h>
+-#include <asm/gnttab_dma.h>
+-#include <asm/bug.h>
+ 
+-#ifdef __x86_64__
+-#include <asm/iommu.h>
++#include <asm/proto.h>
++#include <asm/dma.h>
++#include <asm/gart.h>
++#include <asm/calgary.h>
++
++int forbid_dac __read_mostly;
++EXPORT_SYMBOL(forbid_dac);
++
++const struct dma_mapping_ops *dma_ops;
++EXPORT_SYMBOL(dma_ops);
++
++static int iommu_sac_force __read_mostly;
++
++#ifdef CONFIG_IOMMU_DEBUG
++int panic_on_overflow __read_mostly = 1;
++int force_iommu __read_mostly = 1;
++#else
++int panic_on_overflow __read_mostly = 0;
++int force_iommu __read_mostly = 0;
++#endif
+ 
+ int iommu_merge __read_mostly = 0;
+-EXPORT_SYMBOL(iommu_merge);
+ 
+-dma_addr_t bad_dma_address __read_mostly;
+-EXPORT_SYMBOL(bad_dma_address);
++int no_iommu __read_mostly;
++/* Set this to 1 if there is a HW IOMMU in the system */
++int iommu_detected __read_mostly = 0;
+ 
+ /* This tells the BIO block layer to assume merging. Default to off
+    because we cannot guarantee merging later. */
+ int iommu_bio_merge __read_mostly = 0;
+ EXPORT_SYMBOL(iommu_bio_merge);
+ 
+-int force_iommu __read_mostly= 0;
++dma_addr_t bad_dma_address __read_mostly = 0;
++EXPORT_SYMBOL(bad_dma_address);
+ 
+-__init int iommu_setup(char *p)
+-{
+-    return 1;
+-}
++/* Dummy device used for NULL arguments (normally ISA). Better would
++   be probably a smaller DMA mask, but this is bug-to-bug compatible
++   to older i386. */
++struct device fallback_dev = {
++      .bus_id = "fallback device",
++      .coherent_dma_mask = DMA_32BIT_MASK,
++      .dma_mask = &fallback_dev.coherent_dma_mask,
++};
+ 
+-void __init pci_iommu_alloc(void)
++int dma_set_mask(struct device *dev, u64 mask)
+ {
+-#ifdef CONFIG_SWIOTLB
+-      pci_swiotlb_init();
+-#endif
+-}
++      if (!dev->dma_mask || !dma_supported(dev, mask))
++              return -EIO;
++
++      *dev->dma_mask = mask;
+ 
+-static int __init pci_iommu_init(void)
+-{
+-      no_iommu_init();
+       return 0;
+ }
++EXPORT_SYMBOL(dma_set_mask);
+ 
+-/* Must execute after PCI subsystem */
+-fs_initcall(pci_iommu_init);
+-#endif
+-
+-struct dma_coherent_mem {
+-      void            *virt_base;
+-      u32             device_base;
+-      int             size;
+-      int             flags;
+-      unsigned long   *bitmap;
+-};
+-
+-#define IOMMU_BUG_ON(test)                            \
+-do {                                                  \
+-      if (unlikely(test)) {                           \
+-              printk(KERN_ALERT "Fatal DMA error! "   \
+-                     "Please use 'swiotlb=force'\n"); \
+-              BUG();                                  \
+-      }                                               \
+-} while (0)
++#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
++static __initdata void *dma32_bootmem_ptr;
++static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
+ 
+-static int check_pages_physically_contiguous(unsigned long pfn, 
+-                                           unsigned int offset,
+-                                           size_t length)
++static int __init parse_dma32_size_opt(char *p)
+ {
+-      unsigned long next_mfn;
+-      int i;
+-      int nr_pages;
+-      
+-      next_mfn = pfn_to_mfn(pfn);
+-      nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
+-      
+-      for (i = 1; i < nr_pages; i++) {
+-              if (pfn_to_mfn(++pfn) != ++next_mfn) 
+-                      return 0;
+-      }
+-      return 1;
++      if (!p)
++              return -EINVAL;
++      dma32_bootmem_size = memparse(p, &p);
++      return 0;
+ }
++early_param("dma32_size", parse_dma32_size_opt);
+ 
+-int range_straddles_page_boundary(paddr_t p, size_t size)
++void __init dma32_reserve_bootmem(void)
+ {
+-      unsigned long pfn = p >> PAGE_SHIFT;
+-      unsigned int offset = p & ~PAGE_MASK;
++      unsigned long size, align;
++      if (end_pfn <= MAX_DMA32_PFN)
++              return;
+ 
+-      return ((offset + size > PAGE_SIZE) &&
+-              !check_pages_physically_contiguous(pfn, offset, size));
++      align = 64ULL<<20;
++      size = round_up(dma32_bootmem_size, align);
++      dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
++                               __pa(MAX_DMA_ADDRESS));
++      if (dma32_bootmem_ptr)
++              dma32_bootmem_size = size;
++      else
++              dma32_bootmem_size = 0;
+ }
+-
+-int
+-dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
+-         enum dma_data_direction direction)
++static void __init dma32_free_bootmem(void)
+ {
+-      int i, rc;
++      int node;
++
++      if (end_pfn <= MAX_DMA32_PFN)
++              return;
+ 
+-      BUG_ON(!valid_dma_direction(direction));
+-      WARN_ON(nents == 0 || sgl->length == 0);
++      if (!dma32_bootmem_ptr)
++              return;
+ 
+-      if (swiotlb) {
+-              rc = swiotlb_map_sg(hwdev, sgl, nents, direction);
+-      } else {
+-              struct scatterlist *sg;
+-
+-              for_each_sg(sgl, sg, nents, i) {
+-                      BUG_ON(!sg_page(sg));
+-                      sg->dma_address =
+-                              gnttab_dma_map_page(sg_page(sg)) + sg->offset;
+-                      sg->dma_length  = sg->length;
+-                      IOMMU_BUG_ON(address_needs_mapping(
+-                              hwdev, sg->dma_address));
+-                      IOMMU_BUG_ON(range_straddles_page_boundary(
+-                              page_to_pseudophys(sg_page(sg)) + sg->offset,
+-                              sg->length));
+-              }
+-              rc = nents;
+-      }
++      for_each_online_node(node)
++              free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
++                                dma32_bootmem_size);
+ 
+-      flush_write_buffers();
+-      return rc;
++      dma32_bootmem_ptr = NULL;
++      dma32_bootmem_size = 0;
+ }
+-EXPORT_SYMBOL(dma_map_sg);
++#else
++#define dma32_free_bootmem() ((void)0)
++#endif
+ 
+-void
+-dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
+-           enum dma_data_direction direction)
+-{
+-      int i;
++static const struct dma_mapping_ops swiotlb_dma_ops = {
++      .mapping_error = swiotlb_dma_mapping_error,
++      .map_single = swiotlb_map_single_phys,
++      .unmap_single = swiotlb_unmap_single,
++      .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
++      .sync_single_for_device = swiotlb_sync_single_for_device,
++      .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
++      .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
++      .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
++      .sync_sg_for_device = swiotlb_sync_sg_for_device,
++      .map_sg = swiotlb_map_sg,
++      .unmap_sg = swiotlb_unmap_sg,
++      .dma_supported = swiotlb_dma_supported
++};
+ 
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (swiotlb)
+-              swiotlb_unmap_sg(hwdev, sgl, nents, direction);
+-      else {
+-              struct scatterlist *sg;
++void __init pci_iommu_alloc(void)
++{
++      /* free the range so iommu could get some range less than 4G */
++      dma32_free_bootmem();
++      /*
++       * The order of these functions is important for
++       * fall-back/fail-over reasons
++       */
++#ifdef CONFIG_GART_IOMMU
++      gart_iommu_hole_init();
++#endif
+ 
+-              for_each_sg(sgl, sg, nents, i)
+-                      gnttab_dma_unmap_page(sg->dma_address);
+-      }
+-}
+-EXPORT_SYMBOL(dma_unmap_sg);
++#ifdef CONFIG_CALGARY_IOMMU
++      detect_calgary();
++#endif
+ 
+-#ifdef CONFIG_HIGHMEM
+-dma_addr_t
+-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+-           size_t size, enum dma_data_direction direction)
+-{
+-      dma_addr_t dma_addr;
++      detect_intel_iommu();
+ 
+-      BUG_ON(!valid_dma_direction(direction));
++#ifdef CONFIG_SWIOTLB
++      swiotlb_init();
+       if (swiotlb) {
+-              dma_addr = swiotlb_map_page(
+-                      dev, page, offset, size, direction);
+-      } else {
+-              dma_addr = gnttab_dma_map_page(page) + offset;
+-              IOMMU_BUG_ON(address_needs_mapping(dev, dma_addr));
++              printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
++              dma_ops = &swiotlb_dma_ops;
+       }
+-
+-      return dma_addr;
++#endif
+ }
+-EXPORT_SYMBOL(dma_map_page);
+ 
+-void
+-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+-             enum dma_data_direction direction)
++/*
++ * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
++ * documentation.
++ */
++static __init int iommu_setup(char *p)
+ {
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (swiotlb)
+-              swiotlb_unmap_page(dev, dma_address, size, direction);
+-      else
+-              gnttab_dma_unmap_page(dma_address);
+-}
+-EXPORT_SYMBOL(dma_unmap_page);
+-#endif /* CONFIG_HIGHMEM */
++      iommu_merge = 1;
+ 
+-int
+-dma_mapping_error(dma_addr_t dma_addr)
+-{
+-      if (swiotlb)
+-              return swiotlb_dma_mapping_error(dma_addr);
+-      return 0;
+-}
+-EXPORT_SYMBOL(dma_mapping_error);
++      if (!p)
++              return -EINVAL;
+ 
+-int
+-dma_supported(struct device *dev, u64 mask)
+-{
+-      if (swiotlb)
+-              return swiotlb_dma_supported(dev, mask);
+-      /*
+-       * By default we'll BUG when an infeasible DMA is requested, and
+-       * request swiotlb=force (see IOMMU_BUG_ON).
+-       */
+-      return 1;
+-}
+-EXPORT_SYMBOL(dma_supported);
++      while (*p) {
++              if (!strncmp(p, "off", 3))
++                      no_iommu = 1;
++              /* gart_parse_options has more force support */
++              if (!strncmp(p, "force", 5))
++                      force_iommu = 1;
++              if (!strncmp(p, "noforce", 7)) {
++                      iommu_merge = 0;
++                      force_iommu = 0;
++              }
+ 
+-void *dma_alloc_coherent(struct device *dev, size_t size,
+-                         dma_addr_t *dma_handle, gfp_t gfp)
+-{
+-      void *ret;
+-      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+-      unsigned int order = get_order(size);
+-      unsigned long vstart;
+-      u64 mask;
++              if (!strncmp(p, "biomerge", 8)) {
++                      iommu_bio_merge = 4096;
++                      iommu_merge = 1;
++                      force_iommu = 1;
++              }
++              if (!strncmp(p, "panic", 5))
++                      panic_on_overflow = 1;
++              if (!strncmp(p, "nopanic", 7))
++                      panic_on_overflow = 0;
++              if (!strncmp(p, "merge", 5)) {
++                      iommu_merge = 1;
++                      force_iommu = 1;
++              }
++              if (!strncmp(p, "nomerge", 7))
++                      iommu_merge = 0;
++              if (!strncmp(p, "forcesac", 8))
++                      iommu_sac_force = 1;
++              if (!strncmp(p, "allowdac", 8))
++                      forbid_dac = 0;
++              if (!strncmp(p, "nodac", 5))
++                      forbid_dac = -1;
++              if (!strncmp(p, "usedac", 6)) {
++                      forbid_dac = -1;
++                      return 1;
++              }
++#ifdef CONFIG_SWIOTLB
++              if (!strncmp(p, "soft", 4))
++                      swiotlb = 1;
++#endif
+ 
+-      /* ignore region specifiers */
+-      gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
++#ifdef CONFIG_GART_IOMMU
++              gart_parse_options(p);
++#endif
+ 
+-      if (mem) {
+-              int page = bitmap_find_free_region(mem->bitmap, mem->size,
+-                                                   order);
+-              if (page >= 0) {
+-                      *dma_handle = mem->device_base + (page << PAGE_SHIFT);
+-                      ret = mem->virt_base + (page << PAGE_SHIFT);
+-                      memset(ret, 0, size);
+-                      return ret;
+-              }
+-              if (mem->flags & DMA_MEMORY_EXCLUSIVE)
+-                      return NULL;
++#ifdef CONFIG_CALGARY_IOMMU
++              if (!strncmp(p, "calgary", 7))
++                      use_calgary = 1;
++#endif /* CONFIG_CALGARY_IOMMU */
++
++              p += strcspn(p, ",");
++              if (*p == ',')
++                      ++p;
+       }
++      return 0;
++}
++early_param("iommu", iommu_setup);
+ 
+-      if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
+-              gfp |= GFP_DMA;
+-
+-      vstart = __get_free_pages(gfp, order);
+-      ret = (void *)vstart;
++static int check_pages_physically_contiguous(unsigned long pfn,
++                                           unsigned int offset,
++                                           size_t length)
++{
++      unsigned long next_mfn;
++      int i;
++      int nr_pages;
+ 
+-      if (dev != NULL && dev->coherent_dma_mask)
+-              mask = dev->coherent_dma_mask;
+-      else
+-              mask = 0xffffffff;
++      next_mfn = pfn_to_mfn(pfn);
++      nr_pages = (offset + length + PAGE_SIZE-1) >> PAGE_SHIFT;
+ 
+-      if (ret != NULL) {
+-              if (xen_create_contiguous_region(vstart, order,
+-                                               fls64(mask)) != 0) {
+-                      free_pages(vstart, order);
+-                      return NULL;
+-              }
+-              memset(ret, 0, size);
+-              *dma_handle = virt_to_bus(ret);
++      for (i = 1; i < nr_pages; i++) {
++              if (pfn_to_mfn(++pfn) != ++next_mfn)
++                      return 0;
+       }
+-      return ret;
++      return 1;
+ }
+-EXPORT_SYMBOL(dma_alloc_coherent);
+ 
+-void dma_free_coherent(struct device *dev, size_t size,
+-                       void *vaddr, dma_addr_t dma_handle)
++int range_straddles_page_boundary(paddr_t p, size_t size)
+ {
+-      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
+-      int order = get_order(size);
+-
+-      WARN_ON(irqs_disabled());       /* for portability */
+-      if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+-              int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
++      unsigned long pfn = p >> PAGE_SHIFT;
++      unsigned int offset = p & ~PAGE_MASK;
+ 
+-              bitmap_release_region(mem->bitmap, page, order);
+-      } else {
+-              xen_destroy_contiguous_region((unsigned long)vaddr, order);
+-              free_pages((unsigned long)vaddr, order);
+-      }
++      return ((offset + size > PAGE_SIZE) &&
++              !check_pages_physically_contiguous(pfn, offset, size));
+ }
+-EXPORT_SYMBOL(dma_free_coherent);
+ 
+-#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
++#ifdef CONFIG_X86_32
+ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+                               dma_addr_t device_addr, size_t size, int flags)
+ {
+@@ -327,8 +295,8 @@ EXPORT_SYMBOL(dma_declare_coherent_memor
+ void dma_release_declared_memory(struct device *dev)
+ {
+       struct dma_coherent_mem *mem = dev->dma_mem;
+-      
+-      if(!mem)
++
++      if (!mem)
+               return;
+       dev->dma_mem = NULL;
+       iounmap(mem->virt_base);
+@@ -341,8 +309,10 @@ void *dma_mark_declared_memory_occupied(
+                                       dma_addr_t device_addr, size_t size)
+ {
+       struct dma_coherent_mem *mem = dev->dma_mem;
+-      int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       int pos, err;
++      int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
++
++      pages >>= PAGE_SHIFT;
+ 
+       if (!mem)
+               return ERR_PTR(-EINVAL);
+@@ -354,103 +324,270 @@ void *dma_mark_declared_memory_occupied(
+       return mem->virt_base + (pos << PAGE_SHIFT);
+ }
+ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
+-#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
+-
+-#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
+-/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+ 
+-int forbid_dac;
+-EXPORT_SYMBOL(forbid_dac);
+-
+-static __devinit void via_no_dac(struct pci_dev *dev)
++static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
++                                     dma_addr_t *dma_handle, void **ret)
+ {
+-      if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+-              printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
+-              forbid_dac = 1;
++      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
++      int order = get_order(size);
++
++      if (mem) {
++              int page = bitmap_find_free_region(mem->bitmap, mem->size,
++                                                   order);
++              if (page >= 0) {
++                      *dma_handle = mem->device_base + (page << PAGE_SHIFT);
++                      *ret = mem->virt_base + (page << PAGE_SHIFT);
++                      memset(*ret, 0, size);
++              }
++              if (mem->flags & DMA_MEMORY_EXCLUSIVE)
++                      *ret = NULL;
+       }
++      return (mem != NULL);
+ }
+-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+ 
+-static int check_iommu(char *s)
++static int dma_release_coherent(struct device *dev, int order, void *vaddr)
+ {
+-      if (!strcmp(s, "usedac")) {
+-              forbid_dac = -1;
++      struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
++
++      if (mem && vaddr >= mem->virt_base && vaddr <
++                 (mem->virt_base + (mem->size << PAGE_SHIFT))) {
++              int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
++
++              bitmap_release_region(mem->bitmap, page, order);
+               return 1;
+       }
+       return 0;
+ }
+-__setup("iommu=", check_iommu);
++#else
++#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
++#define dma_release_coherent(dev, order, vaddr) (0)
++#endif /* CONFIG_X86_32 */
++
++int dma_supported(struct device *dev, u64 mask)
++{
++#ifdef CONFIG_PCI
++      if (mask > 0xffffffff && forbid_dac > 0) {
++              printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
++                               dev->bus_id);
++              return 0;
++      }
+ #endif
+ 
+-dma_addr_t
+-dma_map_single(struct device *dev, void *ptr, size_t size,
+-             enum dma_data_direction direction)
++      if (dma_ops->dma_supported)
++              return dma_ops->dma_supported(dev, mask);
++
++      /* Copied from i386. Doesn't make much sense, because it will
++         only work for pci_alloc_coherent.
++         The caller just has to use GFP_DMA in this case. */
++      if (mask < DMA_24BIT_MASK)
++              return 0;
++
++      /* Tell the device to use SAC when IOMMU force is on.  This
++         allows the driver to use cheaper accesses in some cases.
++
++         Problem with this is that if we overflow the IOMMU area and
++         return DAC as fallback address the device may not handle it
++         correctly.
++
++         As a special case some controllers have a 39bit address
++         mode that is as efficient as 32bit (aic79xx). Don't force
++         SAC for these.  Assume all masks <= 40 bits are of this
++         type. Normally this doesn't make any difference, but gives
++         more gentle handling of IOMMU overflow. */
++      if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
++              printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
++                               dev->bus_id, mask);
++              return 0;
++      }
++
++      return 1;
++}
++EXPORT_SYMBOL(dma_supported);
++
++/* Allocate DMA memory on node near device */
++static struct page *
++dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
+ {
+-      dma_addr_t dma;
++      int node;
+ 
+-      BUG_ON(!valid_dma_direction(direction));
+-      WARN_ON(size == 0);
++      node = dev_to_node(dev);
+ 
+-      if (swiotlb) {
+-              dma = swiotlb_map_single(dev, ptr, size, direction);
+-      } else {
+-              dma = gnttab_dma_map_page(virt_to_page(ptr)) +
+-                    offset_in_page(ptr);
+-              IOMMU_BUG_ON(range_straddles_page_boundary(__pa(ptr), size));
+-              IOMMU_BUG_ON(address_needs_mapping(dev, dma));
+-      }
+-
+-      flush_write_buffers();
+-      return dma;
+-}
+-EXPORT_SYMBOL(dma_map_single);
+-
+-void
+-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+-               enum dma_data_direction direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (swiotlb)
+-              swiotlb_unmap_single(dev, dma_addr, size, direction);
+-      else
+-              gnttab_dma_unmap_page(dma_addr);
++      return alloc_pages_node(node, gfp, order);
++}
++
++/*
++ * Allocate memory for a coherent mapping.
++ */
++void *
++dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
++                 gfp_t gfp)
++{
++      void *memory = NULL;
++      struct page *page;
++      unsigned long dma_mask = 0;
++      int noretry = 0;
++      unsigned int order = get_order(size);
++
++      /* ignore region specifiers */
++      gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
++
++      if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
++              return memory;
++
++      if (!dev) {
++              dev = &fallback_dev;
++              gfp |= GFP_DMA;
++      }
++      dma_mask = dev->coherent_dma_mask;
++      if (dma_mask == 0)
++              dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
++
++      /* Device not DMA able */
++      if (dev->dma_mask == NULL)
++              return NULL;
++
++      /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
++      if (gfp & __GFP_DMA)
++              noretry = 1;
++
++#ifdef CONFIG_XEN
++      gfp &= ~(__GFP_DMA | __GFP_DMA32);
++#else
++#ifdef CONFIG_X86_64
++      /* Why <=? Even when the mask is smaller than 4GB it is often
++         larger than 16MB and in this case we have a chance of
++         finding fitting memory in the next higher zone first. If
++         not retry with true GFP_DMA. -AK */
++      if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
++              gfp |= GFP_DMA32;
++#endif
++
++ again:
++#endif
++      page = dma_alloc_pages(dev,
++              noretry ? gfp | __GFP_NORETRY : gfp, order);
++      if (page == NULL)
++              return NULL;
++
++#ifndef CONFIG_XEN
++      {
++              int high, mmu;
++              dma_addr_t bus = page_to_phys(page);
++              memory = page_address(page);
++              high = (bus + size) >= dma_mask;
++              mmu = high;
++              if (force_iommu && !(gfp & GFP_DMA))
++                      mmu = 1;
++              else if (high) {
++                      free_pages((unsigned long)memory, order);
++
++                      /* Don't use the 16MB ZONE_DMA unless absolutely
++                         needed. It's better to use remapping first. */
++                      if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
++                              gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
++                              goto again;
++                      }
++
++                      /* Let low level make its own zone decisions */
++                      gfp &= ~(GFP_DMA32|GFP_DMA);
++
++                      if (dma_ops->alloc_coherent)
++                              return dma_ops->alloc_coherent(dev, size,
++                                                         dma_handle, gfp);
++                      return NULL;
++              }
++
++              memset(memory, 0, size);
++              if (!mmu) {
++                      *dma_handle = bus;
++                      return memory;
++              }
++      }
++
++      if (dma_ops->alloc_coherent) {
++              free_pages((unsigned long)memory, order);
++              gfp &= ~(GFP_DMA|GFP_DMA32);
++              return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
++      }
++
++      if (dma_ops->map_simple) {
++              *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
++                                            size,
++                                            PCI_DMA_BIDIRECTIONAL);
++              if (*dma_handle != bad_dma_address)
++                      return memory;
++      }
++#else
++      memory = page_address(page);
++      if (xen_create_contiguous_region((unsigned long)memory, order,
++                                       fls64(dma_mask)) == 0) {
++              memset(memory, 0, size);
++              *dma_handle = virt_to_bus(memory);
++              return memory;
++      }
++#endif
++
++      if (panic_on_overflow)
++              panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
++                    (unsigned long)size);
++      free_pages((unsigned long)memory, order);
++      return NULL;
+ }
+-EXPORT_SYMBOL(dma_unmap_single);
++EXPORT_SYMBOL(dma_alloc_coherent);
+ 
+-void
+-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                      enum dma_data_direction direction)
++/*
++ * Unmap coherent memory.
++ * The caller must ensure that the device has finished accessing the mapping.
++ */
++void dma_free_coherent(struct device *dev, size_t size,
++                       void *vaddr, dma_addr_t bus)
+ {
+-      if (swiotlb)
+-              swiotlb_sync_single_for_cpu(dev, dma_handle, size, direction);
++      int order = get_order(size);
++      WARN_ON(irqs_disabled());       /* for portability */
++      if (dma_release_coherent(dev, order, vaddr))
++              return;
++#ifndef CONFIG_XEN
++      if (dma_ops->unmap_single)
++              dma_ops->unmap_single(dev, bus, size, 0);
++#endif
++      xen_destroy_contiguous_region((unsigned long)vaddr, order);
++      free_pages((unsigned long)vaddr, order);
+ }
+-EXPORT_SYMBOL(dma_sync_single_for_cpu);
++EXPORT_SYMBOL(dma_free_coherent);
+ 
+-void
+-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                           enum dma_data_direction direction)
++static int __init pci_iommu_init(void)
+ {
+-      if (swiotlb)
+-              swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
++#ifdef CONFIG_CALGARY_IOMMU
++      calgary_iommu_init();
++#endif
++
++      intel_iommu_init();
++
++#ifdef CONFIG_GART_IOMMU
++      gart_iommu_init();
++#endif
++
++      no_iommu_init();
++      return 0;
+ }
+-EXPORT_SYMBOL(dma_sync_single_for_device);
+ 
+-void
+-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction)
++void pci_iommu_shutdown(void)
+ {
+-      if (swiotlb)
+-              swiotlb_sync_sg_for_cpu(dev,sg,nelems,direction);
+-      flush_write_buffers();
++      gart_iommu_shutdown();
+ }
+-EXPORT_SYMBOL(dma_sync_sg_for_cpu);
++/* Must execute after PCI subsystem */
++fs_initcall(pci_iommu_init);
++
++#ifdef CONFIG_PCI
++/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+ 
+-void
+-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction)
++static __devinit void via_no_dac(struct pci_dev *dev)
+ {
+-      if (swiotlb)
+-              swiotlb_sync_sg_for_device(dev,sg,nelems,direction);
+-      flush_write_buffers();
++      if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
++              printk(KERN_INFO "PCI: VIA PCI bridge detected."
++                               "Disabling DAC.\n");
++              forbid_dac = 1;
++      }
+ }
+-EXPORT_SYMBOL(dma_sync_sg_for_device);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
++#endif
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/pci-nommu-xen.c   2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,103 @@
++#include <linux/dma-mapping.h>
++#include <linux/dmar.h>
++#include <linux/bootmem.h>
++#include <linux/pci.h>
++
++#include <xen/gnttab.h>
++
++#include <asm/proto.h>
++#include <asm/dma.h>
++#include <asm/swiotlb.h>
++#include <asm/tlbflush.h>
++#include <asm/gnttab_dma.h>
++#include <asm/bug.h>
++
++#define IOMMU_BUG_ON(test)                            \
++do {                                                  \
++      if (unlikely(test)) {                           \
++              printk(KERN_ALERT "Fatal DMA error! "   \
++                     "Please use 'swiotlb=force'\n"); \
++              BUG();                                  \
++      }                                               \
++} while (0)
++
++static int
++gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
++            int direction)
++{
++      unsigned int i;
++      struct scatterlist *sg;
++
++      WARN_ON(nents == 0 || sgl->length == 0);
++
++      for_each_sg(sgl, sg, nents, i) {
++              BUG_ON(!sg_page(sg));
++              sg->dma_address =
++                      gnttab_dma_map_page(sg_page(sg)) + sg->offset;
++              sg->dma_length  = sg->length;
++              IOMMU_BUG_ON(address_needs_mapping(
++                      hwdev, sg->dma_address));
++              IOMMU_BUG_ON(range_straddles_page_boundary(
++                      page_to_pseudophys(sg_page(sg)) + sg->offset,
++                      sg->length));
++      }
++
++      return nents;
++}
++
++static void
++gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
++              int direction)
++{
++      unsigned int i;
++      struct scatterlist *sg;
++
++      for_each_sg(sgl, sg, nents, i)
++              gnttab_dma_unmap_page(sg->dma_address);
++}
++
++static dma_addr_t
++gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size,
++                int direction)
++{
++      dma_addr_t dma;
++
++      WARN_ON(size == 0);
++
++      dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
++            offset_in_page(paddr);
++      IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
++      IOMMU_BUG_ON(address_needs_mapping(dev, dma));
++
++      return dma;
++}
++
++static void
++gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
++                  int direction)
++{
++      gnttab_dma_unmap_page(dma_addr);
++}
++
++static int nommu_mapping_error(dma_addr_t dma_addr)
++{
++      return (dma_addr == bad_dma_address);
++}
++
++static const struct dma_mapping_ops nommu_dma_ops = {
++      .map_single = gnttab_map_single,
++      .unmap_single = gnttab_unmap_single,
++      .map_sg = gnttab_map_sg,
++      .unmap_sg = gnttab_unmap_sg,
++      .dma_supported = swiotlb_dma_supported,
++      .mapping_error = nommu_mapping_error
++};
++
++void __init no_iommu_init(void)
++{
++      if (dma_ops)
++              return;
++
++      force_iommu = 0; /* no HW IOMMU */
++      dma_ops = &nommu_dma_ops;
++}
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/process-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,188 @@
++#include <linux/errno.h>
++#include <linux/kernel.h>
++#include <linux/mm.h>
++#include <linux/smp.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/pm.h>
++
++struct kmem_cache *task_xstate_cachep;
++
++int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
++{
++      *dst = *src;
++      if (src->thread.xstate) {
++              dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
++                                                    GFP_KERNEL);
++              if (!dst->thread.xstate)
++                      return -ENOMEM;
++              WARN_ON((unsigned long)dst->thread.xstate & 15);
++              memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
++      }
++      return 0;
++}
++
++void free_thread_xstate(struct task_struct *tsk)
++{
++      if (tsk->thread.xstate) {
++              kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
++              tsk->thread.xstate = NULL;
++      }
++}
++
++void free_thread_info(struct thread_info *ti)
++{
++      free_thread_xstate(ti->task);
++      free_pages((unsigned long)ti, get_order(THREAD_SIZE));
++}
++
++void arch_task_cache_init(void)
++{
++        task_xstate_cachep =
++              kmem_cache_create("task_xstate", xstate_size,
++                                __alignof__(union thread_xstate),
++                                SLAB_PANIC, NULL);
++}
++
++static void do_nothing(void *unused)
++{
++}
++
++/*
++ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
++ * pm_idle and update to new pm_idle value. Required while changing pm_idle
++ * handler on SMP systems.
++ *
++ * Caller must have changed pm_idle to the new value before the call. Old
++ * pm_idle value will not be used by any CPU after the return of this function.
++ */
++void cpu_idle_wait(void)
++{
++      smp_mb();
++      /* kick all the CPUs so that they exit out of pm_idle */
++      smp_call_function(do_nothing, NULL, 0, 1);
++}
++EXPORT_SYMBOL_GPL(cpu_idle_wait);
++
++#ifndef CONFIG_XEN
++/*
++ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
++ * which can obviate IPI to trigger checking of need_resched.
++ * We execute MONITOR against need_resched and enter optimized wait state
++ * through MWAIT. Whenever someone changes need_resched, we would be woken
++ * up from MWAIT (without an IPI).
++ *
++ * New with Core Duo processors, MWAIT can take some hints based on CPU
++ * capability.
++ */
++void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
++{
++      if (!need_resched()) {
++              __monitor((void *)&current_thread_info()->flags, 0, 0);
++              smp_mb();
++              if (!need_resched())
++                      __mwait(ax, cx);
++      }
++}
++
++/* Default MONITOR/MWAIT with no hints, used for default C1 state */
++static void mwait_idle(void)
++{
++      if (!need_resched()) {
++              __monitor((void *)&current_thread_info()->flags, 0, 0);
++              smp_mb();
++              if (!need_resched())
++                      __sti_mwait(0, 0);
++              else
++                      local_irq_enable();
++      } else
++              local_irq_enable();
++}
++#endif
++
++/*
++ * On SMP it's slightly faster (but much more power-consuming!)
++ * to poll the ->work.need_resched flag instead of waiting for the
++ * cross-CPU IPI to arrive. Use this option with caution.
++ */
++static void poll_idle(void)
++{
++      local_irq_enable();
++      cpu_relax();
++}
++
++#ifndef CONFIG_XEN
++/*
++ * mwait selection logic:
++ *
++ * It depends on the CPU. For AMD CPUs that support MWAIT this is
++ * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
++ * then depend on a clock divisor and current Pstate of the core. If
++ * all cores of a processor are in halt state (C1) the processor can
++ * enter the C1E (C1 enhanced) state. If mwait is used this will never
++ * happen.
++ *
++ * idle=mwait overrides this decision and forces the usage of mwait.
++ */
++static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
++{
++      if (force_mwait)
++              return 1;
++
++      if (c->x86_vendor == X86_VENDOR_AMD) {
++              switch(c->x86) {
++              case 0x10:
++              case 0x11:
++                      return 0;
++              }
++      }
++      return 1;
++}
++#endif
++
++void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
++{
++#ifndef CONFIG_XEN
++      static int selected;
++
++      if (selected)
++              return;
++#ifdef CONFIG_X86_SMP
++      if (pm_idle == poll_idle && smp_num_siblings > 1) {
++              printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
++                      " performance may degrade.\n");
++      }
++#endif
++      if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
++              /*
++               * Skip, if setup has overridden idle.
++               * One CPU supports mwait => All CPUs supports mwait
++               */
++              if (!pm_idle) {
++                      printk(KERN_INFO "using mwait in idle threads.\n");
++                      pm_idle = mwait_idle;
++              }
++      }
++      selected = 1;
++#endif
++}
++
++static int __init idle_setup(char *str)
++{
++      if (!strcmp(str, "poll")) {
++              printk("using polling idle threads.\n");
++              pm_idle = poll_idle;
++      }
++#ifndef CONFIG_XEN
++      else if (!strcmp(str, "mwait"))
++              force_mwait = 1;
++#endif
++      else
++              return -1;
++
++      boot_option_idle_override = 1;
++      return 0;
++}
++early_param("idle", idle_setup);
++
+--- sle11-2009-05-14.orig/arch/x86/kernel/process_32-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/process_32-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -36,6 +36,7 @@
+ #include <linux/personality.h>
+ #include <linux/tick.h>
+ #include <linux/percpu.h>
++#include <linux/prctl.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -45,7 +46,6 @@
+ #include <asm/processor.h>
+ #include <asm/i387.h>
+ #include <asm/desc.h>
+-#include <asm/vm86.h>
+ #ifdef CONFIG_MATH_EMULATION
+ #include <asm/math_emu.h>
+ #endif
+@@ -102,16 +102,6 @@ void enable_hlt(void)
+ 
+ EXPORT_SYMBOL(enable_hlt);
+ 
+-/*
+- * On SMP it's slightly faster (but much more power-consuming!)
+- * to poll the ->work.need_resched flag instead of waiting for the
+- * cross-CPU IPI to arrive. Use this option with caution.
+- */
+-static void poll_idle(void)
+-{
+-      cpu_relax();
+-}
+-
+ static void xen_idle(void)
+ {
+       current_thread_info()->status &= ~TS_POLLING;
+@@ -121,20 +111,10 @@ static void xen_idle(void)
+        */
+       smp_mb();
+ 
+-      local_irq_disable();
+-      if (!need_resched()) {
+-              ktime_t t0, t1;
+-              u64 t0n, t1n;
+-
+-              t0 = ktime_get();
+-              t0n = ktime_to_ns(t0);
++      if (!need_resched())
+               safe_halt();    /* enables interrupts racelessly */
+-              local_irq_disable();
+-              t1 = ktime_get();
+-              t1n = ktime_to_ns(t1);
+-              sched_clock_idle_wakeup_event(t1n - t0n);
+-      }
+-      local_irq_enable();
++      else
++              local_irq_enable();
+       current_thread_info()->status |= TS_POLLING;
+ }
+ #ifdef CONFIG_APM_MODULE
+@@ -142,7 +122,6 @@ EXPORT_SYMBOL(default_idle);
+ #endif
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-extern cpumask_t cpu_initialized;
+ static inline void play_dead(void)
+ {
+       idle_task_exit();
+@@ -187,6 +166,7 @@ void cpu_idle(void)
+                       if (cpu_is_offline(cpu))
+                               play_dead();
+ 
++                      local_irq_disable();
+                       __get_cpu_var(irq_stat).idle_timestamp = jiffies;
+                       idle();
+               }
+@@ -197,44 +177,6 @@ void cpu_idle(void)
+       }
+ }
+ 
+-static void do_nothing(void *unused)
+-{
+-}
+-
+-/*
+- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+- * pm_idle and update to new pm_idle value. Required while changing pm_idle
+- * handler on SMP systems.
+- *
+- * Caller must have changed pm_idle to the new value before the call. Old
+- * pm_idle value will not be used by any CPU after the return of this function.
+- */
+-void cpu_idle_wait(void)
+-{
+-      smp_mb();
+-      /* kick all the CPUs so that they exit out of pm_idle */
+-      smp_call_function(do_nothing, NULL, 0, 1);
+-}
+-EXPORT_SYMBOL_GPL(cpu_idle_wait);
+-
+-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+-{
+-}
+-
+-static int __init idle_setup(char *str)
+-{
+-      if (!strcmp(str, "poll")) {
+-              printk("using polling idle threads.\n");
+-              pm_idle = poll_idle;
+-      }
+-      else
+-              return -1;
+-
+-      boot_option_idle_override = 1;
+-      return 0;
+-}
+-early_param("idle", idle_setup);
+-
+ void __show_registers(struct pt_regs *regs, int all)
+ {
+       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+@@ -260,7 +202,7 @@ void __show_registers(struct pt_regs *re
+                       init_utsname()->version);
+ 
+       printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+-                      0xffff & regs->cs, regs->ip, regs->flags,
++                      (u16)regs->cs, regs->ip, regs->flags,
+                       smp_processor_id());
+       print_symbol("EIP is at %s\n", regs->ip);
+ 
+@@ -269,8 +211,7 @@ void __show_registers(struct pt_regs *re
+       printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+               regs->si, regs->di, regs->bp, sp);
+       printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+-             regs->ds & 0xffff, regs->es & 0xffff,
+-             regs->fs & 0xffff, gs, ss);
++             (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+ 
+       if (!all)
+               return;
+@@ -367,6 +308,7 @@ void flush_thread(void)
+       /*
+        * Forget coprocessor state..
+        */
++      tsk->fpu_counter = 0;
+       clear_fpu(tsk);
+       clear_used_math();
+ }
+@@ -437,11 +379,30 @@ int copy_thread(int nr, unsigned long cl
+       return err;
+ }
+ 
+-#ifdef CONFIG_SECCOMP
++void
++start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
++{
++      __asm__("movl %0, %%gs" :: "r"(0));
++      regs->fs                = 0;
++      set_fs(USER_DS);
++      regs->ds                = __USER_DS;
++      regs->es                = __USER_DS;
++      regs->ss                = __USER_DS;
++      regs->cs                = __USER_CS;
++      regs->ip                = new_ip;
++      regs->sp                = new_sp;
++      /*
++       * Free the old FP and other extended state
++       */
++      free_thread_xstate(current);
++}
++EXPORT_SYMBOL_GPL(start_thread);
++
+ static void hard_disable_TSC(void)
+ {
+       write_cr4(read_cr4() | X86_CR4_TSD);
+ }
++
+ void disable_TSC(void)
+ {
+       preempt_disable();
+@@ -453,11 +414,47 @@ void disable_TSC(void)
+               hard_disable_TSC();
+       preempt_enable();
+ }
++
+ static void hard_enable_TSC(void)
+ {
+       write_cr4(read_cr4() & ~X86_CR4_TSD);
+ }
+-#endif /* CONFIG_SECCOMP */
++
++static void enable_TSC(void)
++{
++      preempt_disable();
++      if (test_and_clear_thread_flag(TIF_NOTSC))
++              /*
++               * Must flip the CPU state synchronously with
++               * TIF_NOTSC in the current running context.
++               */
++              hard_enable_TSC();
++      preempt_enable();
++}
++
++int get_tsc_mode(unsigned long adr)
++{
++      unsigned int val;
++
++      if (test_thread_flag(TIF_NOTSC))
++              val = PR_TSC_SIGSEGV;
++      else
++              val = PR_TSC_ENABLE;
++
++      return put_user(val, (unsigned int __user *)adr);
++}
++
++int set_tsc_mode(unsigned int val)
++{
++      if (val == PR_TSC_SIGSEGV)
++              disable_TSC();
++      else if (val == PR_TSC_ENABLE)
++              enable_TSC();
++      else
++              return -EINVAL;
++
++      return 0;
++}
+ 
+ static noinline void
+ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
+@@ -473,12 +470,12 @@ __switch_to_xtra(struct task_struct *pre
+               /* we clear debugctl to make sure DS
+                * is not in use when we change it */
+               debugctl = 0;
+-              wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
++              update_debugctlmsr(0);
+               wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
+       }
+ 
+       if (next->debugctlmsr != debugctl)
+-              wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
++              update_debugctlmsr(next->debugctlmsr);
+ 
+       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+               set_debugreg(next->debugreg0, 0);
+@@ -490,7 +487,6 @@ __switch_to_xtra(struct task_struct *pre
+               set_debugreg(next->debugreg7, 7);
+       }
+ 
+-#ifdef CONFIG_SECCOMP
+       if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+           test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+               /* prev and next are different */
+@@ -499,7 +495,6 @@ __switch_to_xtra(struct task_struct *pre
+               else
+                       hard_enable_TSC();
+       }
+-#endif
+ 
+ #ifdef X86_BTS
+       if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+@@ -637,7 +632,7 @@ struct task_struct * __switch_to(struct 
+ 
+       /* we're going to use this soon, after a few expensive things */
+       if (next_p->fpu_counter > 5)
+-              prefetch(&next->i387.fxsave);
++              prefetch(next->xstate);
+ 
+       /*
+        * Now maybe handle debug registers
+@@ -658,8 +653,11 @@ struct task_struct * __switch_to(struct 
+       /* If the task has used fpu the last 5 timeslices, just do a full
+        * restore of the math state immediately to avoid the trap; the
+        * chances of needing FPU soon are obviously high now
++       *
++       * tsk_used_math() checks prevent calling math_state_restore(),
++       * which can sleep in the case of !tsk_used_math()
+        */
+-      if (next_p->fpu_counter > 5)
++      if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
+               math_state_restore();
+ 
+       /*
+--- sle11-2009-05-14.orig/arch/x86/kernel/process_64-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/process_64-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/kdebug.h>
+ #include <linux/tick.h>
++#include <linux/prctl.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -102,17 +103,6 @@ void exit_idle(void)
+       __exit_idle();
+ }
+ 
+-/*
+- * On SMP it's slightly faster (but much more power-consuming!)
+- * to poll the ->need_resched flag instead of waiting for the
+- * cross-CPU IPI to arrive. Use this option with caution.
+- */
+-static void poll_idle(void)
+-{
+-      local_irq_enable();
+-      cpu_relax();
+-}
+-
+ static void xen_idle(void)
+ {
+       current_thread_info()->status &= ~TS_POLLING;
+@@ -121,20 +111,10 @@ static void xen_idle(void)
+        * test NEED_RESCHED:
+        */
+       smp_mb();
+-      local_irq_disable();
+-      if (!need_resched()) {
+-              ktime_t t0, t1;
+-              u64 t0n, t1n;
+-
+-              t0 = ktime_get();
+-              t0n = ktime_to_ns(t0);
++      if (!need_resched())
+               safe_halt();    /* enables interrupts racelessly */
+-              local_irq_disable();
+-              t1 = ktime_get();
+-              t1n = ktime_to_ns(t1);
+-              sched_clock_idle_wakeup_event(t1n - t0n);
+-      }
+-      local_irq_enable();
++      else
++              local_irq_enable();
+       current_thread_info()->status |= TS_POLLING;
+ }
+ 
+@@ -195,45 +175,6 @@ void cpu_idle(void)
+       }
+ }
+ 
+-static void do_nothing(void *unused)
+-{
+-}
+-
+-/*
+- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+- * pm_idle and update to new pm_idle value. Required while changing pm_idle
+- * handler on SMP systems.
+- *
+- * Caller must have changed pm_idle to the new value before the call. Old
+- * pm_idle value will not be used by any CPU after the return of this function.
+- */
+-void cpu_idle_wait(void)
+-{
+-      smp_mb();
+-      /* kick all the CPUs so that they exit out of pm_idle */
+-      smp_call_function(do_nothing, NULL, 0, 1);
+-}
+-EXPORT_SYMBOL_GPL(cpu_idle_wait);
+-
+-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+-{
+-}
+-
+-static int __init idle_setup(char *str)
+-{
+-      if (!strcmp(str, "poll")) {
+-              printk("using polling idle threads.\n");
+-              pm_idle = poll_idle;
+-      } else if (!strcmp(str, "mwait"))
+-              force_mwait = 1;
+-      else
+-              return -1;
+-
+-      boot_option_idle_override = 1;
+-      return 0;
+-}
+-early_param("idle", idle_setup);
+-
+ /* Prints also some state that isn't saved in the pt_regs */
+ void __show_regs(struct pt_regs * regs)
+ {
+@@ -360,6 +301,7 @@ void flush_thread(void)
+       /*
+        * Forget coprocessor state..
+        */
++      tsk->fpu_counter = 0;
+       clear_fpu(tsk);
+       clear_used_math();
+ }
+@@ -472,6 +414,83 @@ out:
+       return err;
+ }
+ 
++void
++start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
++{
++      asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
++      load_gs_index(0);
++      regs->ip                = new_ip;
++      regs->sp                = new_sp;
++      write_pda(oldrsp, new_sp);
++      regs->cs                = __USER_CS;
++      regs->ss                = __USER_DS;
++      regs->flags             = 0x200;
++      set_fs(USER_DS);
++      /*
++       * Free the old FP and other extended state
++       */
++      free_thread_xstate(current);
++}
++EXPORT_SYMBOL_GPL(start_thread);
++
++static void hard_disable_TSC(void)
++{
++      write_cr4(read_cr4() | X86_CR4_TSD);
++}
++
++void disable_TSC(void)
++{
++      preempt_disable();
++      if (!test_and_set_thread_flag(TIF_NOTSC))
++              /*
++               * Must flip the CPU state synchronously with
++               * TIF_NOTSC in the current running context.
++               */
++              hard_disable_TSC();
++      preempt_enable();
++}
++
++static void hard_enable_TSC(void)
++{
++      write_cr4(read_cr4() & ~X86_CR4_TSD);
++}
++
++static void enable_TSC(void)
++{
++      preempt_disable();
++      if (test_and_clear_thread_flag(TIF_NOTSC))
++              /*
++               * Must flip the CPU state synchronously with
++               * TIF_NOTSC in the current running context.
++               */
++              hard_enable_TSC();
++      preempt_enable();
++}
++
++int get_tsc_mode(unsigned long adr)
++{
++      unsigned int val;
++
++      if (test_thread_flag(TIF_NOTSC))
++              val = PR_TSC_SIGSEGV;
++      else
++              val = PR_TSC_ENABLE;
++
++      return put_user(val, (unsigned int __user *)adr);
++}
++
++int set_tsc_mode(unsigned int val)
++{
++      if (val == PR_TSC_SIGSEGV)
++              disable_TSC();
++      else if (val == PR_TSC_ENABLE)
++              enable_TSC();
++      else
++              return -EINVAL;
++
++      return 0;
++}
++
+ /*
+  * This special macro can be used to load a debugging register
+  */
+@@ -491,12 +510,12 @@ static inline void __switch_to_xtra(stru
+               /* we clear debugctl to make sure DS
+                * is not in use when we change it */
+               debugctl = 0;
+-              wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
++              update_debugctlmsr(0);
+               wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
+       }
+ 
+       if (next->debugctlmsr != debugctl)
+-              wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
++              update_debugctlmsr(next->debugctlmsr);
+ 
+       if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+               loaddebug(next, 0);
+@@ -508,6 +527,15 @@ static inline void __switch_to_xtra(stru
+               loaddebug(next, 7);
+       }
+ 
++      if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
++          test_tsk_thread_flag(next_p, TIF_NOTSC)) {
++              /* prev and next are different */
++              if (test_tsk_thread_flag(next_p, TIF_NOTSC))
++                      hard_disable_TSC();
++              else
++                      hard_enable_TSC();
++      }
++
+ #ifdef X86_BTS
+       if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
+               ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
+@@ -547,7 +575,7 @@ __switch_to(struct task_struct *prev_p, 
+ 
+       /* we're going to use this soon, after a few expensive things */
+       if (next_p->fpu_counter>5)
+-              prefetch(&next->i387.fxsave);
++              prefetch(next->xstate);
+ 
+       /*
+        * This is basically '__unlazy_fpu', except that we queue a
+@@ -680,8 +708,11 @@ __switch_to(struct task_struct *prev_p, 
+       /* If the task has used fpu the last 5 timeslices, just do a full
+        * restore of the math state immediately to avoid the trap; the
+        * chances of needing FPU soon are obviously high now
++       *
++       * tsk_used_math() checks prevent calling math_state_restore(),
++       * which can sleep in the case of !tsk_used_math()
+        */
+-      if (next_p->fpu_counter>5)
++      if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
+               math_state_restore();
+       return prev_p;
+ }
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/setup-xen.c       2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,141 @@
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/bootmem.h>
++#include <linux/percpu.h>
++#include <asm/smp.h>
++#include <asm/percpu.h>
++#include <asm/sections.h>
++#include <asm/processor.h>
++#include <asm/setup.h>
++#include <asm/topology.h>
++#include <asm/mpspec.h>
++#include <asm/apicdef.h>
++
++#ifdef CONFIG_X86_LOCAL_APIC
++unsigned int num_processors;
++unsigned disabled_cpus __cpuinitdata;
++/* Processor that is doing the boot up */
++unsigned int boot_cpu_physical_apicid = -1U;
++EXPORT_SYMBOL(boot_cpu_physical_apicid);
++
++DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
++EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
++
++/* Bitmask of physically existing CPUs */
++physid_mask_t phys_cpu_present_map;
++#endif
++
++#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
++/*
++ * Copy data used in early init routines from the initial arrays to the
++ * per cpu data areas.  These arrays then become expendable and the
++ * *_early_ptr's are zeroed indicating that the static arrays are gone.
++ */
++static void __init setup_per_cpu_maps(void)
++{
++#ifndef CONFIG_XEN
++      int cpu;
++
++      for_each_possible_cpu(cpu) {
++              per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
++              per_cpu(x86_bios_cpu_apicid, cpu) =
++                                              x86_bios_cpu_apicid_init[cpu];
++#ifdef CONFIG_NUMA
++              per_cpu(x86_cpu_to_node_map, cpu) =
++                                              x86_cpu_to_node_map_init[cpu];
++#endif
++      }
++
++      /* indicate the early static arrays will soon be gone */
++      x86_cpu_to_apicid_early_ptr = NULL;
++      x86_bios_cpu_apicid_early_ptr = NULL;
++#ifdef CONFIG_NUMA
++      x86_cpu_to_node_map_early_ptr = NULL;
++#endif
++#endif
++}
++
++#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
++cpumask_t *cpumask_of_cpu_map __read_mostly;
++EXPORT_SYMBOL(cpumask_of_cpu_map);
++
++/* requires nr_cpu_ids to be initialized */
++static void __init setup_cpumask_of_cpu(void)
++{
++      int i;
++
++      /* alloc_bootmem zeroes memory */
++      cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
++      for (i = 0; i < nr_cpu_ids; i++)
++              cpu_set(i, cpumask_of_cpu_map[i]);
++}
++#else
++static inline void setup_cpumask_of_cpu(void) { }
++#endif
++
++#ifdef CONFIG_X86_32
++/*
++ * Great future not-so-futuristic plan: make i386 and x86_64 do it
++ * the same way
++ */
++unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
++EXPORT_SYMBOL(__per_cpu_offset);
++#endif
++
++/*
++ * Great future plan:
++ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
++ * Always point %gs to its beginning
++ */
++void __init setup_per_cpu_areas(void)
++{
++      int i, highest_cpu = 0;
++      unsigned long size;
++
++#ifdef CONFIG_HOTPLUG_CPU
++      prefill_possible_map();
++#endif
++
++      /* Copy section for each CPU (we discard the original) */
++      size = PERCPU_ENOUGH_ROOM;
++      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
++                        size);
++
++      for_each_possible_cpu(i) {
++              char *ptr;
++#ifndef CONFIG_NEED_MULTIPLE_NODES
++              ptr = alloc_bootmem_pages(size);
++#else
++              int node = early_cpu_to_node(i);
++              if (!node_online(node) || !NODE_DATA(node)) {
++                      ptr = alloc_bootmem_pages(size);
++                      printk(KERN_INFO
++                             "cpu %d has no node or node-local memory\n", i);
++              }
++              else
++                      ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
++#endif
++              if (!ptr)
++                      panic("Cannot allocate cpu data for CPU %d\n", i);
++#ifdef CONFIG_X86_64
++              cpu_pda(i)->data_offset = ptr - __per_cpu_start;
++#else
++              __per_cpu_offset[i] = ptr - __per_cpu_start;
++#endif
++              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
++
++              highest_cpu = i;
++      }
++
++      nr_cpu_ids = highest_cpu + 1;
++      printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
++
++      /* Setup percpu data maps */
++      setup_per_cpu_maps();
++
++      /* Setup cpumask_of_cpu map */
++      setup_cpumask_of_cpu();
++}
++
++#endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup64-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/setup64-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -15,6 +15,7 @@
+ #include <linux/bootmem.h>
+ #include <linux/bitops.h>
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ #include <asm/pda.h>
+ #include <asm/pgtable.h>
+ #include <asm/processor.h>
+@@ -27,6 +28,7 @@
+ #include <asm/proto.h>
+ #include <asm/sections.h>
+ #include <asm/setup.h>
++#include <asm/genapic.h>
+ #ifdef CONFIG_XEN
+ #include <asm/hypervisor.h>
+ #endif
+@@ -81,8 +83,8 @@ int force_personality32 = 0; 
+ Control non executable heap for 32bit processes.
+ To control the stack too use noexec=off
+ 
+-on    PROT_READ does not imply PROT_EXEC for 32bit processes
+-off   PROT_READ implies PROT_EXEC (default)
++on    PROT_READ does not imply PROT_EXEC for 32bit processes (default)
++off   PROT_READ implies PROT_EXEC
+ */
+ static int __init nonx32_setup(char *str)
+ {
+@@ -94,85 +96,6 @@ static int __init nonx32_setup(char *str
+ }
+ __setup("noexec32=", nonx32_setup);
+ 
+-/*
+- * Copy data used in early init routines from the initial arrays to the
+- * per cpu data areas.  These arrays then become expendable and the
+- * *_early_ptr's are zeroed indicating that the static arrays are gone.
+- */
+-static void __init setup_per_cpu_maps(void)
+-{
+-#ifndef CONFIG_XEN
+-      int cpu;
+-
+-      for_each_possible_cpu(cpu) {
+-#ifdef CONFIG_SMP
+-              if (per_cpu_offset(cpu)) {
+-#endif
+-                      per_cpu(x86_cpu_to_apicid, cpu) =
+-                                              x86_cpu_to_apicid_init[cpu];
+-                      per_cpu(x86_bios_cpu_apicid, cpu) =
+-                                              x86_bios_cpu_apicid_init[cpu];
+-#ifdef CONFIG_NUMA
+-                      per_cpu(x86_cpu_to_node_map, cpu) =
+-                                              x86_cpu_to_node_map_init[cpu];
+-#endif
+-#ifdef CONFIG_SMP
+-              }
+-              else
+-                      printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
+-                                                                      cpu);
+-#endif
+-      }
+-
+-      /* indicate the early static arrays will soon be gone */
+-      x86_cpu_to_apicid_early_ptr = NULL;
+-      x86_bios_cpu_apicid_early_ptr = NULL;
+-#ifdef CONFIG_NUMA
+-      x86_cpu_to_node_map_early_ptr = NULL;
+-#endif
+-#endif
+-}
+-
+-/*
+- * Great future plan:
+- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
+- * Always point %gs to its beginning
+- */
+-void __init setup_per_cpu_areas(void)
+-{ 
+-      int i;
+-      unsigned long size;
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-      prefill_possible_map();
+-#endif
+-
+-      /* Copy section for each CPU (we discard the original) */
+-      size = PERCPU_ENOUGH_ROOM;
+-
+-      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
+-      for_each_cpu_mask (i, cpu_possible_map) {
+-              char *ptr;
+-#ifndef CONFIG_NEED_MULTIPLE_NODES
+-              ptr = alloc_bootmem_pages(size);
+-#else
+-              int node = early_cpu_to_node(i);
+-
+-              if (!node_online(node) || !NODE_DATA(node))
+-                      ptr = alloc_bootmem_pages(size);
+-              else
+-                      ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+-#endif
+-              if (!ptr)
+-                      panic("Cannot allocate cpu data for CPU %d\n", i);
+-              cpu_pda(i)->data_offset = ptr - __per_cpu_start;
+-              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+-      }
+-
+-      /* setup percpu data maps early */
+-      setup_per_cpu_maps();
+-} 
+-
+ #ifdef CONFIG_XEN
+ static void __init_refok switch_pt(int cpu)
+ {
+@@ -410,6 +333,17 @@ void __cpuinit cpu_init (void)
+ #endif
+       load_LDT(&init_mm.context);
+ 
++#ifdef CONFIG_KGDB
++      /*
++       * If the kgdb is connected no debug regs should be altered.  This
++       * is only applicable when KGDB and a KGDB I/O module are built
++       * into the kernel and you are using early debugging with
++       * kgdbwait. KGDB will control the kernel HW breakpoint registers.
++       */
++      if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
++              arch_kgdb_ops.correct_hw_break();
++      else {
++#endif
+       /*
+        * Clear all 6 debug registers:
+        */
+@@ -420,10 +354,17 @@ void __cpuinit cpu_init (void)
+       set_debugreg(0UL, 3);
+       set_debugreg(0UL, 6);
+       set_debugreg(0UL, 7);
++#ifdef CONFIG_KGDB
++      /* If the kgdb is connected no debug regs should be altered. */
++      }
++#endif
+ 
+       fpu_init(); 
+ 
+       asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
+       if (raw_irqs_disabled())
+               kernel_eflags &= ~X86_EFLAGS_IF;
++
++      if (is_uv_system())
++              uv_cpu_init();
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup_32-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/setup_32-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -39,6 +39,7 @@
+ #include <linux/efi.h>
+ #include <linux/init.h>
+ #include <linux/edd.h>
++#include <linux/iscsi_ibft.h>
+ #include <linux/nodemask.h>
+ #include <linux/kernel.h>
+ #include <linux/percpu.h>
+@@ -49,6 +50,7 @@
+ #include <linux/pfn.h>
+ #include <linux/pci.h>
+ #include <linux/init_ohci1394_dma.h>
++#include <linux/kvm_para.h>
+ 
+ #include <video/edid.h>
+ 
+@@ -70,8 +72,9 @@
+ #include <xen/firmware.h>
+ #include <xen/xencons.h>
+ #include <setup_arch.h>
+-#include <bios_ebda.h>
++#include <asm/bios_ebda.h>
+ #include <asm/cacheflush.h>
++#include <asm/processor.h>
+ 
+ #ifdef CONFIG_XEN
+ #include <xen/interface/kexec.h>
+@@ -136,7 +139,12 @@ static struct resource standard_io_resou
+ }, {
+       .name   = "keyboard",
+       .start  = 0x0060,
+-      .end    = 0x006f,
++      .end    = 0x0060,
++      .flags  = IORESOURCE_BUSY | IORESOURCE_IO
++}, {
++      .name   = "keyboard",
++      .start  = 0x0064,
++      .end    = 0x0064,
+       .flags  = IORESOURCE_BUSY | IORESOURCE_IO
+ }, {
+       .name   = "dma page reg",
+@@ -166,6 +174,8 @@ struct cpuinfo_x86 new_cpu_data __cpuini
+ struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+ EXPORT_SYMBOL(boot_cpu_data);
+ 
++unsigned int def_to_bigsmp;
++
+ #ifndef CONFIG_X86_PAE
+ unsigned long mmu_cr4_features;
+ #else
+@@ -204,7 +214,7 @@ EXPORT_SYMBOL(ist_info);
+ extern void early_cpu_init(void);
+ extern int root_mountflags;
+ 
+-unsigned long saved_videomode;
++unsigned long saved_video_mode;
+ 
+ #define RAMDISK_IMAGE_START_MASK      0x07FF
+ #define RAMDISK_PROMPT_FLAG           0x8000
+@@ -259,7 +269,7 @@ static inline void copy_edd(void)
+ }
+ #endif
+ 
+-int __initdata user_defined_memmap = 0;
++int __initdata user_defined_memmap;
+ 
+ /*
+  * "mem=nopentium" disables the 4MB page tables.
+@@ -420,20 +430,59 @@ unsigned long __init find_max_low_pfn(vo
+ }
+ 
+ #ifndef CONFIG_XEN
++#define BIOS_LOWMEM_KILOBYTES 0x413
++
+ /*
+- * workaround for Dell systems that neglect to reserve EBDA
++ * The BIOS places the EBDA/XBDA at the top of conventional
++ * memory, and usually decreases the reported amount of
++ * conventional memory (int 0x12) too. This also contains a
++ * workaround for Dell systems that neglect to reserve EBDA.
++ * The same workaround also avoids a problem with the AMD768MPX
++ * chipset: reserve a page before VGA to prevent PCI prefetch
++ * into it (errata #56). Usually the page is reserved anyways,
++ * unless you have no PS/2 mouse plugged in.
+  */
+ static void __init reserve_ebda_region(void)
+ {
+-      unsigned int addr;
+-      addr = get_bios_ebda();
+-      if (addr)
+-              reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
++      unsigned int lowmem, ebda_addr;
++
++      /* To determine the position of the EBDA and the */
++      /* end of conventional memory, we need to look at */
++      /* the BIOS data area. In a paravirtual environment */
++      /* that area is absent. We'll just have to assume */
++      /* that the paravirt case can handle memory setup */
++      /* correctly, without our help. */
++      if (paravirt_enabled())
++              return;
++
++      /* end of low (conventional) memory */
++      lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
++      lowmem <<= 10;
++
++      /* start of EBDA area */
++      ebda_addr = get_bios_ebda();
++
++      /* Fixup: bios puts an EBDA in the top 64K segment */
++      /* of conventional memory, but does not adjust lowmem. */
++      if ((lowmem - ebda_addr) <= 0x10000)
++              lowmem = ebda_addr;
++
++      /* Fixup: bios does not report an EBDA at all. */
++      /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
++      if ((ebda_addr == 0) && (lowmem >= 0x9f000))
++              lowmem = 0x9f000;
++
++      /* Paranoia: should never happen, but... */
++      if ((lowmem == 0) || (lowmem >= 0x100000))
++              lowmem = 0x9f000;
++
++      /* reserve all memory between lowmem and the 1MB mark */
++      reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
+ }
+ #endif
+ 
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+-void __init setup_bootmem_allocator(void);
++static void __init setup_bootmem_allocator(void);
+ static unsigned long __init setup_memory(void)
+ {
+       /*
+@@ -469,7 +518,7 @@ static unsigned long __init setup_memory
+       return max_low_pfn;
+ }
+ 
+-void __init zone_sizes_init(void)
++static void __init zone_sizes_init(void)
+ {
+       unsigned long max_zone_pfns[MAX_NR_ZONES];
+       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+@@ -521,10 +570,16 @@ static void __init reserve_crashkernel(v
+                                       (unsigned long)(crash_size >> 20),
+                                       (unsigned long)(crash_base >> 20),
+                                       (unsigned long)(total_mem >> 20));
++
++                      if (reserve_bootmem(crash_base, crash_size,
++                                      BOOTMEM_EXCLUSIVE) < 0) {
++                              printk(KERN_INFO "crashkernel reservation "
++                                      "failed - memory is in use\n");
++                              return;
++                      }
++
+                       crashk_res.start = crash_base;
+                       crashk_res.end   = crash_base + crash_size - 1;
+-                      reserve_bootmem(crash_base, crash_size,
+-                                      BOOTMEM_DEFAULT);
+               } else
+                       printk(KERN_INFO "crashkernel reservation failed - "
+                                       "you have to specify a base address\n");
+@@ -658,16 +713,9 @@ void __init setup_bootmem_allocator(void
+        */
+       reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
+ 
+-      /* reserve EBDA region, it's a 4K region */
++      /* reserve EBDA region */
+       reserve_ebda_region();
+ 
+-    /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
+-       PCI prefetch into it (errata #56). Usually the page is reserved anyways,
+-       unless you have no PS/2 mouse plugged in. */
+-      if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+-          boot_cpu_data.x86 == 6)
+-           reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
+-
+ #ifdef CONFIG_SMP
+       /*
+        * But first pinch a few for the stack/trampoline stuff
+@@ -689,6 +737,8 @@ void __init setup_bootmem_allocator(void
+ #endif
+       numa_kva_reserve();
+       reserve_crashkernel();
++
++      reserve_ibft_region();
+ }
+ 
+ /*
+@@ -724,6 +774,18 @@ char * __init __attribute__((weak)) memo
+       return machine_specific_memory_setup();
+ }
+ 
++#ifdef CONFIG_NUMA
++/*
++ * In the golden day, when everything among i386 and x86_64 will be
++ * integrated, this will not live here
++ */
++void *x86_cpu_to_node_map_early_ptr;
++int x86_cpu_to_node_map_init[NR_CPUS] = {
++      [0 ... NR_CPUS-1] = NUMA_NO_NODE
++};
++DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
++#endif
++
+ /*
+  * Determine if we were loaded by an EFI loader.  If so, then we have also been
+  * passed the efi memmap, systab, etc., so we should use these data structures
+@@ -773,7 +835,7 @@ void __init setup_arch(char **cmdline_p)
+       copy_edid();
+       apm_info.bios = boot_params.apm_bios_info;
+       ist_info = boot_params.ist_info;
+-      saved_videomode = boot_params.hdr.vid_mode;
++      saved_video_mode = boot_params.hdr.vid_mode;
+       if( boot_params.sys_desc_table.length != 0 ) {
+               set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
+               machine_id = boot_params.sys_desc_table.table[0];
+@@ -840,15 +902,19 @@ void __init setup_arch(char **cmdline_p)
+               efi_init();
+ 
+       /* update e820 for memory not covered by WB MTRRs */
+-      find_max_pfn();
++      propagate_e820_map();
+       mtrr_bp_init();
+ #ifndef CONFIG_XEN
+       if (mtrr_trim_uncached_memory(max_pfn))
+-              find_max_pfn();
++              propagate_e820_map();
+ #endif
+ 
+       max_low_pfn = setup_memory();
+ 
++#ifdef CONFIG_KVM_CLOCK
++      kvmclock_init();
++#endif
++
+ #ifdef CONFIG_VMI
+       /*
+        * Must be after max_low_pfn is determined, and before kernel
+@@ -856,6 +922,7 @@ void __init setup_arch(char **cmdline_p)
+        */
+       vmi_init();
+ #endif
++      kvm_guest_init();
+ 
+       /*
+        * NOTE: before this point _nobody_ is allowed to allocate
+@@ -977,6 +1044,18 @@ void __init setup_arch(char **cmdline_p)
+ 
+       io_delay_init();
+ 
++#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
++      /*
++       * setup to use the early static init tables during kernel startup
++       * X86_SMP will exclude sub-arches that don't deal well with it.
++       */
++      x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
++      x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
++#ifdef CONFIG_NUMA
++      x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
++#endif
++#endif
++
+ #ifdef CONFIG_X86_GENERICARCH
+       generic_apic_probe();
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup_64-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/setup_64-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -29,18 +29,22 @@
+ #include <linux/crash_dump.h>
+ #include <linux/root_dev.h>
+ #include <linux/pci.h>
++#include <asm/pci-direct.h>
+ #include <linux/efi.h>
+ #include <linux/acpi.h>
+ #include <linux/kallsyms.h>
+ #include <linux/edd.h>
++#include <linux/iscsi_ibft.h>
+ #include <linux/mmzone.h>
+ #include <linux/kexec.h>
+ #include <linux/cpufreq.h>
+ #include <linux/dmi.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/ctype.h>
++#include <linux/sort.h>
+ #include <linux/uaccess.h>
+ #include <linux/init_ohci1394_dma.h>
++#include <linux/kvm_para.h>
+ 
+ #include <asm/mtrr.h>
+ #include <asm/uaccess.h>
+@@ -58,7 +62,6 @@
+ #include <asm/mmu_context.h>
+ #include <asm/proto.h>
+ #include <asm/setup.h>
+-#include <asm/mach_apic.h>
+ #include <asm/numa.h>
+ #include <asm/sections.h>
+ #include <asm/dmi.h>
+@@ -66,6 +69,9 @@
+ #include <asm/mce.h>
+ #include <asm/ds.h>
+ #include <asm/topology.h>
++#include <asm/pat.h>
++
++#include <mach_apic.h>
+ #ifdef CONFIG_XEN
+ #include <linux/percpu.h>
+ #include <xen/interface/physdev.h>
+@@ -149,7 +155,7 @@ extern int root_mountflags;
+ 
+ char __initdata command_line[COMMAND_LINE_SIZE];
+ 
+-struct resource standard_io_resources[] = {
++static struct resource standard_io_resources[] = {
+       { .name = "dma1", .start = 0x00, .end = 0x1f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "pic1", .start = 0x20, .end = 0x21,
+@@ -158,7 +164,9 @@ struct resource standard_io_resources[] 
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "timer1", .start = 0x50, .end = 0x53,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+-      { .name = "keyboard", .start = 0x60, .end = 0x6f,
++      { .name = "keyboard", .start = 0x60, .end = 0x60,
++              .flags = IORESOURCE_BUSY | IORESOURCE_IO },
++      { .name = "keyboard", .start = 0x64, .end = 0x64,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+       { .name = "dma page reg", .start = 0x80, .end = 0x8f,
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+@@ -224,8 +232,10 @@ contig_initmem_init(unsigned long start_
+       e820_register_active_regions(0, start_pfn, end_pfn);
+ #ifdef CONFIG_XEN
+       free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
++      early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT);
+ #else
+       free_bootmem_with_active_regions(0, end_pfn);
++      early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
+ #endif
+       reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
+ }
+@@ -290,6 +300,7 @@ static void __init reserve_crashkernel(v
+                               (unsigned long)(total_mem >> 20));
+               crashk_res.start = crash_base;
+               crashk_res.end   = crash_base + crash_size - 1;
++              insert_resource(&iomem_resource, &crashk_res);
+       }
+ }
+ #else
+@@ -306,6 +317,40 @@ void __attribute__((weak)) __init memory
+        machine_specific_memory_setup();
+ }
+ 
++static void __init parse_setup_data(void)
++{
++      struct setup_data *data;
++      unsigned long pa_data;
++
++      if (boot_params.hdr.version < 0x0209)
++              return;
++      pa_data = boot_params.hdr.setup_data;
++      while (pa_data) {
++              data = early_ioremap(pa_data, PAGE_SIZE);
++              switch (data->type) {
++              default:
++                      break;
++              }
++#ifndef CONFIG_DEBUG_BOOT_PARAMS
++              free_early(pa_data, pa_data+sizeof(*data)+data->len);
++#endif
++              pa_data = data->next;
++              early_iounmap(data, PAGE_SIZE);
++      }
++}
++
++#ifdef CONFIG_PCI_MMCONFIG
++extern void __cpuinit fam10h_check_enable_mmcfg(void);
++extern void __init check_enable_amd_mmconf_dmi(void);
++#else
++void __cpuinit fam10h_check_enable_mmcfg(void)
++{
++}
++void __init check_enable_amd_mmconf_dmi(void)
++{
++}
++#endif
++
+ /*
+  * setup_arch - architecture-specific boot-time initializations
+  *
+@@ -389,6 +434,8 @@ void __init setup_arch(char **cmdline_p)
+       strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+       *cmdline_p = command_line;
+ 
++      parse_setup_data();
++
+       parse_early_param();
+ 
+ #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+@@ -398,6 +445,13 @@ void __init setup_arch(char **cmdline_p)
+ 
+       finish_e820_parsing();
+ 
++#ifndef CONFIG_XEN
++      /* after parse_early_param, so could debug it */
++      insert_resource(&iomem_resource, &code_resource);
++      insert_resource(&iomem_resource, &data_resource);
++      insert_resource(&iomem_resource, &bss_resource);
++#endif
++
+       early_gart_iommu_check();
+ 
+       e820_register_active_regions(0, 0, -1UL);
+@@ -420,15 +474,23 @@ void __init setup_arch(char **cmdline_p)
+ 
+       check_efer();
+ 
+-      init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
++      max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
+       if (efi_enabled)
+               efi_init();
+ 
++#ifndef CONFIG_XEN
++      vsmp_init();
++#endif
++
+       if (is_initial_xendomain())
+               dmi_scan_machine();
+ 
+       io_delay_init();
+ 
++#ifdef CONFIG_KVM_CLOCK
++      kvmclock_init();
++#endif
++
+ #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
+       /* setup to use the early static init tables during kernel startup */
+       x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
+@@ -459,9 +521,9 @@ void __init setup_arch(char **cmdline_p)
+       contig_initmem_init(0, end_pfn);
+ #endif
+ 
+-      early_res_to_bootmem();
+-
+ #ifndef CONFIG_XEN
++      dma32_reserve_bootmem();
++
+ #ifdef CONFIG_ACPI_SLEEP
+       /*
+        * Reserve low memory region for sleep support.
+@@ -487,16 +549,17 @@ void __init setup_arch(char **cmdline_p)
+               unsigned long end_of_mem    = end_pfn << PAGE_SHIFT;
+ 
+               if (ramdisk_end <= end_of_mem) {
+-#ifndef CONFIG_XEN
+-                      reserve_bootmem_generic(ramdisk_image, ramdisk_size);
+-#endif
++                      /*
++                       * don't need to reserve again, already reserved early
++                       * in x86_64_start_kernel, and early_res_to_bootmem
++                       * convert that to reserved in bootmem
++                       */
+                       initrd_start = ramdisk_image + PAGE_OFFSET;
+                       initrd_end = initrd_start+ramdisk_size;
+ #ifdef CONFIG_XEN
+                       initrd_below_start_ok = 1;
+ #endif
+               } else {
+-                      /* Assumes everything on node 0 */
+                       free_bootmem(ramdisk_image, ramdisk_size);
+                       printk(KERN_ERR "initrd extends beyond end of memory "
+                              "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+@@ -506,6 +569,9 @@ void __init setup_arch(char **cmdline_p)
+       }
+ #endif
+       reserve_crashkernel();
++
++      reserve_ibft_region();
++
+       paging_init();
+       map_vsyscall();
+ #ifdef CONFIG_X86_LOCAL_APIC
+@@ -633,16 +699,16 @@ void __init setup_arch(char **cmdline_p)
+       prefill_possible_map();
+ #endif
+ 
++      kvm_guest_init();
++
+       /*
+        * We trust e820 completely. No explicit ROM probing in memory.
+        */
+ #ifdef CONFIG_XEN
+       if (is_initial_xendomain())
+-              e820_reserve_resources(machine_e820.map, machine_e820.nr_map,
+-                                     &code_resource, &data_resource, &bss_resource);
++              e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
+ #else
+-      e820_reserve_resources(e820.map, e820.nr_map,
+-                             &code_resource, &data_resource, &bss_resource);
++      e820_reserve_resources(e820.map, e820.nr_map);
+       e820_mark_nosave_regions();
+ #endif
+ 
+@@ -690,6 +756,9 @@ void __init setup_arch(char **cmdline_p)
+ #endif
+ 
+ #endif /* !CONFIG_XEN */
++
++      /* do this before identify_cpu for boot cpu */
++      check_enable_amd_mmconf_dmi();
+ }
+ 
+ #ifdef CONFIG_XEN
+@@ -786,9 +855,9 @@ static void __cpuinit amd_detect_cmp(str
+       bits = c->x86_coreid_bits;
+ 
+       /* Low order bits define the core id (index of core in socket) */
+-      c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
+-      /* Convert the APIC ID into the socket ID */
+-      c->phys_proc_id = phys_pkg_id(bits);
++      c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
++      /* Convert the initial APIC ID into the socket ID */
++      c->phys_proc_id = c->initial_apicid >> bits;
+ 
+ #ifdef CONFIG_NUMA
+       node = c->phys_proc_id;
+@@ -805,7 +874,7 @@ static void __cpuinit amd_detect_cmp(str
+                  If that doesn't result in a usable node fall back to the
+                  path for the previous case.  */
+ 
+-              int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
++              int ht_nodeid = c->initial_apicid;
+ 
+               if (ht_nodeid >= 0 &&
+                   apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+@@ -913,7 +982,7 @@ static void __cpuinit init_amd(struct cp
+ 
+       /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+          3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+-      clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
++      clear_cpu_cap(c, 0*32+31);
+ 
+       /* On C+ stepping K8 rep microcode works well for copy/memset */
+       level = cpuid_eax(1);
+@@ -955,9 +1024,25 @@ static void __cpuinit init_amd(struct cp
+       /* MFENCE stops RDTSC speculation */
+       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+ 
++      if (c->x86 == 0x10)
++              fam10h_check_enable_mmcfg();
++
+ #ifndef CONFIG_XEN
+       if (amd_apic_timer_broken())
+               disable_apic_timer = 1;
++
++      if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
++              unsigned long long tseg;
++
++              /*
++               * Split up direct mapping around the TSEG SMM area.
++               * Don't do it for gbpages because there seems very little
++               * benefit in doing so.
++               */
++              if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
++              (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
++                      set_memory_4k((unsigned long)__va(tseg), 1);
++      }
+ #endif
+ }
+ 
+@@ -1051,7 +1136,7 @@ static void __cpuinit early_init_intel(s
+ {
+       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+           (c->x86 == 0x6 && c->x86_model >= 0x0e))
+-              set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
++              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ }
+ 
+ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
+@@ -1094,9 +1179,6 @@ static void __cpuinit init_intel(struct 
+ 
+       if (c->x86 == 15)
+               c->x86_cache_alignment = c->x86_clflush_size * 2;
+-      if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+-          (c->x86 == 0x6 && c->x86_model >= 0x0e))
+-              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+       if (c->x86 == 6)
+               set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+@@ -1105,6 +1187,32 @@ static void __cpuinit init_intel(struct 
+       srat_detect_node();
+ }
+ 
++static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
++{
++      if (c->x86 == 0x6 && c->x86_model >= 0xf)
++              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
++}
++
++static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
++{
++      /* Cache sizes */
++      unsigned n;
++
++      n = c->extended_cpuid_level;
++      if (n >= 0x80000008) {
++              unsigned eax = cpuid_eax(0x80000008);
++              c->x86_virt_bits = (eax >> 8) & 0xff;
++              c->x86_phys_bits = eax & 0xff;
++      }
++
++      if (c->x86 == 0x6 && c->x86_model >= 0xf) {
++              c->x86_cache_alignment = c->x86_clflush_size * 2;
++              set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
++              set_cpu_cap(c, X86_FEATURE_REP_GOOD);
++      }
++      set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
++}
++
+ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
+ {
+       char *v = c->x86_vendor_id;
+@@ -1113,6 +1221,8 @@ static void __cpuinit get_cpu_vendor(str
+               c->x86_vendor = X86_VENDOR_AMD;
+       else if (!strcmp(v, "GenuineIntel"))
+               c->x86_vendor = X86_VENDOR_INTEL;
++      else if (!strcmp(v, "CentaurHauls"))
++              c->x86_vendor = X86_VENDOR_CENTAUR;
+       else
+               c->x86_vendor = X86_VENDOR_UNKNOWN;
+ }
+@@ -1160,15 +1270,16 @@ static void __cpuinit early_identify_cpu
+                       c->x86 += (tfms >> 20) & 0xff;
+               if (c->x86 >= 0x6)
+                       c->x86_model += ((tfms >> 16) & 0xF) << 4;
+-              if (c->x86_capability[0] & (1<<19))
++              if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
+                       c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+       } else {
+               /* Have CPUID level 0 only - unheard of */
+               c->x86 = 4;
+       }
+ 
++      c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
+ #ifdef CONFIG_SMP
+-      c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
++      c->phys_proc_id = c->initial_apicid;
+ #endif
+       /* AMD-defined flags: level 0x80000001 */
+       xlvl = cpuid_eax(0x80000000);
+@@ -1201,8 +1312,12 @@ static void __cpuinit early_identify_cpu
+       case X86_VENDOR_INTEL:
+               early_init_intel(c);
+               break;
++      case X86_VENDOR_CENTAUR:
++              early_init_centaur(c);
++              break;
+       }
+ 
++      validate_pat_support(c);
+ }
+ 
+ /*
+@@ -1237,6 +1352,10 @@ void __cpuinit identify_cpu(struct cpuin
+               init_intel(c);
+               break;
+ 
++      case X86_VENDOR_CENTAUR:
++              init_centaur(c);
++              break;
++
+       case X86_VENDOR_UNKNOWN:
+       default:
+               display_cacheinfo(c);
+@@ -1266,14 +1385,24 @@ void __cpuinit identify_cpu(struct cpuin
+ #endif
+       select_idle_routine(c);
+ 
+-      if (c != &boot_cpu_data)
+-              mtrr_ap_init();
+ #ifdef CONFIG_NUMA
+       numa_add_cpu(smp_processor_id());
+ #endif
+ 
+ }
+ 
++void __cpuinit identify_boot_cpu(void)
++{
++      identify_cpu(&boot_cpu_data);
++}
++
++void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
++{
++      BUG_ON(c == &boot_cpu_data);
++      identify_cpu(c);
++      mtrr_ap_init();
++}
++
+ static __init int setup_noclflush(char *arg)
+ {
+       setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
+@@ -1302,123 +1431,3 @@ static __init int setup_disablecpuid(cha
+       return 1;
+ }
+ __setup("clearcpuid=", setup_disablecpuid);
+-
+-/*
+- *    Get CPU information for use by the procfs.
+- */
+-
+-static int show_cpuinfo(struct seq_file *m, void *v)
+-{
+-      struct cpuinfo_x86 *c = v;
+-      int cpu = 0, i;
+-
+-#ifdef CONFIG_SMP
+-      cpu = c->cpu_index;
+-#endif
+-
+-      seq_printf(m, "processor\t: %u\n"
+-                 "vendor_id\t: %s\n"
+-                 "cpu family\t: %d\n"
+-                 "model\t\t: %d\n"
+-                 "model name\t: %s\n",
+-                 (unsigned)cpu,
+-                 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+-                 c->x86,
+-                 (int)c->x86_model,
+-                 c->x86_model_id[0] ? c->x86_model_id : "unknown");
+-
+-      if (c->x86_mask || c->cpuid_level >= 0)
+-              seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+-      else
+-              seq_printf(m, "stepping\t: unknown\n");
+-
+-      if (cpu_has(c, X86_FEATURE_TSC)) {
+-              unsigned int freq = cpufreq_quick_get((unsigned)cpu);
+-
+-              if (!freq)
+-                      freq = cpu_khz;
+-              seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
+-                         freq / 1000, (freq % 1000));
+-      }
+-
+-      /* Cache size */
+-      if (c->x86_cache_size >= 0)
+-              seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+-
+-#ifdef CONFIG_SMP
+-      if (smp_num_siblings * c->x86_max_cores > 1) {
+-              seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+-              seq_printf(m, "siblings\t: %d\n",
+-                             cpus_weight(per_cpu(cpu_core_map, cpu)));
+-              seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+-              seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+-      }
+-#endif
+-
+-      seq_printf(m,
+-                 "fpu\t\t: yes\n"
+-                 "fpu_exception\t: yes\n"
+-                 "cpuid level\t: %d\n"
+-                 "wp\t\t: yes\n"
+-                 "flags\t\t:",
+-                 c->cpuid_level);
+-
+-      for (i = 0; i < 32*NCAPINTS; i++)
+-              if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
+-                      seq_printf(m, " %s", x86_cap_flags[i]);
+-
+-      seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+-                 c->loops_per_jiffy/(500000/HZ),
+-                 (c->loops_per_jiffy/(5000/HZ)) % 100);
+-
+-      if (c->x86_tlbsize > 0)
+-              seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+-      seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
+-      seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
+-
+-      seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
+-                 c->x86_phys_bits, c->x86_virt_bits);
+-
+-      seq_printf(m, "power management:");
+-      for (i = 0; i < 32; i++) {
+-              if (c->x86_power & (1 << i)) {
+-                      if (i < ARRAY_SIZE(x86_power_flags) &&
+-                          x86_power_flags[i])
+-                              seq_printf(m, "%s%s",
+-                                         x86_power_flags[i][0]?" ":"",
+-                                         x86_power_flags[i]);
+-                      else
+-                              seq_printf(m, " [%d]", i);
+-              }
+-      }
+-
+-      seq_printf(m, "\n\n");
+-
+-      return 0;
+-}
+-
+-static void *c_start(struct seq_file *m, loff_t *pos)
+-{
+-      if (*pos == 0)  /* just in case, cpu 0 is not the first */
+-              *pos = first_cpu(cpu_online_map);
+-      if ((*pos) < NR_CPUS && cpu_online(*pos))
+-              return &cpu_data(*pos);
+-      return NULL;
+-}
+-
+-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+-{
+-      *pos = next_cpu(*pos, cpu_online_map);
+-      return c_start(m, pos);
+-}
+-
+-static void c_stop(struct seq_file *m, void *v)
+-{
+-}
+-
+-const struct seq_operations cpuinfo_op = {
+-      .start = c_start,
+-      .next = c_next,
+-      .stop = c_stop,
+-      .show = show_cpuinfo,
+-};
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/kernel/smp-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,329 @@
++/*
++ *    Intel SMP support routines.
++ *
++ *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
++ *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
++ *      (c) 2002,2003 Andi Kleen, SuSE Labs.
++ *
++ *    i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
++ *
++ *    This code is released under the GNU General Public License version 2 or
++ *    later.
++ */
++
++#include <linux/init.h>
++
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/spinlock.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/cache.h>
++#include <linux/interrupt.h>
++#include <linux/cpu.h>
++
++#include <asm/mtrr.h>
++#include <asm/tlbflush.h>
++#include <asm/mmu_context.h>
++#include <asm/proto.h>
++#include <mach_ipi.h>
++#include <xen/evtchn.h>
++/*
++ *    Some notes on x86 processor bugs affecting SMP operation:
++ *
++ *    Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
++ *    The Linux implications for SMP are handled as follows:
++ *
++ *    Pentium III / [Xeon]
++ *            None of the E1AP-E3AP errata are visible to the user.
++ *
++ *    E1AP.   see PII A1AP
++ *    E2AP.   see PII A2AP
++ *    E3AP.   see PII A3AP
++ *
++ *    Pentium II / [Xeon]
++ *            None of the A1AP-A3AP errata are visible to the user.
++ *
++ *    A1AP.   see PPro 1AP
++ *    A2AP.   see PPro 2AP
++ *    A3AP.   see PPro 7AP
++ *
++ *    Pentium Pro
++ *            None of 1AP-9AP errata are visible to the normal user,
++ *    except occasional delivery of 'spurious interrupt' as trap #15.
++ *    This is very rare and a non-problem.
++ *
++ *    1AP.    Linux maps APIC as non-cacheable
++ *    2AP.    worked around in hardware
++ *    3AP.    fixed in C0 and above steppings microcode update.
++ *            Linux does not use excessive STARTUP_IPIs.
++ *    4AP.    worked around in hardware
++ *    5AP.    symmetric IO mode (normal Linux operation) not affected.
++ *            'noapic' mode has vector 0xf filled out properly.
++ *    6AP.    'noapic' mode might be affected - fixed in later steppings
++ *    7AP.    We do not assume writes to the LVT deassering IRQs
++ *    8AP.    We do not enable low power mode (deep sleep) during MP bootup
++ *    9AP.    We do not use mixed mode
++ *
++ *    Pentium
++ *            There is a marginal case where REP MOVS on 100MHz SMP
++ *    machines with B stepping processors can fail. XXX should provide
++ *    an L1cache=Writethrough or L1cache=off option.
++ *
++ *            B stepping CPUs may hang. There are hardware work arounds
++ *    for this. We warn about it in case your board doesn't have the work
++ *    arounds. Basically that's so I can tell anyone with a B stepping
++ *    CPU and SMP problems "tough".
++ *
++ *    Specific items [From Pentium Processor Specification Update]
++ *
++ *    1AP.    Linux doesn't use remote read
++ *    2AP.    Linux doesn't trust APIC errors
++ *    3AP.    We work around this
++ *    4AP.    Linux never generated 3 interrupts of the same priority
++ *            to cause a lost local interrupt.
++ *    5AP.    Remote read is never used
++ *    6AP.    not affected - worked around in hardware
++ *    7AP.    not affected - worked around in hardware
++ *    8AP.    worked around in hardware - we get explicit CS errors if not
++ *    9AP.    only 'noapic' mode affected. Might generate spurious
++ *            interrupts, we log only the first one and count the
++ *            rest silently.
++ *    10AP.   not affected - worked around in hardware
++ *    11AP.   Linux reads the APIC between writes to avoid this, as per
++ *            the documentation. Make sure you preserve this as it affects
++ *            the C stepping chips too.
++ *    12AP.   not affected - worked around in hardware
++ *    13AP.   not affected - worked around in hardware
++ *    14AP.   we always deassert INIT during bootup
++ *    15AP.   not affected - worked around in hardware
++ *    16AP.   not affected - worked around in hardware
++ *    17AP.   not affected - worked around in hardware
++ *    18AP.   not affected - worked around in hardware
++ *    19AP.   not affected - worked around in BIOS
++ *
++ *    If this sounds worrying believe me these bugs are either ___RARE___,
++ *    or are signal timing bugs worked around in hardware and there's
++ *    about nothing of note with C stepping upwards.
++ */
++
++/*
++ * this function sends a 'reschedule' IPI to another CPU.
++ * it goes straight through and wastes no time serializing
++ * anything. Worst case is that we lose a reschedule ...
++ */
++void xen_smp_send_reschedule(int cpu)
++{
++      if (unlikely(cpu_is_offline(cpu))) {
++              WARN_ON(1);
++              return;
++      }
++      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
++}
++
++/*
++ * Structure and data for smp_call_function(). This is designed to minimise
++ * static memory requirements. It also looks cleaner.
++ */
++static DEFINE_SPINLOCK(call_lock);
++
++struct call_data_struct {
++      void (*func) (void *info);
++      void *info;
++      atomic_t started;
++      atomic_t finished;
++      int wait;
++};
++
++void lock_ipi_call_lock(void)
++{
++      spin_lock_irq(&call_lock);
++}
++
++void unlock_ipi_call_lock(void)
++{
++      spin_unlock_irq(&call_lock);
++}
++
++static struct call_data_struct *call_data;
++
++static void __smp_call_function(void (*func) (void *info), void *info,
++                              int nonatomic, int wait)
++{
++      struct call_data_struct data;
++      int cpus = num_online_cpus() - 1;
++
++      if (!cpus)
++              return;
++
++      data.func = func;
++      data.info = info;
++      atomic_set(&data.started, 0);
++      data.wait = wait;
++      if (wait)
++              atomic_set(&data.finished, 0);
++
++      call_data = &data;
++      mb();
++
++      /* Send a message to all other CPUs and wait for them to respond */
++      send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++
++      /* Wait for response */
++      while (atomic_read(&data.started) != cpus)
++              cpu_relax();
++
++      if (wait)
++              while (atomic_read(&data.finished) != cpus)
++                      cpu_relax();
++}
++
++
++/**
++ * smp_call_function_mask(): Run a function on a set of other CPUs.
++ * @mask: The set of cpus to run on.  Must not include the current cpu.
++ * @func: The function to run. This must be fast and non-blocking.
++ * @info: An arbitrary pointer to pass to the function.
++ * @wait: If true, wait (atomically) until function has completed on other CPUs.
++ *
++  * Returns 0 on success, else a negative status code.
++ *
++ * If @wait is true, then returns once @func has returned; otherwise
++ * it returns just before the target cpu calls @func.
++ *
++ * You must not call this function with disabled interrupts or from a
++ * hardware interrupt handler or from a bottom half handler.
++ */
++int
++xen_smp_call_function_mask(cpumask_t mask,
++                            void (*func)(void *), void *info,
++                            int wait)
++{
++      struct call_data_struct data;
++      cpumask_t allbutself;
++      int cpus;
++
++      /* Can deadlock when called with interrupts disabled */
++      WARN_ON(irqs_disabled());
++
++      /* Holding any lock stops cpus from going down. */
++      spin_lock(&call_lock);
++
++      allbutself = cpu_online_map;
++      cpu_clear(smp_processor_id(), allbutself);
++
++      cpus_and(mask, mask, allbutself);
++      cpus = cpus_weight(mask);
++
++      if (!cpus) {
++              spin_unlock(&call_lock);
++              return 0;
++      }
++
++      data.func = func;
++      data.info = info;
++      atomic_set(&data.started, 0);
++      data.wait = wait;
++      if (wait)
++              atomic_set(&data.finished, 0);
++
++      call_data = &data;
++      wmb();
++
++      /* Send a message to other CPUs */
++      if (cpus_equal(mask, allbutself) &&
++          cpus_equal(cpu_online_map, cpu_callout_map))
++              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++      else
++              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
++
++      /* Wait for response */
++      while (atomic_read(&data.started) != cpus)
++              cpu_relax();
++
++      if (wait)
++              while (atomic_read(&data.finished) != cpus)
++                      cpu_relax();
++      spin_unlock(&call_lock);
++
++      return 0;
++}
++
++static void stop_this_cpu(void *dummy)
++{
++      local_irq_disable();
++      /*
++       * Remove this CPU:
++       */
++      cpu_clear(smp_processor_id(), cpu_online_map);
++      disable_all_local_evtchn();
++      if (hlt_works(smp_processor_id()))
++              for (;;) halt();
++      for (;;);
++}
++
++/*
++ * this function calls the 'stop' function on all other CPUs in the system.
++ */
++
++void xen_smp_send_stop(void)
++{
++      int nolock;
++      unsigned long flags;
++
++      /* Don't deadlock on the call lock in panic */
++      nolock = !spin_trylock(&call_lock);
++      local_irq_save(flags);
++      __smp_call_function(stop_this_cpu, NULL, 0, 0);
++      if (!nolock)
++              spin_unlock(&call_lock);
++      disable_all_local_evtchn();
++      local_irq_restore(flags);
++}
++
++/*
++ * Reschedule call back. Nothing to do,
++ * all the work is done automatically when
++ * we return from the interrupt.
++ */
++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
++{
++#ifdef CONFIG_X86_32
++      __get_cpu_var(irq_stat).irq_resched_count++;
++#else
++      add_pda(irq_resched_count, 1);
++#endif
++      return IRQ_HANDLED;
++}
++
++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
++{
++      void (*func) (void *info) = call_data->func;
++      void *info = call_data->info;
++      int wait = call_data->wait;
++
++      /*
++       * Notify initiating CPU that I've grabbed the data and am
++       * about to execute the function
++       */
++      mb();
++      atomic_inc(&call_data->started);
++      /*
++       * At this point the info structure may be out of scope unless wait==1
++       */
++      irq_enter();
++      (*func)(info);
++#ifdef CONFIG_X86_32
++      __get_cpu_var(irq_stat).irq_call_count++;
++#else
++      add_pda(irq_call_count, 1);
++#endif
++      irq_exit();
++
++      if (wait) {
++              mb();
++              atomic_inc(&call_data->finished);
++      }
++
++      return IRQ_HANDLED;
++}
+--- sle11-2009-05-14.orig/arch/x86/kernel/smp_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,647 +0,0 @@
+-/*
+- *    Intel SMP support routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *    This code is released under the GNU General Public License version 2 or
+- *    later.
+- */
+-
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/cache.h>
+-#include <linux/interrupt.h>
+-#include <linux/cpu.h>
+-#include <linux/module.h>
+-
+-#include <asm/mtrr.h>
+-#include <asm/tlbflush.h>
+-#include <asm/mmu_context.h>
+-#if 0
+-#include <mach_apic.h>
+-#endif
+-#include <xen/evtchn.h>
+-
+-/*
+- *    Some notes on x86 processor bugs affecting SMP operation:
+- *
+- *    Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+- *    The Linux implications for SMP are handled as follows:
+- *
+- *    Pentium III / [Xeon]
+- *            None of the E1AP-E3AP errata are visible to the user.
+- *
+- *    E1AP.   see PII A1AP
+- *    E2AP.   see PII A2AP
+- *    E3AP.   see PII A3AP
+- *
+- *    Pentium II / [Xeon]
+- *            None of the A1AP-A3AP errata are visible to the user.
+- *
+- *    A1AP.   see PPro 1AP
+- *    A2AP.   see PPro 2AP
+- *    A3AP.   see PPro 7AP
+- *
+- *    Pentium Pro
+- *            None of 1AP-9AP errata are visible to the normal user,
+- *    except occasional delivery of 'spurious interrupt' as trap #15.
+- *    This is very rare and a non-problem.
+- *
+- *    1AP.    Linux maps APIC as non-cacheable
+- *    2AP.    worked around in hardware
+- *    3AP.    fixed in C0 and above steppings microcode update.
+- *            Linux does not use excessive STARTUP_IPIs.
+- *    4AP.    worked around in hardware
+- *    5AP.    symmetric IO mode (normal Linux operation) not affected.
+- *            'noapic' mode has vector 0xf filled out properly.
+- *    6AP.    'noapic' mode might be affected - fixed in later steppings
+- *    7AP.    We do not assume writes to the LVT deassering IRQs
+- *    8AP.    We do not enable low power mode (deep sleep) during MP bootup
+- *    9AP.    We do not use mixed mode
+- *
+- *    Pentium
+- *            There is a marginal case where REP MOVS on 100MHz SMP
+- *    machines with B stepping processors can fail. XXX should provide
+- *    an L1cache=Writethrough or L1cache=off option.
+- *
+- *            B stepping CPUs may hang. There are hardware work arounds
+- *    for this. We warn about it in case your board doesn't have the work
+- *    arounds. Basically that's so I can tell anyone with a B stepping
+- *    CPU and SMP problems "tough".
+- *
+- *    Specific items [From Pentium Processor Specification Update]
+- *
+- *    1AP.    Linux doesn't use remote read
+- *    2AP.    Linux doesn't trust APIC errors
+- *    3AP.    We work around this
+- *    4AP.    Linux never generated 3 interrupts of the same priority
+- *            to cause a lost local interrupt.
+- *    5AP.    Remote read is never used
+- *    6AP.    not affected - worked around in hardware
+- *    7AP.    not affected - worked around in hardware
+- *    8AP.    worked around in hardware - we get explicit CS errors if not
+- *    9AP.    only 'noapic' mode affected. Might generate spurious
+- *            interrupts, we log only the first one and count the
+- *            rest silently.
+- *    10AP.   not affected - worked around in hardware
+- *    11AP.   Linux reads the APIC between writes to avoid this, as per
+- *            the documentation. Make sure you preserve this as it affects
+- *            the C stepping chips too.
+- *    12AP.   not affected - worked around in hardware
+- *    13AP.   not affected - worked around in hardware
+- *    14AP.   we always deassert INIT during bootup
+- *    15AP.   not affected - worked around in hardware
+- *    16AP.   not affected - worked around in hardware
+- *    17AP.   not affected - worked around in hardware
+- *    18AP.   not affected - worked around in hardware
+- *    19AP.   not affected - worked around in BIOS
+- *
+- *    If this sounds worrying believe me these bugs are either ___RARE___,
+- *    or are signal timing bugs worked around in hardware and there's
+- *    about nothing of note with C stepping upwards.
+- */
+-
+-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
+-
+-/*
+- * the following functions deal with sending IPIs between CPUs.
+- *
+- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+- */
+-
+-static inline int __prepare_ICR (unsigned int shortcut, int vector)
+-{
+-      unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+-
+-      switch (vector) {
+-      default:
+-              icr |= APIC_DM_FIXED | vector;
+-              break;
+-      case NMI_VECTOR:
+-              icr |= APIC_DM_NMI;
+-              break;
+-      }
+-      return icr;
+-}
+-
+-static inline int __prepare_ICR2 (unsigned int mask)
+-{
+-      return SET_APIC_DEST_FIELD(mask);
+-}
+-
+-DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+-
+-static inline void __send_IPI_one(unsigned int cpu, int vector)
+-{
+-      int irq = per_cpu(ipi_to_irq, cpu)[vector];
+-      BUG_ON(irq < 0);
+-      notify_remote_via_irq(irq);
+-}
+-
+-void __send_IPI_shortcut(unsigned int shortcut, int vector)
+-{
+-      int cpu;
+-
+-      switch (shortcut) {
+-      case APIC_DEST_SELF:
+-              __send_IPI_one(smp_processor_id(), vector);
+-              break;
+-      case APIC_DEST_ALLBUT:
+-              for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+-                      if (cpu == smp_processor_id())
+-                              continue;
+-                      if (cpu_isset(cpu, cpu_online_map)) {
+-                              __send_IPI_one(cpu, vector);
+-                      }
+-              }
+-              break;
+-      default:
+-              printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+-                     vector);
+-              break;
+-      }
+-}
+-
+-void send_IPI_self(int vector)
+-{
+-      __send_IPI_shortcut(APIC_DEST_SELF, vector);
+-}
+-
+-/*
+- * This is only used on smaller machines.
+- */
+-void send_IPI_mask_bitmask(cpumask_t mask, int vector)
+-{
+-      unsigned long flags;
+-      unsigned int cpu;
+-
+-      local_irq_save(flags);
+-      WARN_ON(cpus_addr(mask)[0] & ~cpus_addr(cpu_online_map)[0]);
+-
+-      for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+-              if (cpu_isset(cpu, mask)) {
+-                      __send_IPI_one(cpu, vector);
+-              }
+-      }
+-
+-      local_irq_restore(flags);
+-}
+-
+-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+-{
+-
+-      send_IPI_mask_bitmask(mask, vector);
+-}
+-
+-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
+-
+-#if 0 /* XEN */
+-/*
+- *    Smarter SMP flushing macros. 
+- *            c/o Linus Torvalds.
+- *
+- *    These mean you can really definitely utterly forget about
+- *    writing to user space from interrupts. (Its not allowed anyway).
+- *
+- *    Optimizations Manfred Spraul <manfred@colorfullife.com>
+- */
+-
+-static cpumask_t flush_cpumask;
+-static struct mm_struct * flush_mm;
+-static unsigned long flush_va;
+-static DEFINE_SPINLOCK(tlbstate_lock);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- *
+- * We need to reload %cr3 since the page tables may be going
+- * away from under us..
+- */
+-void leave_mm(int cpu)
+-{
+-      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+-              BUG();
+-      cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+-      load_cr3(swapper_pg_dir);
+-}
+-EXPORT_SYMBOL_GPL(leave_mm);
+-
+-/*
+- *
+- * The flush IPI assumes that a thread switch happens in this order:
+- * [cpu0: the cpu that switches]
+- * 1) switch_mm() either 1a) or 1b)
+- * 1a) thread switch to a different mm
+- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+- *    Stop ipi delivery for the old mm. This is not synchronized with
+- *    the other cpus, but smp_invalidate_interrupt ignore flush ipis
+- *    for the wrong mm, and in the worst case we perform a superfluous
+- *    tlb flush.
+- * 1a2) set cpu_tlbstate to TLBSTATE_OK
+- *    Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+- *    was in lazy tlb mode.
+- * 1a3) update cpu_tlbstate[].active_mm
+- *    Now cpu0 accepts tlb flushes for the new mm.
+- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+- *    Now the other cpus will send tlb flush ipis.
+- * 1a4) change cr3.
+- * 1b) thread switch without mm change
+- *    cpu_tlbstate[].active_mm is correct, cpu0 already handles
+- *    flush ipis.
+- * 1b1) set cpu_tlbstate to TLBSTATE_OK
+- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+- *    Atomically set the bit [other cpus will start sending flush ipis],
+- *    and test the bit.
+- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+- * 2) switch %%esp, ie current
+- *
+- * The interrupt must handle 2 special cases:
+- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+- *   runs in kernel space, the cpu could load tlb entries for user space
+- *   pages.
+- *
+- * The good news is that cpu_tlbstate is local to each cpu, no
+- * write/read ordering problems.
+- */
+-
+-/*
+- * TLB flush IPI:
+- *
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- */
+-
+-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
+-{
+-      unsigned long cpu;
+-
+-      cpu = get_cpu();
+-
+-      if (!cpu_isset(cpu, flush_cpumask))
+-              goto out;
+-              /* 
+-               * This was a BUG() but until someone can quote me the
+-               * line from the intel manual that guarantees an IPI to
+-               * multiple CPUs is retried _only_ on the erroring CPUs
+-               * its staying as a return
+-               *
+-               * BUG();
+-               */
+-               
+-      if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+-              if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+-                      if (flush_va == TLB_FLUSH_ALL)
+-                              local_flush_tlb();
+-                      else
+-                              __flush_tlb_one(flush_va);
+-              } else
+-                      leave_mm(cpu);
+-      }
+-      smp_mb__before_clear_bit();
+-      cpu_clear(cpu, flush_cpumask);
+-      smp_mb__after_clear_bit();
+-out:
+-      put_cpu_no_resched();
+-      __get_cpu_var(irq_stat).irq_tlb_count++;
+-
+-      return IRQ_HANDLED;
+-}
+-
+-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+-                           unsigned long va)
+-{
+-      cpumask_t cpumask = *cpumaskp;
+-
+-      /*
+-       * A couple of (to be removed) sanity checks:
+-       *
+-       * - current CPU must not be in mask
+-       * - mask must exist :)
+-       */
+-      BUG_ON(cpus_empty(cpumask));
+-      BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+-      BUG_ON(!mm);
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-      /* If a CPU which we ran on has gone down, OK. */
+-      cpus_and(cpumask, cpumask, cpu_online_map);
+-      if (unlikely(cpus_empty(cpumask)))
+-              return;
+-#endif
+-
+-      /*
+-       * i'm not happy about this global shared spinlock in the
+-       * MM hot path, but we'll see how contended it is.
+-       * AK: x86-64 has a faster method that could be ported.
+-       */
+-      spin_lock(&tlbstate_lock);
+-      
+-      flush_mm = mm;
+-      flush_va = va;
+-      cpus_or(flush_cpumask, cpumask, flush_cpumask);
+-      /*
+-       * We have to send the IPI only to
+-       * CPUs affected.
+-       */
+-      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+-
+-      while (!cpus_empty(flush_cpumask))
+-              /* nothing. lockup detection does not belong here */
+-              cpu_relax();
+-
+-      flush_mm = NULL;
+-      flush_va = 0;
+-      spin_unlock(&tlbstate_lock);
+-}
+-      
+-void flush_tlb_current_task(void)
+-{
+-      struct mm_struct *mm = current->mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      local_flush_tlb();
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-      preempt_enable();
+-}
+-
+-void flush_tlb_mm (struct mm_struct * mm)
+-{
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if (current->mm)
+-                      local_flush_tlb();
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-      preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+-{
+-      struct mm_struct *mm = vma->vm_mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if(current->mm)
+-                      __flush_tlb_one(va);
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, va);
+-
+-      preempt_enable();
+-}
+-EXPORT_SYMBOL(flush_tlb_page);
+-
+-static void do_flush_tlb_all(void* info)
+-{
+-      unsigned long cpu = smp_processor_id();
+-
+-      __flush_tlb_all();
+-      if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+-              leave_mm(cpu);
+-}
+-
+-void flush_tlb_all(void)
+-{
+-      on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+-}
+-
+-#endif /* XEN */
+-
+-/*
+- * this function sends a 'reschedule' IPI to another CPU.
+- * it goes straight through and wastes no time serializing
+- * anything. Worst case is that we lose a reschedule ...
+- */
+-void xen_smp_send_reschedule(int cpu)
+-{
+-      WARN_ON(cpu_is_offline(cpu));
+-      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+-}
+-
+-/*
+- * Structure and data for smp_call_function(). This is designed to minimise
+- * static memory requirements. It also looks cleaner.
+- */
+-static DEFINE_SPINLOCK(call_lock);
+-
+-struct call_data_struct {
+-      void (*func) (void *info);
+-      void *info;
+-      atomic_t started;
+-      atomic_t finished;
+-      int wait;
+-};
+-
+-void lock_ipi_call_lock(void)
+-{
+-      spin_lock_irq(&call_lock);
+-}
+-
+-void unlock_ipi_call_lock(void)
+-{
+-      spin_unlock_irq(&call_lock);
+-}
+-
+-static struct call_data_struct *call_data;
+-
+-static void __smp_call_function(void (*func) (void *info), void *info,
+-                              int nonatomic, int wait)
+-{
+-      struct call_data_struct data;
+-      int cpus = num_online_cpus() - 1;
+-
+-      if (!cpus)
+-              return;
+-
+-      data.func = func;
+-      data.info = info;
+-      atomic_set(&data.started, 0);
+-      data.wait = wait;
+-      if (wait)
+-              atomic_set(&data.finished, 0);
+-
+-      call_data = &data;
+-      mb();
+-
+-      /* Send a message to all other CPUs and wait for them to respond */
+-      send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-
+-      /* Wait for response */
+-      while (atomic_read(&data.started) != cpus)
+-              cpu_relax();
+-
+-      if (wait)
+-              while (atomic_read(&data.finished) != cpus)
+-                      cpu_relax();
+-}
+-
+-
+-/**
+- * smp_call_function_mask(): Run a function on a set of other CPUs.
+- * @mask: The set of cpus to run on.  Must not include the current cpu.
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @wait: If true, wait (atomically) until function has completed on other CPUs.
+- *
+-  * Returns 0 on success, else a negative status code.
+- *
+- * If @wait is true, then returns once @func has returned; otherwise
+- * it returns just before the target cpu calls @func.
+- *
+- * You must not call this function with disabled interrupts or from a
+- * hardware interrupt handler or from a bottom half handler.
+- */
+-int
+-xen_smp_call_function_mask(cpumask_t mask,
+-                            void (*func)(void *), void *info,
+-                            int wait)
+-{
+-      struct call_data_struct data;
+-      cpumask_t allbutself;
+-      int cpus;
+-
+-      /* Can deadlock when called with interrupts disabled */
+-      WARN_ON(irqs_disabled());
+-
+-      /* Holding any lock stops cpus from going down. */
+-      spin_lock(&call_lock);
+-
+-      allbutself = cpu_online_map;
+-      cpu_clear(smp_processor_id(), allbutself);
+-
+-      cpus_and(mask, mask, allbutself);
+-      cpus = cpus_weight(mask);
+-
+-      if (!cpus) {
+-              spin_unlock(&call_lock);
+-              return 0;
+-      }
+-
+-      data.func = func;
+-      data.info = info;
+-      atomic_set(&data.started, 0);
+-      data.wait = wait;
+-      if (wait)
+-              atomic_set(&data.finished, 0);
+-
+-      call_data = &data;
+-      mb();
+-
+-      /* Send a message to other CPUs */
+-      if (cpus_equal(mask, allbutself))
+-              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-      else
+-              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+-
+-      /* Wait for response */
+-      while (atomic_read(&data.started) != cpus)
+-              cpu_relax();
+-
+-      if (wait)
+-              while (atomic_read(&data.finished) != cpus)
+-                      cpu_relax();
+-      spin_unlock(&call_lock);
+-
+-      return 0;
+-}
+-
+-static void stop_this_cpu (void * dummy)
+-{
+-      local_irq_disable();
+-      /*
+-       * Remove this CPU:
+-       */
+-      cpu_clear(smp_processor_id(), cpu_online_map);
+-      disable_all_local_evtchn();
+-      if (cpu_data(smp_processor_id()).hlt_works_ok)
+-              for(;;) halt();
+-      for (;;);
+-}
+-
+-/*
+- * this function calls the 'stop' function on all other CPUs in the system.
+- */
+-
+-void xen_smp_send_stop(void)
+-{
+-      /* Don't deadlock on the call lock in panic */
+-      int nolock = !spin_trylock(&call_lock);
+-      unsigned long flags;
+-
+-      local_irq_save(flags);
+-      __smp_call_function(stop_this_cpu, NULL, 0, 0);
+-      if (!nolock)
+-              spin_unlock(&call_lock);
+-      disable_all_local_evtchn();
+-      local_irq_restore(flags);
+-}
+-
+-/*
+- * Reschedule call back. Nothing to do,
+- * all the work is done automatically when
+- * we return from the interrupt.
+- */
+-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
+-{
+-      __get_cpu_var(irq_stat).irq_resched_count++;
+-
+-      return IRQ_HANDLED;
+-}
+-
+-#include <linux/kallsyms.h>
+-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
+-{
+-      void (*func) (void *info) = call_data->func;
+-      void *info = call_data->info;
+-      int wait = call_data->wait;
+-
+-      /*
+-       * Notify initiating CPU that I've grabbed the data and am
+-       * about to execute the function
+-       */
+-      mb();
+-      atomic_inc(&call_data->started);
+-      /*
+-       * At this point the info structure may be out of scope unless wait==1
+-       */
+-      irq_enter();
+-      (*func)(info);
+-      __get_cpu_var(irq_stat).irq_call_count++;
+-      irq_exit();
+-
+-      if (wait) {
+-              mb();
+-              atomic_inc(&call_data->finished);
+-      }
+-
+-      return IRQ_HANDLED;
+-}
+--- sle11-2009-05-14.orig/arch/x86/kernel/smp_64-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,554 +0,0 @@
+-/*
+- *    Intel SMP support routines.
+- *
+- *    (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+- *    (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+- *      (c) 2002,2003 Andi Kleen, SuSE Labs.
+- *
+- *    This code is released under the GNU General Public License version 2 or
+- *    later.
+- */
+-
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/smp.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/interrupt.h>
+-
+-#include <asm/mtrr.h>
+-#include <asm/pgalloc.h>
+-#include <asm/tlbflush.h>
+-#include <asm/mach_apic.h>
+-#include <asm/mmu_context.h>
+-#include <asm/proto.h>
+-#include <asm/apicdef.h>
+-#include <asm/idle.h>
+-#ifdef CONFIG_XEN
+-#include <xen/evtchn.h>
+-#endif
+-
+-#ifndef CONFIG_XEN
+-/*
+- *    Smarter SMP flushing macros.
+- *            c/o Linus Torvalds.
+- *
+- *    These mean you can really definitely utterly forget about
+- *    writing to user space from interrupts. (Its not allowed anyway).
+- *
+- *    Optimizations Manfred Spraul <manfred@colorfullife.com>
+- *
+- *    More scalable flush, from Andi Kleen
+- *
+- *    To avoid global state use 8 different call vectors.
+- *    Each CPU uses a specific vector to trigger flushes on other
+- *    CPUs. Depending on the received vector the target CPUs look into
+- *    the right per cpu variable for the flush data.
+- *
+- *    With more than 8 CPUs they are hashed to the 8 available
+- *    vectors. The limited global vector space forces us to this right now.
+- *    In future when interrupts are split into per CPU domains this could be
+- *    fixed, at the cost of triggering multiple IPIs in some cases.
+- */
+-
+-union smp_flush_state {
+-      struct {
+-              cpumask_t flush_cpumask;
+-              struct mm_struct *flush_mm;
+-              unsigned long flush_va;
+-              spinlock_t tlbstate_lock;
+-      };
+-      char pad[SMP_CACHE_BYTES];
+-} ____cacheline_aligned;
+-
+-/* State is put into the per CPU data section, but padded
+-   to a full cache line because other CPUs can access it and we don't
+-   want false sharing in the per cpu data segment. */
+-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- */
+-void leave_mm(int cpu)
+-{
+-      if (read_pda(mmu_state) == TLBSTATE_OK)
+-              BUG();
+-      cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+-      load_cr3(swapper_pg_dir);
+-}
+-EXPORT_SYMBOL_GPL(leave_mm);
+-
+-/*
+- *
+- * The flush IPI assumes that a thread switch happens in this order:
+- * [cpu0: the cpu that switches]
+- * 1) switch_mm() either 1a) or 1b)
+- * 1a) thread switch to a different mm
+- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+- *    Stop ipi delivery for the old mm. This is not synchronized with
+- *    the other cpus, but smp_invalidate_interrupt ignore flush ipis
+- *    for the wrong mm, and in the worst case we perform a superfluous
+- *    tlb flush.
+- * 1a2) set cpu mmu_state to TLBSTATE_OK
+- *    Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+- *    was in lazy tlb mode.
+- * 1a3) update cpu active_mm
+- *    Now cpu0 accepts tlb flushes for the new mm.
+- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+- *    Now the other cpus will send tlb flush ipis.
+- * 1a4) change cr3.
+- * 1b) thread switch without mm change
+- *    cpu active_mm is correct, cpu0 already handles
+- *    flush ipis.
+- * 1b1) set cpu mmu_state to TLBSTATE_OK
+- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+- *    Atomically set the bit [other cpus will start sending flush ipis],
+- *    and test the bit.
+- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+- * 2) switch %%esp, ie current
+- *
+- * The interrupt must handle 2 special cases:
+- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+- *   runs in kernel space, the cpu could load tlb entries for user space
+- *   pages.
+- *
+- * The good news is that cpu mmu_state is local to each cpu, no
+- * write/read ordering problems.
+- */
+-
+-/*
+- * TLB flush IPI:
+- *
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- *
+- * Interrupts are disabled.
+- */
+-
+-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+-{
+-      int cpu;
+-      int sender;
+-      union smp_flush_state *f;
+-
+-      cpu = smp_processor_id();
+-      /*
+-       * orig_rax contains the negated interrupt vector.
+-       * Use that to determine where the sender put the data.
+-       */
+-      sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
+-      f = &per_cpu(flush_state, sender);
+-
+-      if (!cpu_isset(cpu, f->flush_cpumask))
+-              goto out;
+-              /*
+-               * This was a BUG() but until someone can quote me the
+-               * line from the intel manual that guarantees an IPI to
+-               * multiple CPUs is retried _only_ on the erroring CPUs
+-               * its staying as a return
+-               *
+-               * BUG();
+-               */
+-
+-      if (f->flush_mm == read_pda(active_mm)) {
+-              if (read_pda(mmu_state) == TLBSTATE_OK) {
+-                      if (f->flush_va == TLB_FLUSH_ALL)
+-                              local_flush_tlb();
+-                      else
+-                              __flush_tlb_one(f->flush_va);
+-              } else
+-                      leave_mm(cpu);
+-      }
+-out:
+-      ack_APIC_irq();
+-      cpu_clear(cpu, f->flush_cpumask);
+-      add_pda(irq_tlb_count, 1);
+-}
+-
+-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+-                           unsigned long va)
+-{
+-      int sender;
+-      union smp_flush_state *f;
+-      cpumask_t cpumask = *cpumaskp;
+-
+-      /* Caller has disabled preemption */
+-      sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+-      f = &per_cpu(flush_state, sender);
+-
+-      /*
+-       * Could avoid this lock when
+-       * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+-       * probably not worth checking this for a cache-hot lock.
+-       */
+-      spin_lock(&f->tlbstate_lock);
+-
+-      f->flush_mm = mm;
+-      f->flush_va = va;
+-      cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+-
+-      /*
+-       * We have to send the IPI only to
+-       * CPUs affected.
+-       */
+-      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+-
+-      while (!cpus_empty(f->flush_cpumask))
+-              cpu_relax();
+-
+-      f->flush_mm = NULL;
+-      f->flush_va = 0;
+-      spin_unlock(&f->tlbstate_lock);
+-}
+-
+-int __cpuinit init_smp_flush(void)
+-{
+-      int i;
+-
+-      for_each_cpu_mask(i, cpu_possible_map) {
+-              spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
+-      }
+-      return 0;
+-}
+-core_initcall(init_smp_flush);
+-
+-void flush_tlb_current_task(void)
+-{
+-      struct mm_struct *mm = current->mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      local_flush_tlb();
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-      preempt_enable();
+-}
+-
+-void flush_tlb_mm (struct mm_struct * mm)
+-{
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if (current->mm)
+-                      local_flush_tlb();
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-      preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+-{
+-      struct mm_struct *mm = vma->vm_mm;
+-      cpumask_t cpu_mask;
+-
+-      preempt_disable();
+-      cpu_mask = mm->cpu_vm_mask;
+-      cpu_clear(smp_processor_id(), cpu_mask);
+-
+-      if (current->active_mm == mm) {
+-              if(current->mm)
+-                      __flush_tlb_one(va);
+-              else
+-                      leave_mm(smp_processor_id());
+-      }
+-
+-      if (!cpus_empty(cpu_mask))
+-              flush_tlb_others(cpu_mask, mm, va);
+-
+-      preempt_enable();
+-}
+-
+-static void do_flush_tlb_all(void* info)
+-{
+-      unsigned long cpu = smp_processor_id();
+-
+-      __flush_tlb_all();
+-      if (read_pda(mmu_state) == TLBSTATE_LAZY)
+-              leave_mm(cpu);
+-}
+-
+-void flush_tlb_all(void)
+-{
+-      on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+-}
+-#endif /* Xen */
+-
+-/*
+- * this function sends a 'reschedule' IPI to another CPU.
+- * it goes straight through and wastes no time serializing
+- * anything. Worst case is that we lose a reschedule ...
+- */
+-
+-void smp_send_reschedule(int cpu)
+-{
+-      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+-}
+-
+-/*
+- * Structure and data for smp_call_function(). This is designed to minimise
+- * static memory requirements. It also looks cleaner.
+- */
+-static DEFINE_SPINLOCK(call_lock);
+-
+-struct call_data_struct {
+-      void (*func) (void *info);
+-      void *info;
+-      atomic_t started;
+-      atomic_t finished;
+-      int wait;
+-};
+-
+-static struct call_data_struct * call_data;
+-
+-void lock_ipi_call_lock(void)
+-{
+-      spin_lock_irq(&call_lock);
+-}
+-
+-void unlock_ipi_call_lock(void)
+-{
+-      spin_unlock_irq(&call_lock);
+-}
+-
+-/*
+- * this function sends a 'generic call function' IPI to all other CPU
+- * of the system defined in the mask.
+- */
+-static int __smp_call_function_mask(cpumask_t mask,
+-                                  void (*func)(void *), void *info,
+-                                  int wait)
+-{
+-      struct call_data_struct data;
+-      cpumask_t allbutself;
+-      int cpus;
+-
+-      allbutself = cpu_online_map;
+-      cpu_clear(smp_processor_id(), allbutself);
+-
+-      cpus_and(mask, mask, allbutself);
+-      cpus = cpus_weight(mask);
+-
+-      if (!cpus)
+-              return 0;
+-
+-      data.func = func;
+-      data.info = info;
+-      atomic_set(&data.started, 0);
+-      data.wait = wait;
+-      if (wait)
+-              atomic_set(&data.finished, 0);
+-
+-      call_data = &data;
+-      wmb();
+-
+-      /* Send a message to other CPUs */
+-      if (cpus_equal(mask, allbutself))
+-              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-      else
+-              send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+-
+-      /* Wait for response */
+-      while (atomic_read(&data.started) != cpus)
+-              cpu_relax();
+-
+-      if (!wait)
+-              return 0;
+-
+-      while (atomic_read(&data.finished) != cpus)
+-              cpu_relax();
+-
+-      return 0;
+-}
+-/**
+- * smp_call_function_mask(): Run a function on a set of other CPUs.
+- * @mask: The set of cpus to run on.  Must not include the current cpu.
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @wait: If true, wait (atomically) until function has completed on other CPUs.
+- *
+- * Returns 0 on success, else a negative status code.
+- *
+- * If @wait is true, then returns once @func has returned; otherwise
+- * it returns just before the target cpu calls @func.
+- *
+- * You must not call this function with disabled interrupts or from a
+- * hardware interrupt handler or from a bottom half handler.
+- */
+-int smp_call_function_mask(cpumask_t mask,
+-                         void (*func)(void *), void *info,
+-                         int wait)
+-{
+-      int ret;
+-
+-      /* Can deadlock when called with interrupts disabled */
+-      WARN_ON(irqs_disabled());
+-
+-      spin_lock(&call_lock);
+-      ret = __smp_call_function_mask(mask, func, info, wait);
+-      spin_unlock(&call_lock);
+-      return ret;
+-}
+-EXPORT_SYMBOL(smp_call_function_mask);
+-
+-/*
+- * smp_call_function_single - Run a function on a specific CPU
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @nonatomic: Currently unused.
+- * @wait: If true, wait until function has completed on other CPUs.
+- *
+- * Retrurns 0 on success, else a negative status code.
+- *
+- * Does not return until the remote CPU is nearly ready to execute <func>
+- * or is or has executed.
+- */
+-
+-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+-                            int nonatomic, int wait)
+-{
+-      /* prevent preemption and reschedule on another processor */
+-      int ret, me = get_cpu();
+-
+-      /* Can deadlock when called with interrupts disabled */
+-      WARN_ON(irqs_disabled());
+-
+-      if (cpu == me) {
+-              local_irq_disable();
+-              func(info);
+-              local_irq_enable();
+-              put_cpu();
+-              return 0;
+-      }
+-
+-      ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+-
+-      put_cpu();
+-      return ret;
+-}
+-EXPORT_SYMBOL(smp_call_function_single);
+-
+-/*
+- * smp_call_function - run a function on all other CPUs.
+- * @func: The function to run. This must be fast and non-blocking.
+- * @info: An arbitrary pointer to pass to the function.
+- * @nonatomic: currently unused.
+- * @wait: If true, wait (atomically) until function has completed on other
+- *        CPUs.
+- *
+- * Returns 0 on success, else a negative status code. Does not return until
+- * remote CPUs are nearly ready to execute func or are or have executed.
+- *
+- * You must not call this function with disabled interrupts or from a
+- * hardware interrupt handler or from a bottom half handler.
+- * Actually there are a few legal cases, like panic.
+- */
+-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+-                      int wait)
+-{
+-      return smp_call_function_mask(cpu_online_map, func, info, wait);
+-}
+-EXPORT_SYMBOL(smp_call_function);
+-
+-static void stop_this_cpu(void *dummy)
+-{
+-      local_irq_disable();
+-      /*
+-       * Remove this CPU:
+-       */
+-      cpu_clear(smp_processor_id(), cpu_online_map);
+-      disable_all_local_evtchn();
+-      for (;;)
+-              halt();
+-}
+-
+-void smp_send_stop(void)
+-{
+-      int nolock;
+-      unsigned long flags;
+-
+-#ifndef CONFIG_XEN
+-      if (reboot_force)
+-              return;
+-#endif
+-
+-      /* Don't deadlock on the call lock in panic */
+-      nolock = !spin_trylock(&call_lock);
+-      local_irq_save(flags);
+-      __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
+-      if (!nolock)
+-              spin_unlock(&call_lock);
+-      disable_all_local_evtchn();
+-      local_irq_restore(flags);
+-}
+-
+-/*
+- * Reschedule call back. Nothing to do,
+- * all the work is done automatically when
+- * we return from the interrupt.
+- */
+-#ifndef CONFIG_XEN
+-asmlinkage void smp_reschedule_interrupt(void)
+-#else
+-asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
+-#endif
+-{
+-#ifndef CONFIG_XEN
+-      ack_APIC_irq();
+-#endif
+-      add_pda(irq_resched_count, 1);
+-#ifdef CONFIG_XEN
+-      return IRQ_HANDLED;
+-#endif
+-}
+-
+-#ifndef CONFIG_XEN
+-asmlinkage void smp_call_function_interrupt(void)
+-#else
+-asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
+-#endif
+-{
+-      void (*func) (void *info) = call_data->func;
+-      void *info = call_data->info;
+-      int wait = call_data->wait;
+-
+-#ifndef CONFIG_XEN
+-      ack_APIC_irq();
+-#endif
+-      /*
+-       * Notify initiating CPU that I've grabbed the data and am
+-       * about to execute the function
+-       */
+-      mb();
+-      atomic_inc(&call_data->started);
+-      /*
+-       * At this point the info structure may be out of scope unless wait==1
+-       */
+-      exit_idle();
+-      irq_enter();
+-      (*func)(info);
+-      add_pda(irq_call_count, 1);
+-      irq_exit();
+-      if (wait) {
+-              mb();
+-              atomic_inc(&call_data->finished);
+-      }
+-#ifdef CONFIG_XEN
+-      return IRQ_HANDLED;
+-#endif
+-}
+--- sle11-2009-05-14.orig/arch/x86/kernel/time_32-xen.c        2009-03-24 10:12:48.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/time_32-xen.c     2009-03-24 10:13:09.000000000 +0100
+@@ -699,8 +699,6 @@ int xen_update_persistent_clock(void)
+       return 0;
+ }
+ 
+-extern void (*late_time_init)(void);
+-
+ /* Dynamically-mapped IRQ. */
+ DEFINE_PER_CPU(int, timer_irq);
+ 
+--- sle11-2009-05-14.orig/arch/x86/kernel/traps_32-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/traps_32-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -9,26 +9,28 @@
+  * 'Traps.c' handles hardware traps and faults after we have saved some
+  * state in 'asm.s'.
+  */
+-#include <linux/sched.h>
++#include <linux/interrupt.h>
++#include <linux/kallsyms.h>
++#include <linux/spinlock.h>
++#include <linux/highmem.h>
++#include <linux/kprobes.h>
++#include <linux/uaccess.h>
++#include <linux/utsname.h>
++#include <linux/kdebug.h>
+ #include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/ptrace.h>
+ #include <linux/string.h>
++#include <linux/unwind.h>
++#include <linux/delay.h>
+ #include <linux/errno.h>
++#include <linux/kexec.h>
++#include <linux/sched.h>
+ #include <linux/timer.h>
+-#include <linux/mm.h>
+ #include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/interrupt.h>
+-#include <linux/highmem.h>
+-#include <linux/kallsyms.h>
+-#include <linux/ptrace.h>
+-#include <linux/utsname.h>
+-#include <linux/kprobes.h>
+-#include <linux/kexec.h>
+-#include <linux/unwind.h>
+-#include <linux/uaccess.h>
+-#include <linux/nmi.h>
+ #include <linux/bug.h>
++#include <linux/nmi.h>
++#include <linux/mm.h>
+ 
+ #ifdef CONFIG_EISA
+ #include <linux/ioport.h>
+@@ -43,21 +45,18 @@
+ #include <linux/edac.h>
+ #endif
+ 
++#include <asm/arch_hooks.h>
++#include <asm/stacktrace.h>
+ #include <asm/processor.h>
+-#include <asm/system.h>
+-#include <asm/io.h>
+-#include <asm/atomic.h>
+ #include <asm/debugreg.h>
++#include <asm/atomic.h>
++#include <asm/system.h>
++#include <asm/unwind.h>
+ #include <asm/desc.h>
+ #include <asm/i387.h>
+ #include <asm/nmi.h>
+-#include <asm/unwind.h>
+ #include <asm/smp.h>
+-#include <asm/arch_hooks.h>
+-#include <linux/kdebug.h>
+-#include <asm/stacktrace.h>
+-
+-#include <linux/module.h>
++#include <asm/io.h>
+ 
+ #include "mach_traps.h"
+ 
+@@ -71,7 +70,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
+ asmlinkage int system_call(void);
+ 
+ /* Do we ignore FPU interrupts ? */
+-char ignore_fpu_irq = 0;
++char ignore_fpu_irq;
+ 
+ #ifndef CONFIG_X86_NO_IDT
+ /*
+@@ -113,12 +112,13 @@ static unsigned int code_bytes = 64;
+ void printk_address(unsigned long address, int reliable)
+ {
+ #ifdef CONFIG_KALLSYMS
+-      unsigned long offset = 0, symsize;
++      char namebuf[KSYM_NAME_LEN];
++      unsigned long offset = 0;
++      unsigned long symsize;
+       const char *symname;
+-      char *modname;
+-      char *delim = ":";
+-      char namebuf[128];
+       char reliab[4] = "";
++      char *delim = ":";
++      char *modname;
+ 
+       symname = kallsyms_lookup(address, &symsize, &offset,
+                                       &modname, namebuf);
+@@ -146,13 +146,14 @@ static inline int valid_stack_ptr(struct
+ 
+ /* The form of the top of the frame on the stack */
+ struct stack_frame {
+-      struct stack_frame *next_frame;
+-      unsigned long return_address;
++      struct stack_frame      *next_frame;
++      unsigned long           return_address;
+ };
+ 
+-static inline unsigned long print_context_stack(struct thread_info *tinfo,
+-                              unsigned long *stack, unsigned long bp,
+-                              const struct stacktrace_ops *ops, void *data)
++static inline unsigned long
++print_context_stack(struct thread_info *tinfo,
++                  unsigned long *stack, unsigned long bp,
++                  const struct stacktrace_ops *ops, void *data)
+ {
+       struct stack_frame *frame = (struct stack_frame *)bp;
+ 
+@@ -174,7 +175,7 @@ static inline unsigned long print_contex
+       return bp;
+ }
+ 
+-#define MSG(msg) ops->warning(data, msg)
++#define MSG(msg)              ops->warning(data, msg)
+ 
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+               unsigned long *stack, unsigned long bp,
+@@ -185,6 +186,7 @@ void dump_trace(struct task_struct *task
+ 
+       if (!stack) {
+               unsigned long dummy;
++
+               stack = &dummy;
+               if (task != current)
+                       stack = (unsigned long *)task->thread.sp;
+@@ -194,7 +196,7 @@ void dump_trace(struct task_struct *task
+       if (!bp) {
+               if (task == current) {
+                       /* Grab bp right from our regs */
+-                      asm ("movl %%ebp, %0" : "=r" (bp) : );
++                      asm("movl %%ebp, %0" : "=r" (bp) :);
+               } else {
+                       /* bp is the last reg pushed by switch_to */
+                       bp = *(unsigned long *) task->thread.sp;
+@@ -204,15 +206,18 @@ void dump_trace(struct task_struct *task
+ 
+       while (1) {
+               struct thread_info *context;
++
+               context = (struct thread_info *)
+                       ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+               bp = print_context_stack(context, stack, bp, ops, data);
+-              /* Should be after the line below, but somewhere
+-                 in early boot context comes out corrupted and we
+-                 can't reference it -AK */
++              /*
++               * Should be after the line below, but somewhere
++               * in early boot context comes out corrupted and we
++               * can't reference it:
++               */
+               if (ops->stack(data, "IRQ") < 0)
+                       break;
+-              stack = (unsigned long*)context->previous_esp;
++              stack = (unsigned long *)context->previous_esp;
+               if (!stack)
+                       break;
+               touch_nmi_watchdog();
+@@ -251,15 +256,15 @@ static void print_trace_address(void *da
+ }
+ 
+ static const struct stacktrace_ops print_trace_ops = {
+-      .warning = print_trace_warning,
+-      .warning_symbol = print_trace_warning_symbol,
+-      .stack = print_trace_stack,
+-      .address = print_trace_address,
++      .warning                = print_trace_warning,
++      .warning_symbol         = print_trace_warning_symbol,
++      .stack                  = print_trace_stack,
++      .address                = print_trace_address,
+ };
+ 
+ static void
+ show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+-              unsigned long *stack, unsigned long bp, char *log_lvl)
++                 unsigned long *stack, unsigned long bp, char *log_lvl)
+ {
+       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+       printk("%s =======================\n", log_lvl);
+@@ -271,21 +276,22 @@ void show_trace(struct task_struct *task
+       show_trace_log_lvl(task, regs, stack, bp, "");
+ }
+ 
+-static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+-                     unsigned long *sp, unsigned long bp, char *log_lvl)
++static void
++show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
++                 unsigned long *sp, unsigned long bp, char *log_lvl)
+ {
+       unsigned long *stack;
+       int i;
+ 
+       if (sp == NULL) {
+               if (task)
+-                      sp = (unsigned long*)task->thread.sp;
++                      sp = (unsigned long *)task->thread.sp;
+               else
+                       sp = (unsigned long *)&sp;
+       }
+ 
+       stack = sp;
+-      for(i = 0; i < kstack_depth_to_print; i++) {
++      for (i = 0; i < kstack_depth_to_print; i++) {
+               if (kstack_end(stack))
+                       break;
+               if (i && ((i % 8) == 0))
+@@ -293,6 +299,7 @@ static void show_stack_log_lvl(struct ta
+               printk("%08lx ", *stack++);
+       }
+       printk("\n%sCall Trace:\n", log_lvl);
++
+       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ }
+ 
+@@ -307,8 +314,8 @@ void show_stack(struct task_struct *task
+  */
+ void dump_stack(void)
+ {
+-      unsigned long stack;
+       unsigned long bp = 0;
++      unsigned long stack;
+ 
+ #ifdef CONFIG_FRAME_POINTER
+       if (!bp)
+@@ -320,6 +327,7 @@ void dump_stack(void)
+               init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
++
+       show_trace(current, NULL, &stack, bp);
+ }
+ 
+@@ -331,6 +339,7 @@ void show_registers(struct pt_regs *regs
+ 
+       print_modules();
+       __show_registers(regs, 0);
++
+       printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
+               TASK_COMM_LEN, current->comm, task_pid_nr(current),
+               current_thread_info(), current, task_thread_info(current));
+@@ -339,10 +348,10 @@ void show_registers(struct pt_regs *regs
+        * time of the fault..
+        */
+       if (!user_mode_vm(regs)) {
+-              u8 *ip;
+               unsigned int code_prologue = code_bytes * 43 / 64;
+               unsigned int code_len = code_bytes;
+               unsigned char c;
++              u8 *ip;
+ 
+               printk("\n" KERN_EMERG "Stack: ");
+               show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
+@@ -369,7 +378,7 @@ void show_registers(struct pt_regs *regs
+               }
+       }
+       printk("\n");
+-}     
++}
+ 
+ int is_valid_bugaddr(unsigned long ip)
+ {
+@@ -385,10 +394,10 @@ int is_valid_bugaddr(unsigned long ip)
+ 
+ static int die_counter;
+ 
+-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
++int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+ {
+-      unsigned long sp;
+       unsigned short ss;
++      unsigned long sp;
+ 
+       printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+ #ifdef CONFIG_PREEMPT
+@@ -403,8 +412,8 @@ int __kprobes __die(const char * str, st
+       printk("\n");
+ 
+       if (notify_die(DIE_OOPS, str, regs, err,
+-                              current->thread.trap_no, SIGSEGV) !=
+-                      NOTIFY_STOP) {
++                      current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
++
+               show_registers(regs);
+               /* Executive summary in case the oops scrolled away */
+               sp = (unsigned long) (&regs->sp);
+@@ -416,17 +425,18 @@ int __kprobes __die(const char * str, st
+               printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+               print_symbol("%s", regs->ip);
+               printk(" SS:ESP %04x:%08lx\n", ss, sp);
++
+               return 0;
+-      } else {
+-              return 1;
+       }
++
++      return 1;
+ }
+ 
+ /*
+- * This is gone through when something in the kernel has done something bad and
+- * is about to be terminated.
++ * This is gone through when something in the kernel has done something bad
++ * and is about to be terminated:
+  */
+-void die(const char * str, struct pt_regs * regs, long err)
++void die(const char *str, struct pt_regs *regs, long err)
+ {
+       static struct {
+               raw_spinlock_t lock;
+@@ -448,8 +458,9 @@ void die(const char * str, struct pt_reg
+               die.lock_owner = smp_processor_id();
+               die.lock_owner_depth = 0;
+               bust_spinlocks(1);
+-      } else
++      } else {
+               raw_local_irq_save(flags);
++      }
+ 
+       if (++die.lock_owner_depth < 3) {
+               report_bug(regs->ip, regs);
+@@ -482,19 +493,20 @@ void die(const char * str, struct pt_reg
+       do_exit(SIGSEGV);
+ }
+ 
+-static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
++static inline void
++die_if_kernel(const char *str, struct pt_regs *regs, long err)
+ {
+       if (!user_mode_vm(regs))
+               die(str, regs, err);
+ }
+ 
+-static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
+-                            struct pt_regs * regs, long error_code,
+-                            siginfo_t *info)
++static void __kprobes
++do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
++      long error_code, siginfo_t *info)
+ {
+       struct task_struct *tsk = current;
+ 
+-      if (regs->flags & VM_MASK) {
++      if (regs->flags & X86_VM_MASK) {
+               if (vm86)
+                       goto vm86_trap;
+               goto trap_signal;
+@@ -503,109 +515,112 @@ static void __kprobes do_trap(int trapnr
+       if (!user_mode(regs))
+               goto kernel_trap;
+ 
+-      trap_signal: {
+-              /*
+-               * We want error_code and trap_no set for userspace faults and
+-               * kernelspace faults which result in die(), but not
+-               * kernelspace faults which are fixed up.  die() gives the
+-               * process no chance to handle the signal and notice the
+-               * kernel fault information, so that won't result in polluting
+-               * the information about previously queued, but not yet
+-               * delivered, faults.  See also do_general_protection below.
+-               */
+-              tsk->thread.error_code = error_code;
+-              tsk->thread.trap_no = trapnr;
++trap_signal:
++      /*
++       * We want error_code and trap_no set for userspace faults and
++       * kernelspace faults which result in die(), but not
++       * kernelspace faults which are fixed up.  die() gives the
++       * process no chance to handle the signal and notice the
++       * kernel fault information, so that won't result in polluting
++       * the information about previously queued, but not yet
++       * delivered, faults.  See also do_general_protection below.
++       */
++      tsk->thread.error_code = error_code;
++      tsk->thread.trap_no = trapnr;
+ 
+-              if (info)
+-                      force_sig_info(signr, info, tsk);
+-              else
+-                      force_sig(signr, tsk);
+-              return;
+-      }
++      if (info)
++              force_sig_info(signr, info, tsk);
++      else
++              force_sig(signr, tsk);
++      return;
+ 
+-      kernel_trap: {
+-              if (!fixup_exception(regs)) {
+-                      tsk->thread.error_code = error_code;
+-                      tsk->thread.trap_no = trapnr;
+-                      die(str, regs, error_code);
+-              }
+-              return;
++kernel_trap:
++      if (!fixup_exception(regs)) {
++              tsk->thread.error_code = error_code;
++              tsk->thread.trap_no = trapnr;
++              die(str, regs, error_code);
+       }
++      return;
+ 
+-      vm86_trap: {
+-              int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
+-              if (ret) goto trap_signal;
+-              return;
+-      }
++vm86_trap:
++      if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
++                                              error_code, trapnr))
++              goto trap_signal;
++      return;
+ }
+ 
+-#define DO_ERROR(trapnr, signr, str, name) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
+-}
+-
+-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      siginfo_t info; \
+-      if (irq) \
+-              local_irq_enable(); \
+-      info.si_signo = signr; \
+-      info.si_errno = 0; \
+-      info.si_code = sicode; \
+-      info.si_addr = (void __user *)siaddr; \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+-}
+-
+-#define DO_VM86_ERROR(trapnr, signr, str, name) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+-}
+-
+-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+-void do_##name(struct pt_regs * regs, long error_code) \
+-{ \
+-      siginfo_t info; \
+-      info.si_signo = signr; \
+-      info.si_errno = 0; \
+-      info.si_code = sicode; \
+-      info.si_addr = (void __user *)siaddr; \
+-      trace_hardirqs_fixup(); \
+-      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
+-                                              == NOTIFY_STOP) \
+-              return; \
+-      do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
++#define DO_ERROR(trapnr, signr, str, name)                            \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      trace_hardirqs_fixup();                                         \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 0, regs, error_code, NULL);         \
++}
++
++#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq)  \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      siginfo_t info;                                                 \
++      if (irq)                                                        \
++              local_irq_enable();                                     \
++      info.si_signo = signr;                                          \
++      info.si_errno = 0;                                              \
++      info.si_code = sicode;                                          \
++      info.si_addr = (void __user *)siaddr;                           \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 0, regs, error_code, &info);        \
++}
++
++#define DO_VM86_ERROR(trapnr, signr, str, name)                               \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 1, regs, error_code, NULL);         \
++}
++
++#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)  \
++void do_##name(struct pt_regs *regs, long error_code)                 \
++{                                                                     \
++      siginfo_t info;                                                 \
++      info.si_signo = signr;                                          \
++      info.si_errno = 0;                                              \
++      info.si_code = sicode;                                          \
++      info.si_addr = (void __user *)siaddr;                           \
++      trace_hardirqs_fixup();                                         \
++      if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)  \
++                                              == NOTIFY_STOP)         \
++              return;                                                 \
++      do_trap(trapnr, signr, str, 1, regs, error_code, &info);        \
+ }
+ 
+-DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
++DO_VM86_ERROR_INFO(0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
+ #ifndef CONFIG_KPROBES
+-DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
++DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
+ #endif
+-DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
+-DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
+-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
+-DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
++DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
++DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
++DO_ERROR_INFO(6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
++DO_ERROR(9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+ DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+ DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+ DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
+-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
++DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+ 
+ void __kprobes do_general_protection(struct pt_regs * regs,
+                                             long error_code)
+ {
+-      if (regs->flags & VM_MASK)
++      struct thread_struct *thread;
++
++      thread = &current->thread;
++
++      if (regs->flags & X86_VM_MASK)
+               goto gp_in_vm86;
+ 
+       if (!user_mode(regs))
+@@ -613,6 +628,7 @@ void __kprobes do_general_protection(str
+ 
+       current->thread.error_code = error_code;
+       current->thread.trap_no = 13;
++
+       if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
+           printk_ratelimit()) {
+               printk(KERN_INFO
+@@ -642,22 +658,25 @@ gp_in_kernel:
+       }
+ }
+ 
+-static __kprobes void
+-mem_parity_error(unsigned char reason, struct pt_regs * regs)
++static notrace __kprobes void
++mem_parity_error(unsigned char reason, struct pt_regs *regs)
+ {
+-      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+-              "CPU %d.\n", reason, smp_processor_id());
+-      printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
++      printk(KERN_EMERG
++              "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
++                      reason, smp_processor_id());
++
++      printk(KERN_EMERG
++              "You have some hardware problem, likely on the PCI bus.\n");
+ 
+ #if defined(CONFIG_EDAC)
+-      if(edac_handler_set()) {
++      if (edac_handler_set()) {
+               edac_atomic_assert_error();
+               return;
+       }
+ #endif
+ 
+       if (panic_on_unrecovered_nmi)
+-                panic("NMI: Not continuing");
++              panic("NMI: Not continuing");
+ 
+       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ 
+@@ -665,8 +684,8 @@ mem_parity_error(unsigned char reason, s
+       clear_mem_error(reason);
+ }
+ 
+-static __kprobes void
+-io_check_error(unsigned char reason, struct pt_regs * regs)
++static notrace __kprobes void
++io_check_error(unsigned char reason, struct pt_regs *regs)
+ {
+       printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+       show_registers(regs);
+@@ -675,38 +694,43 @@ io_check_error(unsigned char reason, str
+       clear_io_check_error(reason);
+ }
+ 
+-static __kprobes void
+-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
++static notrace __kprobes void
++unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
+ {
++      if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
++              return;
+ #ifdef CONFIG_MCA
+-      /* Might actually be able to figure out what the guilty party
+-      * is. */
+-      if( MCA_bus ) {
++      /*
++       * Might actually be able to figure out what the guilty party
++       * is:
++       */
++      if (MCA_bus) {
+               mca_handle_nmi();
+               return;
+       }
+ #endif
+-      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+-              "CPU %d.\n", reason, smp_processor_id());
++      printk(KERN_EMERG
++              "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
++                      reason, smp_processor_id());
++
+       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+       if (panic_on_unrecovered_nmi)
+-                panic("NMI: Not continuing");
++              panic("NMI: Not continuing");
+ 
+       printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+ 
+ static DEFINE_SPINLOCK(nmi_print_lock);
+ 
+-void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
++void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
+ {
+-      if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
+-          NOTIFY_STOP)
++      if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+               return;
+ 
+       spin_lock(&nmi_print_lock);
+       /*
+       * We are in trouble anyway, lets at least try
+-      * to get a message out.
++      * to get a message out:
+       */
+       bust_spinlocks(1);
+       printk(KERN_EMERG "%s", msg);
+@@ -717,9 +741,10 @@ void __kprobes die_nmi(struct pt_regs *r
+       spin_unlock(&nmi_print_lock);
+       bust_spinlocks(0);
+ 
+-      /* If we are in kernel we are probably nested up pretty bad
+-       * and might aswell get out now while we still can.
+-      */
++      /*
++       * If we are in kernel we are probably nested up pretty bad
++       * and might aswell get out now while we still can:
++       */
+       if (!user_mode_vm(regs)) {
+               current->thread.trap_no = 2;
+               crash_kexec(regs);
+@@ -728,14 +753,14 @@ void __kprobes die_nmi(struct pt_regs *r
+       do_exit(SIGSEGV);
+ }
+ 
+-static __kprobes void default_do_nmi(struct pt_regs * regs)
++static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+ {
+       unsigned char reason = 0;
+ 
+-      /* Only the BSP gets external NMIs from the system.  */
++      /* Only the BSP gets external NMIs from the system: */
+       if (!smp_processor_id())
+               reason = get_nmi_reason();
+- 
++
+       if (!(reason & 0xc0)) {
+               if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
+                                                       == NOTIFY_STOP)
+@@ -748,8 +773,10 @@ static __kprobes void default_do_nmi(str
+               if (nmi_watchdog_tick(regs, reason))
+                       return;
+               if (!do_nmi_callback(regs, smp_processor_id()))
+-#endif
+                       unknown_nmi_error(reason, regs);
++#else
++              unknown_nmi_error(reason, regs);
++#endif
+ 
+               return;
+       }
+@@ -761,14 +788,14 @@ static __kprobes void default_do_nmi(str
+               io_check_error(reason, regs);
+       /*
+        * Reassert NMI in case it became active meanwhile
+-       * as it's edge-triggered.
++       * as it's edge-triggered:
+        */
+       reassert_nmi();
+ }
+ 
+ static int ignore_nmis;
+ 
+-__kprobes void do_nmi(struct pt_regs * regs, long error_code)
++notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
+ {
+       int cpu;
+ 
+@@ -804,9 +831,12 @@ void __kprobes do_int3(struct pt_regs *r
+       if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+                       == NOTIFY_STOP)
+               return;
+-      /* This is an interrupt gate, because kprobes wants interrupts
+-      disabled.  Normal trap handlers don't. */
++      /*
++       * This is an interrupt gate, because kprobes wants interrupts
++       * disabled. Normal trap handlers don't.
++       */
+       restore_interrupts(regs);
++
+       do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+ }
+ #endif
+@@ -821,7 +851,7 @@ void __kprobes do_int3(struct pt_regs *r
+  * from user space. Such code must not hold kernel locks (since it
+  * can equally take a page fault), therefore it is safe to call
+  * force_sig_info even though that claims and releases locks.
+- * 
++ *
+  * Code in ./signal.c ensures that the debug control register
+  * is restored before we deliver any signal, and therefore that
+  * user code runs with the correct debug control register even though
+@@ -833,10 +863,10 @@ void __kprobes do_int3(struct pt_regs *r
+  * find every occurrence of the TF bit that could be saved away even
+  * by user code)
+  */
+-void __kprobes do_debug(struct pt_regs * regs, long error_code)
++void __kprobes do_debug(struct pt_regs *regs, long error_code)
+ {
+-      unsigned int condition;
+       struct task_struct *tsk = current;
++      unsigned int condition;
+ 
+       trace_hardirqs_fixup();
+ 
+@@ -861,7 +891,7 @@ void __kprobes do_debug(struct pt_regs *
+                       goto clear_dr7;
+       }
+ 
+-      if (regs->flags & VM_MASK)
++      if (regs->flags & X86_VM_MASK)
+               goto debug_vm86;
+ 
+       /* Save debug status register where ptrace can see it */
+@@ -884,7 +914,8 @@ void __kprobes do_debug(struct pt_regs *
+       /* Ok, finally something we can handle */
+       send_sigtrap(tsk, regs, error_code);
+ 
+-      /* Disable additional traps. They'll be re-enabled when
++      /*
++       * Disable additional traps. They'll be re-enabled when
+        * the signal is delivered.
+        */
+ clear_dr7:
+@@ -897,7 +928,7 @@ debug_vm86:
+ 
+ clear_TF_reenable:
+       set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+-      regs->flags &= ~TF_MASK;
++      regs->flags &= ~X86_EFLAGS_TF;
+       return;
+ }
+ 
+@@ -908,9 +939,10 @@ clear_TF_reenable:
+  */
+ void math_error(void __user *ip)
+ {
+-      struct task_struct * task;
++      struct task_struct *task;
++      unsigned short cwd;
++      unsigned short swd;
+       siginfo_t info;
+-      unsigned short cwd, swd;
+ 
+       /*
+        * Save the info for the exception handler and clear the error.
+@@ -936,36 +968,36 @@ void math_error(void __user *ip)
+       cwd = get_fpu_cwd(task);
+       swd = get_fpu_swd(task);
+       switch (swd & ~cwd & 0x3f) {
+-              case 0x000: /* No unmasked exception */
+-                      return;
+-              default:    /* Multiple exceptions */
+-                      break;
+-              case 0x001: /* Invalid Op */
+-                      /*
+-                       * swd & 0x240 == 0x040: Stack Underflow
+-                       * swd & 0x240 == 0x240: Stack Overflow
+-                       * User must clear the SF bit (0x40) if set
+-                       */
+-                      info.si_code = FPE_FLTINV;
+-                      break;
+-              case 0x002: /* Denormalize */
+-              case 0x010: /* Underflow */
+-                      info.si_code = FPE_FLTUND;
+-                      break;
+-              case 0x004: /* Zero Divide */
+-                      info.si_code = FPE_FLTDIV;
+-                      break;
+-              case 0x008: /* Overflow */
+-                      info.si_code = FPE_FLTOVF;
+-                      break;
+-              case 0x020: /* Precision */
+-                      info.si_code = FPE_FLTRES;
+-                      break;
++      case 0x000: /* No unmasked exception */
++              return;
++      default:    /* Multiple exceptions */
++              break;
++      case 0x001: /* Invalid Op */
++              /*
++               * swd & 0x240 == 0x040: Stack Underflow
++               * swd & 0x240 == 0x240: Stack Overflow
++               * User must clear the SF bit (0x40) if set
++               */
++              info.si_code = FPE_FLTINV;
++              break;
++      case 0x002: /* Denormalize */
++      case 0x010: /* Underflow */
++              info.si_code = FPE_FLTUND;
++              break;
++      case 0x004: /* Zero Divide */
++              info.si_code = FPE_FLTDIV;
++              break;
++      case 0x008: /* Overflow */
++              info.si_code = FPE_FLTOVF;
++              break;
++      case 0x020: /* Precision */
++              info.si_code = FPE_FLTRES;
++              break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+ }
+ 
+-void do_coprocessor_error(struct pt_regs * regs, long error_code)
++void do_coprocessor_error(struct pt_regs *regs, long error_code)
+ {
+       ignore_fpu_irq = 1;
+       math_error((void __user *)regs->ip);
+@@ -973,9 +1005,9 @@ void do_coprocessor_error(struct pt_regs
+ 
+ static void simd_math_error(void __user *ip)
+ {
+-      struct task_struct * task;
+-      siginfo_t info;
++      struct task_struct *task;
+       unsigned short mxcsr;
++      siginfo_t info;
+ 
+       /*
+        * Save the info for the exception handler and clear the error.
+@@ -996,84 +1028,82 @@ static void simd_math_error(void __user 
+        */
+       mxcsr = get_fpu_mxcsr(task);
+       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+-              case 0x000:
+-              default:
+-                      break;
+-              case 0x001: /* Invalid Op */
+-                      info.si_code = FPE_FLTINV;
+-                      break;
+-              case 0x002: /* Denormalize */
+-              case 0x010: /* Underflow */
+-                      info.si_code = FPE_FLTUND;
+-                      break;
+-              case 0x004: /* Zero Divide */
+-                      info.si_code = FPE_FLTDIV;
+-                      break;
+-              case 0x008: /* Overflow */
+-                      info.si_code = FPE_FLTOVF;
+-                      break;
+-              case 0x020: /* Precision */
+-                      info.si_code = FPE_FLTRES;
+-                      break;
++      case 0x000:
++      default:
++              break;
++      case 0x001: /* Invalid Op */
++              info.si_code = FPE_FLTINV;
++              break;
++      case 0x002: /* Denormalize */
++      case 0x010: /* Underflow */
++              info.si_code = FPE_FLTUND;
++              break;
++      case 0x004: /* Zero Divide */
++              info.si_code = FPE_FLTDIV;
++              break;
++      case 0x008: /* Overflow */
++              info.si_code = FPE_FLTOVF;
++              break;
++      case 0x020: /* Precision */
++              info.si_code = FPE_FLTRES;
++              break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+ }
+ 
+-void do_simd_coprocessor_error(struct pt_regs * regs,
+-                                        long error_code)
++void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
+ {
+       if (cpu_has_xmm) {
+               /* Handle SIMD FPU exceptions on PIII+ processors. */
+               ignore_fpu_irq = 1;
+               simd_math_error((void __user *)regs->ip);
+-      } else {
+-              /*
+-               * Handle strange cache flush from user space exception
+-               * in all other cases.  This is undocumented behaviour.
+-               */
+-              if (regs->flags & VM_MASK) {
+-                      handle_vm86_fault((struct kernel_vm86_regs *)regs,
+-                                        error_code);
+-                      return;
+-              }
+-              current->thread.trap_no = 19;
+-              current->thread.error_code = error_code;
+-              die_if_kernel("cache flush denied", regs, error_code);
+-              force_sig(SIGSEGV, current);
++              return;
++      }
++      /*
++       * Handle strange cache flush from user space exception
++       * in all other cases.  This is undocumented behaviour.
++       */
++      if (regs->flags & X86_VM_MASK) {
++              handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
++              return;
+       }
++      current->thread.trap_no = 19;
++      current->thread.error_code = error_code;
++      die_if_kernel("cache flush denied", regs, error_code);
++      force_sig(SIGSEGV, current);
+ }
+ 
+ #ifndef CONFIG_XEN
+-void do_spurious_interrupt_bug(struct pt_regs * regs,
+-                                        long error_code)
++void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
+ {
+ #if 0
+       /* No need to warn about this any longer. */
+-      printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
++      printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+ #endif
+ }
+ 
+-unsigned long patch_espfix_desc(unsigned long uesp,
+-                                        unsigned long kesp)
++unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
+ {
+       struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
+       unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+       unsigned long new_kesp = kesp - base;
+       unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
+       __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
++
+       /* Set up base for espfix segment */
+-      desc &= 0x00f0ff0000000000ULL;
+-      desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
++      desc &= 0x00f0ff0000000000ULL;
++      desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+               ((((__u64)base) << 32) & 0xff00000000000000ULL) |
+               ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
+               (lim_pages & 0xffff);
+       *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
++
+       return new_kesp;
+ }
+ #endif
+ 
+ /*
+- *  'math_state_restore()' saves the current math information in the
++ * 'math_state_restore()' saves the current math information in the
+  * old math state array, and gets the new ones from the current task
+  *
+  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+@@ -1087,9 +1117,22 @@ asmlinkage void math_state_restore(void)
+       struct thread_info *thread = current_thread_info();
+       struct task_struct *tsk = thread->task;
+ 
++      if (!tsk_used_math(tsk)) {
++              local_irq_enable();
++              /*
++               * does a slab alloc which can sleep
++               */
++              if (init_fpu(tsk)) {
++                      /*
++                       * ran out of memory!
++                       */
++                      do_group_exit(SIGKILL);
++                      return;
++              }
++              local_irq_disable();
++      }
++
+       /* NB. 'clts' is done for us by Xen during virtual trap. */
+-      if (!tsk_used_math(tsk))
+-              init_fpu(tsk);
+       restore_fpu(tsk);
+       thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
+       tsk->fpu_counter++;
+@@ -1100,15 +1143,15 @@ EXPORT_SYMBOL_GPL(math_state_restore);
+ 
+ asmlinkage void math_emulate(long arg)
+ {
+-      printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
+-      printk(KERN_EMERG "killing %s.\n",current->comm);
+-      force_sig(SIGFPE,current);
++      printk(KERN_EMERG
++              "math-emulation not enabled and no coprocessor found.\n");
++      printk(KERN_EMERG "killing %s.\n", current->comm);
++      force_sig(SIGFPE, current);
+       schedule();
+ }
+ 
+ #endif /* CONFIG_MATH_EMULATION */
+ 
+-
+ /*
+  * NB. All these are "trap gates" (i.e. events_mask isn't set) except
+  * for those that specify <dpl>|4 in the second field.
+@@ -1146,25 +1189,21 @@ void __init trap_init(void)
+       if (ret)
+               printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
+ 
+-      /*
+-       * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+-       * Generate a build-time error if the alignment is wrong.
+-       */
+-      BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
+       if (cpu_has_fxsr) {
+               printk(KERN_INFO "Enabling fast FPU save and restore... ");
+               set_in_cr4(X86_CR4_OSFXSR);
+               printk("done.\n");
+       }
+       if (cpu_has_xmm) {
+-              printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
+-                              "support... ");
++              printk(KERN_INFO
++                      "Enabling unmasked SIMD FPU exception support... ");
+               set_in_cr4(X86_CR4_OSXMMEXCPT);
+               printk("done.\n");
+       }
+ 
++      init_thread_xstate();
+       /*
+-       * Should be a barrier for any external CPU state.
++       * Should be a barrier for any external CPU state:
+        */
+       cpu_init();
+ }
+@@ -1183,6 +1222,7 @@ void __cpuinit smp_trap_init(trap_info_t
+ static int __init kstack_setup(char *s)
+ {
+       kstack_depth_to_print = simple_strtoul(s, NULL, 0);
++
+       return 1;
+ }
+ __setup("kstack=", kstack_setup);
+--- sle11-2009-05-14.orig/arch/x86/kernel/traps_64-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/traps_64-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -33,6 +33,8 @@
+ #include <linux/kdebug.h>
+ #include <linux/utsname.h>
+ 
++#include <mach_traps.h>
++
+ #if defined(CONFIG_EDAC)
+ #include <linux/edac.h>
+ #endif
+@@ -601,10 +603,16 @@ void die(const char * str, struct pt_reg
+ }
+ 
+ #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
+-void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
++notrace __kprobes void
++die_nmi(char *str, struct pt_regs *regs, int do_panic)
+ {
+-      unsigned long flags = oops_begin();
++      unsigned long flags;
++
++      if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
++          NOTIFY_STOP)
++              return;
+ 
++      flags = oops_begin();
+       /*
+        * We are in trouble anyway, lets at least try
+        * to get a message out.
+@@ -769,7 +777,7 @@ asmlinkage void __kprobes do_general_pro
+       die("general protection fault", regs, error_code);
+ }
+ 
+-static __kprobes void
++static notrace __kprobes void
+ mem_parity_error(unsigned char reason, struct pt_regs * regs)
+ {
+       printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
+@@ -792,7 +800,7 @@ mem_parity_error(unsigned char reason, s
+       clear_mem_error(reason);
+ }
+ 
+-static __kprobes void
++static notrace __kprobes void
+ io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+       printk("NMI: IOCK error (debug interrupt?)\n");
+@@ -802,9 +810,11 @@ io_check_error(unsigned char reason, str
+       clear_io_check_error(reason);
+ }
+ 
+-static __kprobes void
++static notrace __kprobes void
+ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+ {
++      if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
++              return;
+       printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
+               reason);
+       printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+@@ -817,7 +827,7 @@ unknown_nmi_error(unsigned char reason, 
+ 
+ /* Runs on IST stack. This code must keep interrupts off all the time.
+    Nested NMIs are prevented by the CPU. */
+-asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs)
++asmlinkage notrace  __kprobes void default_do_nmi(struct pt_regs *regs)
+ {
+       unsigned char reason = 0;
+       int cpu;
+@@ -1117,11 +1127,25 @@ asmlinkage void __attribute__((weak)) mc
+ asmlinkage void math_state_restore(void)
+ {
+       struct task_struct *me = current;
++
++      if (!used_math()) {
++              local_irq_enable();
++              /*
++               * does a slab alloc which can sleep
++               */
++              if (init_fpu(me)) {
++                      /*
++                       * ran out of memory!
++                       */
++                      do_group_exit(SIGKILL);
++                      return;
++              }
++              local_irq_disable();
++      }
++
+         /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
+ 
+-      if (!used_math())
+-              init_fpu(me);
+-      restore_fpu_checking(&me->thread.i387.fxsave);
++      restore_fpu_checking(&me->thread.xstate->fxsave);
+       task_thread_info(me)->status |= TS_USEDFPU;
+       me->fpu_counter++;
+ }
+@@ -1168,6 +1192,10 @@ void __init trap_init(void)
+               printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
+ 
+       /*
++       * initialize the per thread extended state:
++       */
++        init_thread_xstate();
++      /*
+        * Should be a barrier for any external CPU state.
+        */
+       cpu_init();
+--- sle11-2009-05-14.orig/arch/x86/kernel/vsyscall_64-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -216,7 +216,7 @@ vgetcpu(unsigned *cpu, unsigned *node, s
+       return 0;
+ }
+ 
+-long __vsyscall(3) venosys_1(void)
++static long __vsyscall(3) venosys_1(void)
+ {
+       return -ENOSYS;
+ }
+--- sle11-2009-05-14.orig/arch/x86/mm/fault-xen.c      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/fault-xen.c   2009-03-16 16:38:05.000000000 +0100
+@@ -510,6 +510,11 @@ static int vmalloc_fault(unsigned long a
+       unsigned long pgd_paddr;
+       pmd_t *pmd_k;
+       pte_t *pte_k;
++
++      /* Make sure we are in vmalloc area */
++      if (!(address >= VMALLOC_START && address < VMALLOC_END))
++              return -1;
++
+       /*
+        * Synchronize this task's top level page-table
+        * with the 'reference' page table.
+@@ -670,7 +675,7 @@ void __kprobes do_page_fault(struct pt_r
+ #ifdef CONFIG_X86_32
+       /* It's safe to allow irq's after cr2 has been saved and the vmalloc
+          fault has been handled. */
+-      if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
++      if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
+               local_irq_enable();
+ 
+       /*
+@@ -1017,9 +1022,5 @@ void vmalloc_sync_all(void)
+               if (address == start)
+                       start = address + PGDIR_SIZE;
+       }
+-      /* Check that there is no need to do the same for the modules area. */
+-      BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+-      BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+-                              (__START_KERNEL & PGDIR_MASK)));
+ #endif
+ }
+--- sle11-2009-05-14.orig/arch/x86/mm/highmem_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/highmem_32-xen.c      2009-03-16 16:38:05.000000000 +0100
+@@ -200,6 +200,8 @@ EXPORT_SYMBOL(kmap);
+ EXPORT_SYMBOL(kunmap);
+ EXPORT_SYMBOL(kmap_atomic);
+ EXPORT_SYMBOL(kunmap_atomic);
++#ifdef CONFIG_HIGHPTE
+ EXPORT_SYMBOL(kmap_atomic_to_page);
++#endif
+ EXPORT_SYMBOL(clear_highpage);
+ EXPORT_SYMBOL(copy_highpage);
+--- sle11-2009-05-14.orig/arch/x86/mm/init_32-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/init_32-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,4 @@
+ /*
+- *  linux/arch/i386/mm/init.c
+  *
+  *  Copyright (C) 1995  Linus Torvalds
+  *
+@@ -22,6 +21,7 @@
+ #include <linux/init.h>
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
++#include <linux/pci.h>
+ #include <linux/pfn.h>
+ #include <linux/poison.h>
+ #include <linux/bootmem.h>
+@@ -54,6 +54,8 @@
+ 
+ unsigned int __VMALLOC_RESERVE = 128 << 20;
+ 
++unsigned long max_pfn_mapped;
++
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ unsigned long highstart_pfn, highend_pfn;
+ 
+@@ -73,7 +75,7 @@ static pmd_t * __init one_md_table_init(
+       if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
+               pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ 
+-              paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
++              paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+               make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
+               set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+               pud = pud_offset(pgd, 0);
+@@ -107,7 +109,7 @@ static pte_t * __init one_page_table_ini
+                               (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+               }
+ 
+-              paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
++              paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+               make_lowmem_page_readonly(page_table,
+                                         XENFEAT_writable_page_tables);
+               set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+@@ -209,8 +211,13 @@ static void __init kernel_physical_mappi
+                       /*
+                        * Map with big pages if possible, otherwise
+                        * create normal page tables:
++                       *
++                       * Don't use a large page for the first 2/4MB of memory
++                       * because there are often fixed size MTRRs in there
++                       * and overlapping MTRRs into large pages can cause
++                       * slowdowns.
+                        */
+-                      if (cpu_has_pse) {
++                      if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
+                               unsigned int addr2;
+                               pgprot_t prot = PAGE_KERNEL_LARGE;
+ 
+@@ -224,6 +231,7 @@ static void __init kernel_physical_mappi
+                               set_pmd(pmd, pfn_pmd(pfn, prot));
+ 
+                               pfn += PTRS_PER_PTE;
++                              max_pfn_mapped = pfn;
+                               continue;
+                       }
+                       pte = one_page_table_init(pmd);
+@@ -241,6 +249,7 @@ static void __init kernel_physical_mappi
+ 
+                               set_pte(pte, pfn_pte(pfn, prot));
+                       }
++                      max_pfn_mapped = pfn;
+                       pte_ofs = 0;
+               }
+               pmd_idx = 0;
+@@ -262,6 +271,25 @@ static inline int page_kills_ppro(unsign
+ 
+ #endif
+ 
++/*
++ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
++ * is valid. The argument is a physical page number.
++ *
++ *
++ * On x86, access has to be given to the first megabyte of ram because that area
++ * contains bios code and data regions used by X and dosemu and similar apps.
++ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
++ * mmio resources as well as potential bios/acpi data regions.
++ */
++int devmem_is_allowed(unsigned long pagenr)
++{
++      if (pagenr <= 256)
++              return 1;
++      if (mfn_to_local_pfn(pagenr) >= max_pfn)
++              return 1;
++      return 0;
++}
++
+ #ifdef CONFIG_HIGHMEM
+ pte_t *kmap_pte;
+ pgprot_t kmap_prot;
+@@ -303,48 +331,18 @@ static void __init permanent_kmaps_init(
+       pkmap_page_table = pte;
+ }
+ 
+-static void __meminit free_new_highpage(struct page *page, int pfn)
+-{
+-      init_page_count(page);
+-      if (pfn < xen_start_info->nr_pages)
+-              __free_page(page);
+-      totalhigh_pages++;
+-}
+-
+ void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
+ {
+       if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
+               ClearPageReserved(page);
+-              free_new_highpage(page, pfn);
++              init_page_count(page);
++              if (pfn < xen_start_info->nr_pages)
++                      __free_page(page);
++              totalhigh_pages++;
+       } else
+               SetPageReserved(page);
+ }
+ 
+-static int __meminit
+-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+-{
+-      free_new_highpage(page, pfn);
+-      totalram_pages++;
+-#ifdef CONFIG_FLATMEM
+-      max_mapnr = max(pfn, max_mapnr);
+-#endif
+-      num_physpages++;
+-
+-      return 0;
+-}
+-
+-/*
+- * Not currently handling the NUMA case.
+- * Assuming single node and all memory that
+- * has been added dynamically that would be
+- * onlined here is in HIGHMEM.
+- */
+-void __meminit online_page(struct page *page)
+-{
+-      ClearPageReserved(page);
+-      add_one_highpage_hotplug(page, page_to_pfn(page));
+-}
+-
+ #ifndef CONFIG_NUMA
+ static void __init set_highmem_pages_init(int bad_ppro)
+ {
+@@ -459,15 +457,13 @@ void zap_low_mappings(void)
+ {
+       int i;
+ 
+-      save_pg_dir();
+-
+       /*
+        * Zap initial low-memory mappings.
+        *
+        * Note that "pgd_clear()" doesn't do it for
+        * us, because pgd_clear() is a no-op on i386.
+        */
+-      for (i = 0; i < USER_PTRS_PER_PGD; i++) {
++      for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
+ #if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
+               set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
+ #else
+@@ -572,9 +568,9 @@ void __init paging_init(void)
+ 
+ /*
+  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
+- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
+- * used to involve black magic jumps to work around some nasty CPU bugs,
+- * but fortunately the switch to using exceptions got rid of all that.
++ * and also on some strange 486's. All 586+'s are OK. This used to involve
++ * black magic jumps to work around some nasty CPU bugs, but fortunately the
++ * switch to using exceptions got rid of all that.
+  */
+ static void __init test_wp_bit(void)
+ {
+@@ -605,9 +601,7 @@ void __init mem_init(void)
+       int tmp, bad_ppro;
+       unsigned long pfn;
+ 
+-#if defined(CONFIG_SWIOTLB)
+-      swiotlb_init(); 
+-#endif
++      pci_iommu_alloc();
+ 
+ #ifdef CONFIG_FLATMEM
+       BUG_ON(!mem_map);
+@@ -710,16 +704,8 @@ void __init mem_init(void)
+               test_wp_bit();
+ 
+       cpa_init();
+-
+-      /*
+-       * Subtle. SMP is doing it's boot stuff late (because it has to
+-       * fork idle threads) - but it also needs low mappings for the
+-       * protected-mode entry to work. We zap these entries only after
+-       * the WP-bit has been tested.
+-       */
+-#ifndef CONFIG_SMP
++      save_pg_dir();
+       zap_low_mappings();
+-#endif
+ 
+       SetPagePinned(virt_to_page(init_mm.pgd));
+ }
+@@ -769,25 +755,17 @@ void mark_rodata_ro(void)
+       unsigned long start = PFN_ALIGN(_text);
+       unsigned long size = PFN_ALIGN(_etext) - start;
+ 
+-#ifndef CONFIG_KPROBES
+-#ifdef CONFIG_HOTPLUG_CPU
+-      /* It must still be possible to apply SMP alternatives. */
+-      if (num_possible_cpus() <= 1)
+-#endif
+-      {
+-              set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+-              printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+-                      size >> 10);
++      set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
++      printk(KERN_INFO "Write protecting the kernel text: %luk\n",
++              size >> 10);
+ 
+ #ifdef CONFIG_CPA_DEBUG
+-              printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+-                      start, start+size);
+-              set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
++      printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
++              start, start+size);
++      set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+ 
+-              printk(KERN_INFO "Testing CPA: write protecting again\n");
+-              set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+-#endif
+-      }
++      printk(KERN_INFO "Testing CPA: write protecting again\n");
++      set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+ #endif
+       start += size;
+       size = (unsigned long)__end_rodata - start;
+--- sle11-2009-05-14.orig/arch/x86/mm/init_64-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/init_64-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -52,9 +52,6 @@
+ 
+ #include <xen/features.h>
+ 
+-const struct dma_mapping_ops *dma_ops;
+-EXPORT_SYMBOL(dma_ops);
+-
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ unsigned int __kernel_page_user;
+ EXPORT_SYMBOL(__kernel_page_user);
+@@ -68,6 +65,28 @@ extern unsigned long start_pfn;
+ extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
+ extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
+ 
++#ifndef CONFIG_XEN
++int direct_gbpages __meminitdata
++#ifdef CONFIG_DIRECT_GBPAGES
++                              = 1
++#endif
++;
++
++static int __init parse_direct_gbpages_off(char *arg)
++{
++      direct_gbpages = 0;
++      return 0;
++}
++early_param("nogbpages", parse_direct_gbpages_off);
++
++static int __init parse_direct_gbpages_on(char *arg)
++{
++      direct_gbpages = 1;
++      return 0;
++}
++early_param("gbpages", parse_direct_gbpages_on);
++#endif
++
+ /*
+  * Use this until direct mapping is established, i.e. before __va() is 
+  * available in init_memory_mapping().
+@@ -135,9 +154,6 @@ void show_mem(void)
+ 
+       printk(KERN_INFO "Mem-info:\n");
+       show_free_areas();
+-      printk(KERN_INFO "Free swap:       %6ldkB\n",
+-              nr_swap_pages << (PAGE_SHIFT-10));
+-
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+                       /*
+@@ -328,7 +344,7 @@ void __init cleanup_highmap(void)
+       pmd_t *last_pmd = pmd + PTRS_PER_PMD;
+ 
+       for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+-              if (!pmd_present(*pmd))
++              if (pmd_none(*pmd))
+                       continue;
+               if (vaddr < (unsigned long) _text || vaddr > end)
+                       set_pmd(pmd, __pmd(0));
+@@ -337,8 +353,7 @@ void __init cleanup_highmap(void)
+ #endif
+ 
+ /* NOTE: this is meant to be run only at boot */
+-void __init
+-__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
++void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+ {
+       unsigned long address = __fix_to_virt(idx);
+ 
+@@ -463,7 +478,7 @@ __meminit void early_iounmap(void *addr,
+ }
+ #endif
+ 
+-static void __meminit
++static unsigned long __meminit
+ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+ {
+       int i = pmd_index(address);
+@@ -503,21 +518,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
+                       set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+               }
+       }
++      return address;
+ }
+ 
+-static void __meminit
++static unsigned long __meminit
+ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+ {
+       pmd_t *pmd = pmd_offset(pud, 0);
++      unsigned long last_map_addr;
++
+       spin_lock(&init_mm.page_table_lock);
+-      phys_pmd_init(pmd, address, end);
++      last_map_addr = phys_pmd_init(pmd, address, end);
+       spin_unlock(&init_mm.page_table_lock);
+       __flush_tlb_all();
++      return last_map_addr;
+ }
+ 
+-static void __meminit
++static unsigned long __meminit
+ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+ {
++      unsigned long last_map_addr = end;
+       int i = pud_index(addr);
+ 
+       for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
+@@ -529,7 +549,15 @@ phys_pud_init(pud_t *pud_page, unsigned 
+                       break;
+ 
+               if (__pud_val(*pud)) {
+-                      phys_pmd_update(pud, addr, end);
++                      if (!pud_large(*pud))
++                              last_map_addr = phys_pmd_update(pud, addr, end);
++                      continue;
++              }
++
++              if (direct_gbpages) {
++                      set_pte((pte_t *)pud,
++                              pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
++                      last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
+                       continue;
+               }
+ 
+@@ -537,12 +565,14 @@ phys_pud_init(pud_t *pud_page, unsigned 
+ 
+               spin_lock(&init_mm.page_table_lock);
+               *pud = __pud(pmd_phys | _KERNPG_TABLE);
+-              phys_pmd_init(pmd, addr, end);
++              last_map_addr = phys_pmd_init(pmd, addr, end);
+               spin_unlock(&init_mm.page_table_lock);
+ 
+               early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
+       }
+       __flush_tlb_all();
++
++      return last_map_addr >> PAGE_SHIFT;
+ }
+ 
+ void __init xen_init_pt(void)
+@@ -754,16 +784,138 @@ static void __init xen_finish_init_mappi
+       table_end = start_pfn;
+ }
+ 
++static void __init init_gbpages(void)
++{
++#ifndef CONFIG_XEN
++      if (direct_gbpages && cpu_has_gbpages)
++              printk(KERN_INFO "Using GB pages for direct mapping\n");
++      else
++              direct_gbpages = 0;
++#endif
++}
++
++#ifdef CONFIG_MEMTEST_BOOTPARAM
++
++static void __init memtest(unsigned long start_phys, unsigned long size,
++                               unsigned pattern)
++{
++      unsigned long i;
++      unsigned long *start;
++      unsigned long start_bad;
++      unsigned long last_bad;
++      unsigned long val;
++      unsigned long start_phys_aligned;
++      unsigned long count;
++      unsigned long incr;
++
++      switch (pattern) {
++      case 0:
++              val = 0UL;
++              break;
++      case 1:
++              val = -1UL;
++              break;
++      case 2:
++              val = 0x5555555555555555UL;
++              break;
++      case 3:
++              val = 0xaaaaaaaaaaaaaaaaUL;
++              break;
++      default:
++              return;
++      }
++
++      incr = sizeof(unsigned long);
++      start_phys_aligned = ALIGN(start_phys, incr);
++      count = (size - (start_phys_aligned - start_phys))/incr;
++      start = __va(start_phys_aligned);
++      start_bad = 0;
++      last_bad = 0;
++
++      for (i = 0; i < count; i++)
++              start[i] = val;
++      for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
++              if (*start != val) {
++                      if (start_phys_aligned == last_bad + incr) {
++                              last_bad += incr;
++                      } else {
++                              if (start_bad) {
++                                      printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
++                                              val, start_bad, last_bad + incr);
++                                      reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
++                              }
++                              start_bad = last_bad = start_phys_aligned;
++                      }
++              }
++      }
++      if (start_bad) {
++              printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
++                      val, start_bad, last_bad + incr);
++              reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
++      }
++
++}
++
++static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
++
++static int __init parse_memtest(char *arg)
++{
++      if (arg)
++              memtest_pattern = simple_strtoul(arg, NULL, 0);
++      return 0;
++}
++
++early_param("memtest", parse_memtest);
++
++static void __init early_memtest(unsigned long start, unsigned long end)
++{
++      u64 t_start, t_size;
++      unsigned pattern;
++
++      if (!memtest_pattern)
++              return;
++
++      printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
++      for (pattern = 0; pattern < memtest_pattern; pattern++) {
++              t_start = start;
++              t_size = 0;
++              while (t_start < end) {
++                      t_start = find_e820_area_size(t_start, &t_size, 1);
++
++                      /* done ? */
++                      if (t_start >= end)
++                              break;
++                      if (t_start + t_size > end)
++                              t_size = end - t_start;
++
++                      printk(KERN_CONT "\n  %016llx - %016llx pattern %d",
++                              (unsigned long long)t_start,
++                              (unsigned long long)t_start + t_size, pattern);
++
++                      memtest(t_start, t_size, pattern);
++
++                      t_start += t_size;
++              }
++      }
++      printk(KERN_CONT "\n");
++}
++#else
++static void __init early_memtest(unsigned long start, unsigned long end)
++{
++}
++#endif
++
+ /*
+  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+  * This runs before bootmem is initialized and gets pages directly from
+  * the physical memory. To access them they are temporarily mapped.
+  */
+-void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
++unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
+ {
+-      unsigned long next;
++      unsigned long next, last_map_addr = end;
++      unsigned long start_phys = start, end_phys = end;
+ 
+-      pr_debug("init_memory_mapping\n");
++      printk(KERN_INFO "init_memory_mapping\n");
+ 
+       /*
+        * Find space for the kernel direct mapping tables.
+@@ -772,8 +924,10 @@ void __init_refok init_memory_mapping(un
+        * memory mapped. Unfortunately this is done currently before the
+        * nodes are discovered.
+        */
+-      if (!after_bootmem)
++      if (!after_bootmem) {
++              init_gbpages();
+               find_early_table_space(end);
++      }
+ 
+       start = (unsigned long)__va(start);
+       end = (unsigned long)__va(end);
+@@ -790,7 +944,7 @@ void __init_refok init_memory_mapping(un
+               next = start + PGDIR_SIZE;
+               if (next > end)
+                       next = end;
+-              phys_pud_init(pud, __pa(start), __pa(next));
++              last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
+               if (!after_bootmem) {
+                       early_make_page_readonly(pud, XENFEAT_writable_page_tables);
+                       set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+@@ -807,6 +961,11 @@ void __init_refok init_memory_mapping(un
+       if (!after_bootmem)
+               reserve_early(table_start << PAGE_SHIFT,
+                             table_end << PAGE_SHIFT, "PGTABLE");
++
++      if (!after_bootmem)
++              early_memtest(start_phys, end_phys);
++
++      return last_map_addr;
+ }
+ 
+ #ifndef CONFIG_NUMA
+@@ -830,15 +989,6 @@ void __init paging_init(void)
+ /*
+  * Memory hotplug specific functions
+  */
+-void online_page(struct page *page)
+-{
+-      ClearPageReserved(page);
+-      init_page_count(page);
+-      __free_page(page);
+-      totalram_pages++;
+-      num_physpages++;
+-}
+-
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+  * Memory is added always to NORMAL zone. This means you will never get
+@@ -848,11 +998,13 @@ int arch_add_memory(int nid, u64 start, 
+ {
+       struct pglist_data *pgdat = NODE_DATA(nid);
+       struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+-      unsigned long start_pfn = start >> PAGE_SHIFT;
++      unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+       int ret;
+ 
+-      init_memory_mapping(start, start + size-1);
++      last_mapped_pfn = init_memory_mapping(start, start + size-1);
++      if (last_mapped_pfn > max_pfn_mapped)
++              max_pfn_mapped = last_mapped_pfn;
+ 
+       ret = __add_pages(zone, start_pfn, nr_pages);
+       WARN_ON(1);
+@@ -871,6 +1023,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
+ 
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+ 
++/*
++ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
++ * is valid. The argument is a physical page number.
++ *
++ *
++ * On x86, access has to be given to the first megabyte of ram because that area
++ * contains bios code and data regions used by X and dosemu and similar apps.
++ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
++ * mmio resources as well as potential bios/acpi data regions.
++ */
++int devmem_is_allowed(unsigned long pagenr)
++{
++      if (pagenr <= 256)
++              return 1;
++      if (mfn_to_local_pfn(pagenr) >= max_pfn)
++              return 1;
++      return 0;
++}
++
++
+ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
+                        kcore_modules, kcore_vsyscall;
+ 
+@@ -979,24 +1151,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
+ 
+ void mark_rodata_ro(void)
+ {
+-      unsigned long start = (unsigned long)_stext, end;
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-      /* It must still be possible to apply SMP alternatives. */
+-      if (num_possible_cpus() > 1)
+-              start = (unsigned long)_etext;
+-#endif
+-
+-#ifdef CONFIG_KPROBES
+-      start = (unsigned long)__start_rodata;
+-#endif
+-
+-      end = (unsigned long)__end_rodata;
+-      start = (start + PAGE_SIZE - 1) & PAGE_MASK;
+-      end &= PAGE_MASK;
+-      if (end <= start)
+-              return;
+-
++      unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+ 
+       printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+              (end - start) >> 10);
+@@ -1019,6 +1174,7 @@ void mark_rodata_ro(void)
+       set_memory_ro(start, (end-start) >> PAGE_SHIFT);
+ #endif
+ }
++
+ #endif
+ 
+ #ifdef CONFIG_BLK_DEV_INITRD
+@@ -1031,7 +1187,7 @@ void free_initrd_mem(unsigned long start
+ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+ {
+ #ifdef CONFIG_NUMA
+-      int nid = phys_to_nid(phys);
++      int nid, next_nid;
+ #endif
+       unsigned long pfn = phys >> PAGE_SHIFT;
+ 
+@@ -1040,7 +1196,7 @@ void __init reserve_bootmem_generic(unsi
+                * This can happen with kdump kernels when accessing
+                * firmware tables:
+                */
+-              if (pfn < end_pfn_map)
++              if (pfn < max_pfn_mapped)
+                       return;
+ 
+               printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
+@@ -1050,10 +1206,16 @@ void __init reserve_bootmem_generic(unsi
+ 
+       /* Should check here against the e820 map to avoid double free */
+ #ifdef CONFIG_NUMA
+-      reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
++      nid = phys_to_nid(phys);
++      next_nid = phys_to_nid(phys + len - 1);
++      if (nid == next_nid)
++              reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
++      else
++              reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ #else
+       reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ #endif
++
+ #ifndef CONFIG_XEN
+       if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
+               dma_reserve += len / PAGE_SIZE;
+@@ -1149,6 +1311,10 @@ const char *arch_vma_name(struct vm_area
+ /*
+  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
+  */
++static long __meminitdata addr_start, addr_end;
++static void __meminitdata *p_start, *p_end;
++static int __meminitdata node_start;
++
+ int __meminit
+ vmemmap_populate(struct page *start_page, unsigned long size, int node)
+ {
+@@ -1183,12 +1349,32 @@ vmemmap_populate(struct page *start_page
+                                                       PAGE_KERNEL_LARGE);
+                       set_pmd(pmd, __pmd_ma(__pte_val(entry)));
+ 
+-                      printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
+-                              addr, addr + PMD_SIZE - 1, p, node);
++                      /* check to see if we have contiguous blocks */
++                      if (p_end != p || node_start != node) {
++                              if (p_start)
++                                      printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
++                                              addr_start, addr_end-1, p_start, p_end-1, node_start);
++                              addr_start = addr;
++                              node_start = node;
++                              p_start = p;
++                      }
++                      addr_end = addr + PMD_SIZE;
++                      p_end = p + PMD_SIZE;
+               } else {
+                       vmemmap_verify((pte_t *)pmd, node, addr, next);
+               }
+       }
+       return 0;
+ }
++
++void __meminit vmemmap_populate_print_last(void)
++{
++      if (p_start) {
++              printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
++                      addr_start, addr_end-1, p_start, p_end-1, node_start);
++              p_start = NULL;
++              p_end = NULL;
++              node_start = 0;
++      }
++}
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/mm/ioremap-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/ioremap-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -20,14 +20,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+ #include <asm/pgalloc.h>
++#include <asm/pat.h>
+ 
+-enum ioremap_mode {
+-      IOR_MODE_UNCACHED,
+-      IOR_MODE_CACHED,
+-};
+-
+-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
++#ifdef CONFIG_X86_64
+ 
++#ifndef CONFIG_XEN
+ unsigned long __phys_addr(unsigned long x)
+ {
+       if (x >= __START_KERNEL_map)
+@@ -35,6 +32,19 @@ unsigned long __phys_addr(unsigned long 
+       return x - PAGE_OFFSET;
+ }
+ EXPORT_SYMBOL(__phys_addr);
++#endif
++
++static inline int phys_addr_valid(unsigned long addr)
++{
++      return addr < (1UL << boot_cpu_data.x86_phys_bits);
++}
++
++#else
++
++static inline int phys_addr_valid(unsigned long addr)
++{
++      return 1;
++}
+ 
+ #endif
+ 
+@@ -92,7 +102,8 @@ static int __direct_remap_pfn_range(stru
+                * Fill in the machine address: PTE ptr is done later by
+                * apply_to_page_range().
+                */
+-              v->val = __pte_val(pfn_pte_ma(mfn, prot)) | _PAGE_IO;
++              pgprot_val(prot) |= _PAGE_IO;
++              v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)));
+ 
+               mfn++;
+               address += PAGE_SIZE;
+@@ -189,10 +200,9 @@ int touch_pte_range(struct mm_struct *mm
+ 
+ EXPORT_SYMBOL(touch_pte_range);
+ 
+-#ifdef CONFIG_X86_32
+ int page_is_ram(unsigned long pagenr)
+ {
+-      unsigned long addr, end;
++      resource_size_t addr, end;
+       int i;
+ 
+ #ifndef CONFIG_XEN
+@@ -228,31 +238,51 @@ int page_is_ram(unsigned long pagenr)
+       }
+       return 0;
+ }
+-#endif
+ 
+ /*
+  * Fix up the linear direct mapping of the kernel to avoid cache attribute
+  * conflicts.
+  */
+ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
+-                             enum ioremap_mode mode)
++                             unsigned long prot_val)
+ {
+       unsigned long nrpages = size >> PAGE_SHIFT;
+       int err;
+ 
+-      switch (mode) {
+-      case IOR_MODE_UNCACHED:
++      switch (prot_val) {
++      case _PAGE_CACHE_UC:
+       default:
+-              err = set_memory_uc(vaddr, nrpages);
++              err = _set_memory_uc(vaddr, nrpages);
++              break;
++      case _PAGE_CACHE_WC:
++              err = _set_memory_wc(vaddr, nrpages);
+               break;
+-      case IOR_MODE_CACHED:
+-              err = set_memory_wb(vaddr, nrpages);
++      case _PAGE_CACHE_WB:
++              err = _set_memory_wb(vaddr, nrpages);
+               break;
+       }
+ 
+       return err;
+ }
+ 
++int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
++                            unsigned long prot_val)
++{
++      unsigned long sz;
++      int rc;
++
++      for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
++              unsigned long pfn = mfn_to_local_pfn(mfn);
++
++              if (pfn >= max_pfn_mapped)
++                      continue;
++              rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
++                                       PAGE_SIZE, prot_val);
++      }
++
++      return rc;
++}
++
+ /*
+  * Remap an arbitrary physical address space into the kernel virtual
+  * address space. Needed when the kernel wants to access high addresses
+@@ -262,12 +292,15 @@ static int ioremap_change_attr(unsigned 
+  * have to convert them into an offset in a page-aligned mapping, but the
+  * caller shouldn't need to know that small detail.
+  */
+-static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
+-                             enum ioremap_mode mode)
++static void __iomem *__ioremap_caller(resource_size_t phys_addr,
++              unsigned long size, unsigned long prot_val, void *caller)
+ {
+-      unsigned long mfn, offset, last_addr, vaddr;
++      unsigned long mfn, offset, vaddr;
++      resource_size_t last_addr;
+       struct vm_struct *area;
++      unsigned long new_prot_val;
+       pgprot_t prot;
++      int retval;
+       domid_t domid = DOMID_IO;
+ 
+       /* Don't allow wraparound or zero size */
+@@ -275,6 +308,13 @@ static void __iomem *__ioremap(resource_
+       if (!size || last_addr < phys_addr)
+               return NULL;
+ 
++      if (!phys_addr_valid(phys_addr)) {
++              printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
++                     (unsigned long long)phys_addr);
++              WARN_ON_ONCE(1);
++              return NULL;
++      }
++
+       /*
+        * Don't remap the low PCI/ISA area, it's always mapped..
+        */
+@@ -287,55 +327,86 @@ static void __iomem *__ioremap(resource_
+       for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
+               unsigned long pfn = mfn_to_local_pfn(mfn);
+ 
+-              if (pfn >= max_pfn)
+-                      continue;
++              if (pfn_valid(pfn)) {
++                      if (!PageReserved(pfn_to_page(pfn)))
++                              return NULL;
++                      domid = DOMID_SELF;
++              }
++      }
++      WARN_ON_ONCE(domid == DOMID_SELF);
+ 
+-              domid = DOMID_SELF;
++      /*
++       * Mappings have to be page-aligned
++       */
++      offset = phys_addr & ~PAGE_MASK;
++      phys_addr &= PAGE_MASK;
++      size = PAGE_ALIGN(last_addr+1) - phys_addr;
+ 
+-              if (pfn >= max_pfn_mapped) /* bogus */
+-                      continue;
++      retval = reserve_memtype(phys_addr, phys_addr + size,
++                                              prot_val, &new_prot_val);
++      if (retval) {
++              pr_debug("Warning: reserve_memtype returned %d\n", retval);
++              return NULL;
++      }
+ 
+-              if (pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
++      if (prot_val != new_prot_val) {
++              /*
++               * Do not fallback to certain memory types with certain
++               * requested type:
++               * - request is uc-, return cannot be write-back
++               * - request is uc-, return cannot be write-combine
++               * - request is write-combine, return cannot be write-back
++               */
++              if ((prot_val == _PAGE_CACHE_UC_MINUS &&
++                   (new_prot_val == _PAGE_CACHE_WB ||
++                    new_prot_val == _PAGE_CACHE_WC)) ||
++                  (prot_val == _PAGE_CACHE_WC &&
++                   new_prot_val == _PAGE_CACHE_WB)) {
++                      pr_debug(
++              "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
++                              (unsigned long long)phys_addr,
++                              (unsigned long long)(phys_addr + size),
++                              prot_val, new_prot_val);
++                      free_memtype(phys_addr, phys_addr + size);
+                       return NULL;
++              }
++              prot_val = new_prot_val;
+       }
+ 
+-      switch (mode) {
+-      case IOR_MODE_UNCACHED:
++      switch (prot_val) {
++      case _PAGE_CACHE_UC:
+       default:
+-              /*
+-               * FIXME: we will use UC MINUS for now, as video fb drivers
+-               * depend on it. Upcoming ioremap_wc() will fix this behavior.
+-               */
++              prot = PAGE_KERNEL_NOCACHE;
++              break;
++      case _PAGE_CACHE_UC_MINUS:
+               prot = PAGE_KERNEL_UC_MINUS;
+               break;
+-      case IOR_MODE_CACHED:
++      case _PAGE_CACHE_WC:
++              prot = PAGE_KERNEL_WC;
++              break;
++      case _PAGE_CACHE_WB:
+               prot = PAGE_KERNEL;
+               break;
+       }
+ 
+       /*
+-       * Mappings have to be page-aligned
+-       */
+-      offset = phys_addr & ~PAGE_MASK;
+-      phys_addr &= PAGE_MASK;
+-      size = PAGE_ALIGN(last_addr+1) - phys_addr;
+-
+-      /*
+        * Ok, go for it..
+        */
+-      area = get_vm_area(size, VM_IOREMAP | (mode << 20));
++      area = get_vm_area_caller(size, VM_IOREMAP, caller);
+       if (!area)
+               return NULL;
+       area->phys_addr = phys_addr;
+       vaddr = (unsigned long) area->addr;
+       if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
+                                    size, prot, domid)) {
++              free_memtype(phys_addr, phys_addr + size);
+               free_vm_area(area);
+               return NULL;
+       }
+ 
+-      if (ioremap_change_attr(vaddr, size, mode) < 0) {
+-              iounmap((void __iomem *) vaddr);
++      if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
++              free_memtype(phys_addr, phys_addr + size);
++              vunmap(area->addr);
+               return NULL;
+       }
+ 
+@@ -365,16 +436,72 @@ static void __iomem *__ioremap(resource_
+  */
+ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
+ {
+-      return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
++      /*
++       * Ideally, this should be:
++       *      pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
++       *
++       * Till we fix all X drivers to use ioremap_wc(), we will use
++       * UC MINUS.
++       */
++      unsigned long val = _PAGE_CACHE_UC_MINUS;
++
++      return __ioremap_caller(phys_addr, size, val,
++                              __builtin_return_address(0));
+ }
+ EXPORT_SYMBOL(ioremap_nocache);
+ 
++/**
++ * ioremap_wc -       map memory into CPU space write combined
++ * @offset:   bus address of the memory
++ * @size:     size of the resource to map
++ *
++ * This version of ioremap ensures that the memory is marked write combining.
++ * Write combining allows faster writes to some hardware devices.
++ *
++ * Must be freed with iounmap.
++ */
++void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
++{
++      if (pat_wc_enabled)
++              return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
++                                      __builtin_return_address(0));
++      else
++              return ioremap_nocache(phys_addr, size);
++}
++EXPORT_SYMBOL(ioremap_wc);
++
+ void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
+ {
+-      return __ioremap(phys_addr, size, IOR_MODE_CACHED);
++      return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
++                              __builtin_return_address(0));
+ }
+ EXPORT_SYMBOL(ioremap_cache);
+ 
++#ifndef CONFIG_XEN
++static void __iomem *ioremap_default(resource_size_t phys_addr,
++                                      unsigned long size)
++{
++      unsigned long flags;
++      void *ret;
++      int err;
++
++      /*
++       * - WB for WB-able memory and no other conflicting mappings
++       * - UC_MINUS for non-WB-able memory with no other conflicting mappings
++       * - Inherit from confliting mappings otherwise
++       */
++      err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
++      if (err < 0)
++              return NULL;
++
++      ret = (void *) __ioremap_caller(phys_addr, size, flags,
++                                      __builtin_return_address(0));
++
++      free_memtype(phys_addr, phys_addr + size);
++      return (void __iomem *)ret;
++}
++#endif
++
+ /**
+  * iounmap - Free a IO remapping
+  * @addr: virtual address from ioremap_*
+@@ -417,15 +544,7 @@ void iounmap(volatile void __iomem *addr
+               return;
+       }
+ 
+-      if ((p->flags >> 20) != IOR_MODE_CACHED) {
+-              unsigned long n = get_vm_area_size(p) >> PAGE_SHIFT;
+-              unsigned long mfn = p->phys_addr;
+-              unsigned long va = (unsigned long)addr;
+-
+-              for (; n > 0; n--, mfn++, va += PAGE_SIZE)
+-                      if (mfn_to_local_pfn(mfn) < max_pfn)
+-                              set_memory_wb(va, 1);
+-      }
++      free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
+ 
+       /* Finally remove it */
+       o = remove_vm_area((void *)addr);
+@@ -434,6 +553,37 @@ void iounmap(volatile void __iomem *addr
+ }
+ EXPORT_SYMBOL(iounmap);
+ 
++#ifndef CONFIG_XEN
++/*
++ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
++ * access
++ */
++void *xlate_dev_mem_ptr(unsigned long phys)
++{
++      void *addr;
++      unsigned long start = phys & PAGE_MASK;
++
++      /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
++      if (page_is_ram(start >> PAGE_SHIFT))
++              return __va(phys);
++
++      addr = (void *)ioremap_default(start, PAGE_SIZE);
++      if (addr)
++              addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
++
++      return addr;
++}
++
++void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
++{
++      if (page_is_ram(phys >> PAGE_SHIFT))
++              return;
++
++      iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
++      return;
++}
++#endif
++
+ int __initdata early_ioremap_debug;
+ 
+ static int __init early_ioremap_debug_setup(char *str)
+@@ -445,8 +595,8 @@ static int __init early_ioremap_debug_se
+ early_param("early_ioremap_debug", early_ioremap_debug_setup);
+ 
+ static __initdata int after_paging_init;
+-static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
+-                              __attribute__((aligned(PAGE_SIZE)));
++static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
++              __section(.bss.page_aligned);
+ 
+ #ifdef CONFIG_X86_32
+ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
+@@ -461,8 +611,8 @@ static inline pmd_t * __init early_iorem
+ }
+ #else
+ #define early_ioremap_pmd early_get_pmd
++#undef make_lowmem_page_readonly
+ #define make_lowmem_page_readonly early_make_page_readonly
+-#define make_lowmem_page_writable make_page_writable
+ #endif
+ 
+ static inline pte_t * __init early_ioremap_pte(unsigned long addr)
+@@ -512,7 +662,7 @@ void __init early_ioremap_clear(void)
+       pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+       pmd_clear(pmd);
+       make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables);
+-      /* paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); */
++      /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */
+       __flush_tlb_all();
+ }
+ 
+@@ -654,10 +804,11 @@ void __init early_iounmap(void *addr, un
+       unsigned long offset;
+       unsigned int nrpages;
+       enum fixed_addresses idx;
+-      unsigned int nesting;
++      int nesting;
+ 
+       nesting = --early_ioremap_nested;
+-      WARN_ON(nesting < 0);
++      if (WARN_ON(nesting < 0))
++              return;
+ 
+       if (early_ioremap_debug) {
+               printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
+--- sle11-2009-05-14.orig/arch/x86/mm/pageattr-xen.c   2009-03-16 16:37:14.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/pageattr-xen.c        2009-03-16 16:38:05.000000000 +0100
+@@ -9,6 +9,8 @@
+ #include <linux/slab.h>
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
++#include <linux/seq_file.h>
++#include <linux/debugfs.h>
+ 
+ #include <asm/e820.h>
+ #include <asm/processor.h>
+@@ -17,370 +19,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/pgalloc.h>
+ #include <asm/proto.h>
+-#include <asm/mmu_context.h>
+-
+-#ifndef CONFIG_X86_64
+-#define TASK_SIZE64 TASK_SIZE
+-#endif
+-
+-static void _pin_lock(struct mm_struct *mm, int lock) {
+-      if (lock)
+-              spin_lock(&mm->page_table_lock);
+-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+-      /* While mm->page_table_lock protects us against insertions and
+-       * removals of higher level page table pages, it doesn't protect
+-       * against updates of pte-s. Such updates, however, require the
+-       * pte pages to be in consistent state (unpinned+writable or
+-       * pinned+readonly). The pinning and attribute changes, however
+-       * cannot be done atomically, which is why such updates must be
+-       * prevented from happening concurrently.
+-       * Note that no pte lock can ever elsewhere be acquired nesting
+-       * with an already acquired one in the same mm, or with the mm's
+-       * page_table_lock already acquired, as that would break in the
+-       * non-split case (where all these are actually resolving to the
+-       * one page_table_lock). Thus acquiring all of them here is not
+-       * going to result in dead locks, and the order of acquires
+-       * doesn't matter.
+-       */
+-      {
+-              pgd_t *pgd = mm->pgd;
+-              unsigned g;
+-
+-              for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
+-                      pud_t *pud;
+-                      unsigned u;
+-
+-                      if (pgd_none(*pgd))
+-                              continue;
+-                      pud = pud_offset(pgd, 0);
+-                      for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+-                              pmd_t *pmd;
+-                              unsigned m;
+-
+-                              if (pud_none(*pud))
+-                                      continue;
+-                              pmd = pmd_offset(pud, 0);
+-                              for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+-                                      spinlock_t *ptl;
+-
+-                                      if (pmd_none(*pmd))
+-                                              continue;
+-                                      ptl = pte_lockptr(0, pmd);
+-                                      if (lock)
+-                                              spin_lock(ptl);
+-                                      else
+-                                              spin_unlock(ptl);
+-                              }
+-                      }
+-              }
+-      }
+-#endif
+-      if (!lock)
+-              spin_unlock(&mm->page_table_lock);
+-}
+-#define pin_lock(mm) _pin_lock(mm, 1)
+-#define pin_unlock(mm) _pin_lock(mm, 0)
+-
+-#define PIN_BATCH sizeof(void *)
+-static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
+-
+-static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
+-                                           unsigned int cpu, unsigned int seq)
+-{
+-      unsigned long pfn = page_to_pfn(page);
+-
+-      if (PageHighMem(page)) {
+-              if (pgprot_val(flags) & _PAGE_RW)
+-                      ClearPagePinned(page);
+-              else
+-                      SetPagePinned(page);
+-      } else {
+-              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
+-                                      (unsigned long)__va(pfn << PAGE_SHIFT),
+-                                      pfn_pte(pfn, flags), 0);
+-              if (unlikely(++seq == PIN_BATCH)) {
+-                      if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
+-                                                              PIN_BATCH, NULL)))
+-                              BUG();
+-                      seq = 0;
+-              }
+-      }
+-
+-      return seq;
+-}
+-
+-static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
+-{
+-      pgd_t       *pgd = pgd_base;
+-      pud_t       *pud;
+-      pmd_t       *pmd;
+-      int          g,u,m;
+-      unsigned int cpu, seq;
+-      multicall_entry_t *mcl;
+-
+-      if (xen_feature(XENFEAT_auto_translated_physmap))
+-              return;
+-
+-      cpu = get_cpu();
+-
+-      /*
+-       * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
+-       * may not be the 'current' task's pagetables (e.g., current may be
+-       * 32-bit, but the pagetables may be for a 64-bit task).
+-       * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
+-       * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
+-       */
+-      for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
+-              if (pgd_none(*pgd))
+-                      continue;
+-              pud = pud_offset(pgd, 0);
+-              if (PTRS_PER_PUD > 1) /* not folded */
+-                      seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
+-              for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
+-                      if (pud_none(*pud))
+-                              continue;
+-                      pmd = pmd_offset(pud, 0);
+-                      if (PTRS_PER_PMD > 1) /* not folded */
+-                              seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
+-                      for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
+-                              if (pmd_none(*pmd))
+-                                      continue;
+-                              seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
+-                      }
+-              }
+-      }
+-
+-      mcl = per_cpu(pb_mcl, cpu);
+-#ifdef CONFIG_X86_64
+-      if (unlikely(seq > PIN_BATCH - 2)) {
+-              if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
+-                      BUG();
+-              seq = 0;
+-      }
+-      MULTI_update_va_mapping(mcl + seq,
+-             (unsigned long)__user_pgd(pgd_base),
+-             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
+-             0);
+-      MULTI_update_va_mapping(mcl + seq + 1,
+-             (unsigned long)pgd_base,
+-             pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
+-             UVMF_TLB_FLUSH);
+-      if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
+-              BUG();
+-#else
+-      if (likely(seq != 0)) {
+-              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
+-                      (unsigned long)pgd_base,
+-                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
+-                      UVMF_TLB_FLUSH);
+-              if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
+-                                                      seq + 1, NULL)))
+-                      BUG();
+-      } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
+-                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
+-                      UVMF_TLB_FLUSH))
+-              BUG();
+-#endif
+-
+-      put_cpu();
+-}
+-
+-static void __pgd_pin(pgd_t *pgd)
+-{
+-      pgd_walk(pgd, PAGE_KERNEL_RO);
+-      kmap_flush_unused();
+-      xen_pgd_pin(__pa(pgd)); /* kernel */
+-#ifdef CONFIG_X86_64
+-      xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
+-#endif
+-      SetPagePinned(virt_to_page(pgd));
+-}
+-
+-static void __pgd_unpin(pgd_t *pgd)
+-{
+-      xen_pgd_unpin(__pa(pgd));
+-#ifdef CONFIG_X86_64
+-      xen_pgd_unpin(__pa(__user_pgd(pgd)));
+-#endif
+-      pgd_walk(pgd, PAGE_KERNEL);
+-      ClearPagePinned(virt_to_page(pgd));
+-}
+-
+-void pgd_test_and_unpin(pgd_t *pgd)
+-{
+-      if (PagePinned(virt_to_page(pgd)))
+-              __pgd_unpin(pgd);
+-}
+-
+-void mm_pin(struct mm_struct *mm)
+-{
+-      if (xen_feature(XENFEAT_writable_page_tables))
+-              return;
+-
+-      pin_lock(mm);
+-      __pgd_pin(mm->pgd);
+-      pin_unlock(mm);
+-}
+-
+-void mm_unpin(struct mm_struct *mm)
+-{
+-      if (xen_feature(XENFEAT_writable_page_tables))
+-              return;
+-
+-      pin_lock(mm);
+-      __pgd_unpin(mm->pgd);
+-      pin_unlock(mm);
+-}
+-
+-void mm_pin_all(void)
+-{
+-      struct page *page;
+-      unsigned long flags;
+-
+-      if (xen_feature(XENFEAT_writable_page_tables))
+-              return;
+-
+-      /*
+-       * Allow uninterrupted access to the pgd_list. Also protects
+-       * __pgd_pin() by disabling preemption.
+-       * All other CPUs must be at a safe point (e.g., in stop_machine
+-       * or offlined entirely).
+-       */
+-      spin_lock_irqsave(&pgd_lock, flags);
+-      list_for_each_entry(page, &pgd_list, lru) {
+-              if (!PagePinned(page))
+-                      __pgd_pin((pgd_t *)page_address(page));
+-      }
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+-{
+-      if (!PagePinned(virt_to_page(mm->pgd)))
+-              mm_pin(mm);
+-}
+-
+-void arch_exit_mmap(struct mm_struct *mm)
+-{
+-      struct task_struct *tsk = current;
+-
+-      task_lock(tsk);
+-
+-      /*
+-       * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
+-       * *much* faster this way, as no tlb flushes means bigger wrpt batches.
+-       */
+-      if (tsk->active_mm == mm) {
+-              tsk->active_mm = &init_mm;
+-              atomic_inc(&init_mm.mm_count);
+-
+-              switch_mm(mm, &init_mm, tsk);
+-
+-              atomic_dec(&mm->mm_count);
+-              BUG_ON(atomic_read(&mm->mm_count) == 0);
+-      }
+-
+-      task_unlock(tsk);
+-
+-      if (PagePinned(virt_to_page(mm->pgd))
+-          && atomic_read(&mm->mm_count) == 1
+-          && !mm->context.has_foreign_mappings)
+-              mm_unpin(mm);
+-}
+-
+-static void _pte_free(struct page *page, unsigned int order)
+-{
+-      BUG_ON(order);
+-      __pte_free(page);
+-}
+-
+-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+-{
+-      struct page *pte;
+-
+-#ifdef CONFIG_HIGHPTE
+-      pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+-#else
+-      pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+-#endif
+-      if (pte) {
+-              pgtable_page_ctor(pte);
+-              SetPageForeign(pte, _pte_free);
+-              init_page_count(pte);
+-      }
+-      return pte;
+-}
+-
+-void __pte_free(pgtable_t pte)
+-{
+-      if (!PageHighMem(pte)) {
+-              unsigned long va = (unsigned long)page_address(pte);
+-              unsigned int level;
+-              pte_t *ptep = lookup_address(va, &level);
+-
+-              BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
+-              if (!pte_write(*ptep)
+-                  && HYPERVISOR_update_va_mapping(va,
+-                                                  mk_pte(pte, PAGE_KERNEL),
+-                                                  0))
+-                      BUG();
+-      } else
+-#ifdef CONFIG_HIGHPTE
+-              ClearPagePinned(pte);
+-#else
+-              BUG();
+-#endif
+-
+-      ClearPageForeign(pte);
+-      init_page_count(pte);
+-      pgtable_page_dtor(pte);
+-      __free_page(pte);
+-}
+-
+-#if PAGETABLE_LEVELS >= 3
+-static void _pmd_free(struct page *page, unsigned int order)
+-{
+-      BUG_ON(order);
+-      __pmd_free(page);
+-}
+-
+-pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+-{
+-      struct page *pmd;
+-
+-      pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+-      if (!pmd)
+-              return NULL;
+-      SetPageForeign(pmd, _pmd_free);
+-      init_page_count(pmd);
+-      return page_address(pmd);
+-}
+-
+-void __pmd_free(pgtable_t pmd)
+-{
+-      unsigned long va = (unsigned long)page_address(pmd);
+-      unsigned int level;
+-      pte_t *ptep = lookup_address(va, &level);
+-
+-      BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
+-      if (!pte_write(*ptep)
+-          && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
+-              BUG();
+-
+-      ClearPageForeign(pmd);
+-      init_page_count(pmd);
+-      __free_page(pmd);
+-}
+-#endif
+-
+-/* blktap and gntdev need this, as otherwise they would implicitly (and
+- * needlessly, as they never use it) reference init_mm. */
+-pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
+-                                unsigned long addr, pte_t *ptep, int full)
+-{
+-      return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
+-}
+-EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
++#include <asm/pat.h>
+ 
+ /*
+  * The current flushing context - we pass it instead of 5 arguments:
+@@ -392,6 +31,7 @@ struct cpa_data {
+       int             numpages;
+       int             flushtlb;
+       unsigned long   pfn;
++      unsigned        force_split : 1;
+ };
+ 
+ #ifdef CONFIG_X86_64
+@@ -637,6 +277,9 @@ try_preserve_large_page(pte_t *kpte, uns
+       int i, do_split = 1;
+       unsigned int level;
+ 
++      if (cpa->force_split)
++              return 1;
++
+       spin_lock_irqsave(&pgd_lock, flags);
+       /*
+        * Check for races, another CPU might have split this page
+@@ -856,9 +499,7 @@ static int split_large_page(pte_t *kpte,
+               goto out_unlock;
+ 
+       pbase = (pte_t *)page_address(base);
+-#ifdef CONFIG_X86_32
+-      paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+-#endif
++      paravirt_alloc_pte(&init_mm, page_to_pfn(base));
+       ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ 
+ #ifdef CONFIG_X86_64
+@@ -919,7 +560,7 @@ static int __change_page_attr(struct cpa
+ repeat:
+       kpte = lookup_address(address, &level);
+       if (!kpte)
+-              return primary ? -EINVAL : 0;
++              return 0;
+ 
+       old_pte = *kpte;
+       if (!__pte_val(old_pte)) {
+@@ -1078,7 +719,8 @@ static inline int cache_attr(pgprot_t at
+ }
+ 
+ static int change_page_attr_set_clr(unsigned long addr, int numpages,
+-                                  pgprot_t mask_set, pgprot_t mask_clr)
++                                  pgprot_t mask_set, pgprot_t mask_clr,
++                                  int force_split)
+ {
+       struct cpa_data cpa;
+       int ret, cache, checkalias;
+@@ -1089,7 +731,7 @@ static int change_page_attr_set_clr(unsi
+        */
+       mask_set = canon_pgprot(mask_set);
+       mask_clr = canon_pgprot(mask_clr);
+-      if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
++      if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
+               return 0;
+ 
+       /* Ensure we are PAGE_SIZE aligned */
+@@ -1106,6 +748,7 @@ static int change_page_attr_set_clr(unsi
+       cpa.mask_set = mask_set;
+       cpa.mask_clr = mask_clr;
+       cpa.flushtlb = 0;
++      cpa.force_split = force_split;
+ 
+       /* No alias checking for _NX bit modifications */
+       checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
+@@ -1144,26 +787,67 @@ out:
+ static inline int change_page_attr_set(unsigned long addr, int numpages,
+                                      pgprot_t mask)
+ {
+-      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
++      return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
+ }
+ 
+ static inline int change_page_attr_clear(unsigned long addr, int numpages,
+                                        pgprot_t mask)
+ {
+-      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
++      return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
+ }
+ 
+-int set_memory_uc(unsigned long addr, int numpages)
++int _set_memory_uc(unsigned long addr, int numpages)
+ {
++      /*
++       * for now UC MINUS. see comments in ioremap_nocache()
++       */
+       return change_page_attr_set(addr, numpages,
+-                                  __pgprot(_PAGE_PCD));
++                                  __pgprot(_PAGE_CACHE_UC_MINUS));
++}
++
++int set_memory_uc(unsigned long addr, int numpages)
++{
++      /*
++       * for now UC MINUS. see comments in ioremap_nocache()
++       */
++      if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
++                          _PAGE_CACHE_UC_MINUS, NULL))
++              return -EINVAL;
++
++      return _set_memory_uc(addr, numpages);
+ }
+ EXPORT_SYMBOL(set_memory_uc);
+ 
+-int set_memory_wb(unsigned long addr, int numpages)
++int _set_memory_wc(unsigned long addr, int numpages)
++{
++      return change_page_attr_set(addr, numpages,
++                                  __pgprot(_PAGE_CACHE_WC));
++}
++
++int set_memory_wc(unsigned long addr, int numpages)
++{
++      if (!pat_wc_enabled)
++              return set_memory_uc(addr, numpages);
++
++      if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
++              _PAGE_CACHE_WC, NULL))
++              return -EINVAL;
++
++      return _set_memory_wc(addr, numpages);
++}
++EXPORT_SYMBOL(set_memory_wc);
++
++int _set_memory_wb(unsigned long addr, int numpages)
+ {
+       return change_page_attr_clear(addr, numpages,
+-                                    __pgprot(_PAGE_PCD | _PAGE_PWT));
++                                    __pgprot(_PAGE_CACHE_MASK));
++}
++
++int set_memory_wb(unsigned long addr, int numpages)
++{
++      free_memtype(addr, addr + numpages * PAGE_SIZE);
++
++      return _set_memory_wb(addr, numpages);
+ }
+ EXPORT_SYMBOL(set_memory_wb);
+ 
+@@ -1194,6 +878,12 @@ int set_memory_np(unsigned long addr, in
+       return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
+ }
+ 
++int set_memory_4k(unsigned long addr, int numpages)
++{
++      return change_page_attr_set_clr(addr, numpages, __pgprot(0),
++                                      __pgprot(0), 1);
++}
++
+ int set_pages_uc(struct page *page, int numpages)
+ {
+       unsigned long addr = (unsigned long)page_address(page);
+@@ -1303,6 +993,45 @@ void kernel_map_pages(struct page *page,
+       cpa_fill_pool(NULL);
+ }
+ 
++#ifdef CONFIG_DEBUG_FS
++static int dpa_show(struct seq_file *m, void *v)
++{
++      seq_puts(m, "DEBUG_PAGEALLOC\n");
++      seq_printf(m, "pool_size     : %lu\n", pool_size);
++      seq_printf(m, "pool_pages    : %lu\n", pool_pages);
++      seq_printf(m, "pool_low      : %lu\n", pool_low);
++      seq_printf(m, "pool_used     : %lu\n", pool_used);
++      seq_printf(m, "pool_failed   : %lu\n", pool_failed);
++
++      return 0;
++}
++
++static int dpa_open(struct inode *inode, struct file *filp)
++{
++      return single_open(filp, dpa_show, NULL);
++}
++
++static const struct file_operations dpa_fops = {
++      .open           = dpa_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = single_release,
++};
++
++static int __init debug_pagealloc_proc_init(void)
++{
++      struct dentry *de;
++
++      de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
++                               &dpa_fops);
++      if (!de)
++              return -ENOMEM;
++
++      return 0;
++}
++__initcall(debug_pagealloc_proc_init);
++#endif
++
+ #ifdef CONFIG_HIBERNATION
+ 
+ bool kernel_page_present(struct page *page)
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/mm/pat-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,602 @@
++/*
++ * Handle caching attributes in page tables (PAT)
++ *
++ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *          Suresh B Siddha <suresh.b.siddha@intel.com>
++ *
++ * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
++ */
++
++#include <linux/mm.h>
++#include <linux/kernel.h>
++#include <linux/gfp.h>
++#include <linux/fs.h>
++#include <linux/bootmem.h>
++
++#include <asm/msr.h>
++#include <asm/tlbflush.h>
++#include <asm/processor.h>
++#include <asm/page.h>
++#include <asm/pgtable.h>
++#include <asm/pat.h>
++#include <asm/e820.h>
++#include <asm/cacheflush.h>
++#include <asm/fcntl.h>
++#include <asm/mtrr.h>
++#include <asm/io.h>
++
++#ifdef CONFIG_X86_PAT
++int __read_mostly pat_wc_enabled = 1;
++
++void __cpuinit pat_disable(char *reason)
++{
++      pat_wc_enabled = 0;
++      printk(KERN_INFO "%s\n", reason);
++}
++
++static int __init nopat(char *str)
++{
++      pat_disable("PAT support disabled.");
++      return 0;
++}
++early_param("nopat", nopat);
++#endif
++
++static u64 __read_mostly boot_pat_state;
++
++enum {
++      PAT_UC = 0,             /* uncached */
++      PAT_WC = 1,             /* Write combining */
++      PAT_WT = 4,             /* Write Through */
++      PAT_WP = 5,             /* Write Protected */
++      PAT_WB = 6,             /* Write Back (default) */
++      PAT_UC_MINUS = 7,       /* UC, but can be overriden by MTRR */
++};
++
++#define PAT(x,y)      ((u64)PAT_ ## y << ((x)*8))
++
++void pat_init(void)
++{
++      u64 pat;
++
++      if (!pat_wc_enabled)
++              return;
++
++      /* Paranoia check. */
++      if (!cpu_has_pat) {
++              printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
++              /*
++               * Panic if this happens on the secondary CPU, and we
++               * switched to PAT on the boot CPU. We have no way to
++               * undo PAT.
++              */
++              BUG_ON(boot_pat_state);
++      }
++
++#ifndef CONFIG_XEN
++      /* Set PWT to Write-Combining. All other bits stay the same */
++      /*
++       * PTE encoding used in Linux:
++       *      PAT
++       *      |PCD
++       *      ||PWT
++       *      |||
++       *      000 WB          _PAGE_CACHE_WB
++       *      001 WC          _PAGE_CACHE_WC
++       *      010 UC-         _PAGE_CACHE_UC_MINUS
++       *      011 UC          _PAGE_CACHE_UC
++       * PAT bit unused
++       */
++      pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
++            PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
++
++      /* Boot CPU check */
++      if (!boot_pat_state)
++              rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
++
++      wrmsrl(MSR_IA32_CR_PAT, pat);
++#else
++      /*
++       * PAT settings are part of the hypervisor interface, and their
++       * assignment cannot be changed.
++       */
++      rdmsrl(MSR_IA32_CR_PAT, pat);
++      if (!boot_pat_state)
++              boot_pat_state = pat;
++#endif
++      printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
++             smp_processor_id(), boot_pat_state, pat);
++}
++
++#undef PAT
++
++static char *cattr_name(unsigned long flags)
++{
++      switch (flags & _PAGE_CACHE_MASK) {
++              case _PAGE_CACHE_UC:            return "uncached";
++              case _PAGE_CACHE_UC_MINUS:      return "uncached-minus";
++              case _PAGE_CACHE_WB:            return "write-back";
++              case _PAGE_CACHE_WC:            return "write-combining";
++              case _PAGE_CACHE_WP:            return "write-protected";
++              case _PAGE_CACHE_WT:            return "write-through";
++              default:                        return "broken";
++      }
++}
++
++/*
++ * The global memtype list keeps track of memory type for specific
++ * physical memory areas. Conflicting memory types in different
++ * mappings can cause CPU cache corruption. To avoid this we keep track.
++ *
++ * The list is sorted based on starting address and can contain multiple
++ * entries for each address (this allows reference counting for overlapping
++ * areas). All the aliases have the same cache attributes of course.
++ * Zero attributes are represented as holes.
++ *
++ * Currently the data structure is a list because the number of mappings
++ * are expected to be relatively small. If this should be a problem
++ * it could be changed to a rbtree or similar.
++ *
++ * memtype_lock protects the whole list.
++ */
++
++struct memtype {
++      u64 start;
++      u64 end;
++      unsigned long type;
++      struct list_head nd;
++};
++
++static LIST_HEAD(memtype_list);
++static DEFINE_SPINLOCK(memtype_lock);         /* protects memtype list */
++
++/*
++ * Does intersection of PAT memory type and MTRR memory type and returns
++ * the resulting memory type as PAT understands it.
++ * (Type in pat and mtrr will not have same value)
++ * The intersection is based on "Effective Memory Type" tables in IA-32
++ * SDM vol 3a
++ */
++static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
++                              unsigned long *ret_prot)
++{
++      unsigned long pat_type;
++      u8 mtrr_type;
++
++      pat_type = prot & _PAGE_CACHE_MASK;
++      prot &= (~_PAGE_CACHE_MASK);
++
++      /*
++       * We return the PAT request directly for types where PAT takes
++       * precedence with respect to MTRR and for UC_MINUS.
++       * Consistency checks with other PAT requests is done later
++       * while going through memtype list.
++       */
++      if (pat_type == _PAGE_CACHE_WC) {
++              *ret_prot = prot | _PAGE_CACHE_WC;
++              return 0;
++      } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
++              *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
++              return 0;
++      } else if (pat_type == _PAGE_CACHE_UC) {
++              *ret_prot = prot | _PAGE_CACHE_UC;
++              return 0;
++      }
++
++      /*
++       * Look for MTRR hint to get the effective type in case where PAT
++       * request is for WB.
++       */
++      mtrr_type = mtrr_type_lookup(start, end);
++
++      if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
++              *ret_prot = prot | _PAGE_CACHE_UC;
++      } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
++              *ret_prot = prot | _PAGE_CACHE_WC;
++      } else {
++              *ret_prot = prot | _PAGE_CACHE_WB;
++      }
++
++      return 0;
++}
++
++/*
++ * req_type typically has one of the:
++ * - _PAGE_CACHE_WB
++ * - _PAGE_CACHE_WC
++ * - _PAGE_CACHE_UC_MINUS
++ * - _PAGE_CACHE_UC
++ *
++ * req_type will have a special case value '-1', when requester want to inherit
++ * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
++ *
++ * If ret_type is NULL, function will return an error if it cannot reserve the
++ * region with req_type. If ret_type is non-null, function will return
++ * available type in ret_type in case of no error. In case of any error
++ * it will return a negative return value.
++ */
++int reserve_memtype(u64 start, u64 end, unsigned long req_type,
++                      unsigned long *ret_type)
++{
++      struct memtype *new_entry = NULL;
++      struct memtype *parse;
++      unsigned long actual_type;
++      int err = 0;
++
++      /* Only track when pat_wc_enabled */
++      if (!pat_wc_enabled) {
++              /* This is identical to page table setting without PAT */
++              if (ret_type) {
++                      if (req_type == -1) {
++                              *ret_type = _PAGE_CACHE_WB;
++                      } else {
++                              *ret_type = req_type;
++                      }
++              }
++              return 0;
++      }
++
++      /* Low ISA region is always mapped WB in page table. No need to track */
++      if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
++              if (ret_type)
++                      *ret_type = _PAGE_CACHE_WB;
++
++              return 0;
++      }
++
++      if (req_type == -1) {
++              /*
++               * Call mtrr_lookup to get the type hint. This is an
++               * optimization for /dev/mem mmap'ers into WB memory (BIOS
++               * tools and ACPI tools). Use WB request for WB memory and use
++               * UC_MINUS otherwise.
++               */
++              u8 mtrr_type = mtrr_type_lookup(start, end);
++
++              if (mtrr_type == MTRR_TYPE_WRBACK) {
++                      req_type = _PAGE_CACHE_WB;
++                      actual_type = _PAGE_CACHE_WB;
++              } else {
++                      req_type = _PAGE_CACHE_UC_MINUS;
++                      actual_type = _PAGE_CACHE_UC_MINUS;
++              }
++      } else {
++              req_type &= _PAGE_CACHE_MASK;
++              err = pat_x_mtrr_type(start, end, req_type, &actual_type);
++      }
++
++      if (err) {
++              if (ret_type)
++                      *ret_type = actual_type;
++
++              return -EINVAL;
++      }
++
++      new_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
++      if (!new_entry)
++              return -ENOMEM;
++
++      new_entry->start = start;
++      new_entry->end = end;
++      new_entry->type = actual_type;
++
++      if (ret_type)
++              *ret_type = actual_type;
++
++      spin_lock(&memtype_lock);
++
++      /* Search for existing mapping that overlaps the current range */
++      list_for_each_entry(parse, &memtype_list, nd) {
++              struct memtype *saved_ptr;
++
++              if (parse->start >= end) {
++                      pr_debug("New Entry\n");
++                      list_add(&new_entry->nd, parse->nd.prev);
++                      new_entry = NULL;
++                      break;
++              }
++
++              if (start <= parse->start && end >= parse->start) {
++                      if (actual_type != parse->type && ret_type) {
++                              actual_type = parse->type;
++                              *ret_type = actual_type;
++                              new_entry->type = actual_type;
++                      }
++
++                      if (actual_type != parse->type) {
++                              printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                      current->comm, current->pid,
++                                      start, end,
++                                      cattr_name(actual_type),
++                                      cattr_name(parse->type));
++                              err = -EBUSY;
++                              break;
++                      }
++
++                      saved_ptr = parse;
++                      /*
++                       * Check to see whether the request overlaps more
++                       * than one entry in the list
++                       */
++                      list_for_each_entry_continue(parse, &memtype_list, nd) {
++                              if (end <= parse->start) {
++                                      break;
++                              }
++
++                              if (actual_type != parse->type) {
++                                      printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                              current->comm, current->pid,
++                                              start, end,
++                                              cattr_name(actual_type),
++                                              cattr_name(parse->type));
++                                      err = -EBUSY;
++                                      break;
++                              }
++                      }
++
++                      if (err) {
++                              break;
++                      }
++
++                      pr_debug("Overlap at 0x%Lx-0x%Lx\n",
++                             saved_ptr->start, saved_ptr->end);
++                      /* No conflict. Go ahead and add this new entry */
++                      list_add(&new_entry->nd, saved_ptr->nd.prev);
++                      new_entry = NULL;
++                      break;
++              }
++
++              if (start < parse->end) {
++                      if (actual_type != parse->type && ret_type) {
++                              actual_type = parse->type;
++                              *ret_type = actual_type;
++                              new_entry->type = actual_type;
++                      }
++
++                      if (actual_type != parse->type) {
++                              printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                      current->comm, current->pid,
++                                      start, end,
++                                      cattr_name(actual_type),
++                                      cattr_name(parse->type));
++                              err = -EBUSY;
++                              break;
++                      }
++
++                      saved_ptr = parse;
++                      /*
++                       * Check to see whether the request overlaps more
++                       * than one entry in the list
++                       */
++                      list_for_each_entry_continue(parse, &memtype_list, nd) {
++                              if (end <= parse->start) {
++                                      break;
++                              }
++
++                              if (actual_type != parse->type) {
++                                      printk(
++              KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
++                                              current->comm, current->pid,
++                                              start, end,
++                                              cattr_name(actual_type),
++                                              cattr_name(parse->type));
++                                      err = -EBUSY;
++                                      break;
++                              }
++                      }
++
++                      if (err) {
++                              break;
++                      }
++
++                      pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
++                               saved_ptr->start, saved_ptr->end);
++                      /* No conflict. Go ahead and add this new entry */
++                      list_add(&new_entry->nd, &saved_ptr->nd);
++                      new_entry = NULL;
++                      break;
++              }
++      }
++
++      if (err) {
++              printk(KERN_INFO
++      "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
++                      start, end, cattr_name(new_entry->type),
++                      cattr_name(req_type));
++              kfree(new_entry);
++              spin_unlock(&memtype_lock);
++              return err;
++      }
++
++      if (new_entry) {
++              /* No conflict. Not yet added to the list. Add to the tail */
++              list_add_tail(&new_entry->nd, &memtype_list);
++              pr_debug("New Entry\n");
++      }
++
++      if (ret_type) {
++              pr_debug(
++      "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
++                      start, end, cattr_name(actual_type),
++                      cattr_name(req_type), cattr_name(*ret_type));
++      } else {
++              pr_debug(
++      "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
++                      start, end, cattr_name(actual_type),
++                      cattr_name(req_type));
++      }
++
++      spin_unlock(&memtype_lock);
++      return err;
++}
++
++int free_memtype(u64 start, u64 end)
++{
++      struct memtype *ml;
++      int err = -EINVAL;
++
++      /* Only track when pat_wc_enabled */
++      if (!pat_wc_enabled) {
++              return 0;
++      }
++
++      /* Low ISA region is always mapped WB. No need to track */
++      if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
++              return 0;
++      }
++
++      spin_lock(&memtype_lock);
++      list_for_each_entry(ml, &memtype_list, nd) {
++              if (ml->start == start && ml->end == end) {
++                      list_del(&ml->nd);
++                      kfree(ml);
++                      err = 0;
++                      break;
++              }
++      }
++      spin_unlock(&memtype_lock);
++
++      if (err) {
++              printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
++                      current->comm, current->pid, start, end);
++      }
++
++      pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
++      return err;
++}
++
++
++/*
++ * /dev/mem mmap interface. The memtype used for mapping varies:
++ * - Use UC for mappings with O_SYNC flag
++ * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
++ *   inherit the memtype from existing mapping.
++ * - Else use UC_MINUS memtype (for backward compatibility with existing
++ *   X drivers.
++ */
++pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
++                              unsigned long size, pgprot_t vma_prot)
++{
++      return vma_prot;
++}
++
++#ifdef CONFIG_NONPROMISC_DEVMEM
++/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
++static inline int range_is_allowed(unsigned long mfn, unsigned long size)
++{
++      return 1;
++}
++#else
++static inline int range_is_allowed(unsigned long mfn, unsigned long size)
++{
++      u64 from = ((u64)mfn) << PAGE_SHIFT;
++      u64 to = from + size;
++      u64 cursor = from;
++
++      while (cursor < to) {
++              if (!devmem_is_allowed(mfn)) {
++                      printk(KERN_INFO
++              "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
++                              current->comm, from, to);
++                      return 0;
++              }
++              cursor += PAGE_SIZE;
++              mfn++;
++      }
++      return 1;
++}
++#endif /* CONFIG_NONPROMISC_DEVMEM */
++
++int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
++                              unsigned long size, pgprot_t *vma_prot)
++{
++      u64 addr = (u64)mfn << PAGE_SHIFT;
++      unsigned long flags = _PAGE_CACHE_UC_MINUS;
++      int retval;
++
++      if (!range_is_allowed(mfn, size))
++              return 0;
++
++      if (file->f_flags & O_SYNC) {
++              flags = _PAGE_CACHE_UC;
++      }
++
++#ifndef CONFIG_X86_32
++#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
++      /*
++       * On the PPro and successors, the MTRRs are used to set
++       * memory types for physical addresses outside main memory,
++       * so blindly setting UC or PWT on those pages is wrong.
++       * For Pentiums and earlier, the surround logic should disable
++       * caching for the high addresses through the KEN pin, but
++       * we maintain the tradition of paranoia in this code.
++       */
++      if (!pat_wc_enabled &&
++          ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
++              test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
++              test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
++              test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
++         (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
++              flags = _PAGE_CACHE_UC;
++      }
++#endif
++#endif
++
++      /*
++       * With O_SYNC, we can only take UC mapping. Fail if we cannot.
++       * Without O_SYNC, we want to get
++       * - WB for WB-able memory and no other conflicting mappings
++       * - UC_MINUS for non-WB-able memory with no other conflicting mappings
++       * - Inherit from confliting mappings otherwise
++       */
++      if (flags != _PAGE_CACHE_UC_MINUS) {
++              retval = reserve_memtype(addr, addr + size, flags, NULL);
++      } else {
++              retval = reserve_memtype(addr, addr + size, -1, &flags);
++      }
++
++      if (retval < 0)
++              return 0;
++
++      if (ioremap_check_change_attr(mfn, size, flags) < 0) {
++              free_memtype(addr, addr + size);
++              printk(KERN_INFO
++              "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
++                      current->comm, current->pid,
++                      cattr_name(flags),
++                      addr, addr + size);
++              return 0;
++      }
++
++      *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
++                           flags);
++      return 1;
++}
++
++void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
++{
++      u64 addr = (u64)mfn << PAGE_SHIFT;
++      unsigned long flags;
++      unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
++
++      reserve_memtype(addr, addr + size, want_flags, &flags);
++      if (flags != want_flags) {
++              printk(KERN_INFO
++              "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
++                      current->comm, current->pid,
++                      cattr_name(want_flags),
++                      addr, (unsigned long long)(addr + size),
++                      cattr_name(flags));
++      }
++}
++
++void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
++{
++      u64 addr = (u64)mfn << PAGE_SHIFT;
++
++      free_memtype(addr, addr + size);
++}
++
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ sle11-2009-05-14/arch/x86/mm/pgtable-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -0,0 +1,709 @@
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <xen/features.h>
++#include <asm/pgalloc.h>
++#include <asm/pgtable.h>
++#include <asm/tlb.h>
++#include <asm/hypervisor.h>
++#include <asm/mmu_context.h>
++
++pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
++{
++      pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
++      if (pte)
++              make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
++      return pte;
++}
++
++static void _pte_free(struct page *page, unsigned int order)
++{
++      BUG_ON(order);
++      __pte_free(page);
++}
++
++pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
++{
++      struct page *pte;
++
++#ifdef CONFIG_HIGHPTE
++      pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
++#else
++      pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++#endif
++      if (pte) {
++              pgtable_page_ctor(pte);
++              SetPageForeign(pte, _pte_free);
++              init_page_count(pte);
++      }
++      return pte;
++}
++
++void __pte_free(pgtable_t pte)
++{
++      if (!PageHighMem(pte)) {
++              unsigned long va = (unsigned long)page_address(pte);
++              unsigned int level;
++              pte_t *ptep = lookup_address(va, &level);
++
++              BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
++              if (!pte_write(*ptep)
++                  && HYPERVISOR_update_va_mapping(va,
++                                                  mk_pte(pte, PAGE_KERNEL),
++                                                  0))
++                      BUG();
++      } else
++#ifdef CONFIG_HIGHPTE
++              ClearPagePinned(pte);
++#else
++              BUG();
++#endif
++
++      ClearPageForeign(pte);
++      init_page_count(pte);
++      pgtable_page_dtor(pte);
++      __free_page(pte);
++}
++
++void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
++{
++      pgtable_page_dtor(pte);
++      paravirt_release_pte(page_to_pfn(pte));
++      tlb_remove_page(tlb, pte);
++}
++
++#if PAGETABLE_LEVELS > 2
++static void _pmd_free(struct page *page, unsigned int order)
++{
++      BUG_ON(order);
++      __pmd_free(page);
++}
++
++pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
++{
++      struct page *pmd;
++
++      pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++      if (!pmd)
++              return NULL;
++      SetPageForeign(pmd, _pmd_free);
++      init_page_count(pmd);
++      return page_address(pmd);
++}
++
++void __pmd_free(pgtable_t pmd)
++{
++      unsigned long va = (unsigned long)page_address(pmd);
++      unsigned int level;
++      pte_t *ptep = lookup_address(va, &level);
++
++      BUG_ON(!ptep || level != PG_LEVEL_4K || !pte_present(*ptep));
++      if (!pte_write(*ptep)
++          && HYPERVISOR_update_va_mapping(va, mk_pte(pmd, PAGE_KERNEL), 0))
++              BUG();
++
++      ClearPageForeign(pmd);
++      init_page_count(pmd);
++      __free_page(pmd);
++}
++
++void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
++{
++      paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
++      tlb_remove_page(tlb, virt_to_page(pmd));
++}
++
++#if PAGETABLE_LEVELS > 3
++void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
++{
++      paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
++      tlb_remove_page(tlb, virt_to_page(pud));
++}
++#endif        /* PAGETABLE_LEVELS > 3 */
++#endif        /* PAGETABLE_LEVELS > 2 */
++
++#ifndef CONFIG_X86_64
++#define TASK_SIZE64 TASK_SIZE
++#endif
++
++static void _pin_lock(struct mm_struct *mm, int lock) {
++      if (lock)
++              spin_lock(&mm->page_table_lock);
++#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
++      /* While mm->page_table_lock protects us against insertions and
++       * removals of higher level page table pages, it doesn't protect
++       * against updates of pte-s. Such updates, however, require the
++       * pte pages to be in consistent state (unpinned+writable or
++       * pinned+readonly). The pinning and attribute changes, however
++       * cannot be done atomically, which is why such updates must be
++       * prevented from happening concurrently.
++       * Note that no pte lock can ever elsewhere be acquired nesting
++       * with an already acquired one in the same mm, or with the mm's
++       * page_table_lock already acquired, as that would break in the
++       * non-split case (where all these are actually resolving to the
++       * one page_table_lock). Thus acquiring all of them here is not
++       * going to result in dead locks, and the order of acquires
++       * doesn't matter.
++       */
++      {
++              pgd_t *pgd = mm->pgd;
++              unsigned g;
++
++              for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
++                      pud_t *pud;
++                      unsigned u;
++
++                      if (pgd_none(*pgd))
++                              continue;
++                      pud = pud_offset(pgd, 0);
++                      for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
++                              pmd_t *pmd;
++                              unsigned m;
++
++                              if (pud_none(*pud))
++                                      continue;
++                              pmd = pmd_offset(pud, 0);
++                              for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
++                                      spinlock_t *ptl;
++
++                                      if (pmd_none(*pmd))
++                                              continue;
++                                      ptl = pte_lockptr(0, pmd);
++                                      if (lock)
++                                              spin_lock(ptl);
++                                      else
++                                              spin_unlock(ptl);
++                              }
++                      }
++              }
++      }
++#endif
++      if (!lock)
++              spin_unlock(&mm->page_table_lock);
++}
++#define pin_lock(mm) _pin_lock(mm, 1)
++#define pin_unlock(mm) _pin_lock(mm, 0)
++
++#define PIN_BATCH sizeof(void *)
++static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl);
++
++static inline unsigned int pgd_walk_set_prot(struct page *page, pgprot_t flags,
++                                           unsigned int cpu, unsigned int seq)
++{
++      unsigned long pfn = page_to_pfn(page);
++
++      if (PageHighMem(page)) {
++              if (pgprot_val(flags) & _PAGE_RW)
++                      ClearPagePinned(page);
++              else
++                      SetPagePinned(page);
++      } else {
++              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
++                                      (unsigned long)__va(pfn << PAGE_SHIFT),
++                                      pfn_pte(pfn, flags), 0);
++              if (unlikely(++seq == PIN_BATCH)) {
++                      if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
++                                                              PIN_BATCH, NULL)))
++                              BUG();
++                      seq = 0;
++              }
++      }
++
++      return seq;
++}
++
++static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
++{
++      pgd_t       *pgd = pgd_base;
++      pud_t       *pud;
++      pmd_t       *pmd;
++      int          g,u,m;
++      unsigned int cpu, seq;
++      multicall_entry_t *mcl;
++
++      if (xen_feature(XENFEAT_auto_translated_physmap))
++              return;
++
++      cpu = get_cpu();
++
++      /*
++       * Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
++       * may not be the 'current' task's pagetables (e.g., current may be
++       * 32-bit, but the pagetables may be for a 64-bit task).
++       * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
++       * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
++       */
++      for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
++              if (pgd_none(*pgd))
++                      continue;
++              pud = pud_offset(pgd, 0);
++              if (PTRS_PER_PUD > 1) /* not folded */
++                      seq = pgd_walk_set_prot(virt_to_page(pud),flags,cpu,seq);
++              for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
++                      if (pud_none(*pud))
++                              continue;
++                      pmd = pmd_offset(pud, 0);
++                      if (PTRS_PER_PMD > 1) /* not folded */
++                              seq = pgd_walk_set_prot(virt_to_page(pmd),flags,cpu,seq);
++                      for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
++                              if (pmd_none(*pmd))
++                                      continue;
++                              seq = pgd_walk_set_prot(pmd_page(*pmd),flags,cpu,seq);
++                      }
++              }
++      }
++
++      mcl = per_cpu(pb_mcl, cpu);
++#ifdef CONFIG_X86_64
++      if (unlikely(seq > PIN_BATCH - 2)) {
++              if (unlikely(HYPERVISOR_multicall_check(mcl, seq, NULL)))
++                      BUG();
++              seq = 0;
++      }
++      MULTI_update_va_mapping(mcl + seq,
++             (unsigned long)__user_pgd(pgd_base),
++             pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
++             0);
++      MULTI_update_va_mapping(mcl + seq + 1,
++             (unsigned long)pgd_base,
++             pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
++             UVMF_TLB_FLUSH);
++      if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL)))
++              BUG();
++#else
++      if (likely(seq != 0)) {
++              MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq,
++                      (unsigned long)pgd_base,
++                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
++                      UVMF_TLB_FLUSH);
++              if (unlikely(HYPERVISOR_multicall_check(per_cpu(pb_mcl, cpu),
++                                                      seq + 1, NULL)))
++                      BUG();
++      } else if(HYPERVISOR_update_va_mapping((unsigned long)pgd_base,
++                      pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
++                      UVMF_TLB_FLUSH))
++              BUG();
++#endif
++
++      put_cpu();
++}
++
++static void __pgd_pin(pgd_t *pgd)
++{
++      pgd_walk(pgd, PAGE_KERNEL_RO);
++      kmap_flush_unused();
++      xen_pgd_pin(__pa(pgd)); /* kernel */
++#ifdef CONFIG_X86_64
++      xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
++#endif
++      SetPagePinned(virt_to_page(pgd));
++}
++
++static void __pgd_unpin(pgd_t *pgd)
++{
++      xen_pgd_unpin(__pa(pgd));
++#ifdef CONFIG_X86_64
++      xen_pgd_unpin(__pa(__user_pgd(pgd)));
++#endif
++      pgd_walk(pgd, PAGE_KERNEL);
++      ClearPagePinned(virt_to_page(pgd));
++}
++
++static void pgd_test_and_unpin(pgd_t *pgd)
++{
++      if (PagePinned(virt_to_page(pgd)))
++              __pgd_unpin(pgd);
++}
++
++void mm_pin(struct mm_struct *mm)
++{
++      if (xen_feature(XENFEAT_writable_page_tables))
++              return;
++
++      pin_lock(mm);
++      __pgd_pin(mm->pgd);
++      pin_unlock(mm);
++}
++
++void mm_unpin(struct mm_struct *mm)
++{
++      if (xen_feature(XENFEAT_writable_page_tables))
++              return;
++
++      pin_lock(mm);
++      __pgd_unpin(mm->pgd);
++      pin_unlock(mm);
++}
++
++void mm_pin_all(void)
++{
++      struct page *page;
++      unsigned long flags;
++
++      if (xen_feature(XENFEAT_writable_page_tables))
++              return;
++
++      /*
++       * Allow uninterrupted access to the pgd_list. Also protects
++       * __pgd_pin() by disabling preemption.
++       * All other CPUs must be at a safe point (e.g., in stop_machine
++       * or offlined entirely).
++       */
++      spin_lock_irqsave(&pgd_lock, flags);
++      list_for_each_entry(page, &pgd_list, lru) {
++              if (!PagePinned(page))
++                      __pgd_pin((pgd_t *)page_address(page));
++      }
++      spin_unlock_irqrestore(&pgd_lock, flags);
++}
++
++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
++{
++      if (!PagePinned(virt_to_page(mm->pgd)))
++              mm_pin(mm);
++}
++
++void arch_exit_mmap(struct mm_struct *mm)
++{
++      struct task_struct *tsk = current;
++
++      task_lock(tsk);
++
++      /*
++       * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
++       * *much* faster this way, as no tlb flushes means bigger wrpt batches.
++       */
++      if (tsk->active_mm == mm) {
++              tsk->active_mm = &init_mm;
++              atomic_inc(&init_mm.mm_count);
++
++              switch_mm(mm, &init_mm, tsk);
++
++              atomic_dec(&mm->mm_count);
++              BUG_ON(atomic_read(&mm->mm_count) == 0);
++      }
++
++      task_unlock(tsk);
++
++      if (PagePinned(virt_to_page(mm->pgd))
++          && atomic_read(&mm->mm_count) == 1
++          && !mm->context.has_foreign_mappings)
++              mm_unpin(mm);
++}
++
++static inline void pgd_list_add(pgd_t *pgd)
++{
++      struct page *page = virt_to_page(pgd);
++
++      list_add(&page->lru, &pgd_list);
++}
++
++static inline void pgd_list_del(pgd_t *pgd)
++{
++      struct page *page = virt_to_page(pgd);
++
++      list_del(&page->lru);
++}
++
++#define UNSHARED_PTRS_PER_PGD                         \
++      (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
++
++static void pgd_ctor(void *p)
++{
++      pgd_t *pgd = p;
++      unsigned long flags;
++
++      pgd_test_and_unpin(pgd);
++
++      /* Clear usermode parts of PGD */
++      memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
++
++      spin_lock_irqsave(&pgd_lock, flags);
++
++      /* If the pgd points to a shared pagetable level (either the
++         ptes in non-PAE, or shared PMD in PAE), then just copy the
++         references from swapper_pg_dir. */
++      if (PAGETABLE_LEVELS == 2 ||
++          (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
++          PAGETABLE_LEVELS == 4) {
++              clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
++                              swapper_pg_dir + KERNEL_PGD_BOUNDARY,
++                              KERNEL_PGD_PTRS);
++              paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
++                                       __pa(swapper_pg_dir) >> PAGE_SHIFT,
++                                       KERNEL_PGD_BOUNDARY,
++                                       KERNEL_PGD_PTRS);
++      }
++
++#ifdef CONFIG_X86_64
++      /* set level3_user_pgt for vsyscall area */
++      __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
++              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
++#endif
++
++#ifndef CONFIG_X86_PAE
++      /* list required to sync kernel mapping updates */
++      if (!SHARED_KERNEL_PMD)
++              pgd_list_add(pgd);
++#endif
++
++      spin_unlock_irqrestore(&pgd_lock, flags);
++}
++
++static void pgd_dtor(void *pgd)
++{
++      unsigned long flags; /* can be called from interrupt context */
++
++      if (!SHARED_KERNEL_PMD) {
++              spin_lock_irqsave(&pgd_lock, flags);
++              pgd_list_del(pgd);
++              spin_unlock_irqrestore(&pgd_lock, flags);
++      }
++
++      pgd_test_and_unpin(pgd);
++}
++
++/*
++ * List of all pgd's needed for non-PAE so it can invalidate entries
++ * in both cached and uncached pgd's; not needed for PAE since the
++ * kernel pmd is shared. If PAE were not to share the pmd a similar
++ * tactic would be needed. This is essentially codepath-based locking
++ * against pageattr.c; it is the unique case in which a valid change
++ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
++ * vmalloc faults work because attached pagetables are never freed.
++ * -- wli
++ */
++
++#ifdef CONFIG_X86_PAE
++/*
++ * Mop up any pmd pages which may still be attached to the pgd.
++ * Normally they will be freed by munmap/exit_mmap, but any pmd we
++ * preallocate which never got a corresponding vma will need to be
++ * freed manually.
++ */
++static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
++{
++      int i;
++
++      for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
++              pgd_t pgd = pgdp[i];
++
++              if (__pgd_val(pgd) != 0) {
++                      pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
++
++                      pgdp[i] = xen_make_pgd(0);
++
++                      paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
++                      pmd_free(mm, pmd);
++              }
++      }
++
++      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
++              xen_destroy_contiguous_region((unsigned long)pgdp, 0);
++}
++
++/*
++ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
++ * updating the top-level pagetable entries to guarantee the
++ * processor notices the update.  Since this is expensive, and
++ * all 4 top-level entries are used almost immediately in a
++ * new process's life, we just pre-populate them here.
++ *
++ * Also, if we're in a paravirt environment where the kernel pmd is
++ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
++ * and initialize the kernel pmds here.
++ */
++static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
++{
++      pud_t *pud;
++      pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
++      unsigned long addr, flags;
++      int i;
++
++      /*
++       * We can race save/restore (if we sleep during a GFP_KERNEL memory
++       * allocation). We therefore store virtual addresses of pmds as they
++       * do not change across save/restore, and poke the machine addresses
++       * into the pgdir under the pgd_lock.
++       */
++      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
++              pmds[i] = pmd_alloc_one(mm, addr);
++              if (!pmds[i])
++                      goto out_oom;
++      }
++
++      spin_lock_irqsave(&pgd_lock, flags);
++
++      /* Protect against save/restore: move below 4GB under pgd_lock. */
++      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
++          && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
++              spin_unlock_irqrestore(&pgd_lock, flags);
++out_oom:
++              while (i--)
++                      pmd_free(mm, pmds[i]);
++              return 0;
++      }
++
++      /* Copy kernel pmd contents and write-protect the new pmds. */
++      pud = pud_offset(pgd, 0);
++      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
++           i++, pud++, addr += PUD_SIZE) {
++              if (i >= KERNEL_PGD_BOUNDARY) {
++                      memcpy(pmds[i],
++                             (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
++                             sizeof(pmd_t) * PTRS_PER_PMD);
++                      make_lowmem_page_readonly(
++                              pmds[i], XENFEAT_writable_page_tables);
++              }
++
++              /* It is safe to poke machine addresses of pmds under the pgd_lock. */
++              pud_populate(mm, pud, pmds[i]);
++      }
++
++      /* List required to sync kernel mapping updates and
++       * to pin/unpin on save/restore. */
++      pgd_list_add(pgd);
++
++      spin_unlock_irqrestore(&pgd_lock, flags);
++
++      return 1;
++}
++
++void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
++{
++      struct page *page = virt_to_page(pmd);
++      unsigned long pfn = page_to_pfn(page);
++
++      paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
++
++      /* Note: almost everything apart from _PAGE_PRESENT is
++         reserved at the pmd (PDPT) level. */
++      if (PagePinned(virt_to_page(mm->pgd))) {
++              BUG_ON(PageHighMem(page));
++              BUG_ON(HYPERVISOR_update_va_mapping(
++                        (unsigned long)__va(pfn << PAGE_SHIFT),
++                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
++              set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
++      } else
++              *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
++
++      /*
++       * According to Intel App note "TLBs, Paging-Structure Caches,
++       * and Their Invalidation", April 2007, document 317080-001,
++       * section 8.1: in PAE mode we explicitly have to flush the
++       * TLB via cr3 if the top-level pgd is changed...
++       */
++      if (mm == current->active_mm)
++              xen_tlb_flush();
++}
++#else  /* !CONFIG_X86_PAE */
++/* No need to prepopulate any pagetable entries in non-PAE modes. */
++static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
++{
++      return 1;
++}
++
++static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
++{
++}
++#endif        /* CONFIG_X86_PAE */
++
++#ifdef CONFIG_X86_64
++/* We allocate two contiguous pages for kernel and user. */
++#define PGD_ORDER 1
++#else
++#define PGD_ORDER 0
++#endif
++
++pgd_t *pgd_alloc(struct mm_struct *mm)
++{
++      pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
++
++      /* so that alloc_pd can use it */
++      mm->pgd = pgd;
++      if (pgd)
++              pgd_ctor(pgd);
++
++      if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
++              free_pages((unsigned long)pgd, PGD_ORDER);
++              pgd = NULL;
++      }
++
++      return pgd;
++}
++
++void pgd_free(struct mm_struct *mm, pgd_t *pgd)
++{
++      /*
++       * After this the pgd should not be pinned for the duration of this
++       * function's execution. We should never sleep and thus never race:
++       *  1. User pmds will not become write-protected under our feet due
++       *     to a concurrent mm_pin_all().
++       *  2. The machine addresses in PGD entries will not become invalid
++       *     due to a concurrent save/restore.
++       */
++      pgd_dtor(pgd);
++
++      pgd_mop_up_pmds(mm, pgd);
++      free_pages((unsigned long)pgd, PGD_ORDER);
++}
++
++/* blktap and gntdev need this, as otherwise they would implicitly (and
++ * needlessly, as they never use it) reference init_mm. */
++pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *vma,
++                                unsigned long addr, pte_t *ptep, int full)
++{
++      return ptep_get_and_clear_full(vma->vm_mm, addr, ptep, full);
++}
++EXPORT_SYMBOL_GPL(xen_ptep_get_and_clear_full);
++
++int ptep_set_access_flags(struct vm_area_struct *vma,
++                        unsigned long address, pte_t *ptep,
++                        pte_t entry, int dirty)
++{
++      int changed = !pte_same(*ptep, entry);
++
++      if (changed && dirty) {
++              if (likely(vma->vm_mm == current->mm)) {
++                      if (HYPERVISOR_update_va_mapping(address,
++                              entry,
++                              (unsigned long)vma->vm_mm->cpu_vm_mask.bits|
++                                      UVMF_INVLPG|UVMF_MULTI))
++                              BUG();
++              } else {
++                      xen_l1_entry_update(ptep, entry);
++                      flush_tlb_page(vma, address);
++              }
++      }
++
++      return changed;
++}
++
++int ptep_test_and_clear_young(struct vm_area_struct *vma,
++                            unsigned long addr, pte_t *ptep)
++{
++      int ret = 0;
++
++      if (pte_young(*ptep))
++              ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
++                                       &ptep->pte);
++
++      if (ret)
++              pte_update(vma->vm_mm, addr, ptep);
++
++      return ret;
++}
++
++int ptep_clear_flush_young(struct vm_area_struct *vma,
++                         unsigned long address, pte_t *ptep)
++{
++      pte_t pte = *ptep;
++      int young = pte_young(pte);
++
++      pte = pte_mkold(pte);
++      if (PagePinned(virt_to_page(vma->vm_mm->pgd)))
++              ptep_set_access_flags(vma, address, ptep, pte, young);
++      else if (young)
++              ptep->pte_low = pte.pte_low;
++
++      return young;
++}
+--- sle11-2009-05-14.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/pgtable_32-xen.c      2009-03-16 16:38:05.000000000 +0100
+@@ -1,7 +1,3 @@
+-/*
+- *  linux/arch/i386/mm/pgtable.c
+- */
+-
+ #include <linux/sched.h>
+ #include <linux/kernel.h>
+ #include <linux/errno.h>
+@@ -41,7 +37,6 @@ void show_mem(void)
+ 
+       printk(KERN_INFO "Mem-info:\n");
+       show_free_areas();
+-      printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+       for_each_online_pgdat(pgdat) {
+               pgdat_resize_lock(pgdat, &flags);
+               for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+@@ -157,243 +152,6 @@ void __init reserve_top_address(unsigned
+       __VMALLOC_RESERVE += reserve;
+ }
+ 
+-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+-{
+-      pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+-      if (pte)
+-              make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
+-      return pte;
+-}
+-
+-/*
+- * List of all pgd's needed for non-PAE so it can invalidate entries
+- * in both cached and uncached pgd's; not needed for PAE since the
+- * kernel pmd is shared. If PAE were not to share the pmd a similar
+- * tactic would be needed. This is essentially codepath-based locking
+- * against pageattr.c; it is the unique case in which a valid change
+- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+- * vmalloc faults work because attached pagetables are never freed.
+- * -- wli
+- */
+-static inline void pgd_list_add(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
+-
+-      list_add(&page->lru, &pgd_list);
+-}
+-
+-static inline void pgd_list_del(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
+-
+-      list_del(&page->lru);
+-}
+-
+-#define UNSHARED_PTRS_PER_PGD                         \
+-      (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
+-
+-static void pgd_ctor(void *p)
+-{
+-      pgd_t *pgd = p;
+-      unsigned long flags;
+-
+-      pgd_test_and_unpin(pgd);
+-
+-      /* Clear usermode parts of PGD */
+-      memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+-
+-      spin_lock_irqsave(&pgd_lock, flags);
+-
+-      /* If the pgd points to a shared pagetable level (either the
+-         ptes in non-PAE, or shared PMD in PAE), then just copy the
+-         references from swapper_pg_dir. */
+-      if (PAGETABLE_LEVELS == 2 ||
+-          (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+-              clone_pgd_range(pgd + USER_PTRS_PER_PGD,
+-                              swapper_pg_dir + USER_PTRS_PER_PGD,
+-                              KERNEL_PGD_PTRS);
+-              paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+-                                      __pa(swapper_pg_dir) >> PAGE_SHIFT,
+-                                      USER_PTRS_PER_PGD,
+-                                      KERNEL_PGD_PTRS);
+-      }
+-
+-      /* list required to sync kernel mapping updates */
+-      if (PAGETABLE_LEVELS == 2)
+-              pgd_list_add(pgd);
+-
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-static void pgd_dtor(void *pgd)
+-{
+-      unsigned long flags; /* can be called from interrupt context */
+-
+-      if (!SHARED_KERNEL_PMD) {
+-              spin_lock_irqsave(&pgd_lock, flags);
+-              pgd_list_del(pgd);
+-              spin_unlock_irqrestore(&pgd_lock, flags);
+-      }
+-
+-      pgd_test_and_unpin(pgd);
+-}
+-
+-#ifdef CONFIG_X86_PAE
+-/*
+- * Mop up any pmd pages which may still be attached to the pgd.
+- * Normally they will be freed by munmap/exit_mmap, but any pmd we
+- * preallocate which never got a corresponding vma will need to be
+- * freed manually.
+- */
+-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+-{
+-      int i;
+-
+-      for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+-              pgd_t pgd = pgdp[i];
+-
+-              if (__pgd_val(pgd) != 0) {
+-                      pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+-
+-                      pgdp[i] = xen_make_pgd(0);
+-
+-                      paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
+-                      pmd_free(mm, pmd);
+-              }
+-      }
+-}
+-
+-/*
+- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+- * updating the top-level pagetable entries to guarantee the
+- * processor notices the update.  Since this is expensive, and
+- * all 4 top-level entries are used almost immediately in a
+- * new process's life, we just pre-populate them here.
+- *
+- * Also, if we're in a paravirt environment where the kernel pmd is
+- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+- * and initialize the kernel pmds here.
+- */
+-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      pud_t *pud;
+-      pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
+-      unsigned long addr, flags;
+-      int i;
+-
+-      /*
+-       * We can race save/restore (if we sleep during a GFP_KERNEL memory
+-       * allocation). We therefore store virtual addresses of pmds as they
+-       * do not change across save/restore, and poke the machine addresses
+-       * into the pgdir under the pgd_lock.
+-       */
+-      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
+-              pmds[i] = pmd_alloc_one(mm, addr);
+-              if (!pmds[i])
+-                      goto out_oom;
+-      }
+-
+-      spin_lock_irqsave(&pgd_lock, flags);
+-
+-      /* Protect against save/restore: move below 4GB under pgd_lock. */
+-      if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
+-          && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
+-              spin_unlock_irqrestore(&pgd_lock, flags);
+-out_oom:
+-              while (i--)
+-                      pmd_free(mm, pmds[i]);
+-              return 0;
+-      }
+-
+-      /* Copy kernel pmd contents and write-protect the new pmds. */
+-      pud = pud_offset(pgd, 0);
+-      for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+-           i++, pud++, addr += PUD_SIZE) {
+-              if (i >= USER_PTRS_PER_PGD) {
+-                      memcpy(pmds[i],
+-                             (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+-                             sizeof(pmd_t) * PTRS_PER_PMD);
+-                      make_lowmem_page_readonly(
+-                              pmds[i], XENFEAT_writable_page_tables);
+-              }
+-
+-              /* It is safe to poke machine addresses of pmds under the pgd_lock. */
+-              pud_populate(mm, pud, pmds[i]);
+-      }
+-
+-      /* List required to sync kernel mapping updates and
+-       * to pin/unpin on save/restore. */
+-      pgd_list_add(pgd);
+-
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-
+-      return 1;
+-}
+-#else  /* !CONFIG_X86_PAE */
+-/* No need to prepopulate any pagetable entries in non-PAE modes. */
+-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      return 1;
+-}
+-
+-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+-{
+-}
+-#endif        /* CONFIG_X86_PAE */
+-
+-pgd_t *pgd_alloc(struct mm_struct *mm)
+-{
+-      pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+-
+-      /* so that alloc_pd can use it */
+-      mm->pgd = pgd;
+-      if (pgd)
+-              pgd_ctor(pgd);
+-
+-      if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+-              free_page((unsigned long)pgd);
+-              pgd = NULL;
+-      }
+-
+-      return pgd;
+-}
+-
+-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      /*
+-       * After this the pgd should not be pinned for the duration of this
+-       * function's execution. We should never sleep and thus never race:
+-       *  1. User pmds will not become write-protected under our feet due
+-       *     to a concurrent mm_pin_all().
+-       *  2. The machine addresses in PGD entries will not become invalid
+-       *     due to a concurrent save/restore.
+-       */
+-      pgd_dtor(pgd);
+-
+-      if (PTRS_PER_PMD > 1 && !xen_feature(XENFEAT_pae_pgdir_above_4gb))
+-              xen_destroy_contiguous_region((unsigned long)pgd, 0);
+-
+-      pgd_mop_up_pmds(mm, pgd);
+-      free_page((unsigned long)pgd);
+-}
+-
+-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+-{
+-      pgtable_page_dtor(pte);
+-      paravirt_release_pt(page_to_pfn(pte));
+-      tlb_remove_page(tlb, pte);
+-}
+-
+-#ifdef CONFIG_X86_PAE
+-
+-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+-{
+-      paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+-      tlb_remove_page(tlb, virt_to_page(pmd));
+-}
+-
+-#endif
+-
+ void make_lowmem_page_readonly(void *va, unsigned int feature)
+ {
+       pte_t *pte;
+--- sle11-2009-05-14.orig/arch/x86/pci/i386.c  2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/arch/x86/pci/i386.c       2009-05-14 11:20:29.000000000 +0200
+@@ -331,10 +331,14 @@ int pci_mmap_page_range(struct pci_dev *
+                       flags);
+       }
+ 
++#ifndef CONFIG_XEN
+       if (((vma->vm_pgoff < max_low_pfn_mapped) ||
+            (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+             vma->vm_pgoff < max_pfn_mapped)) &&
+           ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
++#else
++      if (ioremap_check_change_attr(vma->vm_pgoff, len, flags)) {
++#endif
+               free_memtype(addr, addr + len);
+               return -EINVAL;
+       }
+--- sle11-2009-05-14.orig/arch/x86/pci/irq-xen.c       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/pci/irq-xen.c    2009-03-16 16:38:05.000000000 +0100
+@@ -140,9 +140,11 @@ static void __init pirq_peer_trick(void)
+               busmap[e->bus] = 1;
+       }
+       for(i = 1; i < 256; i++) {
++              int node;
+               if (!busmap[i] || pci_find_bus(0, i))
+                       continue;
+-              if (pci_scan_bus_with_sysdata(i))
++              node = get_mp_bus_to_node(i);
++              if (pci_scan_bus_on_node(i, &pci_root_ops, node))
+                       printk(KERN_INFO "PCI: Discovered primary peer "
+                              "bus %02x [IRQ]\n", i);
+       }
+@@ -204,7 +206,7 @@ static int pirq_ali_get(struct pci_dev *
+ {
+       static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+ 
+-      WARN_ON_ONCE(pirq >= 16);
++      WARN_ON_ONCE(pirq > 16);
+       return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+ }
+ 
+@@ -213,7 +215,7 @@ static int pirq_ali_set(struct pci_dev *
+       static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+       unsigned int val = irqmap[irq];
+ 
+-      WARN_ON_ONCE(pirq >= 16);
++      WARN_ON_ONCE(pirq > 16);
+       if (val) {
+               write_config_nybble(router, 0x48, pirq-1, val);
+               return 1;
+@@ -264,7 +266,7 @@ static int pirq_via586_get(struct pci_de
+ {
+       static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+ 
+-      WARN_ON_ONCE(pirq >= 5);
++      WARN_ON_ONCE(pirq > 5);
+       return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
+ }
+ 
+@@ -272,7 +274,7 @@ static int pirq_via586_set(struct pci_de
+ {
+       static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
+ 
+-      WARN_ON_ONCE(pirq >= 5);
++      WARN_ON_ONCE(pirq > 5);
+       write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
+       return 1;
+ }
+@@ -286,7 +288,7 @@ static int pirq_ite_get(struct pci_dev *
+ {
+       static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ 
+-      WARN_ON_ONCE(pirq >= 4);
++      WARN_ON_ONCE(pirq > 4);
+       return read_config_nybble(router,0x43, pirqmap[pirq-1]);
+ }
+ 
+@@ -294,7 +296,7 @@ static int pirq_ite_set(struct pci_dev *
+ {
+       static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
+ 
+-      WARN_ON_ONCE(pirq >= 4);
++      WARN_ON_ONCE(pirq > 4);
+       write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
+       return 1;
+ }
+@@ -623,6 +625,13 @@ static __init int via_router_probe(struc
+                        */
+                       device = PCI_DEVICE_ID_VIA_8235;
+                       break;
++              case PCI_DEVICE_ID_VIA_8237:
++                      /**
++                       * Asus a7v600 bios wrongly reports 8237
++                       * as 586-compatible
++                       */
++                      device = PCI_DEVICE_ID_VIA_8237;
++                      break;
+               }
+       }
+ 
+--- sle11-2009-05-14.orig/arch/x86/vdso/vdso32-setup-xen.c     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/arch/x86/vdso/vdso32-setup-xen.c  2009-03-16 16:38:05.000000000 +0100
+@@ -164,7 +164,7 @@ static __init void relocate_vdso(Elf32_E
+       Elf32_Shdr *shdr;
+       int i;
+ 
+-      BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
++      BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
+              !elf_check_arch_ia32(ehdr) ||
+              ehdr->e_type != ET_DYN);
+ 
+@@ -233,8 +233,12 @@ void syscall32_cpu_init(void)
+               BUG();
+ #endif
+ 
+-      if (use_sysenter < 0)
+-              use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
++      if (use_sysenter < 0) {
++              if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
++                      use_sysenter = 1;
++              if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
++                      use_sysenter = 1;
++      }
+ }
+ 
+ #define compat_uses_vma               1
+@@ -337,8 +341,6 @@ int __init sysenter_setup(void)
+ 
+ #ifdef CONFIG_X86_32
+       gate_vma_init();
+-
+-      printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
+ #endif
+ 
+ #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
+@@ -383,6 +385,9 @@ int arch_setup_additional_pages(struct l
+       int ret = 0;
+       bool compat;
+ 
++      if (vdso_enabled == VDSO_DISABLED)
++              return 0;
++
+       down_write(&mm->mmap_sem);
+ 
+       /* Test compat mode once here, in case someone
+--- sle11-2009-05-14.orig/drivers/acpi/processor_core.c        2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/drivers/acpi/processor_core.c     2009-03-16 16:38:05.000000000 +0100
+@@ -657,7 +657,7 @@ static int acpi_processor_get_info(struc
+        * of /proc/cpuinfo
+        */
+       status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer);
+-      if (ACPI_SUCCESS(status))
++      if (ACPI_SUCCESS(status) && pr->id != -1)
+               arch_fix_phys_package_id(pr->id, object.integer.value);
+ 
+       return 0;
+--- sle11-2009-05-14.orig/drivers/input/xen-kbdfront.c 2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/drivers/input/xen-kbdfront.c      2009-03-16 16:38:05.000000000 +0100
+@@ -325,7 +325,6 @@ static struct xenbus_device_id xenkbd_id
+ 
+ static struct xenbus_driver xenkbd = {
+       .name = "vkbd",
+-      .owner = THIS_MODULE,
+       .ids = xenkbd_ids,
+       .probe = xenkbd_probe,
+       .remove = xenkbd_remove,
+--- sle11-2009-05-14.orig/drivers/oprofile/cpu_buffer.c        2009-03-12 16:15:32.000000000 +0100
++++ sle11-2009-05-14/drivers/oprofile/cpu_buffer.c     2009-03-16 16:38:05.000000000 +0100
+@@ -341,7 +341,7 @@ void oprofile_add_mode(int cpu_mode)
+ 
+ int oprofile_add_domain_switch(int32_t domain_id)
+ {
+-      struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
++      struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ 
+       /* should have space for switching into and out of domain 
+          (2 slots each) plus one sample and one cpu mode switch */
+--- sle11-2009-05-14.orig/drivers/pci/msi-xen.c        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/drivers/pci/msi-xen.c     2009-03-16 16:38:05.000000000 +0100
+@@ -583,7 +583,7 @@ int pci_enable_msi(struct pci_dev* dev)
+ EXPORT_SYMBOL(pci_enable_msi);
+ 
+ extern void pci_frontend_disable_msi(struct pci_dev* dev);
+-void pci_disable_msi(struct pci_dev* dev)
++void pci_msi_shutdown(struct pci_dev* dev)
+ {
+       int pirq;
+ 
+@@ -612,6 +612,10 @@ void pci_disable_msi(struct pci_dev* dev
+       pci_intx_for_msi(dev, 1);
+       dev->msi_enabled = 0;
+ }
++void pci_disable_msi(struct pci_dev* dev)
++{
++      pci_msi_shutdown(dev);
++}
+ EXPORT_SYMBOL(pci_disable_msi);
+ 
+ /**
+@@ -714,7 +718,7 @@ int pci_enable_msix(struct pci_dev* dev,
+ EXPORT_SYMBOL(pci_enable_msix);
+ 
+ extern void pci_frontend_disable_msix(struct pci_dev* dev);
+-void pci_disable_msix(struct pci_dev* dev)
++void pci_msix_shutdown(struct pci_dev* dev)
+ {
+       if (!pci_msi_enable)
+               return;
+@@ -751,6 +755,10 @@ void pci_disable_msix(struct pci_dev* de
+       pci_intx_for_msi(dev, 1);
+       dev->msix_enabled = 0;
+ }
++void pci_disable_msix(struct pci_dev* dev)
++{
++      pci_msix_shutdown(dev);
++}
+ EXPORT_SYMBOL(pci_disable_msix);
+ 
+ /**
+--- sle11-2009-05-14.orig/drivers/video/Kconfig        2009-02-16 15:58:02.000000000 +0100
++++ sle11-2009-05-14/drivers/video/Kconfig     2009-03-16 16:38:05.000000000 +0100
+@@ -2029,7 +2029,7 @@ config FB_VIRTUAL
+ 
+ config XEN_FBDEV_FRONTEND
+       tristate "Xen virtual frame buffer support"
+-      depends on FB && XEN
++      depends on FB && PARAVIRT_XEN
+       select FB_SYS_FILLRECT
+       select FB_SYS_COPYAREA
+       select FB_SYS_IMAGEBLIT
+--- sle11-2009-05-14.orig/drivers/video/xen-fbfront.c  2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/drivers/video/xen-fbfront.c       2009-03-16 16:38:05.000000000 +0100
+@@ -670,7 +670,6 @@ static struct xenbus_device_id xenfb_ids
+ 
+ static struct xenbus_driver xenfb = {
+       .name = "vfb",
+-      .owner = THIS_MODULE,
+       .ids = xenfb_ids,
+       .probe = xenfb_probe,
+       .remove = xenfb_remove,
+--- sle11-2009-05-14.orig/drivers/xen/Kconfig  2009-03-04 11:28:34.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/Kconfig       2009-03-16 16:38:05.000000000 +0100
+@@ -2,8 +2,6 @@
+ # This Kconfig describe xen options
+ #
+ 
+-mainmenu "Xen Configuration"
+-
+ config XEN
+       bool
+ 
+--- sle11-2009-05-14.orig/drivers/xen/Makefile 2009-02-16 16:17:21.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/Makefile      2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,8 @@
+-obj-$(CONFIG_PARAVIRT_XEN)    += grant-table.o
++obj-$(CONFIG_PARAVIRT_XEN)    += grant-table.o features.o events.o
++xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o
++xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+ 
++xen-balloon-$(CONFIG_XEN)     := balloon/
+ obj-$(CONFIG_XEN)             += core/
+ obj-$(CONFIG_XEN)             += console/
+ obj-$(CONFIG_XEN)             += evtchn/
+@@ -7,7 +10,8 @@ obj-y                         += xenbus/
+ obj-$(CONFIG_XEN)             += char/
+ 
+ obj-$(CONFIG_XEN)             += util.o
+-obj-$(CONFIG_XEN_BALLOON)             += balloon/
++obj-$(CONFIG_XEN_XENCOMM)     += $(xen-xencomm-y)
++obj-$(CONFIG_XEN_BALLOON)             += $(xen-balloon-y)
+ obj-$(CONFIG_XEN_BLKDEV_BACKEND)      += blkback/
+ obj-$(CONFIG_XEN_BLKDEV_TAP)          += blktap/
+ obj-$(CONFIG_XEN_NETDEV_BACKEND)      += netback/
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/blkfront.c      2009-03-24 10:12:53.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/blkfront.c   2009-05-19 10:38:53.000000000 +0200
+@@ -285,7 +285,11 @@ static void backend_changed(struct xenbu
+               break;
+ 
+       case XenbusStateClosing:
+-              bd = bdget(info->dev);
++              if (!info->gd) {
++                      xenbus_frontend_closed(dev);
++                      break;
++              }
++              bd = bdget_disk(info->gd, 0);
+               if (bd == NULL)
+                       xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+ 
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/block.h 2009-03-24 10:11:58.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/block.h      2009-03-16 16:38:05.000000000 +0100
+@@ -96,7 +96,6 @@ struct blk_shadow {
+ struct blkfront_info
+ {
+       struct xenbus_device *xbdev;
+-      dev_t dev;
+       struct gendisk *gd;
+       int vdevice;
+       blkif_vdev_t handle;
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/vbd.c   2009-02-16 16:17:21.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/vbd.c        2009-03-16 16:38:05.000000000 +0100
+@@ -246,17 +246,32 @@ xlvbd_init_blk_queue(struct gendisk *gd,
+       return 0;
+ }
+ 
+-static int
+-xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
+-                  u16 vdisk_info, u16 sector_size,
+-                  struct blkfront_info *info)
++int
++xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
++        u16 sector_size, struct blkfront_info *info)
+ {
++      int major, minor;
+       struct gendisk *gd;
+       struct xlbd_major_info *mi;
+       int nr_minors = 1;
+       int err = -ENODEV;
+       unsigned int offset;
+ 
++      if ((vdevice>>EXT_SHIFT) > 1) {
++              /* this is above the extended range; something is wrong */
++              printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
++              return -ENODEV;
++      }
++
++      if (!VDEV_IS_EXTENDED(vdevice)) {
++              major = BLKIF_MAJOR(vdevice);
++              minor = BLKIF_MINOR(vdevice);
++      }
++      else {
++              major = 202;
++              minor = BLKIF_MINOR_EXT(vdevice);
++      }
++
+       BUG_ON(info->gd != NULL);
+       BUG_ON(info->mi != NULL);
+       BUG_ON(info->rq != NULL);
+@@ -337,41 +352,6 @@ xlvbd_alloc_gendisk(int major, int minor
+       return err;
+ }
+ 
+-int
+-xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+-        u16 sector_size, struct blkfront_info *info)
+-{
+-      struct block_device *bd;
+-      int err = 0;
+-      int major, minor;
+-
+-      if ((vdevice>>EXT_SHIFT) > 1) {
+-              /* this is above the extended range; something is wrong */
+-              printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
+-              return -ENODEV;
+-      }
+-
+-      if (!VDEV_IS_EXTENDED(vdevice)) {
+-              major = BLKIF_MAJOR(vdevice);
+-              minor = BLKIF_MINOR(vdevice);
+-      }
+-      else {
+-              major = 202;
+-              minor = BLKIF_MINOR_EXT(vdevice);
+-      }
+-
+-      info->dev = MKDEV(major, minor);
+-      bd = bdget(info->dev);
+-      if (bd == NULL)
+-              return -ENODEV;
+-
+-      err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
+-                                sector_size, info);
+-
+-      bdput(bd);
+-      return err;
+-}
+-
+ void
+ xlvbd_del(struct blkfront_info *info)
+ {
+--- sle11-2009-05-14.orig/drivers/xen/blktap/blktap.c  2009-04-20 11:38:54.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/blktap/blktap.c       2009-04-20 11:40:14.000000000 +0200
+@@ -111,6 +111,7 @@ typedef struct tap_blkif {
+       unsigned long mode;           /*current switching mode               */
+       int minor;                    /*Minor number for tapdisk device      */
+       pid_t pid;                    /*tapdisk process id                   */
++      struct pid_namespace *pid_ns; /*... and its corresponding namespace  */
+       enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
+                                                 shutdown                   */
+       unsigned long *idx_map;       /*Record the user ring id to kern 
+@@ -299,16 +300,14 @@ struct tap_vma_priv {
+       struct page *map[];
+ };
+ 
+-static struct page *blktap_nopage(struct vm_area_struct *vma,
+-                                unsigned long address,
+-                                int *type)
++static int blktap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+       /*
+        * if the page has not been mapped in by the driver then return
+-       * NOPAGE_SIGBUS to the domain.
++       * VM_FAULT_SIGBUS to the domain.
+        */
+ 
+-      return NOPAGE_SIGBUS;
++      return VM_FAULT_SIGBUS;
+ }
+ 
+ static pte_t blktap_clear_pte(struct vm_area_struct *vma,
+@@ -404,7 +403,7 @@ static void blktap_vma_close(struct vm_a
+ }
+ 
+ struct vm_operations_struct blktap_vm_ops = {
+-      nopage:   blktap_nopage,
++      fault:    blktap_fault,
+       zap_pte:  blktap_clear_pte,
+       close:    blktap_vma_close,
+ };
+@@ -498,9 +497,8 @@ found:
+               tapfds[minor] = info;
+ 
+               if ((class = get_xen_class()) != NULL)
+-                      class_device_create(class, NULL,
+-                                          MKDEV(blktap_major, minor), NULL,
+-                                          "blktap%d", minor);
++                      device_create(class, NULL, MKDEV(blktap_major, minor),
++                                    "blktap%d", minor);
+       }
+ 
+ out:
+@@ -542,7 +540,7 @@ void signal_tapdisk(int idx) 
+               return;
+ 
+       if (info->pid > 0) {
+-              ptask = find_task_by_pid(info->pid);
++              ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
+               if (ptask)
+                       info->status = CLEANSHUTDOWN;
+       }
+@@ -770,8 +768,9 @@ static int blktap_ioctl(struct inode *in
+       {
+               if (info) {
+                       info->pid = (pid_t)arg;
+-                      DPRINTK("blktap: pid received %d\n", 
+-                             info->pid);
++                      info->pid_ns = current->nsproxy->pid_ns;
++                      DPRINTK("blktap: pid received %p:%d\n",
++                              info->pid_ns, info->pid);
+               }
+               return 0;
+       }
+@@ -1684,9 +1683,7 @@ static int __init blkif_init(void)
+                * We only create the device when a request of a new device is
+                * made.
+                */
+-              class_device_create(class, NULL,
+-                                  MKDEV(blktap_major, 0), NULL,
+-                                  "blktap0");
++              device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
+       } else {
+               /* this is bad, but not fatal */
+               WPRINTK("blktap: sysfs xen_class not created\n");
+--- sle11-2009-05-14.orig/drivers/xen/char/mem.c       2008-12-15 11:27:22.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/char/mem.c    2009-03-16 16:38:05.000000000 +0100
+@@ -33,6 +33,27 @@ static inline int uncached_access(struct
+       return 0;
+ }
+ 
++static inline int range_is_allowed(unsigned long pfn, unsigned long size)
++{
++#ifdef CONFIG_NONPROMISC_DEVMEM
++      u64 from = ((u64)pfn) << PAGE_SHIFT;
++      u64 to = from + size;
++      u64 cursor = from;
++
++      while (cursor < to) {
++              if (!devmem_is_allowed(pfn)) {
++                      printk(KERN_INFO
++              "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
++                              current->comm, from, to);
++                      return 0;
++              }
++              cursor += PAGE_SIZE;
++              pfn++;
++      }
++#endif
++      return 1;
++}
++
+ /*
+  * This funcion reads the *physical* memory. The f_pos points directly to the 
+  * memory location. 
+@@ -55,6 +76,9 @@ static ssize_t read_mem(struct file * fi
+ 
+               sz = min_t(unsigned long, sz, count);
+ 
++              if (!range_is_allowed(p >> PAGE_SHIFT, count))
++                      return -EPERM;
++
+               v = ioremap(p, sz);
+               if (IS_ERR(v) || v == NULL) {
+                       /*
+@@ -103,6 +127,9 @@ static ssize_t write_mem(struct file * f
+ 
+               sz = min_t(unsigned long, sz, count);
+ 
++              if (!range_is_allowed(p >> PAGE_SHIFT, sz))
++                      return -EPERM;
++
+               v = ioremap(p, sz);
+               if (v == NULL)
+                       break;
+@@ -131,6 +158,23 @@ static ssize_t write_mem(struct file * f
+ }
+ 
+ #ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
++static void mmap_mem_open(struct vm_area_struct *vma)
++{
++      map_devmem(vma->vm_pgoff,  vma->vm_end - vma->vm_start,
++                      vma->vm_page_prot);
++}
++
++static void mmap_mem_close(struct vm_area_struct *vma)
++{
++      unmap_devmem(vma->vm_pgoff,  vma->vm_end - vma->vm_start,
++                      vma->vm_page_prot);
++}
++
++static struct vm_operations_struct mmap_mem_ops = {
++      .open  = mmap_mem_open,
++      .close = mmap_mem_close
++};
++
+ static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
+ {
+       size_t size = vma->vm_end - vma->vm_start;
+@@ -138,6 +182,15 @@ static int xen_mmap_mem(struct file * fi
+       if (uncached_access(file))
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ 
++      if (!range_is_allowed(vma->vm_pgoff, size))
++              return -EPERM;
++
++      if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
++                                              &vma->vm_page_prot))
++              return -EINVAL;
++
++      vma->vm_ops = &mmap_mem_ops;
++
+       /* We want to return the real error code, not EAGAIN. */
+       return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+                                     size, vma->vm_page_prot, DOMID_IO);
+--- sle11-2009-05-14.orig/drivers/xen/console/console.c        2008-12-15 11:26:44.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/console/console.c     2009-03-16 16:38:05.000000000 +0100
+@@ -552,16 +552,18 @@ static int xencons_write(
+       return i;
+ }
+ 
+-static void xencons_put_char(struct tty_struct *tty, u_char ch)
++static int xencons_put_char(struct tty_struct *tty, u_char ch)
+ {
+       unsigned long flags;
++      int ret;
+ 
+       if (DUMMY_TTY(tty))
+-              return;
++              return 0;
+ 
+       spin_lock_irqsave(&xencons_lock, flags);
+-      (void)__xencons_put_char(ch);
++      ret = __xencons_put_char(ch);
+       spin_unlock_irqrestore(&xencons_lock, flags);
++      return ret;
+ }
+ 
+ static void xencons_flush_chars(struct tty_struct *tty)
+@@ -583,7 +585,7 @@ static void xencons_wait_until_sent(stru
+       if (DUMMY_TTY(tty))
+               return;
+ 
+-      while (DRV(tty->driver)->chars_in_buffer(tty)) {
++      while (tty_chars_in_buffer(tty)) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+               if (signal_pending(current))
+@@ -632,8 +634,7 @@ static void xencons_close(struct tty_str
+ 
+       tty->closing = 1;
+       tty_wait_until_sent(tty, 0);
+-      if (DRV(tty->driver)->flush_buffer != NULL)
+-              DRV(tty->driver)->flush_buffer(tty);
++      tty_driver_flush_buffer(tty);
+       if (tty->ldisc.flush_buffer != NULL)
+               tty->ldisc.flush_buffer(tty);
+       tty->closing = 0;
+--- sle11-2009-05-14.orig/drivers/xen/core/machine_kexec.c     2009-02-17 11:46:41.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/core/machine_kexec.c  2009-03-16 16:38:05.000000000 +0100
+@@ -5,6 +5,7 @@
+ 
+ #include <linux/kexec.h>
+ #include <xen/interface/kexec.h>
++#include <linux/reboot.h>
+ #include <linux/mm.h>
+ #include <linux/bootmem.h>
+ 
+@@ -90,6 +91,9 @@ void __init xen_machine_kexec_setup_reso
+       xen_hypervisor_res.start = range.start;
+       xen_hypervisor_res.end = range.start + range.size - 1;
+       xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
++#ifdef CONFIG_X86_64
++      insert_resource(&iomem_resource, &xen_hypervisor_res);
++#endif
+ 
+       /* fill in crashk_res if range is reserved by hypervisor */
+ 
+@@ -102,6 +106,9 @@ void __init xen_machine_kexec_setup_reso
+       if (range.size) {
+               crashk_res.start = range.start;
+               crashk_res.end = range.start + range.size - 1;
++#ifdef CONFIG_X86_64
++              insert_resource(&iomem_resource, &crashk_res);
++#endif
+       }
+ 
+       /* get physical address of vmcoreinfo */
+@@ -153,11 +160,13 @@ void __init xen_machine_kexec_setup_reso
+       return;
+ }
+ 
++#ifndef CONFIG_X86_64
+ void __init xen_machine_kexec_register_resources(struct resource *res)
+ {
+       request_resource(res, &xen_hypervisor_res);
+       machine_kexec_register_resources(res);
+ }
++#endif
+ 
+ static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
+ {
+@@ -228,6 +237,11 @@ void machine_shutdown(void)
+       /* do nothing */
+ }
+ 
++void machine_crash_shutdown(struct pt_regs *regs)
++{
++      /* The kernel is broken so disable interrupts */
++      local_irq_disable();
++}
+ 
+ /*
+  * Local variables:
+--- sle11-2009-05-14.orig/drivers/xen/core/smpboot.c   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/core/smpboot.c        2009-03-16 16:38:05.000000000 +0100
+@@ -53,17 +53,16 @@ static DEFINE_PER_CPU(int, callfunc_irq)
+ static char resched_name[NR_CPUS][15];
+ static char callfunc_name[NR_CPUS][15];
+ 
+-u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
++#ifdef CONFIG_X86_LOCAL_APIC
++#define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
++#else
++#define set_cpu_to_apicid(cpu, apicid)
++#endif
+ 
+ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+ DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+ 
+-#if defined(__i386__)
+-DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+-#endif
+-
+ void __init prefill_possible_map(void)
+ {
+       int i, rc;
+@@ -154,7 +153,7 @@ static int __cpuinit xen_smp_intr_init(u
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-static void xen_smp_intr_exit(unsigned int cpu)
++static void __cpuexit xen_smp_intr_exit(unsigned int cpu)
+ {
+       if (cpu != 0)
+               local_teardown_timer(cpu);
+@@ -263,8 +262,7 @@ void __init smp_prepare_cpus(unsigned in
+       boot_cpu_data.apicid = apicid;
+       cpu_data(0) = boot_cpu_data;
+ 
+-      cpu_2_logical_apicid[0] = apicid;
+-      per_cpu(x86_cpu_to_apicid, 0) = apicid;
++      set_cpu_to_apicid(0, apicid);
+ 
+       current_thread_info()->cpu = 0;
+ 
+@@ -319,8 +317,7 @@ void __init smp_prepare_cpus(unsigned in
+               cpu_data(cpu).cpu_index = cpu;
+               cpu_data(cpu).apicid = apicid;
+ 
+-              cpu_2_logical_apicid[cpu] = apicid;
+-              per_cpu(x86_cpu_to_apicid, cpu) = apicid;
++              set_cpu_to_apicid(cpu, apicid);
+ 
+ #ifdef __x86_64__
+               cpu_pda(cpu)->pcurrent = idle;
+@@ -375,7 +372,7 @@ static int __init initialize_cpu_present
+ }
+ core_initcall(initialize_cpu_present_map);
+ 
+-int __cpu_disable(void)
++int __cpuexit __cpu_disable(void)
+ {
+       cpumask_t map = cpu_online_map;
+       unsigned int cpu = smp_processor_id();
+@@ -392,7 +389,7 @@ int __cpu_disable(void)
+       return 0;
+ }
+ 
+-void __cpu_die(unsigned int cpu)
++void __cpuexit __cpu_die(unsigned int cpu)
+ {
+       while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+               current->state = TASK_UNINTERRUPTIBLE;
+--- sle11-2009-05-14.orig/drivers/xen/core/xen_proc.c  2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/core/xen_proc.c       2009-03-16 16:38:05.000000000 +0100
+@@ -8,7 +8,7 @@ static struct proc_dir_entry *xen_base;
+ struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
+ {
+       if ( xen_base == NULL )
+-              if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
++              if ( (xen_base = proc_mkdir("xen", NULL)) == NULL )
+                       panic("Couldn't create /proc/xen");
+       return create_proc_entry(name, mode, xen_base);
+ }
+--- sle11-2009-05-14.orig/drivers/xen/fbfront/xenfb.c  2009-03-04 11:25:55.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/fbfront/xenfb.c       2009-03-16 16:38:05.000000000 +0100
+@@ -93,7 +93,7 @@ struct xenfb_info
+  *    only mappings.  The former creates unfaulted pages.  Preserves
+  *    invariant.  The latter removes pages.  Preserves invariant.
+  *
+- * 3. Holding both locks: xenfb_vm_nopage().  Extends the dirty
++ * 3. Holding both locks: xenfb_vm_fault().  Extends the dirty
+  *    rectangle and updates mappings consistently.  Preserves
+  *    invariant.
+  *
+@@ -112,13 +112,13 @@ struct xenfb_info
+  *
+  * But FIXME: the invariant is too weak.  It misses that the fault
+  * record in mappings must be consistent with the mapping of pages in
+- * the associated address space!  do_no_page() updates the PTE after
+- * xenfb_vm_nopage() returns, i.e. outside the critical region.  This
++ * the associated address space!  __do_fault() updates the PTE after
++ * xenfb_vm_fault() returns, i.e. outside the critical region.  This
+  * allows the following race:
+  *
+  * X writes to some address in the Xen frame buffer
+- * Fault - call do_no_page()
+- *     call xenfb_vm_nopage()
++ * Fault - call __do_fault()
++ *     call xenfb_vm_fault()
+  *         grab mm_lock
+  *         map->faults++;
+  *         release mm_lock
+@@ -387,18 +387,17 @@ static void xenfb_vm_close(struct vm_are
+       mutex_unlock(&info->mm_lock);
+ }
+ 
+-static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
+-                                  unsigned long vaddr, int *type)
++static int xenfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+       struct xenfb_mapping *map = vma->vm_private_data;
+       struct xenfb_info *info = map->info;
+-      int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
++      int pgnr = ((long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
+       unsigned long flags;
+       struct page *page;
+       int y1, y2;
+ 
+       if (pgnr >= info->nr_pages)
+-              return NOPAGE_SIGBUS;
++              return VM_FAULT_SIGBUS;
+ 
+       mutex_lock(&info->mm_lock);
+       spin_lock_irqsave(&info->dirty_lock, flags);
+@@ -414,16 +413,15 @@ static struct page *xenfb_vm_nopage(stru
+       spin_unlock_irqrestore(&info->dirty_lock, flags);
+       mutex_unlock(&info->mm_lock);
+ 
+-      if (type)
+-              *type = VM_FAULT_MINOR;
++      vmf->page = page;
+ 
+-      return page;
++      return VM_FAULT_MINOR;
+ }
+ 
+ static struct vm_operations_struct xenfb_vm_ops = {
+       .open   = xenfb_vm_open,
+       .close  = xenfb_vm_close,
+-      .nopage = xenfb_vm_nopage,
++      .fault  = xenfb_vm_fault,
+ };
+ 
+ static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
+--- sle11-2009-05-14.orig/drivers/xen/gntdev/gntdev.c  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/gntdev/gntdev.c       2009-03-16 16:38:05.000000000 +0100
+@@ -392,7 +392,7 @@ nomem_out:
+ static int __init gntdev_init(void)
+ {
+       struct class *class;
+-      struct class_device *device;
++      struct device *device;
+ 
+       if (!is_running_on_xen()) {
+               printk(KERN_ERR "You must be running Xen to use gntdev\n");
+@@ -417,8 +417,8 @@ static int __init gntdev_init(void)
+               return 0;
+       }
+ 
+-      device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
+-                                   NULL, GNTDEV_NAME);
++      device = device_create(class, NULL, MKDEV(gntdev_major, 0),
++                             GNTDEV_NAME);
+       if (IS_ERR(device)) {
+               printk(KERN_ERR "Error creating gntdev device in xen_class\n");
+               printk(KERN_ERR "gntdev created with major number = %d\n",
+@@ -435,7 +435,7 @@ static void __exit gntdev_exit(void)
+ {
+       struct class *class;
+       if ((class = get_xen_class()) != NULL)
+-              class_device_destroy(class, MKDEV(gntdev_major, 0));
++              device_destroy(class, MKDEV(gntdev_major, 0));
+       unregister_chrdev(gntdev_major, GNTDEV_NAME);
+ }
+ 
+--- sle11-2009-05-14.orig/drivers/xen/netfront/netfront.c      2009-03-30 16:39:44.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/netfront/netfront.c   2009-03-30 16:40:17.000000000 +0200
+@@ -1464,8 +1464,7 @@ err:     
+               }
+       }
+ 
+-      while ((skb = __skb_dequeue(&errq)))
+-              kfree_skb(skb);
++      __skb_queue_purge(&errq);
+ 
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               struct page *page = NETFRONT_SKB_CB(skb)->page;
+@@ -1630,8 +1629,7 @@ static void netif_release_rx_bufs_flip(s
+               }
+       }
+ 
+-      while ((skb = __skb_dequeue(&free_list)) != NULL)
+-              dev_kfree_skb(skb);
++      __skb_queue_purge(&free_list);
+ 
+       spin_unlock_bh(&np->rx_lock);
+ }
+--- sle11-2009-05-14.orig/drivers/xen/privcmd/privcmd.c        2009-03-04 11:28:34.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/privcmd/privcmd.c     2009-03-16 16:38:05.000000000 +0100
+@@ -261,15 +261,13 @@ static long privcmd_ioctl(struct file *f
+ }
+ 
+ #ifndef HAVE_ARCH_PRIVCMD_MMAP
+-static struct page *privcmd_nopage(struct vm_area_struct *vma,
+-                                 unsigned long address,
+-                                 int *type)
++static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ {
+-      return NOPAGE_SIGBUS;
++      return VM_FAULT_SIGBUS;
+ }
+ 
+ static struct vm_operations_struct privcmd_vm_ops = {
+-      .nopage = privcmd_nopage
++      .fault = privcmd_fault
+ };
+ 
+ static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+--- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_client.c   2009-03-24 10:12:22.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_client.c        2009-03-24 10:13:17.000000000 +0100
+@@ -442,7 +442,7 @@ int xenbus_map_ring_valloc(struct xenbus
+ 
+       *vaddr = NULL;
+ 
+-      area = alloc_vm_area(PAGE_SIZE);
++      area = xen_alloc_vm_area(PAGE_SIZE);
+       if (!area)
+               return -ENOMEM;
+ 
+@@ -452,7 +452,7 @@ int xenbus_map_ring_valloc(struct xenbus
+               BUG();
+ 
+       if (op.status != GNTST_okay) {
+-              free_vm_area(area);
++              xen_free_vm_area(area);
+               xenbus_dev_fatal(dev, op.status,
+                                "mapping in shared page %d from domain %d",
+                                gnt_ref, dev->otherend_id);
+@@ -551,7 +551,7 @@ int xenbus_unmap_ring_vfree(struct xenbu
+               BUG();
+ 
+       if (op.status == GNTST_okay)
+-              free_vm_area(area);
++              xen_free_vm_area(area);
+       else
+               xenbus_dev_error(dev, op.status,
+                                "unmapping page at handle %d error %d",
+--- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_probe.c    2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_probe.c 2009-03-16 16:38:05.000000000 +0100
+@@ -173,7 +173,7 @@ static int read_backend_details(struct x
+       return read_otherend_details(xendev, "backend-id", "backend");
+ }
+ 
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+ static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
+ {
+       struct xenbus_device *xdev;
+@@ -185,8 +185,10 @@ static int xenbus_uevent_frontend(struct
+               return -ENODEV;
+ 
+       /* stuff we want to pass to /sbin/hotplug */
++#if defined(CONFIG_XEN) || defined(MODULE)
+       add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
+       add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
++#endif
+       add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
+ 
+       return 0;
+@@ -207,10 +209,8 @@ static struct xen_bus_type xenbus_fronte
+               .probe    = xenbus_dev_probe,
+               .remove   = xenbus_dev_remove,
+               .shutdown = xenbus_dev_shutdown,
+-#if defined(CONFIG_XEN) || defined(MODULE)
+               .uevent   = xenbus_uevent_frontend,
+ #endif
+-#endif
+       },
+ #if defined(CONFIG_XEN) || defined(MODULE)
+       .dev = {
+@@ -519,6 +519,15 @@ static ssize_t xendev_show_devtype(struc
+ }
+ DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+ 
++static ssize_t xendev_show_modalias(struct device *dev,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
++                                  struct device_attribute *attr,
++#endif
++                                  char *buf)
++{
++      return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
++}
++DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
+ 
+ int xenbus_probe_node(struct xen_bus_type *bus,
+                     const char *type,
+@@ -579,10 +588,16 @@ int xenbus_probe_node(struct xen_bus_typ
+ 
+       err = device_create_file(&xendev->dev, &dev_attr_devtype);
+       if (err)
+-              goto fail_remove_file;
++              goto fail_remove_nodename;
++
++      err = device_create_file(&xendev->dev, &dev_attr_modalias);
++      if (err)
++              goto fail_remove_devtype;
+ 
+       return 0;
+-fail_remove_file:
++fail_remove_devtype:
++      device_remove_file(&xendev->dev, &dev_attr_devtype);
++fail_remove_nodename:
+       device_remove_file(&xendev->dev, &dev_attr_nodename);
+ fail_unregister:
+       device_unregister(&xendev->dev);
+--- sle11-2009-05-14.orig/fs/aio.c     2009-03-24 10:11:37.000000000 +0100
++++ sle11-2009-05-14/fs/aio.c  2009-03-24 10:13:25.000000000 +0100
+@@ -1271,6 +1271,7 @@ static void io_destroy(struct kioctx *io
+ #ifdef CONFIG_EPOLL
+       /* forget the poll file, but it's up to the user to close it */
+       if (ioctx->file) {
++              fput(ioctx->file);
+               ioctx->file->private_data = 0;
+               ioctx->file = 0;
+       }
+@@ -1295,6 +1296,7 @@ static int aio_queue_fd_close(struct ino
+               spin_lock_irq(&ioctx->ctx_lock);
+               ioctx->file = 0;
+               spin_unlock_irq(&ioctx->ctx_lock);
++              fput(file);
+       }
+       return 0;
+ }
+@@ -1330,16 +1332,17 @@ static const struct file_operations aioq
+ 
+ static int make_aio_fd(struct kioctx *ioctx)
+ {
+-      int error, fd;
+-      struct inode *inode;
++      int fd;
+       struct file *file;
+ 
+-      error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
+-                               &aioq_fops, ioctx);
+-      if (error)
+-              return error;
++      fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
++      if (fd < 0)
++              return fd;
+ 
+       /* associate the file with the IO context */
++      file = fget(fd);
++      if (!file)
++              return -EBADF;
+       file->private_data = ioctx;
+       ioctx->file = file;
+       init_waitqueue_head(&ioctx->poll_wait);
+--- sle11-2009-05-14.orig/include/asm-x86/dma-mapping.h        2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/dma-mapping.h     2009-03-16 16:38:05.000000000 +0100
+@@ -223,8 +223,13 @@ static inline dma_addr_t dma_map_page(st
+       struct dma_mapping_ops *ops = get_dma_ops(dev);
+ 
+       BUG_ON(!valid_dma_direction(direction));
++#ifndef CONFIG_XEN
+       return ops->map_single(dev, page_to_phys(page) + offset,
+                              size, direction);
++#else
++      return ops->map_single(dev, page_to_pseudophys(page) + offset,
++                             size, direction);
++#endif
+ }
+ 
+ static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+--- sle11-2009-05-14.orig/include/asm-x86/genapic_64.h 2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/genapic_64.h      2009-03-16 16:38:05.000000000 +0100
+@@ -46,6 +46,7 @@ extern struct genapic apic_x2apic_phys;
+ extern int acpi_madt_oem_check(char *, char *);
+ 
+ extern void apic_send_IPI_self(int vector);
++#ifndef CONFIG_XEN
+ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+ extern enum uv_system_type get_uv_system_type(void);
+ extern int is_uv_system(void);
+@@ -55,6 +56,10 @@ DECLARE_PER_CPU(int, x2apic_extra_bits);
+ extern void uv_cpu_init(void);
+ extern void uv_system_init(void);
+ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
++#else
++#define is_uv_system() 0
++#define uv_cpu_init() ((void)0)
++#endif
+ 
+ extern void setup_apic_routing(void);
+ 
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc.h  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/desc.h       2009-03-16 16:38:05.000000000 +0100
+@@ -64,8 +64,8 @@ static inline struct desc_struct *get_cp
+ }
+ 
+ static inline void pack_gate(gate_desc *gate, unsigned char type,
+-       unsigned long base, unsigned dpl, unsigned flags, unsigned short seg)
+-
++                           unsigned long base, unsigned dpl, unsigned flags,
++                           unsigned short seg)
+ {
+       gate->a = (seg << 16) | (base & 0xffff);
+       gate->b = (base & 0xffff0000) |
+@@ -84,22 +84,23 @@ static inline int desc_empty(const void 
+ #define load_TR_desc() native_load_tr_desc()
+ #define load_gdt(dtr) native_load_gdt(dtr)
+ #define load_idt(dtr) native_load_idt(dtr)
+-#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
+-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
++#define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
++#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
+ 
+ #define store_gdt(dtr) native_store_gdt(dtr)
+ #define store_idt(dtr) native_store_idt(dtr)
+ #define store_tr(tr) (tr = native_store_tr())
+-#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
++#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
+ 
+ #define load_TLS(t, cpu) native_load_tls(t, cpu)
+ #define set_ldt native_set_ldt
+ 
+-#define write_ldt_entry(dt, entry, desc) \
+-                              native_write_ldt_entry(dt, entry, desc)
+-#define write_gdt_entry(dt, entry, desc, type) \
+-                              native_write_gdt_entry(dt, entry, desc, type)
+-#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
++#define write_ldt_entry(dt, entry, desc)      \
++      native_write_ldt_entry(dt, entry, desc)
++#define write_gdt_entry(dt, entry, desc, type)                \
++      native_write_gdt_entry(dt, entry, desc, type)
++#define write_idt_entry(dt, entry, g)         \
++      native_write_idt_entry(dt, entry, g)
+ 
+ static inline void native_write_idt_entry(gate_desc *idt, int entry,
+                                         const gate_desc *gate)
+@@ -138,8 +139,8 @@ static inline void pack_descriptor(struc
+ {
+       desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
+       desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
+-                (limit & 0x000f0000) | ((type & 0xff) << 8) |
+-                ((flags & 0xf) << 20);
++              (limit & 0x000f0000) | ((type & 0xff) << 8) |
++              ((flags & 0xf) << 20);
+       desc->p = 1;
+ }
+ 
+@@ -160,7 +161,6 @@ static inline void set_tssldt_descriptor
+       desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF;
+       desc->base3 = PTR_HIGH(addr);
+ #else
+-
+       pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
+ #endif
+ }
+@@ -178,7 +178,8 @@ static inline void __set_tss_desc(unsign
+        * last valid byte
+        */
+       set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
+-              IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
++                            IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
++                            sizeof(unsigned long) - 1);
+       write_gdt_entry(d, entry, &tss, DESC_TSS);
+ }
+ 
+@@ -187,16 +188,16 @@ static inline void __set_tss_desc(unsign
+ static inline void native_set_ldt(const void *addr, unsigned int entries)
+ {
+       if (likely(entries == 0))
+-              __asm__ __volatile__("lldt %w0"::"q" (0));
++              asm volatile("lldt %w0"::"q" (0));
+       else {
+               unsigned cpu = smp_processor_id();
+               ldt_desc ldt;
+ 
+-              set_tssldt_descriptor(&ldt, (unsigned long)addr,
+-                                    DESC_LDT, entries * sizeof(ldt) - 1);
++              set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
++                                    entries * LDT_ENTRY_SIZE - 1);
+               write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+                               &ldt, DESC_LDT);
+-              __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
++              asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+       }
+ }
+ 
+@@ -261,15 +262,15 @@ static inline void xen_load_tls(struct t
+ }
+ #endif
+ 
+-#define _LDT_empty(info) (\
+-      (info)->base_addr       == 0    && \
+-      (info)->limit           == 0    && \
+-      (info)->contents        == 0    && \
+-      (info)->read_exec_only  == 1    && \
+-      (info)->seg_32bit       == 0    && \
+-      (info)->limit_in_pages  == 0    && \
+-      (info)->seg_not_present == 1    && \
+-      (info)->useable         == 0)
++#define _LDT_empty(info)                              \
++      ((info)->base_addr              == 0    &&      \
++       (info)->limit                  == 0    &&      \
++       (info)->contents               == 0    &&      \
++       (info)->read_exec_only         == 1    &&      \
++       (info)->seg_32bit              == 0    &&      \
++       (info)->limit_in_pages         == 0    &&      \
++       (info)->seg_not_present        == 1    &&      \
++       (info)->useable                == 0)
+ 
+ #ifdef CONFIG_X86_64
+ #define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
+@@ -309,7 +310,7 @@ static inline unsigned long get_desc_lim
+ 
+ #ifndef CONFIG_X86_NO_IDT
+ static inline void _set_gate(int gate, unsigned type, void *addr,
+-                            unsigned dpl, unsigned ist, unsigned seg)
++                           unsigned dpl, unsigned ist, unsigned seg)
+ {
+       gate_desc s;
+       pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+@@ -393,10 +394,10 @@ static inline void set_system_gate_ist(i
+  *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
+  */
+ #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
+-      movb idx*8+4(gdt), lo_b; \
+-      movb idx*8+7(gdt), hi_b; \
+-      shll $16, base; \
+-      movw idx*8+2(gdt), lo_w;
++      movb idx * 8 + 4(gdt), lo_b;                    \
++      movb idx * 8 + 7(gdt), hi_b;                    \
++      shll $16, base;                                 \
++      movw idx * 8 + 2(gdt), lo_w;
+ 
+ 
+ #endif /* __ASSEMBLY__ */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping.h   2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/dma-mapping.h        2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,17 @@
+-#ifdef CONFIG_X86_32
+-# include "dma-mapping_32.h"
+-#else
+-# include "dma-mapping_64.h"
+-#endif
++#ifndef _ASM_DMA_MAPPING_H_
++
++#include "../../dma-mapping.h"
++
++static inline int
++address_needs_mapping(struct device *hwdev, dma_addr_t addr)
++{
++      dma_addr_t mask = 0xffffffff;
++      /* If the device has a mask, use it, otherwise default to 32 bits */
++      if (hwdev && hwdev->dma_mask)
++              mask = *hwdev->dma_mask;
++      return (addr & ~mask) != 0;
++}
++
++extern int range_straddles_page_boundary(paddr_t p, size_t size);
++
++#endif /* _ASM_DMA_MAPPING_H_ */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h        2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,141 +0,0 @@
+-#ifndef _ASM_I386_DMA_MAPPING_H
+-#define _ASM_I386_DMA_MAPPING_H
+-
+-/*
+- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+- * documentation.
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/scatterlist.h>
+-#include <asm/cache.h>
+-#include <asm/io.h>
+-#include <asm/swiotlb.h>
+-
+-static inline int
+-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+-{
+-      dma_addr_t mask = 0xffffffff;
+-      /* If the device has a mask, use it, otherwise default to 32 bits */
+-      if (hwdev && hwdev->dma_mask)
+-              mask = *hwdev->dma_mask;
+-      return (addr & ~mask) != 0;
+-}
+-
+-extern int range_straddles_page_boundary(paddr_t p, size_t size);
+-
+-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+-
+-void *dma_alloc_coherent(struct device *dev, size_t size,
+-                         dma_addr_t *dma_handle, gfp_t flag);
+-
+-void dma_free_coherent(struct device *dev, size_t size,
+-                       void *vaddr, dma_addr_t dma_handle);
+-
+-extern dma_addr_t
+-dma_map_single(struct device *dev, void *ptr, size_t size,
+-             enum dma_data_direction direction);
+-
+-extern void
+-dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+-               enum dma_data_direction direction);
+-
+-extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+-                    int nents, enum dma_data_direction direction);
+-extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+-                       int nents, enum dma_data_direction direction);
+-
+-#ifdef CONFIG_HIGHMEM
+-extern dma_addr_t
+-dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+-           size_t size, enum dma_data_direction direction);
+-
+-extern void
+-dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+-             enum dma_data_direction direction);
+-#else
+-#define dma_map_page(dev, page, offset, size, dir) \
+-      dma_map_single(dev, page_address(page) + (offset), (size), (dir))
+-#define dma_unmap_page dma_unmap_single
+-#endif
+-
+-extern void
+-dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                      enum dma_data_direction direction);
+-
+-extern void
+-dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+-                           enum dma_data_direction direction);
+-
+-static inline void
+-dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+-                            unsigned long offset, size_t size,
+-                            enum dma_data_direction direction)
+-{
+-      dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
+-}
+-
+-static inline void
+-dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+-                               unsigned long offset, size_t size,
+-                               enum dma_data_direction direction)
+-{
+-      dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
+-}
+-
+-extern void
+-dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction);
+-
+-extern void
+-dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+-                  enum dma_data_direction direction);
+-
+-extern int
+-dma_mapping_error(dma_addr_t dma_addr);
+-
+-extern int
+-dma_supported(struct device *dev, u64 mask);
+-
+-static inline int
+-dma_set_mask(struct device *dev, u64 mask)
+-{
+-      if(!dev->dma_mask || !dma_supported(dev, mask))
+-              return -EIO;
+-
+-      *dev->dma_mask = mask;
+-
+-      return 0;
+-}
+-
+-static inline int
+-dma_get_cache_alignment(void)
+-{
+-      /* no easy way to get cache size on all x86, so return the
+-       * maximum possible, to be safe */
+-      return (1 << INTERNODE_CACHE_SHIFT);
+-}
+-
+-#define dma_is_consistent(d, h)       (1)
+-
+-static inline void
+-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+-             enum dma_data_direction direction)
+-{
+-      flush_write_buffers();
+-}
+-
+-#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
+-extern int
+-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+-                          dma_addr_t device_addr, size_t size, int flags);
+-
+-extern void
+-dma_release_declared_memory(struct device *dev);
+-
+-extern void *
+-dma_mark_declared_memory_occupied(struct device *dev,
+-                                dma_addr_t device_addr, size_t size);
+-
+-#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h        2009-02-16 16:18:36.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,205 +0,0 @@
+-#ifndef _X8664_DMA_MAPPING_H
+-#define _X8664_DMA_MAPPING_H 1
+-
+-/*
+- * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for
+- * documentation.
+- */
+-
+-#include <linux/scatterlist.h>
+-#include <asm/io.h>
+-
+-struct dma_mapping_ops {
+-      int             (*mapping_error)(dma_addr_t dma_addr);
+-      void*           (*alloc_coherent)(struct device *dev, size_t size,
+-                                dma_addr_t *dma_handle, gfp_t gfp);
+-      void            (*free_coherent)(struct device *dev, size_t size,
+-                                void *vaddr, dma_addr_t dma_handle);
+-      dma_addr_t      (*map_single)(struct device *hwdev, void *ptr,
+-                                size_t size, int direction);
+-      /* like map_single, but doesn't check the device mask */
+-      dma_addr_t      (*map_simple)(struct device *hwdev, char *ptr,
+-                                size_t size, int direction);
+-      void            (*unmap_single)(struct device *dev, dma_addr_t addr,
+-                              size_t size, int direction);
+-      void            (*sync_single_for_cpu)(struct device *hwdev,
+-                              dma_addr_t dma_handle, size_t size,
+-                              int direction);
+-      void            (*sync_single_for_device)(struct device *hwdev,
+-                                dma_addr_t dma_handle, size_t size,
+-                              int direction);
+-      void            (*sync_single_range_for_cpu)(struct device *hwdev,
+-                                dma_addr_t dma_handle, unsigned long offset,
+-                              size_t size, int direction);
+-      void            (*sync_single_range_for_device)(struct device *hwdev,
+-                              dma_addr_t dma_handle, unsigned long offset,
+-                              size_t size, int direction);
+-      void            (*sync_sg_for_cpu)(struct device *hwdev,
+-                                struct scatterlist *sg, int nelems,
+-                              int direction);
+-      void            (*sync_sg_for_device)(struct device *hwdev,
+-                              struct scatterlist *sg, int nelems,
+-                              int direction);
+-      int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
+-                              int nents, int direction);
+-      void            (*unmap_sg)(struct device *hwdev,
+-                              struct scatterlist *sg, int nents,
+-                              int direction);
+-      int             (*dma_supported)(struct device *hwdev, u64 mask);
+-      int             is_phys;
+-};
+-
+-extern dma_addr_t bad_dma_address;
+-extern const struct dma_mapping_ops* dma_ops;
+-extern int iommu_merge;
+-
+-#if 0
+-static inline int dma_mapping_error(dma_addr_t dma_addr)
+-{
+-      if (dma_ops->mapping_error)
+-              return dma_ops->mapping_error(dma_addr);
+-
+-      return (dma_addr == bad_dma_address);
+-}
+-
+-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+-
+-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+-
+-extern void *dma_alloc_coherent(struct device *dev, size_t size,
+-                              dma_addr_t *dma_handle, gfp_t gfp);
+-extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+-                            dma_addr_t dma_handle);
+-
+-static inline dma_addr_t
+-dma_map_single(struct device *hwdev, void *ptr, size_t size,
+-             int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      return dma_ops->map_single(hwdev, ptr, size, direction);
+-}
+-
+-static inline void
+-dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
+-               int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      dma_ops->unmap_single(dev, addr, size, direction);
+-}
+-
+-#define dma_map_page(dev,page,offset,size,dir) \
+-      dma_map_single((dev), page_address(page)+(offset), (size), (dir))
+-
+-#define dma_unmap_page dma_unmap_single
+-
+-static inline void
+-dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+-                      size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_for_cpu)
+-              dma_ops->sync_single_for_cpu(hwdev, dma_handle, size,
+-                                           direction);
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
+-                         size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_for_device)
+-              dma_ops->sync_single_for_device(hwdev, dma_handle, size,
+-                                              direction);
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+-                            unsigned long offset, size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_range_for_cpu) {
+-              dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction);
+-      }
+-
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
+-                               unsigned long offset, size_t size, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_single_range_for_device)
+-              dma_ops->sync_single_range_for_device(hwdev, dma_handle,
+-                                                    offset, size, direction);
+-
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+-                  int nelems, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_sg_for_cpu)
+-              dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
+-      flush_write_buffers();
+-}
+-
+-static inline void
+-dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+-                     int nelems, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      if (dma_ops->sync_sg_for_device) {
+-              dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction);
+-      }
+-
+-      flush_write_buffers();
+-}
+-
+-static inline int
+-dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      return dma_ops->map_sg(hwdev, sg, nents, direction);
+-}
+-
+-static inline void
+-dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+-           int direction)
+-{
+-      BUG_ON(!valid_dma_direction(direction));
+-      dma_ops->unmap_sg(hwdev, sg, nents, direction);
+-}
+-
+-extern int dma_supported(struct device *hwdev, u64 mask);
+-
+-/* same for gart, swiotlb, and nommu */
+-static inline int dma_get_cache_alignment(void)
+-{
+-      return boot_cpu_data.x86_clflush_size;
+-}
+-
+-#define dma_is_consistent(d, h) 1
+-
+-extern int dma_set_mask(struct device *dev, u64 mask);
+-
+-static inline void
+-dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+-      enum dma_data_direction dir)
+-{
+-      flush_write_buffers();
+-}
+-
+-extern struct device fallback_dev;
+-extern int panic_on_overflow;
+-#endif
+-
+-#endif /* _X8664_DMA_MAPPING_H */
+-
+-#include "dma-mapping_32.h"
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap.h        2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap.h     2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,13 @@
++#ifndef _ASM_FIXMAP_H
++#define _ASM_FIXMAP_H
++
+ #ifdef CONFIG_X86_32
+ # include "fixmap_32.h"
+ #else
+ # include "fixmap_64.h"
+ #endif
++
++#define clear_fixmap(idx)                     \
++      __set_fixmap(idx, 0, __pgprot(0))
++
++#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_32.h     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_32.h  2009-03-16 16:38:05.000000000 +0100
+@@ -10,8 +10,8 @@
+  * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+  */
+ 
+-#ifndef _ASM_FIXMAP_H
+-#define _ASM_FIXMAP_H
++#ifndef _ASM_FIXMAP_32_H
++#define _ASM_FIXMAP_32_H
+ 
+ /* used by vmalloc.c, vsyscall.lds.S.
+  *
+@@ -102,8 +102,7 @@ enum fixed_addresses {
+        */
+ #define NR_FIX_BTMAPS         64
+ #define FIX_BTMAPS_NESTING    4
+-      FIX_BTMAP_END =
+-              __end_of_permanent_fixed_addresses + 512 -
++      FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
+                       (__end_of_permanent_fixed_addresses & 511),
+       FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
+       FIX_WP_TEST,
+@@ -114,19 +113,16 @@ enum fixed_addresses {
+ };
+ 
+ extern void __set_fixmap(enum fixed_addresses idx,
+-                                      maddr_t phys, pgprot_t flags);
++                       maddr_t phys, pgprot_t flags);
+ extern void reserve_top_address(unsigned long reserve);
+ 
+-#define set_fixmap(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL)
++#define set_fixmap(idx, phys)                         \
++      __set_fixmap(idx, phys, PAGE_KERNEL)
+ /*
+  * Some hardware wants to get fixmapped without caching.
+  */
+-#define set_fixmap_nocache(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+-
+-#define clear_fixmap(idx) \
+-              __set_fixmap(idx, 0, __pgprot(0))
++#define set_fixmap_nocache(idx, phys)                 \
++      __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+ 
+ #define FIXADDR_TOP   ((unsigned long)__FIXADDR_TOP)
+ 
+@@ -159,7 +155,7 @@ static __always_inline unsigned long fix
+       if (idx >= __end_of_fixed_addresses)
+               __this_fixmap_does_not_exist();
+ 
+-        return __fix_to_virt(idx);
++      return __fix_to_virt(idx);
+ }
+ 
+ static inline unsigned long virt_to_fix(const unsigned long vaddr)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_64.h     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_64.h  2009-03-16 16:38:05.000000000 +0100
+@@ -8,8 +8,8 @@
+  * Copyright (C) 1998 Ingo Molnar
+  */
+ 
+-#ifndef _ASM_FIXMAP_H
+-#define _ASM_FIXMAP_H
++#ifndef _ASM_FIXMAP_64_H
++#define _ASM_FIXMAP_64_H
+ 
+ #include <linux/kernel.h>
+ #include <asm/apicdef.h>
+@@ -35,7 +35,8 @@
+ 
+ enum fixed_addresses {
+       VSYSCALL_LAST_PAGE,
+-      VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
++      VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
++                          + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+       VSYSCALL_HPET,
+       FIX_DBGP_BASE,
+       FIX_EARLYCON_MEM_BASE,
+@@ -45,11 +46,12 @@ enum fixed_addresses {
+ #endif
+ #ifndef CONFIG_XEN
+       FIX_IO_APIC_BASE_0,
+-      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
++      FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+ #endif
+ #ifdef CONFIG_EFI
+       FIX_EFI_IO_MAP_LAST_PAGE,
+-      FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE+MAX_EFI_IO_PAGES-1,
++      FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
++                                + MAX_EFI_IO_PAGES - 1,
+ #endif
+ #ifdef CONFIG_ACPI
+       FIX_ACPI_BEGIN,
+@@ -79,19 +81,16 @@ enum fixed_addresses {
+       __end_of_fixed_addresses
+ };
+ 
+-extern void __set_fixmap (enum fixed_addresses idx,
+-                                      unsigned long phys, pgprot_t flags);
++extern void __set_fixmap(enum fixed_addresses idx,
++                       unsigned long phys, pgprot_t flags);
+ 
+-#define set_fixmap(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL)
++#define set_fixmap(idx, phys)                 \
++      __set_fixmap(idx, phys, PAGE_KERNEL)
+ /*
+  * Some hardware wants to get fixmapped without caching.
+  */
+-#define set_fixmap_nocache(idx, phys) \
+-              __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+-
+-#define clear_fixmap(idx) \
+-                __set_fixmap(idx, 0, __pgprot(0))
++#define set_fixmap_nocache(idx, phys)                 \
++      __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+ 
+ #define FIXADDR_TOP   (VSYSCALL_END-PAGE_SIZE)
+ #define FIXADDR_SIZE  (__end_of_fixed_addresses << PAGE_SHIFT)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/highmem.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/highmem.h    2009-03-16 16:38:05.000000000 +0100
+@@ -8,7 +8,7 @@
+  *                  Gerhard.Wichert@pdb.siemens.de
+  *
+  *
+- * Redesigned the x86 32-bit VM architecture to deal with 
++ * Redesigned the x86 32-bit VM architecture to deal with
+  * up to 16 Terabyte physical memory. With current x86 CPUs
+  * we now support up to 64 Gigabytes physical RAM.
+  *
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/io.h    2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/io.h 2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,22 @@
++#ifndef _ASM_X86_IO_H
++#define _ASM_X86_IO_H
++
++#define ARCH_HAS_IOREMAP_WC
++
+ #ifdef CONFIG_X86_32
+ # include "io_32.h"
+ #else
+ # include "io_64.h"
+ #endif
++
++extern void *xlate_dev_mem_ptr(unsigned long phys);
++extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
++
++extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
++extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
++
++extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
++                                   unsigned long prot_val);
++extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
++
++#endif /* _ASM_X86_IO_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/irqflags.h      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/irqflags.h   2009-03-16 16:38:05.000000000 +0100
+@@ -137,11 +137,11 @@ sysexit_ecrit:   /**** END OF SYSEXIT CRIT
+ #endif /* __ASSEMBLY__ */
+ 
+ #ifndef __ASSEMBLY__
+-#define raw_local_save_flags(flags) \
+-              do { (flags) = __raw_local_save_flags(); } while (0)
++#define raw_local_save_flags(flags)                           \
++      do { (flags) = __raw_local_save_flags(); } while (0)
+ 
+-#define raw_local_irq_save(flags) \
+-              do { (flags) = __raw_local_irq_save(); } while (0)
++#define raw_local_irq_save(flags)                             \
++      do { (flags) = __raw_local_irq_save(); } while (0)
+ 
+ static inline int raw_irqs_disabled_flags(unsigned long flags)
+ {
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_32.h     2009-03-16 16:38:05.000000000 +0100
+@@ -94,7 +94,7 @@ static inline void switch_mm(struct mm_s
+               BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
+ 
+               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+-                      /* We were in lazy tlb mode and leave_mm disabled 
++                      /* We were in lazy tlb mode and leave_mm disabled
+                        * tlb flush IPI delivery. We must reload %cr3.
+                        */
+                       load_cr3(next->pgd);
+@@ -107,10 +107,10 @@ static inline void switch_mm(struct mm_s
+ #define deactivate_mm(tsk, mm)                        \
+       asm("movl %0,%%gs": :"r" (0));
+ 
+-#define activate_mm(prev, next)                               \
+-      do {                                            \
+-              xen_activate_mm(prev, next);            \
+-              switch_mm((prev),(next),NULL);          \
+-      } while(0)
++#define activate_mm(prev, next)                       \
++do {                                          \
++      xen_activate_mm(prev, next);            \
++      switch_mm((prev), (next), NULL);        \
++} while (0)
+ 
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/mmu_context_64.h     2009-03-16 16:38:05.000000000 +0100
+@@ -21,7 +21,7 @@ void destroy_context(struct mm_struct *m
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+ #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
+-      if (read_pda(mmu_state) == TLBSTATE_OK) 
++      if (read_pda(mmu_state) == TLBSTATE_OK)
+               write_pda(mmu_state, TLBSTATE_LAZY);
+ #endif
+ }
+@@ -62,7 +62,7 @@ extern void mm_pin(struct mm_struct *mm)
+ extern void mm_unpin(struct mm_struct *mm);
+ void mm_pin_all(void);
+ 
+-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
++static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+                            struct task_struct *tsk)
+ {
+       unsigned cpu = smp_processor_id();
+@@ -106,7 +106,7 @@ static inline void switch_mm(struct mm_s
+               if (read_pda(active_mm) != next)
+                       BUG();
+               if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+-                      /* We were in lazy tlb mode and leave_mm disabled 
++                      /* We were in lazy tlb mode and leave_mm disabled
+                        * tlb flush IPI delivery. We must reload CR3
+                        * to make sure to use no freed page tables.
+                        */
+@@ -118,10 +118,11 @@ static inline void switch_mm(struct mm_s
+ #endif
+ }
+ 
+-#define deactivate_mm(tsk,mm) do { \
+-      load_gs_index(0); \
+-      asm volatile("movl %0,%%fs"::"r"(0));  \
+-} while(0)
++#define deactivate_mm(tsk, mm)                        \
++do {                                          \
++      load_gs_index(0);                       \
++      asm volatile("movl %0,%%fs"::"r"(0));   \
++} while (0)
+ 
+ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+ {
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page.h  2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page.h       2009-03-16 16:38:05.000000000 +0100
+@@ -20,8 +20,16 @@
+ #define _PAGE_BIT_IO          9
+ #define _PAGE_IO              (_AC(1, L)<<_PAGE_BIT_IO)
+ 
+-#define PHYSICAL_PAGE_MASK    (~(_AT(phys_addr_t, PAGE_SIZE) - 1) & __PHYSICAL_MASK)
+-#define PTE_MASK              _AT(pteval_t, PHYSICAL_PAGE_MASK)
++#define __PHYSICAL_MASK               ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
++#define __VIRTUAL_MASK                ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
++
++/* Cast PAGE_MASK to a signed type so that it is sign-extended if
++   virtual addresses are 32-bits but physical addresses are larger
++   (ie, 32-bit PAE). */
++#define PHYSICAL_PAGE_MASK    (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
++
++/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
++#define PTE_MASK              ((pteval_t)PHYSICAL_PAGE_MASK)
+ 
+ #define PMD_PAGE_SIZE         (_AC(1, UL) << PMD_SHIFT)
+ #define PMD_PAGE_MASK         (~(PMD_PAGE_SIZE-1))
+@@ -34,19 +42,14 @@
+ /* to align the pointer to the (next) page boundary */
+ #define PAGE_ALIGN(addr)      (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+ 
+-#define __PHYSICAL_MASK               _AT(phys_addr_t, (_AC(1,ULL) << __PHYSICAL_MASK_SHIFT) - 1)
+-#define __VIRTUAL_MASK                ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
+-
+ #ifndef __ASSEMBLY__
+ #include <linux/types.h>
+ #endif
+ 
+ #ifdef CONFIG_X86_64
+ #include <asm/page_64.h>
+-#define max_pfn_mapped                end_pfn_map
+ #else
+ #include <asm/page_32.h>
+-#define max_pfn_mapped                max_low_pfn
+ #endif        /* CONFIG_X86_64 */
+ 
+ #define PAGE_OFFSET           ((unsigned long)__PAGE_OFFSET)
+@@ -59,6 +62,9 @@
+ #ifndef __ASSEMBLY__
+ 
+ extern int page_is_ram(unsigned long pagenr);
++extern int devmem_is_allowed(unsigned long pagenr);
++
++extern unsigned long max_pfn_mapped;
+ 
+ struct page;
+ 
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/page_64.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/page_64.h    2009-03-16 16:38:05.000000000 +0100
+@@ -5,7 +5,7 @@
+ 
+ #define THREAD_ORDER  1
+ #define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
+-#define CURRENT_MASK (~(THREAD_SIZE-1))
++#define CURRENT_MASK (~(THREAD_SIZE - 1))
+ 
+ #define EXCEPTION_STACK_ORDER 0
+ #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+@@ -53,10 +53,10 @@
+ #define __VIRTUAL_MASK_SHIFT  48
+ 
+ /*
+- * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
++ * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
+  * arch/x86/kernel/head_64.S), and it is mapped here:
+  */
+-#define KERNEL_IMAGE_SIZE     (128*1024*1024)
++#define KERNEL_IMAGE_SIZE     (512 * 1024 * 1024)
+ #define KERNEL_IMAGE_START    _AC(0xffffffff80000000, UL)
+ 
+ #ifndef __ASSEMBLY__
+@@ -64,7 +64,6 @@ void clear_page(void *page);
+ void copy_page(void *to, void *from);
+ 
+ extern unsigned long end_pfn;
+-extern unsigned long end_pfn_map;
+ 
+ static inline unsigned long __phys_addr(unsigned long x)
+ {
+@@ -89,6 +88,9 @@ typedef union { pteval_t pte; unsigned i
+ 
+ #define vmemmap ((struct page *)VMEMMAP_START)
+ 
++extern unsigned long init_memory_mapping(unsigned long start,
++                                       unsigned long end);
++
+ #endif        /* !__ASSEMBLY__ */
+ 
+ #ifdef CONFIG_FLATMEM
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pci.h   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pci.h        2009-03-16 16:38:05.000000000 +0100
+@@ -8,14 +8,13 @@
+ #include <asm/scatterlist.h>
+ #include <asm/io.h>
+ 
+-
+ #ifdef __KERNEL__
+ 
+ struct pci_sysdata {
+       int             domain;         /* PCI domain */
+       int             node;           /* NUMA node */
+ #ifdef CONFIG_X86_64
+-      void*           iommu;          /* IOMMU private data */
++      void            *iommu;         /* IOMMU private data */
+ #endif
+ #ifdef CONFIG_XEN_PCIDEV_FRONTEND
+       struct pcifront_device *pdev;
+@@ -23,6 +22,8 @@ struct pci_sysdata {
+ };
+ 
+ /* scan a bus after allocating a pci_sysdata for it */
++extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
++                                          int node);
+ extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
+ 
+ static inline int pci_domain_nr(struct pci_bus *bus)
+@@ -36,6 +37,7 @@ static inline int pci_proc_domain(struct
+       return pci_domain_nr(bus);
+ }
+ 
++extern void pci_iommu_alloc(void);
+ 
+ /* Can be used to override the logic in pci_scan_bus for skipping
+    already-configured bus numbers - to be used for buggy BIOSes
+@@ -57,7 +59,7 @@ extern unsigned long pci_mem_start;
+ #define PCIBIOS_MIN_CARDBUS_IO        0x4000
+ 
+ void pcibios_config_init(void);
+-struct pci_bus * pcibios_scan_root(int bus);
++struct pci_bus *pcibios_scan_root(int bus);
+ 
+ void pcibios_set_master(struct pci_dev *dev);
+ void pcibios_penalize_isa_irq(int irq, int active);
+@@ -67,7 +69,8 @@ int pcibios_set_irq_routing(struct pci_d
+ 
+ #define HAVE_PCI_MMAP
+ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+-                             enum pci_mmap_state mmap_state, int write_combine);
++                             enum pci_mmap_state mmap_state,
++                             int write_combine);
+ 
+ 
+ #ifdef CONFIG_PCI
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc.h       2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgalloc.h    2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,149 @@
+-#ifdef CONFIG_X86_32
+-# include "pgalloc_32.h"
+-#else
+-# include "pgalloc_64.h"
++#ifndef _ASM_X86_PGALLOC_H
++#define _ASM_X86_PGALLOC_H
++
++#include <linux/threads.h>
++#include <linux/mm.h>         /* for struct page */
++#include <linux/pagemap.h>
++
++#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
++
++static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn)        {}
++static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn)        {}
++static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
++                                          unsigned long start, unsigned long count) {}
++static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn)        {}
++static inline void paravirt_release_pte(unsigned long pfn) {}
++static inline void paravirt_release_pmd(unsigned long pfn) {}
++static inline void paravirt_release_pud(unsigned long pfn) {}
++
++#ifdef CONFIG_X86_64
++void early_make_page_readonly(void *va, unsigned int feature);
++pmd_t *early_get_pmd(unsigned long va);
++#define make_lowmem_page_readonly make_page_readonly
++#define make_lowmem_page_writable make_page_writable
+ #endif
++
++/*
++ * Allocate and free page tables.
++ */
++extern pgd_t *pgd_alloc(struct mm_struct *);
++extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
++
++extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
++extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
++
++/* Should really implement gc for free page table pages. This could be
++   done with a reference count in struct page. */
++
++static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
++{
++      BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
++      make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
++      free_page((unsigned long)pte);
++}
++
++extern void __pte_free(pgtable_t);
++static inline void pte_free(struct mm_struct *mm, struct page *pte)
++{
++      __pte_free(pte);
++}
++
++extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
++
++static inline void pmd_populate_kernel(struct mm_struct *mm,
++                                     pmd_t *pmd, pte_t *pte)
++{
++      paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
++      set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
++}
++
++static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
++                              struct page *pte)
++{
++      unsigned long pfn = page_to_pfn(pte);
++
++      paravirt_alloc_pte(mm, pfn);
++      if (PagePinned(virt_to_page(mm->pgd))) {
++              if (!PageHighMem(pte))
++                      BUG_ON(HYPERVISOR_update_va_mapping(
++                        (unsigned long)__va(pfn << PAGE_SHIFT),
++                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
++#ifndef CONFIG_X86_64
++              else if (!TestSetPagePinned(pte))
++                      kmap_flush_unused();
++#endif
++              set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
++      } else
++              *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
++}
++
++#define pmd_pgtable(pmd) pmd_page(pmd)
++
++#if PAGETABLE_LEVELS > 2
++extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
++extern void __pmd_free(pgtable_t);
++
++static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
++{
++      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
++      __pmd_free(virt_to_page(pmd));
++}
++
++extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
++
++#ifdef CONFIG_X86_PAE
++extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
++#else /* !CONFIG_X86_PAE */
++static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
++{
++      paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
++      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
++              BUG_ON(HYPERVISOR_update_va_mapping(
++                             (unsigned long)pmd,
++                             pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT,
++                                     PAGE_KERNEL_RO), 0));
++              set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
++      } else
++              *pud =  __pud(_PAGE_TABLE | __pa(pmd));
++}
++#endif        /* CONFIG_X86_PAE */
++
++#if PAGETABLE_LEVELS > 3
++#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
++
++/*
++ * We need to use the batch mode here, but pgd_pupulate() won't be
++ * be called frequently.
++ */
++static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
++{
++      paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
++      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
++              BUG_ON(HYPERVISOR_update_va_mapping(
++                             (unsigned long)pud,
++                             pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT,
++                                     PAGE_KERNEL_RO), 0));
++              set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
++              set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
++      } else {
++              *(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
++              *__user_pgd(pgd) = *(pgd);
++      }
++}
++
++static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
++{
++      return (pud_t *)pmd_alloc_one(mm, addr);
++}
++
++static inline void pud_free(struct mm_struct *mm, pud_t *pud)
++{
++      BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
++      __pmd_free(virt_to_page(pud));
++}
++
++extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
++#endif        /* PAGETABLE_LEVELS > 3 */
++#endif        /* PAGETABLE_LEVELS > 2 */
++
++#endif        /* _ASM_X86_PGALLOC_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h    2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,111 +0,0 @@
+-#ifndef _I386_PGALLOC_H
+-#define _I386_PGALLOC_H
+-
+-#include <linux/threads.h>
+-#include <linux/mm.h>         /* for struct page */
+-#include <linux/pagemap.h>
+-#include <asm/tlb.h>
+-#include <asm-generic/tlb.h>
+-#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
+-
+-#define paravirt_alloc_pt(mm, pfn) do { } while (0)
+-#define paravirt_alloc_pd(mm, pfn) do { } while (0)
+-#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
+-#define paravirt_release_pt(pfn) do { } while (0)
+-#define paravirt_release_pd(pfn) do { } while (0)
+-
+-static inline void pmd_populate_kernel(struct mm_struct *mm,
+-                                     pmd_t *pmd, pte_t *pte)
+-{
+-      paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT);
+-      set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+-}
+-
+-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+-{
+-      unsigned long pfn = page_to_pfn(pte);
+-
+-      paravirt_alloc_pt(mm, pfn);
+-      if (PagePinned(virt_to_page(mm->pgd))) {
+-              if (!PageHighMem(pte))
+-                      BUG_ON(HYPERVISOR_update_va_mapping(
+-                        (unsigned long)__va(pfn << PAGE_SHIFT),
+-                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
+-              else if (!test_and_set_bit(PG_pinned, &pte->flags))
+-                      kmap_flush_unused();
+-              set_pmd(pmd, __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE));
+-      } else
+-              *pmd = __pmd(((pmdval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE);
+-}
+-#define pmd_pgtable(pmd) pmd_page(pmd)
+-
+-/*
+- * Allocate and free page tables.
+- */
+-extern void pgd_test_and_unpin(pgd_t *);
+-extern pgd_t *pgd_alloc(struct mm_struct *);
+-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+-
+-extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long);
+-extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long);
+-
+-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+-{
+-      make_lowmem_page_writable(pte, XENFEAT_writable_page_tables);
+-      free_page((unsigned long)pte);
+-}
+-
+-extern void __pte_free(pgtable_t);
+-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+-{
+-      __pte_free(pte);
+-}
+-
+-
+-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+-
+-#ifdef CONFIG_X86_PAE
+-/*
+- * In the PAE case we free the pmds as part of the pgd.
+- */
+-extern pmd_t *pmd_alloc_one(struct mm_struct *, unsigned long);
+-
+-extern void __pmd_free(pgtable_t);
+-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+-{
+-      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+-      __pmd_free(virt_to_page(pmd));
+-}
+-
+-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+-
+-static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+-{
+-      struct page *page = virt_to_page(pmd);
+-      unsigned long pfn = page_to_pfn(page);
+-
+-      paravirt_alloc_pd(mm, pfn);
+-
+-      /* Note: almost everything apart from _PAGE_PRESENT is
+-         reserved at the pmd (PDPT) level. */
+-      if (PagePinned(virt_to_page(mm->pgd))) {
+-              BUG_ON(PageHighMem(page));
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                        (unsigned long)__va(pfn << PAGE_SHIFT),
+-                        pfn_pte(pfn, PAGE_KERNEL_RO), 0));
+-              set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+-      } else
+-              *pudp = __pud(__pa(pmd) | _PAGE_PRESENT);
+-
+-      /*
+-       * According to Intel App note "TLBs, Paging-Structure Caches,
+-       * and Their Invalidation", April 2007, document 317080-001,
+-       * section 8.1: in PAE mode we explicitly have to flush the
+-       * TLB via cr3 if the top-level pgd is changed...
+-       */
+-      if (mm == current->active_mm)
+-              xen_tlb_flush();
+-}
+-#endif        /* CONFIG_X86_PAE */
+-
+-#endif /* _I386_PGALLOC_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h    2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,179 +0,0 @@
+-#ifndef _X86_64_PGALLOC_H
+-#define _X86_64_PGALLOC_H
+-
+-#include <asm/pda.h>
+-#include <linux/threads.h>
+-#include <linux/mm.h>
+-#include <asm/io.h>           /* for phys_to_virt and page_to_pseudophys */
+-
+-pmd_t *early_get_pmd(unsigned long va);
+-void early_make_page_readonly(void *va, unsigned int feature);
+-
+-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+-
+-#define pmd_populate_kernel(mm, pmd, pte) \
+-              set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+-
+-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+-{
+-      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                             (unsigned long)pmd,
+-                             pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, 
+-                                     PAGE_KERNEL_RO), 0));
+-              set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
+-      } else {
+-              *(pud) =  __pud(_PAGE_TABLE | __pa(pmd));
+-      }
+-}
+-
+-/*
+- * We need to use the batch mode here, but pgd_pupulate() won't be
+- * be called frequently.
+- */
+-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+-{
+-      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                             (unsigned long)pud,
+-                             pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, 
+-                                     PAGE_KERNEL_RO), 0));
+-              set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+-              set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
+-      } else {
+-              *(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
+-              *(__user_pgd(pgd)) = *(pgd);
+-      }
+-}
+-
+-#define pmd_pgtable(pmd) pmd_page(pmd)
+-
+-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
+-{
+-      if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) {
+-              BUG_ON(HYPERVISOR_update_va_mapping(
+-                             (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT),
+-                             pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
+-              set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
+-      } else {
+-              *(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
+-      }
+-}
+-
+-extern void __pmd_free(pgtable_t);
+-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+-{
+-      BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
+-      __pmd_free(virt_to_page(pmd));
+-}
+-
+-extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr);
+-
+-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+-{
+-      return (pud_t *)pmd_alloc_one(mm, addr);
+-}
+-
+-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+-{
+-      BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+-      __pmd_free(virt_to_page(pud));
+-}
+-
+-static inline void pgd_list_add(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(&pgd_lock, flags);
+-      list_add(&page->lru, &pgd_list);
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-static inline void pgd_list_del(pgd_t *pgd)
+-{
+-      struct page *page = virt_to_page(pgd);
+-      unsigned long flags;
+-
+-      spin_lock_irqsave(&pgd_lock, flags);
+-      list_del(&page->lru);
+-      spin_unlock_irqrestore(&pgd_lock, flags);
+-}
+-
+-extern void pgd_test_and_unpin(pgd_t *);
+-
+-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+-{
+-      /*
+-       * We allocate two contiguous pages for kernel and user.
+-       */
+-      unsigned boundary;
+-      pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 1);
+-      if (!pgd)
+-              return NULL;
+-      pgd_list_add(pgd);
+-      pgd_test_and_unpin(pgd);
+-      /*
+-       * Copy kernel pointers in from init.
+-       * Could keep a freelist or slab cache of those because the kernel
+-       * part never changes.
+-       */
+-      boundary = pgd_index(__PAGE_OFFSET);
+-      memset(pgd, 0, boundary * sizeof(pgd_t));
+-      memcpy(pgd + boundary,
+-             init_level4_pgt + boundary,
+-             (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+-
+-      memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
+-      /*
+-       * Set level3_user_pgt for vsyscall area
+-       */
+-      __user_pgd(pgd)[pgd_index(VSYSCALL_START)] =
+-              __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
+-      return pgd;
+-}
+-
+-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+-{
+-      pgd_test_and_unpin(pgd);
+-      pgd_list_del(pgd);
+-      free_pages((unsigned long)pgd, 1);
+-}
+-
+-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+-{
+-      pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+-      if (pte)
+-              make_page_readonly(pte, XENFEAT_writable_page_tables);
+-
+-      return pte;
+-}
+-
+-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
+-
+-/* Should really implement gc for free page table pages. This could be
+-   done with a reference count in struct page. */
+-
+-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+-{
+-      BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
+-      make_page_writable(pte, XENFEAT_writable_page_tables);
+-      free_page((unsigned long)pte); 
+-}
+-
+-extern void __pte_free(pgtable_t);
+-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+-{
+-      __pte_free(pte);
+-}
+-
+-#define __pte_free_tlb(tlb,pte)                               \
+-do {                                                  \
+-      pgtable_page_dtor((pte));                               \
+-      tlb_remove_page((tlb), (pte));                  \
+-} while (0)
+-
+-#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+-#define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+-
+-#endif /* _X86_64_PGALLOC_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable.h    2009-03-16 16:38:05.000000000 +0100
+@@ -1,17 +1,15 @@
+ #ifndef _ASM_X86_PGTABLE_H
+ #define _ASM_X86_PGTABLE_H
+ 
+-#define USER_PTRS_PER_PGD     ((TASK_SIZE-1)/PGDIR_SIZE+1)
+ #define FIRST_USER_ADDRESS    0
+ 
+-#define _PAGE_BIT_PRESENT     0
+-#define _PAGE_BIT_RW          1
+-#define _PAGE_BIT_USER                2
+-#define _PAGE_BIT_PWT         3
+-#define _PAGE_BIT_PCD         4
+-#define _PAGE_BIT_ACCESSED    5
+-#define _PAGE_BIT_DIRTY               6
+-#define _PAGE_BIT_FILE                6
++#define _PAGE_BIT_PRESENT     0       /* is present */
++#define _PAGE_BIT_RW          1       /* writeable */
++#define _PAGE_BIT_USER                2       /* userspace addressable */
++#define _PAGE_BIT_PWT         3       /* page write through */
++#define _PAGE_BIT_PCD         4       /* page cache disabled */
++#define _PAGE_BIT_ACCESSED    5       /* was accessed (raised by CPU) */
++#define _PAGE_BIT_DIRTY               6       /* was written to (raised by CPU) */
+ #define _PAGE_BIT_PSE         7       /* 4 MB (or 2MB) page */
+ #define _PAGE_BIT_PAT         7       /* on 4KB pages */
+ #define _PAGE_BIT_GLOBAL      8       /* Global TLB entry PPro+ */
+@@ -22,6 +20,14 @@
+ #define _PAGE_BIT_PAT_LARGE   12      /* On 2MB or 1GB pages */
+ #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
+ 
++/* If _PAGE_BIT_PRESENT is clear, we use these: */
++
++/* set: nonlinear file mapping, saved PTE; unset:swap */
++#define _PAGE_BIT_FILE                _PAGE_BIT_DIRTY
++
++/* if the user mapped it with PROT_NONE; pte_present gives true */
++#define _PAGE_BIT_PROTNONE    _PAGE_BIT_GLOBAL
++
+ /*
+  * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
+  * sign-extended value on 32-bit with all 1's in the upper word,
+@@ -48,10 +54,8 @@
+ #define _PAGE_NX      0
+ #endif
+ 
+-/* If _PAGE_PRESENT is clear, we use these: */
+-#define _PAGE_FILE    _PAGE_DIRTY     /* nonlinear file mapping, saved PTE; unset:swap */
+-#define _PAGE_PROTNONE        _PAGE_PSE       /* if the user mapped it with PROT_NONE;
+-                                         pte_present gives true */
++#define _PAGE_FILE    (_AC(1, L)<<_PAGE_BIT_FILE)
++#define _PAGE_PROTNONE        (_AC(1, L)<<_PAGE_BIT_PROTNONE)
+ 
+ #ifndef __ASSEMBLY__
+ #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
+@@ -61,20 +65,42 @@ extern unsigned int __kernel_page_user;
+ #endif
+ #endif
+ 
+-#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
++#define _PAGE_TABLE   (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |        \
++                       _PAGE_ACCESSED | _PAGE_DIRTY)
++#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |    \
++                       _PAGE_DIRTY | __kernel_page_user)
++
++/* Set of bits not changed in pte_modify */
++#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO |       \
++                       _PAGE_ACCESSED | _PAGE_DIRTY)
+ 
+-#define _PAGE_CHG_MASK        (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_IO)
++/*
++ * PAT settings are part of the hypervisor interface, which sets the
++ * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
++ */
++#define _PAGE_CACHE_MASK      (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
++#define _PAGE_CACHE_WB                (0)
++#define _PAGE_CACHE_WT                (_PAGE_PWT)
++#define _PAGE_CACHE_WC                (_PAGE_PAT)
++#define _PAGE_CACHE_WP                (_PAGE_PAT | _PAGE_PWT)
++#define _PAGE_CACHE_UC_MINUS  (_PAGE_PCD)
++#define _PAGE_CACHE_UC                (_PAGE_PCD | _PAGE_PWT)
+ 
+ #define PAGE_NONE     __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+-#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_SHARED   __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
++                               _PAGE_ACCESSED | _PAGE_NX)
+ 
+-#define PAGE_SHARED_EXEC      __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+-#define PAGE_COPY_NOEXEC      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+-#define PAGE_COPY_EXEC                __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_SHARED_EXEC      __pgprot(_PAGE_PRESENT | _PAGE_RW |     \
++                                       _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_COPY_NOEXEC      __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_COPY_EXEC                __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED)
+ #define PAGE_COPY             PAGE_COPY_NOEXEC
+-#define PAGE_READONLY         __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
+-#define PAGE_READONLY_EXEC    __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_READONLY         __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_READONLY_EXEC    __pgprot(_PAGE_PRESENT | _PAGE_USER |   \
++                                       _PAGE_ACCESSED)
+ 
+ #ifdef CONFIG_X86_32
+ #define _PAGE_KERNEL_EXEC \
+@@ -93,6 +119,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
+ #define __PAGE_KERNEL_RO              (__PAGE_KERNEL & ~_PAGE_RW)
+ #define __PAGE_KERNEL_RX              (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+ #define __PAGE_KERNEL_EXEC_NOCACHE    (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
++#define __PAGE_KERNEL_WC              (__PAGE_KERNEL | _PAGE_CACHE_WC)
+ #define __PAGE_KERNEL_NOCACHE         (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+ #define __PAGE_KERNEL_UC_MINUS                (__PAGE_KERNEL | _PAGE_PCD)
+ #define __PAGE_KERNEL_VSYSCALL                (__PAGE_KERNEL_RX | _PAGE_USER)
+@@ -109,6 +136,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
+ #define PAGE_KERNEL_RO                        MAKE_GLOBAL(__PAGE_KERNEL_RO)
+ #define PAGE_KERNEL_EXEC              MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
+ #define PAGE_KERNEL_RX                        MAKE_GLOBAL(__PAGE_KERNEL_RX)
++#define PAGE_KERNEL_WC                        MAKE_GLOBAL(__PAGE_KERNEL_WC)
+ #define PAGE_KERNEL_NOCACHE           MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+ #define PAGE_KERNEL_UC_MINUS          MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
+ #define PAGE_KERNEL_EXEC_NOCACHE      MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
+@@ -142,7 +170,7 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
+  * ZERO_PAGE is a global shared page that is always zero: used
+  * for zero-mapped memory areas etc..
+  */
+-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
++extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+ #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+ 
+ extern spinlock_t pgd_lock;
+@@ -152,30 +180,111 @@ extern struct list_head pgd_list;
+  * The following only work if pte_present() is true.
+  * Undefined behaviour if not..
+  */
+-static inline int pte_dirty(pte_t pte)                { return __pte_val(pte) & _PAGE_DIRTY; }
+-static inline int pte_young(pte_t pte)                { return __pte_val(pte) & _PAGE_ACCESSED; }
+-static inline int pte_write(pte_t pte)                { return __pte_val(pte) & _PAGE_RW; }
+-static inline int pte_file(pte_t pte)         { return __pte_val(pte) & _PAGE_FILE; }
+-static inline int pte_huge(pte_t pte)         { return __pte_val(pte) & _PAGE_PSE; }
+-static inline int pte_global(pte_t pte)       { return 0; }
+-static inline int pte_exec(pte_t pte)         { return !(__pte_val(pte) & _PAGE_NX); }
+-
+-static inline int pmd_large(pmd_t pte) {
+-      return (__pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+-              (_PAGE_PSE|_PAGE_PRESENT);
+-}
+-
+-static inline pte_t pte_mkclean(pte_t pte)    { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY); }
+-static inline pte_t pte_mkold(pte_t pte)      { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED); }
+-static inline pte_t pte_wrprotect(pte_t pte)  { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW); }
+-static inline pte_t pte_mkexec(pte_t pte)     { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX); }
+-static inline pte_t pte_mkdirty(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_DIRTY); }
+-static inline pte_t pte_mkyoung(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED); }
+-static inline pte_t pte_mkwrite(pte_t pte)    { return __pte_ma(__pte_val(pte) | _PAGE_RW); }
+-static inline pte_t pte_mkhuge(pte_t pte)     { return __pte_ma(__pte_val(pte) | _PAGE_PSE); }
+-static inline pte_t pte_clrhuge(pte_t pte)    { return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE); }
+-static inline pte_t pte_mkglobal(pte_t pte)   { return pte; }
+-static inline pte_t pte_clrglobal(pte_t pte)  { return pte; }
++static inline int pte_dirty(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_DIRTY;
++}
++
++static inline int pte_young(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_ACCESSED;
++}
++
++static inline int pte_write(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_RW;
++}
++
++static inline int pte_file(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_FILE;
++}
++
++static inline int pte_huge(pte_t pte)
++{
++      return __pte_val(pte) & _PAGE_PSE;
++}
++
++static inline int pte_global(pte_t pte)
++{
++      return 0;
++}
++
++static inline int pte_exec(pte_t pte)
++{
++      return !(__pte_val(pte) & _PAGE_NX);
++}
++
++static inline int pte_special(pte_t pte)
++{
++      return 0;
++}
++
++static inline int pmd_large(pmd_t pte)
++{
++      return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
++              (_PAGE_PSE | _PAGE_PRESENT);
++}
++
++static inline pte_t pte_mkclean(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
++}
++
++static inline pte_t pte_mkold(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
++}
++
++static inline pte_t pte_wrprotect(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
++}
++
++static inline pte_t pte_mkexec(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
++}
++
++static inline pte_t pte_mkdirty(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_DIRTY);
++}
++
++static inline pte_t pte_mkyoung(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED);
++}
++
++static inline pte_t pte_mkwrite(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_RW);
++}
++
++static inline pte_t pte_mkhuge(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) | _PAGE_PSE);
++}
++
++static inline pte_t pte_clrhuge(pte_t pte)
++{
++      return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
++}
++
++static inline pte_t pte_mkglobal(pte_t pte)
++{
++      return pte;
++}
++
++static inline pte_t pte_clrglobal(pte_t pte)
++{
++      return pte;
++}
++
++static inline pte_t pte_mkspecial(pte_t pte)
++{
++      return pte;
++}
+ 
+ extern pteval_t __supported_pte_mask;
+ 
+@@ -202,15 +311,33 @@ static inline pte_t pte_modify(pte_t pte
+       pteval_t val = pte_val(pte);
+ 
+       val &= _PAGE_CHG_MASK;
+-      val |= pgprot_val(newprot) & __supported_pte_mask;
++      val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
+ 
+       return __pte(val);
+ }
+ 
+-#define pte_pgprot(x) __pgprot(pte_val(x) & (0xfff | _PAGE_NX))
++/* mprotect needs to preserve PAT bits when updating vm_page_prot */
++#define pgprot_modify pgprot_modify
++static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
++{
++      pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
++      pgprotval_t addbits = pgprot_val(newprot);
++      return __pgprot(preservebits | addbits);
++}
++
++#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
+ 
+ #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
+ 
++#ifndef __ASSEMBLY__
++#define __HAVE_PHYS_MEM_ACCESS_PROT
++struct file;
++pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
++                              unsigned long size, pgprot_t vma_prot);
++int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
++                              unsigned long size, pgprot_t *vma_prot);
++#endif
++
+ #define set_pte(ptep, pte)            xen_set_pte(ptep, pte)
+ #define set_pte_at(mm, addr, ptep, pte)       xen_set_pte_at(mm, addr, ptep, pte)
+ 
+@@ -246,6 +373,9 @@ static inline pte_t pte_modify(pte_t pte
+ # include "pgtable_64.h"
+ #endif
+ 
++#define KERNEL_PGD_BOUNDARY   pgd_index(PAGE_OFFSET)
++#define KERNEL_PGD_PTRS               (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
++
+ #ifndef __ASSEMBLY__
+ 
+ enum {
+@@ -312,46 +442,17 @@ static inline void xen_pte_clear(struct 
+  * bit at the same time.
+  */
+ #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+-#define ptep_set_access_flags(vma, address, ptep, entry, dirty)               \
+-({                                                                    \
+-      int __changed = !pte_same(*(ptep), entry);                      \
+-      if (__changed && (dirty)) {                                     \
+-              if ( likely((vma)->vm_mm == current->mm) ) {            \
+-                      BUG_ON(HYPERVISOR_update_va_mapping(address,    \
+-                              entry,                                  \
+-                              (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
+-                                      UVMF_INVLPG|UVMF_MULTI));       \
+-              } else {                                                \
+-                      xen_l1_entry_update(ptep, entry);               \
+-                      flush_tlb_page(vma, address);                   \
+-              }                                                       \
+-      }                                                               \
+-      __changed;                                                      \
+-})
++extern int ptep_set_access_flags(struct vm_area_struct *vma,
++                               unsigned long address, pte_t *ptep,
++                               pte_t entry, int dirty);
+ 
+ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+-#define ptep_test_and_clear_young(vma, addr, ptep) ({                 \
+-      int __ret = 0;                                                  \
+-      if (pte_young(*(ptep)))                                         \
+-              __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,          \
+-                                         &(ptep)->pte);               \
+-      if (__ret)                                                      \
+-              pte_update((vma)->vm_mm, addr, ptep);                   \
+-      __ret;                                                          \
+-})
++extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
++                                   unsigned long addr, pte_t *ptep);
+ 
+ #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+-#define ptep_clear_flush_young(vma, address, ptep)                    \
+-({                                                                    \
+-      pte_t __pte = *(ptep);                                          \
+-      int __young = pte_young(__pte);                                 \
+-      __pte = pte_mkold(__pte);                                       \
+-      if (PagePinned(virt_to_page((vma)->vm_mm->pgd)))                \
+-              (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
+-      else if (__young)                                               \
+-              (ptep)->pte_low = __pte.pte_low;                        \
+-      __young;                                                        \
+-})
++extern int ptep_clear_flush_young(struct vm_area_struct *vma,
++                                unsigned long address, pte_t *ptep);
+ 
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep)                     \
+@@ -370,7 +471,8 @@ static inline void xen_pte_clear(struct 
+ })
+ 
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
++                                     pte_t *ptep)
+ {
+       pte_t pte = *ptep;
+       if (!pte_none(pte)
+@@ -398,13 +500,29 @@ static inline pte_t ptep_get_and_clear(s
+ pte_t xen_ptep_get_and_clear_full(struct vm_area_struct *, unsigned long, pte_t *, int);
+ 
+ #define __HAVE_ARCH_PTEP_SET_WRPROTECT
+-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
++static inline void ptep_set_wrprotect(struct mm_struct *mm,
++                                    unsigned long addr, pte_t *ptep)
+ {
+       pte_t pte = *ptep;
+       if (pte_write(pte))
+               set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+ }
+ 
++/*
++ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
++ *
++ *  dst - pointer to pgd range anwhere on a pgd page
++ *  src - ""
++ *  count - the number of pgds to copy.
++ *
++ * dst and src can be on the same page, but the range must not overlap,
++ * and must not cross a page boundary.
++ */
++static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
++{
++       memcpy(dst, src, count * sizeof(pgd_t));
++}
++
+ #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
+       xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
+ 
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable-3level.h     2009-03-16 16:38:05.000000000 +0100
+@@ -8,25 +8,28 @@
+  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+  */
+ 
+-#define pte_ERROR(e) \
+-      printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n", __FILE__, __LINE__, \
+-             &(e), __pte_val(e), pte_pfn(e))
+-#define pmd_ERROR(e) \
+-      printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
+-             &(e), __pmd_val(e), (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
+-#define pgd_ERROR(e) \
+-      printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", __FILE__, __LINE__, \
+-             &(e), __pgd_val(e), (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
+-
++#define pte_ERROR(e)                                                  \
++      printk("%s:%d: bad pte %p(%016Lx pfn %08lx).\n",                \
++              __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
++#define pmd_ERROR(e)                                                  \
++      printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n",                \
++             __FILE__, __LINE__, &(e), __pmd_val(e),                  \
++             (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
++#define pgd_ERROR(e)                                                  \
++      printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n",                \
++             __FILE__, __LINE__, &(e), __pgd_val(e),                  \
++             (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
+ 
+ static inline int pud_none(pud_t pud)
+ {
+       return __pud_val(pud) == 0;
++
+ }
+ static inline int pud_bad(pud_t pud)
+ {
+       return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
+ }
++
+ static inline int pud_present(pud_t pud)
+ {
+       return __pud_val(pud) & _PAGE_PRESENT;
+@@ -48,12 +51,14 @@ static inline void xen_set_pte(pte_t *pt
+ 
+ static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
+ {
+-      set_64bit((unsigned long long *)(ptep),__pte_val(pte));
++      set_64bit((unsigned long long *)(ptep), __pte_val(pte));
+ }
++
+ static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
+ {
+       xen_l2_entry_update(pmdp, pmd);
+ }
++
+ static inline void xen_set_pud(pud_t *pudp, pud_t pud)
+ {
+       xen_l3_entry_update(pudp, pud);
+@@ -92,20 +97,19 @@ static inline void pud_clear(pud_t *pudp
+        * current pgd to avoid unnecessary TLB flushes.
+        */
+       pgd = read_cr3();
+-      if (__pa(pudp) >= pgd && __pa(pudp) < (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
++      if (__pa(pudp) >= pgd && __pa(pudp) <
++          (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
+               xen_tlb_flush();
+ }
+ 
+-#define pud_page(pud) \
+-((struct page *) __va(pud_val(pud) & PAGE_MASK))
++#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
+ 
+-#define pud_page_vaddr(pud) \
+-((unsigned long) __va(pud_val(pud) & PAGE_MASK))
++#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
+ 
+ 
+ /* Find an entry in the second-level page table.. */
+-#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+-                      pmd_index(address))
++#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
++                                pmd_index(address))
+ 
+ #ifdef CONFIG_SMP
+ static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
+@@ -150,7 +154,8 @@ static inline int pte_none(pte_t pte)
+  * put the 32 bits of offset into the high part.
+  */
+ #define pte_to_pgoff(pte) ((pte).pte_high)
+-#define pgoff_to_pte(off) ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
++#define pgoff_to_pte(off)                                             \
++      ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
+ #define PTE_FILE_MAX_BITS       32
+ 
+ /* Encode and de-code a swap entry */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_32.h    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:38:05.000000000 +0100
+@@ -38,16 +38,13 @@ void paging_init(void);
+ #ifdef CONFIG_X86_PAE
+ # include <asm/pgtable-3level-defs.h>
+ # define PMD_SIZE     (1UL << PMD_SHIFT)
+-# define PMD_MASK     (~(PMD_SIZE-1))
++# define PMD_MASK     (~(PMD_SIZE - 1))
+ #else
+ # include <asm/pgtable-2level-defs.h>
+ #endif
+ 
+ #define PGDIR_SIZE    (1UL << PGDIR_SHIFT)
+-#define PGDIR_MASK    (~(PGDIR_SIZE-1))
+-
+-#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+-#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
++#define PGDIR_MASK    (~(PGDIR_SIZE - 1))
+ 
+ /* Just any arbitrary offset to the start of the vmalloc VM area: the
+  * current 8MB value just means that there will be a 8MB "hole" after the
+@@ -56,21 +53,22 @@ void paging_init(void);
+  * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+  * area for the same reason. ;)
+  */
+-#define VMALLOC_OFFSET        (8*1024*1024)
+-#define VMALLOC_START (((unsigned long) high_memory + \
+-                      2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
++#define VMALLOC_OFFSET        (8 * 1024 * 1024)
++#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
++                       & ~(VMALLOC_OFFSET - 1))
+ #ifdef CONFIG_X86_PAE
+ #define LAST_PKMAP 512
+ #else
+ #define LAST_PKMAP 1024
+ #endif
+ 
+-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK)
++#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1))       \
++                  & PMD_MASK)
+ 
+ #ifdef CONFIG_HIGHMEM
+-# define VMALLOC_END  (PKMAP_BASE-2*PAGE_SIZE)
++# define VMALLOC_END  (PKMAP_BASE - 2 * PAGE_SIZE)
+ #else
+-# define VMALLOC_END  (FIXADDR_START-2*PAGE_SIZE)
++# define VMALLOC_END  (FIXADDR_START - 2 * PAGE_SIZE)
+ #endif
+ 
+ /*
+@@ -91,10 +89,10 @@ extern unsigned long pg0[];
+ /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+    can temporarily clear it. */
+ #define pmd_present(x)        (__pmd_val(x))
+-#define pmd_bad(x)    ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
++#define pmd_bad(x)    ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+ #else
+ #define pmd_present(x)        (__pmd_val(x) & _PAGE_PRESENT)
+-#define pmd_bad(x)    ((__pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
++#define pmd_bad(x)    ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+ #endif
+ 
+ 
+@@ -107,32 +105,18 @@ extern unsigned long pg0[];
+ #endif
+ 
+ /*
+- * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
+- *
+- *  dst - pointer to pgd range anwhere on a pgd page
+- *  src - ""
+- *  count - the number of pgds to copy.
+- *
+- * dst and src can be on the same page, but the range must not overlap,
+- * and must not cross a page boundary.
++ * Macro to mark a page protection value as "uncacheable".
++ * On processors which do not support it, this is a no-op.
+  */
+-static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+-{
+-       memcpy(dst, src, count * sizeof(pgd_t));
+-}
+-
+-/*
+- * Macro to mark a page protection value as "uncacheable".  On processors which do not support
+- * it, this is a no-op.
+- */
+-#define pgprot_noncached(prot)        ((boot_cpu_data.x86 > 3)                                          \
+-                               ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
++#define pgprot_noncached(prot)                                        \
++      ((boot_cpu_data.x86 > 3)                                \
++       ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
++       : (prot))
+ 
+ /*
+  * Conversion functions: convert a page and protection to a page entry,
+  * and a page entry and page directory to the page they refer to.
+  */
+-
+ #define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
+ 
+ /*
+@@ -141,20 +125,20 @@ static inline void clone_pgd_range(pgd_t
+  * this macro returns the index of the entry in the pgd page which would
+  * control the given virtual address
+  */
+-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+-#define pgd_index_k(addr) pgd_index(addr)
++#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
++#define pgd_index_k(addr) pgd_index((addr))
+ 
+ /*
+  * pgd_offset() returns a (pgd_t *)
+  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+  */
+-#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
++#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+ 
+ /*
+  * a shortcut which implies the use of the kernel's pgd, instead
+  * of a process's
+  */
+-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
++#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
+ 
+ static inline int pud_large(pud_t pud) { return 0; }
+ 
+@@ -164,8 +148,8 @@ static inline int pud_large(pud_t pud) {
+  * this macro returns the index of the entry in the pmd page which would
+  * control the given virtual address
+  */
+-#define pmd_index(address) \
+-              (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
++#define pmd_index(address)                            \
++      (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+ 
+ /*
+  * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+@@ -173,33 +157,36 @@ static inline int pud_large(pud_t pud) {
+  * this macro returns the index of the entry in the pte page which would
+  * control the given virtual address
+  */
+-#define pte_index(address) \
+-              (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+-#define pte_offset_kernel(dir, address) \
+-      ((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
++#define pte_index(address)                                    \
++      (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
++#define pte_offset_kernel(dir, address)                               \
++      ((pte_t *)pmd_page_vaddr(*(dir)) +  pte_index((address)))
+ 
+-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
++#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
+ 
+-#define pmd_page_vaddr(pmd) \
+-              ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
++#define pmd_page_vaddr(pmd)                                   \
++      ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
+ 
+ #if defined(CONFIG_HIGHPTE)
+-#define pte_offset_map(dir, address) \
+-      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
+-#define pte_offset_map_nested(dir, address) \
+-      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
+-#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+-#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+-#else
+-#define pte_offset_map(dir, address) \
+-      ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
+-#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
++#define pte_offset_map(dir, address)                                  \
++      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +          \
++       pte_index((address)))
++#define pte_offset_map_nested(dir, address)                           \
++      ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +          \
++       pte_index((address)))
++#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
++#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
++#else
++#define pte_offset_map(dir, address)                                  \
++      ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
++#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
+ #define pte_unmap(pte) do { } while (0)
+ #define pte_unmap_nested(pte) do { } while (0)
+ #endif
+ 
+ /* Clear a kernel PTE and flush it from the TLB */
+-#define kpte_clear_flush(ptep, vaddr) do { \
++#define kpte_clear_flush(ptep, vaddr)                                 \
++do {                                                                  \
+       if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
+               BUG(); \
+ } while (0)
+@@ -208,7 +195,7 @@ static inline int pud_large(pud_t pud) {
+  * The i386 doesn't have any external MMU info: the kernel page
+  * tables contain all the necessary information.
+  */
+-#define update_mmu_cache(vma,address,pte) do { } while (0)
++#define update_mmu_cache(vma, address, pte) do { } while (0)
+ 
+ void make_lowmem_page_readonly(void *va, unsigned int feature);
+ void make_lowmem_page_writable(void *va, unsigned int feature);
+@@ -225,7 +212,7 @@ void make_lowmem_page_writable(void *va,
+ #define kern_addr_valid(kaddr)        (0)
+ #endif
+ 
+-#define io_remap_pfn_range(vma,from,pfn,size,prot) \
+-direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
++#define io_remap_pfn_range(vma, from, pfn, size, prot)                        \
++      direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
+ 
+ #endif /* _I386_PGTABLE_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_64.h    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:38:05.000000000 +0100
+@@ -31,7 +31,7 @@ extern void paging_init(void);
+ 
+ #endif /* !__ASSEMBLY__ */
+ 
+-#define SHARED_KERNEL_PMD     1
++#define SHARED_KERNEL_PMD     0
+ 
+ /*
+  * PGDIR_SHIFT determines what a top-level page table entry can map
+@@ -59,18 +59,20 @@ extern void paging_init(void);
+ 
+ #ifndef __ASSEMBLY__
+ 
+-#define pte_ERROR(e) \
+-      printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pte_val(e), pte_pfn(e))
+-#define pmd_ERROR(e) \
+-      printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pmd_val(e), pmd_pfn(e))
+-#define pud_ERROR(e) \
+-      printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pud_val(e), (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
+-#define pgd_ERROR(e) \
+-      printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
+-             &(e), __pgd_val(e), (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
++#define pte_ERROR(e)                                                  \
++      printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
++#define pmd_ERROR(e)                                                  \
++      printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
++#define pud_ERROR(e)                                                  \
++      printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pud_val(e),                  \
++             (pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
++#define pgd_ERROR(e)                                                  \
++      printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n",               \
++             __FILE__, __LINE__, &(e), __pgd_val(e),                  \
++             (pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
+ 
+ #define pgd_none(x)   (!__pgd_val(x))
+ #define pud_none(x)   (!__pud_val(x))
+@@ -125,7 +127,7 @@ static inline void xen_set_pgd(pgd_t *pg
+       xen_l4_entry_update(pgdp, pgd);
+ }
+ 
+-static inline void xen_pgd_clear(pgd_t * pgd)
++static inline void xen_pgd_clear(pgd_t *pgd)
+ {
+       xen_set_pgd(pgd, xen_make_pgd(0));
+       xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
+@@ -135,43 +137,43 @@ static inline void xen_pgd_clear(pgd_t *
+ 
+ #endif /* !__ASSEMBLY__ */
+ 
+-#define PMD_SIZE      (_AC(1,UL) << PMD_SHIFT)
+-#define PMD_MASK      (~(PMD_SIZE-1))
+-#define PUD_SIZE      (_AC(1,UL) << PUD_SHIFT)
+-#define PUD_MASK      (~(PUD_SIZE-1))
+-#define PGDIR_SIZE    (_AC(1,UL) << PGDIR_SHIFT)
+-#define PGDIR_MASK    (~(PGDIR_SIZE-1))
++#define PMD_SIZE      (_AC(1, UL) << PMD_SHIFT)
++#define PMD_MASK      (~(PMD_SIZE - 1))
++#define PUD_SIZE      (_AC(1, UL) << PUD_SHIFT)
++#define PUD_MASK      (~(PUD_SIZE - 1))
++#define PGDIR_SIZE    (_AC(1, UL) << PGDIR_SHIFT)
++#define PGDIR_MASK    (~(PGDIR_SIZE - 1))
+ 
+ 
+-#define MAXMEM                 _AC(0x3fffffffffff, UL)
++#define MAXMEM                 _AC(0x00003fffffffffff, UL)
+ #define VMALLOC_START    _AC(0xffffc20000000000, UL)
+ #define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
+ #define VMEMMAP_START  _AC(0xffffe20000000000, UL)
+-#define MODULES_VADDR    _AC(0xffffffff88000000, UL)
++#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
+ #define MODULES_END      _AC(0xfffffffffff00000, UL)
+ #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+ 
+ #ifndef __ASSEMBLY__
+ 
+-static inline unsigned long pgd_bad(pgd_t pgd)
++static inline int pgd_bad(pgd_t pgd)
+ {
+-      return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
++      return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+ }
+ 
+-static inline unsigned long pud_bad(pud_t pud)
++static inline int pud_bad(pud_t pud)
+ {
+-      return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
++      return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+ }
+ 
+-static inline unsigned long pmd_bad(pmd_t pmd)
++static inline int pmd_bad(pmd_t pmd)
+ {
+-      return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
++      return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+ }
+ 
+ #define pte_none(x)   (!(x).pte)
+ #define pte_present(x)        ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
+ 
+-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))       /* FIXME: is this right? */
++#define pages_to_mb(x)        ((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
+ 
+ #define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
+ #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
+@@ -181,13 +183,13 @@ static inline unsigned long pmd_bad(pmd_
+                      mfn_to_local_pfn(__pte_mfn(_pte)) :      \
+                      __pte_mfn(_pte))
+ 
+-#define pte_page(x)   pfn_to_page(pte_pfn(x))
++#define pte_page(x)   pfn_to_page(pte_pfn((x)))
+ 
+ /*
+  * Macro to mark a page protection value as "uncacheable".
+  */
+-#define pgprot_noncached(prot)        (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT))
+-
++#define pgprot_noncached(prot)                                        \
++      (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
+ 
+ /*
+  * Conversion functions: convert a page and protection to a page entry,
+@@ -197,36 +199,39 @@ static inline unsigned long pmd_bad(pmd_
+ /*
+  * Level 4 access.
+  */
+-#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
+-#define pgd_page(pgd)         (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
+-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+-#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
+-#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
++#define pgd_page_vaddr(pgd)                                           \
++      ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
++#define pgd_page(pgd)         (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
++#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
++#define pgd_offset(mm, address)       ((mm)->pgd + pgd_index((address)))
++#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
+ #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
+ static inline int pgd_large(pgd_t pgd) { return 0; }
+ #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
+ 
+ /* PUD - Level3 access */
+ /* to find an entry in a page-table-directory. */
+-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+-#define pud_page(pud)         (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
+-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+-#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
++#define pud_page_vaddr(pud)                                           \
++      ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
++#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
++#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
++#define pud_offset(pgd, address)                                      \
++      ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
+ #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
+ 
+ static inline int pud_large(pud_t pte)
+ {
+-      return (__pud_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
+-              (_PAGE_PSE|_PAGE_PRESENT);
++      return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
++              (_PAGE_PSE | _PAGE_PRESENT);
+ }
+ 
+ /* PMD  - Level 2 access */
+-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+-#define pmd_page(pmd)         (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
++#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
++#define pmd_page(pmd)         (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
+ 
+-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+-#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
+-                                  pmd_index(address))
++#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
++#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
++                                pmd_index(address))
+ #define pmd_none(x)   (!__pmd_val(x))
+ #if CONFIG_XEN_COMPAT <= 0x030002
+ /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+@@ -235,43 +240,56 @@ static inline int pud_large(pud_t pte)
+ #else
+ #define pmd_present(x)        (__pmd_val(x) & _PAGE_PRESENT)
+ #endif
+-#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
+-#define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
++#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
++#define pmd_pfn(x)  ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
+ 
+ #define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+-#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | _PAGE_FILE })
++#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) |   \
++                                          _PAGE_FILE })
+ #define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
+ 
+ /* PTE - Level 1 access. */
+ 
+ /* page, protection -> pte */
+-#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn(page), (pgprot))
+- 
+-#define pte_index(address) \
+-              (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
++#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn((page)), (pgprot))
++
++#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
+-                      pte_index(address))
++                                       pte_index((address)))
+ 
+ /* x86-64 always has all page tables mapped. */
+-#define pte_offset_map(dir,address) pte_offset_kernel(dir,address)
+-#define pte_offset_map_nested(dir,address) pte_offset_kernel(dir,address)
++#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
++#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
+ #define pte_unmap(pte) /* NOP */
+-#define pte_unmap_nested(pte) /* NOP */ 
++#define pte_unmap_nested(pte) /* NOP */
++
++#define update_mmu_cache(vma, address, pte) do { } while (0)
+ 
+-#define update_mmu_cache(vma,address,pte) do { } while (0)
++#define direct_gbpages 0
+ 
+ /* Encode and de-code a swap entry */
+-#define __swp_type(x)                 (((x).val >> 1) & 0x3f)
+-#define __swp_offset(x)                       ((x).val >> 8)
+-#define __swp_entry(type, offset)     ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
++#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
++#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
++#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
++#else
++#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
++#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
++#endif
++
++#define __swp_type(x)                 (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
++                                       & ((1U << SWP_TYPE_BITS) - 1))
++#define __swp_offset(x)                       ((x).val >> SWP_OFFSET_SHIFT)
++#define __swp_entry(type, offset)     ((swp_entry_t) { \
++                                       ((type) << (_PAGE_BIT_PRESENT + 1)) \
++                                       | ((offset) << SWP_OFFSET_SHIFT) })
+ #define __pte_to_swp_entry(pte)               ((swp_entry_t) { __pte_val(pte) })
+ #define __swp_entry_to_pte(x)         ((pte_t) { .pte = (x).val })
+ 
+-extern int kern_addr_valid(unsigned long addr); 
++extern int kern_addr_valid(unsigned long addr);
+ extern void cleanup_highmap(void);
+ 
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)               \
+-              direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)       \
++      direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
+ 
+ #define HAVE_ARCH_UNMAPPED_AREA
+ #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+@@ -284,8 +302,10 @@ extern void cleanup_highmap(void);
+ 
+ /* fs/proc/kcore.c */
+ #define       kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
+-#define       kc_offset_to_vaddr(o) \
+-   (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
++#define       kc_offset_to_vaddr(o)                           \
++      (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1)))    \
++       ? ((o) | ~__VIRTUAL_MASK)                      \
++       : (o))
+ 
+ #define __HAVE_ARCH_PTE_SAME
+ #endif /* !__ASSEMBLY__ */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor.h     2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor.h  2009-03-16 16:38:05.000000000 +0100
+@@ -3,10 +3,6 @@
+ 
+ #include <asm/processor-flags.h>
+ 
+-/* migration helpers, for KVM - will be removed in 2.6.25: */
+-#include <asm/vm86.h>
+-#define Xgt_desc_struct       desc_ptr
+-
+ /* Forward declaration, a strange C thing */
+ struct task_struct;
+ struct mm_struct;
+@@ -24,6 +20,7 @@ struct mm_struct;
+ #include <asm/msr.h>
+ #include <asm/desc_defs.h>
+ #include <asm/nops.h>
++
+ #include <linux/personality.h>
+ #include <linux/cpumask.h>
+ #include <linux/cache.h>
+@@ -38,16 +35,18 @@ struct mm_struct;
+ static inline void *current_text_addr(void)
+ {
+       void *pc;
+-      asm volatile("mov $1f,%0\n1:":"=r" (pc));
++
++      asm volatile("mov $1f, %0; 1:":"=r" (pc));
++
+       return pc;
+ }
+ 
+ #ifdef CONFIG_X86_VSMP
+-#define ARCH_MIN_TASKALIGN    (1 << INTERNODE_CACHE_SHIFT)
+-#define ARCH_MIN_MMSTRUCT_ALIGN       (1 << INTERNODE_CACHE_SHIFT)
++# define ARCH_MIN_TASKALIGN           (1 << INTERNODE_CACHE_SHIFT)
++# define ARCH_MIN_MMSTRUCT_ALIGN      (1 << INTERNODE_CACHE_SHIFT)
+ #else
+-#define ARCH_MIN_TASKALIGN    16
+-#define ARCH_MIN_MMSTRUCT_ALIGN       0
++# define ARCH_MIN_TASKALIGN           16
++# define ARCH_MIN_MMSTRUCT_ALIGN      0
+ #endif
+ 
+ /*
+@@ -57,68 +56,80 @@ static inline void *current_text_addr(vo
+  */
+ 
+ struct cpuinfo_x86 {
+-      __u8    x86;            /* CPU family */
+-      __u8    x86_vendor;     /* CPU vendor */
+-      __u8    x86_model;
+-      __u8    x86_mask;
++      __u8                    x86;            /* CPU family */
++      __u8                    x86_vendor;     /* CPU vendor */
++      __u8                    x86_model;
++      __u8                    x86_mask;
+ #ifdef CONFIG_X86_32
+-      char    wp_works_ok;    /* It doesn't on 386's */
+-      char    hlt_works_ok;   /* Problems on some 486Dx4's and old 386's */
+-      char    hard_math;
+-      char    rfu;
+-      char    fdiv_bug;
+-      char    f00f_bug;
+-      char    coma_bug;
+-      char    pad0;
++      char                    wp_works_ok;    /* It doesn't on 386's */
++
++      /* Problems on some 486Dx4's and old 386's: */
++      char                    hlt_works_ok;
++      char                    hard_math;
++      char                    rfu;
++      char                    fdiv_bug;
++      char                    f00f_bug;
++      char                    coma_bug;
++      char                    pad0;
+ #else
+-      /* number of 4K pages in DTLB/ITLB combined(in pages)*/
+-      int     x86_tlbsize;
+-      __u8    x86_virt_bits, x86_phys_bits;
+-      /* cpuid returned core id bits */
+-      __u8    x86_coreid_bits;
+-      /* Max extended CPUID function supported */
+-      __u32   extended_cpuid_level;
+-#endif
+-      int     cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
+-      __u32   x86_capability[NCAPINTS];
+-      char    x86_vendor_id[16];
+-      char    x86_model_id[64];
+-      int     x86_cache_size;  /* in KB - valid for CPUS which support this
+-                                  call  */
+-      int     x86_cache_alignment;    /* In bytes */
+-      int     x86_power;
+-      unsigned long loops_per_jiffy;
++      /* Number of 4K pages in DTLB/ITLB combined(in pages): */
++      int                      x86_tlbsize;
++      __u8                    x86_virt_bits;
++      __u8                    x86_phys_bits;
++      /* CPUID returned core id bits: */
++      __u8                    x86_coreid_bits;
++      /* Max extended CPUID function supported: */
++      __u32                   extended_cpuid_level;
++#endif
++      /* Maximum supported CPUID level, -1=no CPUID: */
++      int                     cpuid_level;
++      __u32                   x86_capability[NCAPINTS];
++      char                    x86_vendor_id[16];
++      char                    x86_model_id[64];
++      /* in KB - valid for CPUS which support this call: */
++      int                     x86_cache_size;
++      int                     x86_cache_alignment;    /* In bytes */
++      int                     x86_power;
++      unsigned long           loops_per_jiffy;
+ #ifdef CONFIG_SMP
+-      cpumask_t llc_shared_map;       /* cpus sharing the last level cache */
++      /* cpus sharing the last level cache: */
++      cpumask_t               llc_shared_map;
+ #endif
+-      u16 x86_max_cores;              /* cpuid returned max cores value */
+-      u16 apicid;
+-      u16 x86_clflush_size;
++      /* cpuid returned max cores value: */
++      u16                      x86_max_cores;
++      u16                     apicid;
++      u16                     initial_apicid;
++      u16                     x86_clflush_size;
+ #ifdef CONFIG_SMP
+-      u16 booted_cores;               /* number of cores as seen by OS */
+-      u16 phys_proc_id;               /* Physical processor id. */
+-      u16 cpu_core_id;                /* Core id */
+-      u16 cpu_index;                  /* index into per_cpu list */
++      /* number of cores as seen by the OS: */
++      u16                     booted_cores;
++      /* Physical processor id: */
++      u16                     phys_proc_id;
++      /* Core id: */
++      u16                     cpu_core_id;
++      /* Index into per_cpu list: */
++      u16                     cpu_index;
+ #endif
+ } __attribute__((__aligned__(SMP_CACHE_BYTES)));
+ 
+-#define X86_VENDOR_INTEL 0
+-#define X86_VENDOR_CYRIX 1
+-#define X86_VENDOR_AMD 2
+-#define X86_VENDOR_UMC 3
+-#define X86_VENDOR_NEXGEN 4
+-#define X86_VENDOR_CENTAUR 5
+-#define X86_VENDOR_TRANSMETA 7
+-#define X86_VENDOR_NSC 8
+-#define X86_VENDOR_NUM 9
+-#define X86_VENDOR_UNKNOWN 0xff
++#define X86_VENDOR_INTEL      0
++#define X86_VENDOR_CYRIX      1
++#define X86_VENDOR_AMD                2
++#define X86_VENDOR_UMC                3
++#define X86_VENDOR_CENTAUR    5
++#define X86_VENDOR_TRANSMETA  7
++#define X86_VENDOR_NSC                8
++#define X86_VENDOR_NUM                9
++
++#define X86_VENDOR_UNKNOWN    0xff
+ 
+ /*
+  * capabilities of CPUs
+  */
+-extern struct cpuinfo_x86 boot_cpu_data;
+-extern struct cpuinfo_x86 new_cpu_data;
+-extern __u32 cleared_cpu_caps[NCAPINTS];
++extern struct cpuinfo_x86     boot_cpu_data;
++extern struct cpuinfo_x86     new_cpu_data;
++
++extern __u32                  cleared_cpu_caps[NCAPINTS];
+ 
+ #ifdef CONFIG_SMP
+ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+@@ -129,7 +140,18 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_
+ #define current_cpu_data      boot_cpu_data
+ #endif
+ 
+-void cpu_detect(struct cpuinfo_x86 *c);
++static inline int hlt_works(int cpu)
++{
++#ifdef CONFIG_X86_32
++      return cpu_data(cpu).hlt_works_ok;
++#else
++      return 1;
++#endif
++}
++
++#define cache_line_size()     (boot_cpu_data.x86_cache_alignment)
++
++extern void cpu_detect(struct cpuinfo_x86 *c);
+ 
+ extern void identify_cpu(struct cpuinfo_x86 *);
+ extern void identify_boot_cpu(void);
+@@ -149,12 +171,12 @@ static inline void xen_cpuid(unsigned in
+                            unsigned int *ecx, unsigned int *edx)
+ {
+       /* ecx is often an input as well as an output. */
+-      __asm__(XEN_CPUID
+-              : "=a" (*eax),
+-                "=b" (*ebx),
+-                "=c" (*ecx),
+-                "=d" (*edx)
+-              : "0" (*eax), "2" (*ecx));
++      asm(XEN_CPUID
++          : "=a" (*eax),
++            "=b" (*ebx),
++            "=c" (*ecx),
++            "=d" (*edx)
++          : "0" (*eax), "2" (*ecx));
+ }
+ 
+ static inline void load_cr3(pgd_t *pgdir)
+@@ -166,57 +188,70 @@ static inline void load_cr3(pgd_t *pgdir
+ #ifdef CONFIG_X86_32
+ /* This is the TSS defined by the hardware. */
+ struct x86_hw_tss {
+-      unsigned short  back_link, __blh;
+-      unsigned long   sp0;
+-      unsigned short  ss0, __ss0h;
+-      unsigned long   sp1;
+-      unsigned short  ss1, __ss1h;    /* ss1 caches MSR_IA32_SYSENTER_CS */
+-      unsigned long   sp2;
+-      unsigned short  ss2, __ss2h;
+-      unsigned long   __cr3;
+-      unsigned long   ip;
+-      unsigned long   flags;
+-      unsigned long   ax, cx, dx, bx;
+-      unsigned long   sp, bp, si, di;
+-      unsigned short  es, __esh;
+-      unsigned short  cs, __csh;
+-      unsigned short  ss, __ssh;
+-      unsigned short  ds, __dsh;
+-      unsigned short  fs, __fsh;
+-      unsigned short  gs, __gsh;
+-      unsigned short  ldt, __ldth;
+-      unsigned short  trace, io_bitmap_base;
++      unsigned short          back_link, __blh;
++      unsigned long           sp0;
++      unsigned short          ss0, __ss0h;
++      unsigned long           sp1;
++      /* ss1 caches MSR_IA32_SYSENTER_CS: */
++      unsigned short          ss1, __ss1h;
++      unsigned long           sp2;
++      unsigned short          ss2, __ss2h;
++      unsigned long           __cr3;
++      unsigned long           ip;
++      unsigned long           flags;
++      unsigned long           ax;
++      unsigned long           cx;
++      unsigned long           dx;
++      unsigned long           bx;
++      unsigned long           sp;
++      unsigned long           bp;
++      unsigned long           si;
++      unsigned long           di;
++      unsigned short          es, __esh;
++      unsigned short          cs, __csh;
++      unsigned short          ss, __ssh;
++      unsigned short          ds, __dsh;
++      unsigned short          fs, __fsh;
++      unsigned short          gs, __gsh;
++      unsigned short          ldt, __ldth;
++      unsigned short          trace;
++      unsigned short          io_bitmap_base;
++
+ } __attribute__((packed));
+ extern struct tss_struct doublefault_tss;
+ #else
+ struct x86_hw_tss {
+-      u32 reserved1;
+-      u64 sp0;
+-      u64 sp1;
+-      u64 sp2;
+-      u64 reserved2;
+-      u64 ist[7];
+-      u32 reserved3;
+-      u32 reserved4;
+-      u16 reserved5;
+-      u16 io_bitmap_base;
++      u32                     reserved1;
++      u64                     sp0;
++      u64                     sp1;
++      u64                     sp2;
++      u64                     reserved2;
++      u64                     ist[7];
++      u32                     reserved3;
++      u32                     reserved4;
++      u16                     reserved5;
++      u16                     io_bitmap_base;
++
+ } __attribute__((packed)) ____cacheline_aligned;
+ #endif
+ #endif /* CONFIG_X86_NO_TSS */
+ 
+ /*
+- * Size of io_bitmap.
++ * IO-bitmap sizes:
+  */
+-#define IO_BITMAP_BITS  65536
+-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
+-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
+-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+-#define INVALID_IO_BITMAP_OFFSET 0x8000
+-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
++#define IO_BITMAP_BITS                        65536
++#define IO_BITMAP_BYTES                       (IO_BITMAP_BITS/8)
++#define IO_BITMAP_LONGS                       (IO_BITMAP_BYTES/sizeof(long))
++#define IO_BITMAP_OFFSET              offsetof(struct tss_struct, io_bitmap)
++#define INVALID_IO_BITMAP_OFFSET      0x8000
++#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
+ 
+ #ifndef CONFIG_X86_NO_TSS
+ struct tss_struct {
+-      struct x86_hw_tss x86_tss;
++      /*
++       * The hardware state:
++       */
++      struct x86_hw_tss       x86_tss;
+ 
+       /*
+        * The extra 1 is there because the CPU will access an
+@@ -224,136 +259,162 @@ struct tss_struct {
+        * bitmap. The extra byte must be all 1 bits, and must
+        * be within the limit.
+        */
+-      unsigned long   io_bitmap[IO_BITMAP_LONGS + 1];
++      unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+       /*
+        * Cache the current maximum and the last task that used the bitmap:
+        */
+-      unsigned long io_bitmap_max;
+-      struct thread_struct *io_bitmap_owner;
++      unsigned long           io_bitmap_max;
++      struct thread_struct    *io_bitmap_owner;
++
+       /*
+-       * pads the TSS to be cacheline-aligned (size is 0x100)
++       * Pad the TSS to be cacheline-aligned (size is 0x100):
+        */
+-      unsigned long __cacheline_filler[35];
++      unsigned long           __cacheline_filler[35];
+       /*
+-       * .. and then another 0x100 bytes for emergency kernel stack
++       * .. and then another 0x100 bytes for the emergency kernel stack:
+        */
+-      unsigned long stack[64];
++      unsigned long           stack[64];
++
+ } __attribute__((packed));
+ 
+ DECLARE_PER_CPU(struct tss_struct, init_tss);
+ 
+-/* Save the original ist values for checking stack pointers during debugging */
++/*
++ * Save the original ist values for checking stack pointers during debugging
++ */
+ struct orig_ist {
+-      unsigned long ist[7];
++      unsigned long           ist[7];
+ };
+ #endif /* CONFIG_X86_NO_TSS */
+ 
+ #define       MXCSR_DEFAULT           0x1f80
+ 
+ struct i387_fsave_struct {
+-      u32     cwd;
+-      u32     swd;
+-      u32     twd;
+-      u32     fip;
+-      u32     fcs;
+-      u32     foo;
+-      u32     fos;
+-      u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+-      u32     status;         /* software status information */
++      u32                     cwd;    /* FPU Control Word             */
++      u32                     swd;    /* FPU Status Word              */
++      u32                     twd;    /* FPU Tag Word                 */
++      u32                     fip;    /* FPU IP Offset                */
++      u32                     fcs;    /* FPU IP Selector              */
++      u32                     foo;    /* FPU Operand Pointer Offset   */
++      u32                     fos;    /* FPU Operand Pointer Selector */
++
++      /* 8*10 bytes for each FP-reg = 80 bytes:                       */
++      u32                     st_space[20];
++
++      /* Software status information [not touched by FSAVE ]:         */
++      u32                     status;
+ };
+ 
+ struct i387_fxsave_struct {
+-      u16     cwd;
+-      u16     swd;
+-      u16     twd;
+-      u16     fop;
++      u16                     cwd; /* Control Word                    */
++      u16                     swd; /* Status Word                     */
++      u16                     twd; /* Tag Word                        */
++      u16                     fop; /* Last Instruction Opcode         */
+       union {
+               struct {
+-                      u64     rip;
+-                      u64     rdp;
++                      u64     rip; /* Instruction Pointer             */
++                      u64     rdp; /* Data Pointer                    */
+               };
+               struct {
+-                      u32     fip;
+-                      u32     fcs;
+-                      u32     foo;
+-                      u32     fos;
++                      u32     fip; /* FPU IP Offset                   */
++                      u32     fcs; /* FPU IP Selector                 */
++                      u32     foo; /* FPU Operand Offset              */
++                      u32     fos; /* FPU Operand Selector            */
+               };
+       };
+-      u32     mxcsr;
+-      u32     mxcsr_mask;
+-      u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+-      u32     xmm_space[64];  /* 16*16 bytes for each XMM-reg = 256 bytes */
+-      u32     padding[24];
++      u32                     mxcsr;          /* MXCSR Register State */
++      u32                     mxcsr_mask;     /* MXCSR Mask           */
++
++      /* 8*16 bytes for each FP-reg = 128 bytes:                      */
++      u32                     st_space[32];
++
++      /* 16*16 bytes for each XMM-reg = 256 bytes:                    */
++      u32                     xmm_space[64];
++
++      u32                     padding[24];
++
+ } __attribute__((aligned(16)));
+ 
+ struct i387_soft_struct {
+-      u32     cwd;
+-      u32     swd;
+-      u32     twd;
+-      u32     fip;
+-      u32     fcs;
+-      u32     foo;
+-      u32     fos;
+-      u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+-      u8      ftop, changed, lookahead, no_update, rm, alimit;
+-      struct info     *info;
+-      u32     entry_eip;
++      u32                     cwd;
++      u32                     swd;
++      u32                     twd;
++      u32                     fip;
++      u32                     fcs;
++      u32                     foo;
++      u32                     fos;
++      /* 8*10 bytes for each FP-reg = 80 bytes: */
++      u32                     st_space[20];
++      u8                      ftop;
++      u8                      changed;
++      u8                      lookahead;
++      u8                      no_update;
++      u8                      rm;
++      u8                      alimit;
++      struct info             *info;
++      u32                     entry_eip;
+ };
+ 
+-union i387_union {
++union thread_xstate {
+       struct i387_fsave_struct        fsave;
+       struct i387_fxsave_struct       fxsave;
+-      struct i387_soft_struct         soft;
++      struct i387_soft_struct         soft;
+ };
+ 
+-#ifdef CONFIG_X86_32
+-DECLARE_PER_CPU(u8, cpu_llc_id);
+-#elif !defined(CONFIG_X86_NO_TSS)
++#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
+ DECLARE_PER_CPU(struct orig_ist, orig_ist);
+ #endif
+ 
+ extern void print_cpu_info(struct cpuinfo_x86 *);
++extern unsigned int xstate_size;
++extern void free_thread_xstate(struct task_struct *);
++extern struct kmem_cache *task_xstate_cachep;
+ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+ extern unsigned short num_cache_leaves;
+ 
+ struct thread_struct {
+-/* cached TLS descriptors. */
+-      struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+-      unsigned long   sp0;
+-      unsigned long   sp;
++      /* Cached TLS descriptors: */
++      struct desc_struct      tls_array[GDT_ENTRY_TLS_ENTRIES];
++      unsigned long           sp0;
++      unsigned long           sp;
+ #ifdef CONFIG_X86_32
+-      unsigned long   sysenter_cs;
++      unsigned long           sysenter_cs;
+ #else
+-      unsigned long   usersp; /* Copy from PDA */
+-      unsigned short  es, ds, fsindex, gsindex;
+-#endif
+-      unsigned long   ip;
+-      unsigned long   fs;
+-      unsigned long   gs;
+-/* Hardware debugging registers */
+-      unsigned long   debugreg0;
+-      unsigned long   debugreg1;
+-      unsigned long   debugreg2;
+-      unsigned long   debugreg3;
+-      unsigned long   debugreg6;
+-      unsigned long   debugreg7;
+-/* fault info */
+-      unsigned long   cr2, trap_no, error_code;
+-/* floating point info */
+-      union i387_union        i387 __attribute__((aligned(16)));;
++      unsigned long           usersp; /* Copy from PDA */
++      unsigned short          es;
++      unsigned short          ds;
++      unsigned short          fsindex;
++      unsigned short          gsindex;
++#endif
++      unsigned long           ip;
++      unsigned long           fs;
++      unsigned long           gs;
++      /* Hardware debugging registers: */
++      unsigned long           debugreg0;
++      unsigned long           debugreg1;
++      unsigned long           debugreg2;
++      unsigned long           debugreg3;
++      unsigned long           debugreg6;
++      unsigned long           debugreg7;
++      /* Fault info: */
++      unsigned long           cr2;
++      unsigned long           trap_no;
++      unsigned long           error_code;
++      /* floating point and extended processor state */
++      union thread_xstate     *xstate;
+ #ifdef CONFIG_X86_32
+-/* virtual 86 mode info */
++      /* Virtual 86 mode info */
+       struct vm86_struct __user *vm86_info;
+       unsigned long           screen_bitmap;
+       unsigned long           v86flags, v86mask, saved_sp0;
+       unsigned int            saved_fs, saved_gs;
+ #endif
+-/* IO permissions */
+-      unsigned long   *io_bitmap_ptr;
+-      unsigned long   iopl;
+-/* max allowed port in the bitmap, in bytes: */
+-      unsigned io_bitmap_max;
++      /* IO permissions: */
++      unsigned long           *io_bitmap_ptr;
++      unsigned long           iopl;
++      /* Max allowed port in the bitmap, in bytes: */
++      unsigned                io_bitmap_max;
+ /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
+       unsigned long   debugctlmsr;
+ /* Debug Store - if not 0 points to a DS Save Area configuration;
+@@ -384,12 +445,12 @@ static inline void xen_set_iopl_mask(uns
+ }
+ 
+ #ifndef CONFIG_X86_NO_TSS
+-static inline void native_load_sp0(struct tss_struct *tss,
+-                                 struct thread_struct *thread)
++static inline void
++native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+       tss->x86_tss.sp0 = thread->sp0;
+ #ifdef CONFIG_X86_32
+-      /* Only happens when SEP is enabled, no need to test "SEP"arately */
++      /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+       if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
+               tss->x86_tss.ss1 = thread->sysenter_cs;
+               wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+@@ -403,8 +464,8 @@ static inline void native_load_sp0(struc
+ } while (0)
+ #endif
+ 
+-#define __cpuid xen_cpuid
+-#define paravirt_enabled() 0
++#define __cpuid                       xen_cpuid
++#define paravirt_enabled()    0
+ 
+ /*
+  * These special macros can be used to get or set a debugging register
+@@ -424,11 +485,12 @@ static inline void native_load_sp0(struc
+  * enable), so that any CPU's that boot up
+  * after us can get the correct flags.
+  */
+-extern unsigned long mmu_cr4_features;
++extern unsigned long          mmu_cr4_features;
+ 
+ static inline void set_in_cr4(unsigned long mask)
+ {
+       unsigned cr4;
++
+       mmu_cr4_features |= mask;
+       cr4 = read_cr4();
+       cr4 |= mask;
+@@ -438,6 +500,7 @@ static inline void set_in_cr4(unsigned l
+ static inline void clear_in_cr4(unsigned long mask)
+ {
+       unsigned cr4;
++
+       mmu_cr4_features &= ~mask;
+       cr4 = read_cr4();
+       cr4 &= ~mask;
+@@ -445,42 +508,42 @@ static inline void clear_in_cr4(unsigned
+ }
+ 
+ struct microcode_header {
+-      unsigned int hdrver;
+-      unsigned int rev;
+-      unsigned int date;
+-      unsigned int sig;
+-      unsigned int cksum;
+-      unsigned int ldrver;
+-      unsigned int pf;
+-      unsigned int datasize;
+-      unsigned int totalsize;
+-      unsigned int reserved[3];
++      unsigned int            hdrver;
++      unsigned int            rev;
++      unsigned int            date;
++      unsigned int            sig;
++      unsigned int            cksum;
++      unsigned int            ldrver;
++      unsigned int            pf;
++      unsigned int            datasize;
++      unsigned int            totalsize;
++      unsigned int            reserved[3];
+ };
+ 
+ struct microcode {
+-      struct microcode_header hdr;
+-      unsigned int bits[0];
++      struct microcode_header hdr;
++      unsigned int            bits[0];
+ };
+ 
+-typedef struct microcode microcode_t;
+-typedef struct microcode_header microcode_header_t;
++typedef struct microcode      microcode_t;
++typedef struct microcode_header       microcode_header_t;
+ 
+ /* microcode format is extended from prescott processors */
+ struct extended_signature {
+-      unsigned int sig;
+-      unsigned int pf;
+-      unsigned int cksum;
++      unsigned int            sig;
++      unsigned int            pf;
++      unsigned int            cksum;
+ };
+ 
+ struct extended_sigtable {
+-      unsigned int count;
+-      unsigned int cksum;
+-      unsigned int reserved[3];
++      unsigned int            count;
++      unsigned int            cksum;
++      unsigned int            reserved[3];
+       struct extended_signature sigs[0];
+ };
+ 
+ typedef struct {
+-      unsigned long seg;
++      unsigned long           seg;
+ } mm_segment_t;
+ 
+ 
+@@ -492,7 +555,7 @@ extern int kernel_thread(int (*fn)(void 
+ /* Free all resources held by a thread. */
+ extern void release_thread(struct task_struct *);
+ 
+-/* Prepare to copy thread state - unlazy all lazy status */
++/* Prepare to copy thread state - unlazy all lazy state */
+ extern void prepare_to_copy(struct task_struct *tsk);
+ 
+ unsigned long get_wchan(struct task_struct *p);
+@@ -529,118 +592,138 @@ static inline unsigned int cpuid_eax(uns
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return eax;
+ }
++
+ static inline unsigned int cpuid_ebx(unsigned int op)
+ {
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return ebx;
+ }
++
+ static inline unsigned int cpuid_ecx(unsigned int op)
+ {
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return ecx;
+ }
++
+ static inline unsigned int cpuid_edx(unsigned int op)
+ {
+       unsigned int eax, ebx, ecx, edx;
+ 
+       cpuid(op, &eax, &ebx, &ecx, &edx);
++
+       return edx;
+ }
+ 
+ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+ static inline void rep_nop(void)
+ {
+-      __asm__ __volatile__("rep;nop": : :"memory");
++      asm volatile("rep; nop" ::: "memory");
+ }
+ 
+-/* Stop speculative execution */
++static inline void cpu_relax(void)
++{
++      rep_nop();
++}
++
++/* Stop speculative execution: */
+ static inline void sync_core(void)
+ {
+       int tmp;
++
+       asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+-                                        : "ebx", "ecx", "edx", "memory");
++                   : "ebx", "ecx", "edx", "memory");
+ }
+ 
+-#define cpu_relax()   rep_nop()
+-
+ static inline void __monitor(const void *eax, unsigned long ecx,
+-              unsigned long edx)
++                           unsigned long edx)
+ {
+-      /* "monitor %eax,%ecx,%edx;" */
+-      asm volatile(
+-              ".byte 0x0f,0x01,0xc8;"
+-              : :"a" (eax), "c" (ecx), "d"(edx));
++      /* "monitor %eax, %ecx, %edx;" */
++      asm volatile(".byte 0x0f, 0x01, 0xc8;"
++                   :: "a" (eax), "c" (ecx), "d"(edx));
+ }
+ 
+ static inline void __mwait(unsigned long eax, unsigned long ecx)
+ {
+-      /* "mwait %eax,%ecx;" */
+-      asm volatile(
+-              ".byte 0x0f,0x01,0xc9;"
+-              : :"a" (eax), "c" (ecx));
++      /* "mwait %eax, %ecx;" */
++      asm volatile(".byte 0x0f, 0x01, 0xc9;"
++                   :: "a" (eax), "c" (ecx));
+ }
+ 
+ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+ {
+-      /* "mwait %eax,%ecx;" */
+-      asm volatile(
+-              "sti; .byte 0x0f,0x01,0xc9;"
+-              : :"a" (eax), "c" (ecx));
++      trace_hardirqs_on();
++      /* "mwait %eax, %ecx;" */
++      asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
++                   :: "a" (eax), "c" (ecx));
+ }
+ 
+ extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+ 
+-extern int force_mwait;
++extern int                    force_mwait;
+ 
+ extern void select_idle_routine(const struct cpuinfo_x86 *c);
+ 
+-extern unsigned long boot_option_idle_override;
++extern unsigned long          boot_option_idle_override;
+ 
+ extern void enable_sep_cpu(void);
+ extern int sysenter_setup(void);
+ 
+ /* Defined in head.S */
+-extern struct desc_ptr early_gdt_descr;
++extern struct desc_ptr                early_gdt_descr;
+ 
+ extern void cpu_set_gdt(int);
+ extern void switch_to_new_gdt(void);
+ extern void cpu_init(void);
+ extern void init_gdt(int cpu);
+ 
+-/* from system description table in BIOS.  Mostly for MCA use, but
+- * others may find it useful. */
+-extern unsigned int machine_id;
+-extern unsigned int machine_submodel_id;
+-extern unsigned int BIOS_revision;
++static inline void update_debugctlmsr(unsigned long debugctlmsr)
++{
++#ifndef CONFIG_X86_DEBUGCTLMSR
++      if (boot_cpu_data.x86 < 6)
++              return;
++#endif
++      wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
++}
+ 
+-/* Boot loader type from the setup header */
+-extern int bootloader_type;
++/*
++ * from system description table in BIOS. Mostly for MCA use, but
++ * others may find it useful:
++ */
++extern unsigned int           machine_id;
++extern unsigned int           machine_submodel_id;
++extern unsigned int           BIOS_revision;
++
++/* Boot loader type from the setup header: */
++extern int                    bootloader_type;
+ 
+-extern char ignore_fpu_irq;
+-#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
++extern char                   ignore_fpu_irq;
+ 
+ #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
+ #define ARCH_HAS_PREFETCHW
+ #define ARCH_HAS_SPINLOCK_PREFETCH
+ 
+ #ifdef CONFIG_X86_32
+-#define BASE_PREFETCH ASM_NOP4
+-#define ARCH_HAS_PREFETCH
++# define BASE_PREFETCH                ASM_NOP4
++# define ARCH_HAS_PREFETCH
+ #else
+-#define BASE_PREFETCH "prefetcht0 (%1)"
++# define BASE_PREFETCH                "prefetcht0 (%1)"
+ #endif
+ 
+-/* Prefetch instructions for Pentium III and AMD Athlon */
+-/* It's not worth to care about 3dnow! prefetches for the K6
+-   because they are microcoded there and very slow.
+-   However we don't do prefetches for pre XP Athlons currently
+-   That should be fixed. */
++/*
++ * Prefetch instructions for Pentium III (+) and AMD Athlon (+)
++ *
++ * It's not worth to care about 3dnow prefetches for the K6
++ * because they are microcoded there and very slow.
++ */
+ static inline void prefetch(const void *x)
+ {
+       alternative_input(BASE_PREFETCH,
+@@ -649,8 +732,11 @@ static inline void prefetch(const void *
+                         "r" (x));
+ }
+ 
+-/* 3dnow! prefetch to get an exclusive cache line. Useful for
+-   spinlocks to avoid one state transition in the cache coherency protocol. */
++/*
++ * 3dnow prefetch to get an exclusive cache line.
++ * Useful for spinlocks to avoid one state transition in the
++ * cache coherency protocol:
++ */
+ static inline void prefetchw(const void *x)
+ {
+       alternative_input(BASE_PREFETCH,
+@@ -659,21 +745,25 @@ static inline void prefetchw(const void 
+                         "r" (x));
+ }
+ 
+-#define spin_lock_prefetch(x) prefetchw(x)
++static inline void spin_lock_prefetch(const void *x)
++{
++      prefetchw(x);
++}
++
+ #ifdef CONFIG_X86_32
+ /*
+  * User space process size: 3GB (default).
+  */
+-#define TASK_SIZE     (PAGE_OFFSET)
+-#define STACK_TOP     TASK_SIZE
+-#define STACK_TOP_MAX STACK_TOP
+-
+-#define INIT_THREAD  {                                                        \
+-      .sp0 = sizeof(init_stack) + (long)&init_stack,                  \
+-      .vm86_info = NULL,                                              \
+-      .sysenter_cs = __KERNEL_CS,                                     \
+-      .io_bitmap_ptr = NULL,                                          \
+-      .fs = __KERNEL_PERCPU,                                          \
++#define TASK_SIZE             PAGE_OFFSET
++#define STACK_TOP             TASK_SIZE
++#define STACK_TOP_MAX         STACK_TOP
++
++#define INIT_THREAD  {                                                          \
++      .sp0                    = sizeof(init_stack) + (long)&init_stack, \
++      .vm86_info              = NULL,                                   \
++      .sysenter_cs            = __KERNEL_CS,                            \
++      .io_bitmap_ptr          = NULL,                                   \
++      .fs                     = __KERNEL_PERCPU,                        \
+ }
+ 
+ /*
+@@ -682,28 +772,15 @@ static inline void prefetchw(const void 
+  * permission bitmap. The extra byte must be all 1 bits, and must
+  * be within the limit.
+  */
+-#define INIT_TSS  {                                                   \
+-      .x86_tss = {                                                    \
++#define INIT_TSS  {                                                     \
++      .x86_tss = {                                                      \
+               .sp0            = sizeof(init_stack) + (long)&init_stack, \
+-              .ss0            = __KERNEL_DS,                          \
+-              .ss1            = __KERNEL_CS,                          \
+-              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,             \
+-       },                                                             \
+-      .io_bitmap      = { [0 ... IO_BITMAP_LONGS] = ~0 },             \
+-}
+-
+-#define start_thread(regs, new_eip, new_esp) do {             \
+-      __asm__("movl %0,%%gs": :"r" (0));                      \
+-      regs->fs = 0;                                           \
+-      set_fs(USER_DS);                                        \
+-      regs->ds = __USER_DS;                                   \
+-      regs->es = __USER_DS;                                   \
+-      regs->ss = __USER_DS;                                   \
+-      regs->cs = __USER_CS;                                   \
+-      regs->ip = new_eip;                                     \
+-      regs->sp = new_esp;                                     \
+-} while (0)
+-
++              .ss0            = __KERNEL_DS,                            \
++              .ss1            = __KERNEL_CS,                            \
++              .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,               \
++       },                                                               \
++      .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },       \
++}
+ 
+ extern unsigned long thread_saved_pc(struct task_struct *tsk);
+ 
+@@ -731,24 +808,24 @@ extern unsigned long thread_saved_pc(str
+        __regs__ - 1;                                                   \
+ })
+ 
+-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
++#define KSTK_ESP(task)                (task_pt_regs(task)->sp)
+ 
+ #else
+ /*
+  * User space process size. 47bits minus one guard page.
+  */
+-#define TASK_SIZE64   (0x800000000000UL - 4096)
++#define TASK_SIZE64   ((1UL << 47) - PAGE_SIZE)
+ 
+ /* This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
+-                         0xc0000000 : 0xFFFFe000)
++#define IA32_PAGE_OFFSET      ((current->personality & ADDR_LIMIT_3GB) ? \
++                                      0xc0000000 : 0xFFFFe000)
+ 
+-#define TASK_SIZE             (test_thread_flag(TIF_IA32) ? \
+-                               IA32_PAGE_OFFSET : TASK_SIZE64)
+-#define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child, TIF_IA32)) ? \
+-                                IA32_PAGE_OFFSET : TASK_SIZE64)
++#define TASK_SIZE             (test_thread_flag(TIF_IA32) ? \
++                                      IA32_PAGE_OFFSET : TASK_SIZE64)
++#define TASK_SIZE_OF(child)   ((test_tsk_thread_flag(child, TIF_IA32)) ? \
++                                      IA32_PAGE_OFFSET : TASK_SIZE64)
+ 
+ #define STACK_TOP             TASK_SIZE
+ #define STACK_TOP_MAX         TASK_SIZE64
+@@ -761,33 +838,32 @@ extern unsigned long thread_saved_pc(str
+       .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+ }
+ 
+-#define start_thread(regs, new_rip, new_rsp) do {                          \
+-      asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));  \
+-      load_gs_index(0);                                                    \
+-      (regs)->ip = (new_rip);                                              \
+-      (regs)->sp = (new_rsp);                                              \
+-      write_pda(oldrsp, (new_rsp));                                        \
+-      (regs)->cs = __USER_CS;                                              \
+-      (regs)->ss = __USER_DS;                                              \
+-      (regs)->flags = 0x200;                                               \
+-      set_fs(USER_DS);                                                     \
+-} while (0)
+-
+ /*
+  * Return saved PC of a blocked thread.
+  * What is this good for? it will be always the scheduler or ret_from_fork.
+  */
+-#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
++#define thread_saved_pc(t)    (*(unsigned long *)((t)->thread.sp - 8))
+ 
+-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
+-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
++#define task_pt_regs(tsk)     ((struct pt_regs *)(tsk)->thread.sp0 - 1)
++#define KSTK_ESP(tsk)         -1 /* sorry. doesn't work for syscall. */
+ #endif /* CONFIG_X86_64 */
+ 
+-/* This decides where the kernel will search for a free chunk of vm
++extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
++                                             unsigned long new_sp);
++
++/*
++ * This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+  */
+ #define TASK_UNMAPPED_BASE    (PAGE_ALIGN(TASK_SIZE / 3))
+ 
+-#define KSTK_EIP(task) (task_pt_regs(task)->ip)
++#define KSTK_EIP(task)                (task_pt_regs(task)->ip)
++
++/* Get/set a process' ability to use the timestamp counter instruction */
++#define GET_TSC_CTL(adr)      get_tsc_mode((adr))
++#define SET_TSC_CTL(val)      set_tsc_mode((val))
++
++extern int get_tsc_mode(unsigned long adr);
++extern int set_tsc_mode(unsigned int val);
+ 
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment.h       2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/segment.h    2009-03-16 16:38:05.000000000 +0100
+@@ -191,13 +191,14 @@
+ #define SEGMENT_TI_MASK               0x4
+ 
+ #define IDT_ENTRIES 256
++#define NUM_EXCEPTION_VECTORS 32
+ #define GDT_SIZE (GDT_ENTRIES * 8)
+ #define GDT_ENTRY_TLS_ENTRIES 3
+ #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+ 
+ #ifdef __KERNEL__
+ #ifndef __ASSEMBLY__
+-extern const char early_idt_handlers[IDT_ENTRIES][10];
++extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
+ #endif
+ #endif
+ 
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp.h   2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp.h        2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,227 @@
+-#ifdef CONFIG_X86_32
+-# include "smp_32.h"
++#ifndef _ASM_X86_SMP_H_
++#define _ASM_X86_SMP_H_
++#ifndef __ASSEMBLY__
++#include <linux/cpumask.h>
++#include <linux/init.h>
++#include <asm/percpu.h>
++
++/*
++ * We need the APIC definitions automatically as part of 'smp.h'
++ */
++#ifdef CONFIG_X86_LOCAL_APIC
++# include <asm/mpspec.h>
++# include <asm/apic.h>
++# ifdef CONFIG_X86_IO_APIC
++#  include <asm/io_apic.h>
++# endif
++#endif
++#include <asm/pda.h>
++#include <asm/thread_info.h>
++
++#define cpu_callout_map cpu_possible_map
++extern cpumask_t cpu_initialized;
++#define cpu_callin_map cpu_possible_map
++
++extern void (*mtrr_hook)(void);
++extern void zap_low_mappings(void);
++
++extern int smp_num_siblings;
++extern unsigned int num_processors;
++extern cpumask_t cpu_initialized;
++
++#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
++extern u16 x86_cpu_to_apicid_init[];
++extern u16 x86_bios_cpu_apicid_init[];
++extern void *x86_cpu_to_apicid_early_ptr;
++extern void *x86_bios_cpu_apicid_early_ptr;
+ #else
+-# include "smp_64.h"
++#define x86_cpu_to_apicid_early_ptr NULL
++#define x86_bios_cpu_apicid_early_ptr NULL
++#endif
++
++DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
++DECLARE_PER_CPU(cpumask_t, cpu_core_map);
++DECLARE_PER_CPU(u16, cpu_llc_id);
++DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
++DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
++
++#ifdef CONFIG_SMP
++
++#ifndef CONFIG_XEN
++
++/* Static state in head.S used to set up a CPU */
++extern struct {
++      void *sp;
++      unsigned short ss;
++} stack_start;
++
++struct smp_ops {
++      void (*smp_prepare_boot_cpu)(void);
++      void (*smp_prepare_cpus)(unsigned max_cpus);
++      int (*cpu_up)(unsigned cpu);
++      void (*smp_cpus_done)(unsigned max_cpus);
++
++      void (*smp_send_stop)(void);
++      void (*smp_send_reschedule)(int cpu);
++      int (*smp_call_function_mask)(cpumask_t mask,
++                                    void (*func)(void *info), void *info,
++                                    int wait);
++};
++
++/* Globals due to paravirt */
++extern void set_cpu_sibling_map(int cpu);
++
++#ifndef CONFIG_PARAVIRT
++#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
++#endif
++extern struct smp_ops smp_ops;
++
++static inline void smp_send_stop(void)
++{
++      smp_ops.smp_send_stop();
++}
++
++static inline void smp_prepare_boot_cpu(void)
++{
++      smp_ops.smp_prepare_boot_cpu();
++}
++
++static inline void smp_prepare_cpus(unsigned int max_cpus)
++{
++      smp_ops.smp_prepare_cpus(max_cpus);
++}
++
++static inline void smp_cpus_done(unsigned int max_cpus)
++{
++      smp_ops.smp_cpus_done(max_cpus);
++}
++
++static inline int __cpu_up(unsigned int cpu)
++{
++      return smp_ops.cpu_up(cpu);
++}
++
++static inline void smp_send_reschedule(int cpu)
++{
++      smp_ops.smp_send_reschedule(cpu);
++}
++
++static inline int smp_call_function_mask(cpumask_t mask,
++                                       void (*func) (void *info), void *info,
++                                       int wait)
++{
++      return smp_ops.smp_call_function_mask(mask, func, info, wait);
++}
++
++void native_smp_prepare_boot_cpu(void);
++void native_smp_prepare_cpus(unsigned int max_cpus);
++void native_smp_cpus_done(unsigned int max_cpus);
++int native_cpu_up(unsigned int cpunum);
++
++#else /* CONFIG_XEN */
++
++void xen_smp_send_stop(void);
++void xen_smp_send_reschedule(int cpu);
++int xen_smp_call_function_mask(cpumask_t mask,
++                             void (*func) (void *info), void *info,
++                             int wait);
++
++#define smp_send_stop         xen_smp_send_stop
++#define smp_send_reschedule   xen_smp_send_reschedule
++#define smp_call_function_mask        xen_smp_call_function_mask
++
++extern void prefill_possible_map(void);
++
++#endif /* CONFIG_XEN */
++
++extern int __cpu_disable(void);
++extern void __cpu_die(unsigned int cpu);
++
++extern void prefill_possible_map(void);
++
++void smp_store_cpu_info(int id);
++#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
++
++/* We don't mark CPUs online until __cpu_up(), so we need another measure */
++static inline int num_booting_cpus(void)
++{
++      return cpus_weight(cpu_callout_map);
++}
++#endif /* CONFIG_SMP */
++
++extern unsigned disabled_cpus __cpuinitdata;
++
++#ifdef CONFIG_X86_32_SMP
++/*
++ * This function is needed by all SMP systems. It must _always_ be valid
++ * from the initial startup. We map APIC_BASE very early in page_setup(),
++ * so this is correct in the x86 case.
++ */
++DECLARE_PER_CPU(int, cpu_number);
++#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
++#define safe_smp_processor_id() smp_processor_id()
++
++#elif defined(CONFIG_X86_64_SMP)
++#define raw_smp_processor_id()        read_pda(cpunumber)
++
++#define stack_smp_processor_id()                                      \
++({                                                            \
++      struct thread_info *ti;                                         \
++      __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));      \
++      ti->cpu;                                                        \
++})
++#define safe_smp_processor_id()               smp_processor_id()
++
++#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
++#define cpu_physical_id(cpu)          boot_cpu_physical_apicid
++#define safe_smp_processor_id()               0
++#define stack_smp_processor_id()      0
++#endif
++
++#ifdef CONFIG_X86_LOCAL_APIC
++
++static inline int logical_smp_processor_id(void)
++{
++      /* we don't want to mark this access volatile - bad code generation */
++      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
++}
++
++#ifndef CONFIG_X86_64
++static inline unsigned int read_apic_id(void)
++{
++      return *(u32 *)(APIC_BASE + APIC_ID);
++}
++#else
++extern unsigned int read_apic_id(void);
++#endif
++
++
++# ifdef APIC_DEFINITION
++extern int hard_smp_processor_id(void);
++# else
++#  include <mach_apicdef.h>
++static inline int hard_smp_processor_id(void)
++{
++      /* we don't want to mark this access volatile - bad code generation */
++      return GET_APIC_ID(read_apic_id());
++}
++# endif /* APIC_DEFINITION */
++
++#else /* CONFIG_X86_LOCAL_APIC */
++
++# ifndef CONFIG_SMP
++#  define hard_smp_processor_id()     0
++# endif
++
++#endif /* CONFIG_X86_LOCAL_APIC */
++
++#ifdef CONFIG_HOTPLUG_CPU
++extern void cpu_exit_clear(void);
++extern void cpu_uninit(void);
++#endif
++
++extern void smp_alloc_memory(void);
++extern void lock_ipi_call_lock(void);
++extern void unlock_ipi_call_lock(void);
++#endif /* __ASSEMBLY__ */
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_32.h        2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,178 +0,0 @@
+-#ifndef __ASM_SMP_H
+-#define __ASM_SMP_H
+-
+-#ifndef __ASSEMBLY__
+-#include <linux/cpumask.h>
+-#include <linux/init.h>
+-
+-/*
+- * We need the APIC definitions automatically as part of 'smp.h'
+- */
+-#ifdef CONFIG_X86_LOCAL_APIC
+-# include <asm/mpspec.h>
+-# include <asm/apic.h>
+-# ifdef CONFIG_X86_IO_APIC
+-#  include <asm/io_apic.h>
+-# endif
+-#endif
+-
+-#define cpu_callout_map cpu_possible_map
+-#define cpu_callin_map cpu_possible_map
+-
+-extern int smp_num_siblings;
+-extern unsigned int num_processors;
+-
+-extern void smp_alloc_memory(void);
+-extern void lock_ipi_call_lock(void);
+-extern void unlock_ipi_call_lock(void);
+-
+-extern void (*mtrr_hook) (void);
+-extern void zap_low_mappings (void);
+-
+-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+-DECLARE_PER_CPU(u8, cpu_llc_id);
+-DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-extern void cpu_exit_clear(void);
+-extern void cpu_uninit(void);
+-#endif
+-
+-#ifdef CONFIG_SMP
+-
+-#ifndef CONFIG_XEN
+-
+-/* Globals due to paravirt */
+-extern void set_cpu_sibling_map(int cpu);
+-
+-struct smp_ops
+-{
+-      void (*smp_prepare_boot_cpu)(void);
+-      void (*smp_prepare_cpus)(unsigned max_cpus);
+-      int (*cpu_up)(unsigned cpu);
+-      void (*smp_cpus_done)(unsigned max_cpus);
+-
+-      void (*smp_send_stop)(void);
+-      void (*smp_send_reschedule)(int cpu);
+-      int (*smp_call_function_mask)(cpumask_t mask,
+-                                    void (*func)(void *info), void *info,
+-                                    int wait);
+-};
+-
+-extern struct smp_ops smp_ops;
+-
+-static inline void smp_prepare_boot_cpu(void)
+-{
+-      smp_ops.smp_prepare_boot_cpu();
+-}
+-static inline void smp_prepare_cpus(unsigned int max_cpus)
+-{
+-      smp_ops.smp_prepare_cpus(max_cpus);
+-}
+-static inline int __cpu_up(unsigned int cpu)
+-{
+-      return smp_ops.cpu_up(cpu);
+-}
+-static inline void smp_cpus_done(unsigned int max_cpus)
+-{
+-      smp_ops.smp_cpus_done(max_cpus);
+-}
+-
+-static inline void smp_send_stop(void)
+-{
+-      smp_ops.smp_send_stop();
+-}
+-static inline void smp_send_reschedule(int cpu)
+-{
+-      smp_ops.smp_send_reschedule(cpu);
+-}
+-static inline int smp_call_function_mask(cpumask_t mask,
+-                                       void (*func) (void *info), void *info,
+-                                       int wait)
+-{
+-      return smp_ops.smp_call_function_mask(mask, func, info, wait);
+-}
+-
+-void native_smp_prepare_boot_cpu(void);
+-void native_smp_prepare_cpus(unsigned int max_cpus);
+-int native_cpu_up(unsigned int cpunum);
+-void native_smp_cpus_done(unsigned int max_cpus);
+-
+-#ifndef CONFIG_PARAVIRT
+-#define startup_ipi_hook(phys_apicid, start_eip, start_esp) do { } while (0)
+-#endif
+-
+-#else /* CONFIG_XEN */
+-
+-void xen_smp_send_stop(void);
+-void xen_smp_send_reschedule(int cpu);
+-int xen_smp_call_function_mask(cpumask_t mask,
+-                             void (*func) (void *info), void *info,
+-                             int wait);
+-
+-#define smp_send_stop         xen_smp_send_stop
+-#define smp_send_reschedule   xen_smp_send_reschedule
+-#define smp_call_function_mask        xen_smp_call_function_mask
+-
+-extern void prefill_possible_map(void);
+-
+-#endif /* CONFIG_XEN */
+-
+-extern int __cpu_disable(void);
+-extern void __cpu_die(unsigned int cpu);
+-
+-/*
+- * This function is needed by all SMP systems. It must _always_ be valid
+- * from the initial startup. We map APIC_BASE very early in page_setup(),
+- * so this is correct in the x86 case.
+- */
+-DECLARE_PER_CPU(int, cpu_number);
+-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+-
+-#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
+-
+-#define safe_smp_processor_id() smp_processor_id()
+-
+-/* We don't mark CPUs online until __cpu_up(), so we need another measure */
+-static inline int num_booting_cpus(void)
+-{
+-      return cpus_weight(cpu_callout_map);
+-}
+-
+-#else /* CONFIG_SMP */
+-
+-#define safe_smp_processor_id()               0
+-#define cpu_physical_id(cpu)          boot_cpu_physical_apicid
+-
+-#endif /* !CONFIG_SMP */
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-
+-static __inline int logical_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+-}
+-
+-# ifdef APIC_DEFINITION
+-extern int hard_smp_processor_id(void);
+-# else
+-#  include <mach_apicdef.h>
+-static inline int hard_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
+-}
+-# endif /* APIC_DEFINITION */
+-
+-#else /* CONFIG_X86_LOCAL_APIC */
+-
+-# ifndef CONFIG_SMP
+-#  define hard_smp_processor_id()     0
+-# endif
+-
+-#endif /* CONFIG_X86_LOCAL_APIC */
+-
+-#endif /* !ASSEMBLY */
+-#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_64.h        2009-03-16 16:33:40.000000000 +0100
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,103 +0,0 @@
+-#ifndef __ASM_SMP_H
+-#define __ASM_SMP_H
+-
+-#include <linux/cpumask.h>
+-#include <linux/init.h>
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-/*
+- * We need the APIC definitions automatically as part of 'smp.h'
+- */
+-#include <asm/apic.h>
+-#ifdef CONFIG_X86_IO_APIC
+-#include <asm/io_apic.h>
+-#endif
+-#include <asm/mpspec.h>
+-#endif
+-#include <asm/pda.h>
+-#include <asm/thread_info.h>
+-
+-extern cpumask_t cpu_initialized;
+-
+-extern int smp_num_siblings;
+-extern unsigned int num_processors;
+-
+-extern void smp_alloc_memory(void);
+-extern void lock_ipi_call_lock(void);
+-extern void unlock_ipi_call_lock(void);
+-
+-extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
+-                                void *info, int wait);
+-
+-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
+-DECLARE_PER_CPU(u16, cpu_llc_id);
+-DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
+-DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-      if (cpu_present(mps_cpu))
+-              return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+-      else
+-              return BAD_APICID;
+-}
+-#endif
+-
+-#ifdef CONFIG_SMP
+-
+-#define SMP_TRAMPOLINE_BASE 0x6000
+-
+-extern int __cpu_disable(void);
+-extern void __cpu_die(unsigned int cpu);
+-extern void prefill_possible_map(void);
+-extern unsigned __cpuinitdata disabled_cpus;
+-
+-#define raw_smp_processor_id()        read_pda(cpunumber)
+-#define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
+-
+-#define stack_smp_processor_id()                                      \
+-      ({                                                              \
+-      struct thread_info *ti;                                         \
+-      __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));      \
+-      ti->cpu;                                                        \
+-})
+-
+-/*
+- * On x86 all CPUs are mapped 1:1 to the APIC space. This simplifies
+- * scheduling and IPI sending and compresses data structures.
+- */
+-static inline int num_booting_cpus(void)
+-{
+-      return cpus_weight(cpu_possible_map);
+-}
+-
+-extern void smp_send_reschedule(int cpu);
+-
+-#else /* CONFIG_SMP */
+-
+-extern unsigned int boot_cpu_id;
+-#define cpu_physical_id(cpu)  boot_cpu_id
+-#define stack_smp_processor_id() 0
+-
+-#endif /* !CONFIG_SMP */
+-
+-#define safe_smp_processor_id()               smp_processor_id()
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-static __inline int logical_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+-}
+-
+-static inline int hard_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_ID(*(u32 *)(APIC_BASE + APIC_ID));
+-}
+-#endif
+-
+-#endif
+-
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/spinlock.h      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/spinlock.h   2009-03-16 16:38:05.000000000 +0100
+@@ -88,7 +88,7 @@ extern void xen_spin_kick(raw_spinlock_t
+           : "memory", "cc")
+ 
+ 
+-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
++static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+       int tmp, new;
+ 
+@@ -107,7 +107,7 @@ static inline int __raw_spin_trylock(raw
+       return tmp;
+ }
+ 
+-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
++static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+       unsigned int token;
+       unsigned char kick;
+@@ -155,7 +155,7 @@ static inline void __raw_spin_unlock(raw
+                   : "memory", "cc"); \
+       } while (0)
+ 
+-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
++static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+       int tmp;
+       int new;
+@@ -177,7 +177,7 @@ static inline int __raw_spin_trylock(raw
+       return tmp;
+ }
+ 
+-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
++static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+       unsigned int token, tmp;
+       bool kick;
+@@ -197,19 +197,19 @@ static inline void __raw_spin_unlock(raw
+ 
+ static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+ {
+-      int tmp = *(volatile signed int *)(&(lock)->slock);
++      int tmp = ACCESS_ONCE(lock->slock);
+ 
+       return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+ }
+ 
+ static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+ {
+-      int tmp = *(volatile signed int *)(&(lock)->slock);
++      int tmp = ACCESS_ONCE(lock->slock);
+ 
+       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+ }
+ 
+-static inline void __raw_spin_lock(raw_spinlock_t *lock)
++static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+ {
+       unsigned int token, count;
+       bool free;
+@@ -223,8 +223,8 @@ static inline void __raw_spin_lock(raw_s
+       } while (unlikely(!count) && !xen_spin_wait(lock, token));
+ }
+ 
+-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+-                                       unsigned long flags)
++static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
++                                                unsigned long flags)
+ {
+       unsigned int token, count;
+       bool free;
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/swiotlb.h       2009-02-16 16:18:36.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/swiotlb.h    2009-03-16 16:38:05.000000000 +0100
+@@ -1,5 +1,8 @@
+-#ifdef CONFIG_X86_32
+-# include "swiotlb_32.h"
+-#else
+-# include "../../swiotlb.h"
+-#endif
++#ifndef _ASM_SWIOTLB_H
++
++#include "../../swiotlb.h"
++
++dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
++                                 int dir);
++
++#endif /* _ASM_SWIOTLB_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/swiotlb_32.h    2009-05-14 10:56:29.000000000 +0200
++++ /dev/null  1970-01-01 00:00:00.000000000 +0000
+@@ -1,43 +0,0 @@
+-#ifndef _ASM_SWIOTLB_H
+-#define _ASM_SWIOTLB_H 1
+-
+-/* SWIOTLB interface */
+-
+-extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size,
+-                                    int dir);
+-extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+-                                size_t size, int dir);
+-extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
+-                                       dma_addr_t dev_addr,
+-                                       size_t size, int dir);
+-extern void swiotlb_sync_single_for_device(struct device *hwdev,
+-                                          dma_addr_t dev_addr,
+-                                          size_t size, int dir);
+-extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
+-                                   struct scatterlist *sg, int nelems,
+-                                   int dir);
+-extern void swiotlb_sync_sg_for_device(struct device *hwdev,
+-                                      struct scatterlist *sg, int nelems,
+-                                      int dir);
+-extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
+-                    int nents, int direction);
+-extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
+-                       int nents, int direction);
+-extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr);
+-#ifdef CONFIG_HIGHMEM
+-extern dma_addr_t swiotlb_map_page(struct device *hwdev, struct page *page,
+-                                   unsigned long offset, size_t size,
+-                                   enum dma_data_direction direction);
+-extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+-                               size_t size, enum dma_data_direction direction);
+-#endif
+-extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
+-extern void swiotlb_init(void);
+-
+-#ifdef CONFIG_SWIOTLB
+-extern int swiotlb;
+-#else
+-#define swiotlb 0
+-#endif
+-
+-#endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system.h        2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system.h     2009-03-16 16:38:05.000000000 +0100
+@@ -28,22 +28,44 @@ struct task_struct *__switch_to(struct t
+  * Saving eflags is important. It switches not only IOPL between tasks,
+  * it also protects other tasks from NT leaking through sysenter etc.
+  */
+-#define switch_to(prev, next, last) do {                              \
+-      unsigned long esi, edi;                                         \
+-      asm volatile("pushfl\n\t"               /* Save flags */        \
+-                   "pushl %%ebp\n\t"                                  \
+-                   "movl %%esp,%0\n\t"        /* save ESP */          \
+-                   "movl %5,%%esp\n\t"        /* restore ESP */       \
+-                   "movl $1f,%1\n\t"          /* save EIP */          \
+-                   "pushl %6\n\t"             /* restore EIP */       \
+-                   "jmp __switch_to\n"                                \
++#define switch_to(prev, next, last)                                   \
++do {                                                                  \
++      /*                                                              \
++       * Context-switching clobbers all registers, so we clobber      \
++       * them explicitly, via unused output variables.                \
++       * (EAX and EBP is not listed because EBP is saved/restored     \
++       * explicitly for wchan access and EAX is the return value of   \
++       * __switch_to())                                               \
++       */                                                             \
++      unsigned long ebx, ecx, edx, esi, edi;                          \
++                                                                      \
++      asm volatile("pushfl\n\t"               /* save    flags */     \
++                   "pushl %%ebp\n\t"          /* save    EBP   */     \
++                   "movl %%esp,%[prev_sp]\n\t"        /* save    ESP   */ \
++                   "movl %[next_sp],%%esp\n\t"        /* restore ESP   */ \
++                   "movl $1f,%[prev_ip]\n\t"  /* save    EIP   */     \
++                   "pushl %[next_ip]\n\t"     /* restore EIP   */     \
++                   "jmp __switch_to\n"        /* regparm call  */     \
+                    "1:\t"                                             \
+-                   "popl %%ebp\n\t"                                   \
+-                   "popfl"                                            \
+-                   :"=m" (prev->thread.sp), "=m" (prev->thread.ip),   \
+-                    "=a" (last), "=S" (esi), "=D" (edi)               \
+-                   :"m" (next->thread.sp), "m" (next->thread.ip),     \
+-                    "2" (prev), "d" (next));                          \
++                   "popl %%ebp\n\t"           /* restore EBP   */     \
++                   "popfl\n"                  /* restore flags */     \
++                                                                      \
++                   /* output parameters */                            \
++                   : [prev_sp] "=m" (prev->thread.sp),                \
++                     [prev_ip] "=m" (prev->thread.ip),                \
++                     "=a" (last),                                     \
++                                                                      \
++                     /* clobbered output registers: */                \
++                     "=b" (ebx), "=c" (ecx), "=d" (edx),              \
++                     "=S" (esi), "=D" (edi)                           \
++                                                                      \
++                     /* input parameters: */                          \
++                   : [next_sp]  "m" (next->thread.sp),                \
++                     [next_ip]  "m" (next->thread.ip),                \
++                                                                      \
++                     /* regparm parameters for __switch_to(): */      \
++                     [prev]     "a" (prev),                           \
++                     [next]     "d" (next));                          \
+ } while (0)
+ 
+ /*
+@@ -123,30 +145,29 @@ extern void load_gs_index(unsigned);
+  */
+ #define loadsegment(seg, value)                       \
+       asm volatile("\n"                       \
+-              "1:\t"                          \
+-              "movl %k0,%%" #seg "\n"         \
+-              "2:\n"                          \
+-              ".section .fixup,\"ax\"\n"      \
+-              "3:\t"                          \
+-              "movl %k1, %%" #seg "\n\t"      \
+-              "jmp 2b\n"                      \
+-              ".previous\n"                   \
+-              _ASM_EXTABLE(1b,3b)             \
+-              : :"r" (value), "r" (0))
++                   "1:\t"                     \
++                   "movl %k0,%%" #seg "\n"    \
++                   "2:\n"                     \
++                   ".section .fixup,\"ax\"\n" \
++                   "3:\t"                     \
++                   "movl %k1, %%" #seg "\n\t" \
++                   "jmp 2b\n"                 \
++                   ".previous\n"              \
++                   _ASM_EXTABLE(1b,3b)        \
++                   : :"r" (value), "r" (0))
+ 
+ 
+ /*
+  * Save a segment register away
+  */
+-#define savesegment(seg, value) \
++#define savesegment(seg, value)                               \
+       asm volatile("mov %%" #seg ",%0":"=rm" (value))
+ 
+ static inline unsigned long get_limit(unsigned long segment)
+ {
+       unsigned long __limit;
+-      __asm__("lsll %1,%0"
+-              :"=r" (__limit):"r" (segment));
+-      return __limit+1;
++      asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
++      return __limit + 1;
+ }
+ 
+ static inline void xen_clts(void)
+@@ -171,13 +192,13 @@ static unsigned long __force_order;
+ static inline unsigned long xen_read_cr0(void)
+ {
+       unsigned long val;
+-      asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order));
++      asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+       return val;
+ }
+ 
+ static inline void xen_write_cr0(unsigned long val)
+ {
+-      asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order));
++      asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
+ }
+ 
+ #define xen_read_cr2() (current_vcpu_info()->arch.cr2)
+@@ -186,7 +207,7 @@ static inline void xen_write_cr0(unsigne
+ static inline unsigned long xen_read_cr3(void)
+ {
+       unsigned long val;
+-      asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order));
++      asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+ #ifdef CONFIG_X86_32
+       return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
+ #else
+@@ -201,13 +222,13 @@ static inline void xen_write_cr3(unsigne
+ #else
+       val = phys_to_machine(val);
+ #endif
+-      asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order));
++      asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+ }
+ 
+ static inline unsigned long xen_read_cr4(void)
+ {
+       unsigned long val;
+-      asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order));
++      asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+       return val;
+ }
+ 
+@@ -215,7 +236,7 @@ static inline unsigned long xen_read_cr4
+ 
+ static inline void xen_write_cr4(unsigned long val)
+ {
+-      asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order));
++      asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+ }
+ 
+ #ifdef CONFIG_X86_64
+@@ -234,6 +255,7 @@ static inline void xen_wbinvd(void)
+ {
+       asm volatile("wbinvd": : :"memory");
+ }
++
+ #define read_cr0()    (xen_read_cr0())
+ #define write_cr0(x)  (xen_write_cr0(x))
+ #define read_cr2()    (xen_read_cr2())
+@@ -260,7 +282,7 @@ static inline void clflush(volatile void
+       asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
+ }
+ 
+-#define nop() __asm__ __volatile__ ("nop")
++#define nop() asm volatile ("nop")
+ 
+ void disable_hlt(void);
+ void enable_hlt(void);
+@@ -280,16 +302,7 @@ void default_idle(void);
+  */
+ #ifdef CONFIG_X86_32
+ /*
+- * For now, "wmb()" doesn't actually do anything, as all
+- * Intel CPU's follow what Intel calls a *Processor Order*,
+- * in which all writes are seen in the program order even
+- * outside the CPU.
+- *
+- * I expect future Intel CPU's to have a weaker ordering,
+- * but I'd also expect them to finally get their act together
+- * and add some real memory barriers if so.
+- *
+- * Some non intel clones support out of order store. wmb() ceases to be a
++ * Some non-Intel clones support out of order store. wmb() ceases to be a
+  * nop for these.
+  */
+ #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+@@ -368,7 +381,7 @@ void default_idle(void);
+ # define smp_wmb()    barrier()
+ #endif
+ #define smp_read_barrier_depends()    read_barrier_depends()
+-#define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
++#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+ #else
+ #define smp_mb()      barrier()
+ #define smp_rmb()     barrier()
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush.h      2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush.h   2009-03-16 16:38:05.000000000 +0100
+@@ -86,8 +86,7 @@ static inline void flush_tlb_range(struc
+ #define TLBSTATE_LAZY 2
+ 
+ #ifdef CONFIG_X86_32
+-struct tlb_state
+-{
++struct tlb_state {
+       struct mm_struct *active_mm;
+       int state;
+       char __cacheline_padding[L1_CACHE_BYTES-8];
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/vga.h   2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/vga.h        2009-03-16 16:38:05.000000000 +0100
+@@ -12,9 +12,9 @@
+  *    access the videoram directly without any black magic.
+  */
+ 
+-#define VGA_MAP_MEM(x,s) (unsigned long)isa_bus_to_virt(x)
++#define VGA_MAP_MEM(x, s) (unsigned long)isa_bus_to_virt(x)
+ 
+ #define vga_readb(x) (*(x))
+-#define vga_writeb(x,y) (*(y) = (x))
++#define vga_writeb(x, y) (*(y) = (x))
+ 
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/xor_64.h        2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/xor_64.h     2009-03-16 16:38:05.000000000 +0100
+@@ -1,20 +1,23 @@
+ /*
+- * x86-64 changes / gcc fixes from Andi Kleen. 
++ * x86-64 changes / gcc fixes from Andi Kleen.
+  * Copyright 2002 Andi Kleen, SuSE Labs.
+  *
+  * This hasn't been optimized for the hammer yet, but there are likely
+  * no advantages to be gotten from x86-64 here anyways.
+  */
+ 
+-typedef struct { unsigned long a,b; } __attribute__((aligned(16))) xmm_store_t;
++typedef struct {
++      unsigned long a, b;
++} __attribute__((aligned(16))) xmm_store_t;
+ 
+-/* Doesn't use gcc to save the XMM registers, because there is no easy way to 
++/* Doesn't use gcc to save the XMM registers, because there is no easy way to
+    tell it to do a clts before the register saving. */
+-#define XMMS_SAVE do {                                \
++#define XMMS_SAVE                             \
++do {                                          \
+       preempt_disable();                      \
+       if (!(current_thread_info()->status & TS_USEDFPU))      \
+               clts();                         \
+-      __asm__ __volatile__ (                  \
++      asm volatile(                           \
+               "movups %%xmm0,(%1)     ;\n\t"  \
+               "movups %%xmm1,0x10(%1) ;\n\t"  \
+               "movups %%xmm2,0x20(%1) ;\n\t"  \
+@@ -22,10 +25,11 @@ typedef struct { unsigned long a,b; } __
+               : "=&r" (cr0)                   \
+               : "r" (xmm_save)                \
+               : "memory");                    \
+-} while(0)
++} while (0)
+ 
+-#define XMMS_RESTORE do {                     \
+-      asm volatile (                          \
++#define XMMS_RESTORE                          \
++do {                                          \
++      asm volatile(                           \
+               "sfence                 ;\n\t"  \
+               "movups (%1),%%xmm0     ;\n\t"  \
+               "movups 0x10(%1),%%xmm1 ;\n\t"  \
+@@ -37,72 +41,72 @@ typedef struct { unsigned long a,b; } __
+       if (!(current_thread_info()->status & TS_USEDFPU))      \
+               stts();                         \
+       preempt_enable();                       \
+-} while(0)
++} while (0)
+ 
+ #define OFFS(x)               "16*("#x")"
+ #define PF_OFFS(x)    "256+16*("#x")"
+ #define       PF0(x)          "       prefetchnta "PF_OFFS(x)"(%[p1])         ;\n"
+-#define LD(x,y)               "       movaps   "OFFS(x)"(%[p1]), %%xmm"#y"    ;\n"
+-#define ST(x,y)               "       movaps %%xmm"#y",   "OFFS(x)"(%[p1])    ;\n"
++#define LD(x, y)      "       movaps   "OFFS(x)"(%[p1]), %%xmm"#y"    ;\n"
++#define ST(x, y)      "       movaps %%xmm"#y",   "OFFS(x)"(%[p1])    ;\n"
+ #define PF1(x)                "       prefetchnta "PF_OFFS(x)"(%[p2])         ;\n"
+ #define PF2(x)                "       prefetchnta "PF_OFFS(x)"(%[p3])         ;\n"
+ #define PF3(x)                "       prefetchnta "PF_OFFS(x)"(%[p4])         ;\n"
+ #define PF4(x)                "       prefetchnta "PF_OFFS(x)"(%[p5])         ;\n"
+ #define PF5(x)                "       prefetchnta "PF_OFFS(x)"(%[p6])         ;\n"
+-#define XO1(x,y)      "       xorps   "OFFS(x)"(%[p2]), %%xmm"#y"     ;\n"
+-#define XO2(x,y)      "       xorps   "OFFS(x)"(%[p3]), %%xmm"#y"     ;\n"
+-#define XO3(x,y)      "       xorps   "OFFS(x)"(%[p4]), %%xmm"#y"     ;\n"
+-#define XO4(x,y)      "       xorps   "OFFS(x)"(%[p5]), %%xmm"#y"     ;\n"
+-#define XO5(x,y)      "       xorps   "OFFS(x)"(%[p6]), %%xmm"#y"     ;\n"
++#define XO1(x, y)     "       xorps   "OFFS(x)"(%[p2]), %%xmm"#y"     ;\n"
++#define XO2(x, y)     "       xorps   "OFFS(x)"(%[p3]), %%xmm"#y"     ;\n"
++#define XO3(x, y)     "       xorps   "OFFS(x)"(%[p4]), %%xmm"#y"     ;\n"
++#define XO4(x, y)     "       xorps   "OFFS(x)"(%[p5]), %%xmm"#y"     ;\n"
++#define XO5(x, y)     "       xorps   "OFFS(x)"(%[p6]), %%xmm"#y"     ;\n"
+ 
+ 
+ static void
+ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+ {
+-        unsigned int lines = bytes >> 8;
++      unsigned int lines = bytes >> 8;
+       unsigned long cr0;
+       xmm_store_t xmm_save[4];
+ 
+       XMMS_SAVE;
+ 
+-        asm volatile (
++      asm volatile(
+ #undef BLOCK
+ #define BLOCK(i) \
+-              LD(i,0)                                 \
+-                      LD(i+1,1)                       \
++              LD(i, 0)                                \
++                      LD(i + 1, 1)                    \
+               PF1(i)                                  \
+-                              PF1(i+2)                \
+-                              LD(i+2,2)               \
+-                                      LD(i+3,3)       \
+-              PF0(i+4)                                \
+-                              PF0(i+6)                \
+-              XO1(i,0)                                \
+-                      XO1(i+1,1)                      \
+-                              XO1(i+2,2)              \
+-                                      XO1(i+3,3)      \
+-              ST(i,0)                                 \
+-                      ST(i+1,1)                       \
+-                              ST(i+2,2)               \
+-                                      ST(i+3,3)       \
++                              PF1(i + 2)              \
++                              LD(i + 2, 2)            \
++                                      LD(i + 3, 3)    \
++              PF0(i + 4)                              \
++                              PF0(i + 6)              \
++              XO1(i, 0)                               \
++                      XO1(i + 1, 1)                   \
++                              XO1(i + 2, 2)           \
++                                      XO1(i + 3, 3)   \
++              ST(i, 0)                                \
++                      ST(i + 1, 1)                    \
++                              ST(i + 2, 2)            \
++                                      ST(i + 3, 3)    \
+ 
+ 
+               PF0(0)
+                               PF0(2)
+ 
+       " .align 32                     ;\n"
+-        " 1:                            ;\n"
++      " 1:                            ;\n"
+ 
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+ 
+-        "       addq %[inc], %[p1]           ;\n"
+-        "       addq %[inc], %[p2]           ;\n"
++      "       addq %[inc], %[p1]           ;\n"
++      "       addq %[inc], %[p2]           ;\n"
+               "               decl %[cnt] ; jnz 1b"
+       : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
+-      : [inc] "r" (256UL) 
+-        : "memory");
++      : [inc] "r" (256UL)
++      : "memory");
+ 
+       XMMS_RESTORE;
+ }
+@@ -117,52 +121,52 @@ xor_sse_3(unsigned long bytes, unsigned 
+ 
+       XMMS_SAVE;
+ 
+-        __asm__ __volatile__ (
++      asm volatile(
+ #undef BLOCK
+ #define BLOCK(i) \
+               PF1(i)                                  \
+-                              PF1(i+2)                \
+-              LD(i,0)                                 \
+-                      LD(i+1,1)                       \
+-                              LD(i+2,2)               \
+-                                      LD(i+3,3)       \
++                              PF1(i + 2)              \
++              LD(i, 0)                                        \
++                      LD(i + 1, 1)                    \
++                              LD(i + 2, 2)            \
++                                      LD(i + 3, 3)    \
+               PF2(i)                                  \
+-                              PF2(i+2)                \
+-              PF0(i+4)                                \
+-                              PF0(i+6)                \
+-              XO1(i,0)                                \
+-                      XO1(i+1,1)                      \
+-                              XO1(i+2,2)              \
+-                                      XO1(i+3,3)      \
+-              XO2(i,0)                                \
+-                      XO2(i+1,1)                      \
+-                              XO2(i+2,2)              \
+-                                      XO2(i+3,3)      \
+-              ST(i,0)                                 \
+-                      ST(i+1,1)                       \
+-                              ST(i+2,2)               \
+-                                      ST(i+3,3)       \
++                              PF2(i + 2)              \
++              PF0(i + 4)                              \
++                              PF0(i + 6)              \
++              XO1(i, 0)                               \
++                      XO1(i + 1, 1)                   \
++                              XO1(i + 2, 2)           \
++                                      XO1(i + 3, 3)   \
++              XO2(i, 0)                               \
++                      XO2(i + 1, 1)                   \
++                              XO2(i + 2, 2)           \
++                                      XO2(i + 3, 3)   \
++              ST(i, 0)                                \
++                      ST(i + 1, 1)                    \
++                              ST(i + 2, 2)            \
++                                      ST(i + 3, 3)    \
+ 
+ 
+               PF0(0)
+                               PF0(2)
+ 
+       " .align 32                     ;\n"
+-        " 1:                            ;\n"
++      " 1:                            ;\n"
+ 
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+ 
+-        "       addq %[inc], %[p1]           ;\n"
+-        "       addq %[inc], %[p2]          ;\n"
+-        "       addq %[inc], %[p3]           ;\n"
++      "       addq %[inc], %[p1]           ;\n"
++      "       addq %[inc], %[p2]          ;\n"
++      "       addq %[inc], %[p3]           ;\n"
+               "               decl %[cnt] ; jnz 1b"
+       : [cnt] "+r" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+       : [inc] "r" (256UL)
+-      : "memory"); 
++      : "memory");
+       XMMS_RESTORE;
+ }
+ 
+@@ -171,64 +175,64 @@ xor_sse_4(unsigned long bytes, unsigned 
+         unsigned long *p3, unsigned long *p4)
+ {
+       unsigned int lines = bytes >> 8;
+-      xmm_store_t xmm_save[4]; 
++      xmm_store_t xmm_save[4];
+       unsigned long cr0;
+ 
+       XMMS_SAVE;
+ 
+-        __asm__ __volatile__ (
++      asm volatile(
+ #undef BLOCK
+ #define BLOCK(i) \
+               PF1(i)                                  \
+-                              PF1(i+2)                \
+-              LD(i,0)                                 \
+-                      LD(i+1,1)                       \
+-                              LD(i+2,2)               \
+-                                      LD(i+3,3)       \
++                              PF1(i + 2)              \
++              LD(i, 0)                                \
++                      LD(i + 1, 1)                    \
++                              LD(i + 2, 2)            \
++                                      LD(i + 3, 3)    \
+               PF2(i)                                  \
+-                              PF2(i+2)                \
+-              XO1(i,0)                                \
+-                      XO1(i+1,1)                      \
+-                              XO1(i+2,2)              \
+-                                      XO1(i+3,3)      \
++                              PF2(i + 2)              \
++              XO1(i, 0)                               \
++                      XO1(i + 1, 1)                   \
++                              XO1(i + 2, 2)           \
++                                      XO1(i + 3, 3)   \
+               PF3(i)                                  \
+-                              PF3(i+2)                \
+-              PF0(i+4)                                \
+-                              PF0(i+6)                \
+-              XO2(i,0)                                \
+-                      XO2(i+1,1)                      \
+-                              XO2(i+2,2)              \
+-                                      XO2(i+3,3)      \
+-              XO3(i,0)                                \
+-                      XO3(i+1,1)                      \
+-                              XO3(i+2,2)              \
+-                                      XO3(i+3,3)      \
+-              ST(i,0)                                 \
+-                      ST(i+1,1)                       \
+-                              ST(i+2,2)               \
+-                                      ST(i+3,3)       \
++                              PF3(i + 2)              \
++              PF0(i + 4)                              \
++                              PF0(i + 6)              \
++              XO2(i, 0)                               \
++                      XO2(i + 1, 1)                   \
++                              XO2(i + 2, 2)           \
++                                      XO2(i + 3, 3)   \
++              XO3(i, 0)                               \
++                      XO3(i + 1, 1)                   \
++                              XO3(i + 2, 2)           \
++                                      XO3(i + 3, 3)   \
++              ST(i, 0)                                \
++                      ST(i + 1, 1)                    \
++                              ST(i + 2, 2)            \
++                                      ST(i + 3, 3)    \
+ 
+ 
+               PF0(0)
+                               PF0(2)
+ 
+       " .align 32                     ;\n"
+-        " 1:                            ;\n"
++      " 1:                            ;\n"
+ 
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+ 
+-        "       addq %[inc], %[p1]           ;\n"
+-        "       addq %[inc], %[p2]           ;\n"
+-        "       addq %[inc], %[p3]           ;\n"
+-        "       addq %[inc], %[p4]           ;\n"
++      "       addq %[inc], %[p1]           ;\n"
++      "       addq %[inc], %[p2]           ;\n"
++      "       addq %[inc], %[p3]           ;\n"
++      "       addq %[inc], %[p4]           ;\n"
+       "       decl %[cnt] ; jnz 1b"
+       : [cnt] "+c" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+       : [inc] "r" (256UL)
+-        : "memory" );
++      : "memory" );
+ 
+       XMMS_RESTORE;
+ }
+@@ -237,70 +241,70 @@ static void
+ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4, unsigned long *p5)
+ {
+-        unsigned int lines = bytes >> 8;
++      unsigned int lines = bytes >> 8;
+       xmm_store_t xmm_save[4];
+       unsigned long cr0;
+ 
+       XMMS_SAVE;
+ 
+-        __asm__ __volatile__ (
++      asm volatile(
+ #undef BLOCK
+ #define BLOCK(i) \
+               PF1(i)                                  \
+-                              PF1(i+2)                \
+-              LD(i,0)                                 \
+-                      LD(i+1,1)                       \
+-                              LD(i+2,2)               \
+-                                      LD(i+3,3)       \
++                              PF1(i + 2)              \
++              LD(i, 0)                                \
++                      LD(i + 1, 1)                    \
++                              LD(i + 2, 2)            \
++                                      LD(i + 3, 3)    \
+               PF2(i)                                  \
+-                              PF2(i+2)                \
+-              XO1(i,0)                                \
+-                      XO1(i+1,1)                      \
+-                              XO1(i+2,2)              \
+-                                      XO1(i+3,3)      \
++                              PF2(i + 2)              \
++              XO1(i, 0)                               \
++                      XO1(i + 1, 1)                   \
++                              XO1(i + 2, 2)           \
++                                      XO1(i + 3, 3)   \
+               PF3(i)                                  \
+-                              PF3(i+2)                \
+-              XO2(i,0)                                \
+-                      XO2(i+1,1)                      \
+-                              XO2(i+2,2)              \
+-                                      XO2(i+3,3)      \
++                              PF3(i + 2)              \
++              XO2(i, 0)                               \
++                      XO2(i + 1, 1)                   \
++                              XO2(i + 2, 2)           \
++                                      XO2(i + 3, 3)   \
+               PF4(i)                                  \
+-                              PF4(i+2)                \
+-              PF0(i+4)                                \
+-                              PF0(i+6)                \
+-              XO3(i,0)                                \
+-                      XO3(i+1,1)                      \
+-                              XO3(i+2,2)              \
+-                                      XO3(i+3,3)      \
+-              XO4(i,0)                                \
+-                      XO4(i+1,1)                      \
+-                              XO4(i+2,2)              \
+-                                      XO4(i+3,3)      \
+-              ST(i,0)                                 \
+-                      ST(i+1,1)                       \
+-                              ST(i+2,2)               \
+-                                      ST(i+3,3)       \
++                              PF4(i + 2)              \
++              PF0(i + 4)                              \
++                              PF0(i + 6)              \
++              XO3(i, 0)                               \
++                      XO3(i + 1, 1)                   \
++                              XO3(i + 2, 2)           \
++                                      XO3(i + 3, 3)   \
++              XO4(i, 0)                               \
++                      XO4(i + 1, 1)                   \
++                              XO4(i + 2, 2)           \
++                                      XO4(i + 3, 3)   \
++              ST(i, 0)                                \
++                      ST(i + 1, 1)                    \
++                              ST(i + 2, 2)            \
++                                      ST(i + 3, 3)    \
+ 
+ 
+               PF0(0)
+                               PF0(2)
+ 
+       " .align 32                     ;\n"
+-        " 1:                            ;\n"
++      " 1:                            ;\n"
+ 
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+ 
+-        "       addq %[inc], %[p1]           ;\n"
+-        "       addq %[inc], %[p2]           ;\n"
+-        "       addq %[inc], %[p3]           ;\n"
+-        "       addq %[inc], %[p4]           ;\n"
+-        "       addq %[inc], %[p5]           ;\n"
++      "       addq %[inc], %[p1]           ;\n"
++      "       addq %[inc], %[p2]           ;\n"
++      "       addq %[inc], %[p3]           ;\n"
++      "       addq %[inc], %[p4]           ;\n"
++      "       addq %[inc], %[p5]           ;\n"
+       "       decl %[cnt] ; jnz 1b"
+       : [cnt] "+c" (lines),
+-        [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4), 
++        [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
+         [p5] "+r" (p5)
+       : [inc] "r" (256UL)
+       : "memory");
+@@ -309,18 +313,18 @@ xor_sse_5(unsigned long bytes, unsigned 
+ }
+ 
+ static struct xor_block_template xor_block_sse = {
+-        .name = "generic_sse",
+-        .do_2 = xor_sse_2,
+-        .do_3 = xor_sse_3,
+-        .do_4 = xor_sse_4,
+-        .do_5 = xor_sse_5,
++      .name = "generic_sse",
++      .do_2 = xor_sse_2,
++      .do_3 = xor_sse_3,
++      .do_4 = xor_sse_4,
++      .do_5 = xor_sse_5,
+ };
+ 
+ #undef XOR_TRY_TEMPLATES
+-#define XOR_TRY_TEMPLATES                             \
+-      do {                                            \
+-              xor_speed(&xor_block_sse);      \
+-      } while (0)
++#define XOR_TRY_TEMPLATES                     \
++do {                                          \
++      xor_speed(&xor_block_sse);              \
++} while (0)
+ 
+ /* We force the use of the SSE xor block because it can write around L2.
+    We may also be able to load into the L1 only depending on how the cpu
+--- sle11-2009-05-14.orig/include/asm-x86/scatterlist.h        2009-05-14 10:56:29.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/scatterlist.h     2009-03-16 16:38:05.000000000 +0100
+@@ -24,7 +24,7 @@ struct scatterlist {
+  * returns.
+  */
+ #define sg_dma_address(sg)    ((sg)->dma_address)
+-#ifdef CONFIG_X86_32
++#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+ # define sg_dma_len(sg)               ((sg)->length)
+ #else
+ # define sg_dma_len(sg)               ((sg)->dma_length)
+--- sle11-2009-05-14.orig/include/linux/page-flags.h   2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/include/linux/page-flags.h        2009-03-16 16:38:05.000000000 +0100
+@@ -278,18 +278,25 @@ static inline void SetPageUptodate(struc
+ 
+ CLEARPAGEFLAG(Uptodate, uptodate)
+ 
+-#define PageForeign(page)     test_bit(PG_foreign, &(page)->flags)
+-#define SetPageForeign(_page, dtor) do {              \
+-      set_bit(PG_foreign, &(_page)->flags);           \
+-      BUG_ON((dtor) == (void (*)(struct page *, unsigned int))0); \
+-      (_page)->index = (long)(dtor);                  \
+-} while (0)
+-#define ClearPageForeign(page) do {                   \
+-      clear_bit(PG_foreign, &(page)->flags);          \
+-      (page)->index = 0;                              \
+-} while (0)
+-#define PageForeignDestructor(_page, order)           \
+-      ((void (*)(struct page *, unsigned int))(_page)->index)(_page, order)
++#ifdef CONFIG_XEN
++TESTPAGEFLAG(Foreign, foreign)
++static inline void SetPageForeign(struct page *page,
++                                void (*dtor)(struct page *, unsigned int))
++{
++      BUG_ON(!dtor);
++      set_bit(PG_foreign, &page->flags);
++      page->index = (long)dtor;
++}
++static inline void ClearPageForeign(struct page *page)
++{
++      clear_bit(PG_foreign, &page->flags);
++      page->index = 0;
++}
++static inline void PageForeignDestructor(struct page *page, unsigned int order)
++{
++      ((void (*)(struct page *, unsigned int))page->index)(page, order);
++}
++#endif
+ 
+ extern void cancel_dirty_page(struct page *page, unsigned int account_size);
+ 
+--- sle11-2009-05-14.orig/include/xen/balloon.h        2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/include/xen/balloon.h     2009-03-16 16:38:05.000000000 +0100
+@@ -31,9 +31,12 @@
+  * IN THE SOFTWARE.
+  */
+ 
+-#ifndef __ASM_BALLOON_H__
+-#define __ASM_BALLOON_H__
++#ifndef __XEN_BALLOON_H__
++#define __XEN_BALLOON_H__
+ 
++#include <linux/spinlock.h>
++
++#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
+ /*
+  * Inform the balloon driver that it should allow some slop for device-driver
+  * memory activities.
+@@ -53,5 +56,6 @@ void balloon_release_driver_page(struct 
+ extern spinlock_t balloon_lock;
+ #define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
+ #define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
++#endif
+ 
+-#endif /* __ASM_BALLOON_H__ */
++#endif /* __XEN_BALLOON_H__ */
+--- sle11-2009-05-14.orig/include/xen/interface/grant_table.h  2008-11-25 12:22:34.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/grant_table.h       2009-03-16 16:38:05.000000000 +0100
+@@ -193,6 +193,7 @@ struct gnttab_map_grant_ref {
+     grant_handle_t handle;
+     uint64_t dev_bus_addr;
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
+ typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
+ 
+@@ -216,6 +217,7 @@ struct gnttab_unmap_grant_ref {
+     /* OUT parameters. */
+     int16_t  status;              /* GNTST_* */
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
+ typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
+ 
+@@ -237,6 +239,7 @@ struct gnttab_setup_table {
+     int16_t  status;              /* GNTST_* */
+     XEN_GUEST_HANDLE(ulong) frame_list;
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_setup_table);
+ typedef struct gnttab_setup_table gnttab_setup_table_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
+ 
+@@ -251,6 +254,7 @@ struct gnttab_dump_table {
+     /* OUT parameters. */
+     int16_t status;               /* GNTST_* */
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_dump_table);
+ typedef struct gnttab_dump_table gnttab_dump_table_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
+ 
+@@ -271,6 +275,7 @@ struct gnttab_transfer {
+     /* OUT parameters. */
+     int16_t       status;
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_transfer);
+ typedef struct gnttab_transfer gnttab_transfer_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
+ 
+@@ -314,6 +319,7 @@ typedef struct gnttab_copy {
+     /* OUT parameters. */
+     int16_t       status;
+ } gnttab_copy_t;
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_copy);
+ DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
+ 
+ /*
+@@ -332,6 +338,7 @@ struct gnttab_query_size {
+     uint32_t max_nr_frames;
+     int16_t  status;              /* GNTST_* */
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(gnttab_query_size);
+ typedef struct gnttab_query_size gnttab_query_size_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+ 
+--- sle11-2009-05-14.orig/include/xen/interface/io/fbif.h      2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/io/fbif.h   2009-03-16 16:38:05.000000000 +0100
+@@ -150,7 +150,12 @@ struct xenfb_page
+      * framebuffer with a max resolution of 12,800x10,240.  Should
+      * be enough for a while with room leftover for expansion.
+      */
++#ifndef CONFIG_PARAVIRT_XEN
+     unsigned long pd[256];
++#else
++      /* Two directory pages should be enough for a while. */
++      unsigned long pd[2];
++#endif
+ };
+ 
+ /*
+--- sle11-2009-05-14.orig/include/xen/interface/memory.h       2009-02-16 16:17:21.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/memory.h    2009-03-16 16:38:05.000000000 +0100
+@@ -62,7 +62,7 @@ struct xen_memory_reservation {
+      *   OUT: GMFN bases of extents that were allocated
+      *   (NB. This command also updates the mach_to_phys translation table)
+      */
+-    XEN_GUEST_HANDLE(ulong) extent_start;
++    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+ 
+     /* Number of extents, and size/alignment of each (2^extent_order pages). */
+     xen_ulong_t    nr_extents;
+@@ -82,7 +82,6 @@ struct xen_memory_reservation {
+     domid_t        domid;
+ 
+ };
+-DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation);
+ typedef struct xen_memory_reservation xen_memory_reservation_t;
+ DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
+ 
+@@ -168,7 +167,11 @@ struct xen_machphys_mfn_list {
+      * any large discontiguities in the machine address space, 2MB gaps in
+      * the machphys table will be represented by an MFN base of zero.
+      */
++#ifndef CONFIG_PARAVIRT_XEN
+     XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
++#else
++    ulong extent_start;
++#endif
+ 
+     /*
+      * Number of extents written to the above array. This will be smaller
+@@ -176,7 +179,6 @@ struct xen_machphys_mfn_list {
+      */
+     unsigned int nr_extents;
+ };
+-DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
+ typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
+ DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
+ 
+@@ -216,7 +218,6 @@ struct xen_add_to_physmap {
+     /* GPFN where the source mapping page should appear. */
+     xen_pfn_t     gpfn;
+ };
+-DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
+ typedef struct xen_add_to_physmap xen_add_to_physmap_t;
+ DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
+ 
+@@ -249,13 +250,21 @@ struct xen_translate_gpfn_list {
+     xen_ulong_t nr_gpfns;
+ 
+     /* List of GPFNs to translate. */
++#ifndef CONFIG_PARAVIRT_XEN
+     XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
++#else
++    ulong gpfn_list;
++#endif
+ 
+     /*
+      * Output list to contain MFN translations. May be the same as the input
+      * list (in which case each input GPFN is overwritten with the output MFN).
+      */
++#ifndef CONFIG_PARAVIRT_XEN
+     XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
++#else
++    ulong mfn_list;
++#endif
+ };
+ DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
+ typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
+--- sle11-2009-05-14.orig/include/xen/interface/vcpu.h 2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/include/xen/interface/vcpu.h      2009-03-16 16:38:05.000000000 +0100
+@@ -85,6 +85,7 @@ struct vcpu_runstate_info {
+      */
+     uint64_t time[4];
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
+ typedef struct vcpu_runstate_info vcpu_runstate_info_t;
+ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
+ 
+@@ -140,6 +141,7 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_register_ru
+ struct vcpu_set_periodic_timer {
+     uint64_t period_ns;
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer);
+ typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+ DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
+ 
+@@ -153,6 +155,7 @@ struct vcpu_set_singleshot_timer {
+     uint64_t timeout_abs_ns;   /* Absolute system time value in nanoseconds. */
+     uint32_t flags;            /* VCPU_SSHOTTMR_??? */
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer);
+ typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+ DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+ 
+@@ -176,6 +179,7 @@ struct vcpu_register_vcpu_info {
+     uint32_t offset; /* offset within page */
+     uint32_t rsvd;   /* unused */
+ };
++DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
+ typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+ DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
+ 
+--- sle11-2009-05-14.orig/lib/swiotlb-xen.c    2009-03-16 16:33:40.000000000 +0100
++++ sle11-2009-05-14/lib/swiotlb-xen.c 2009-03-16 16:38:05.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <linux/ctype.h>
+ #include <linux/init.h>
+ #include <linux/bootmem.h>
++#include <linux/iommu-helper.h>
+ #include <linux/highmem.h>
+ #include <asm/io.h>
+ #include <asm/pci.h>
+@@ -288,15 +289,6 @@ __sync_single(struct phys_addr buffer, c
+       }
+ }
+ 
+-static inline unsigned int is_span_boundary(unsigned int index,
+-                                          unsigned int nslots,
+-                                          unsigned long offset_slots,
+-                                          unsigned long max_slots)
+-{
+-      unsigned long offset = (offset_slots + index) & (max_slots - 1);
+-      return offset + nslots > max_slots;
+-}
+-
+ /*
+  * Allocates bounce buffer and returns its kernel virtual address.
+  */
+@@ -335,61 +327,53 @@ map_single(struct device *hwdev, struct 
+        * request and allocate a buffer from that IO TLB pool.
+        */
+       spin_lock_irqsave(&io_tlb_lock, flags);
+-      {
+-              index = ALIGN(io_tlb_index, stride);
+-              if (index >= iotlb_nslabs)
+-                      index = 0;
+-              wrap = index;
++      index = ALIGN(io_tlb_index, stride);
++      if (index >= iotlb_nslabs)
++              index = 0;
++      wrap = index;
+ 
+-              do {
+-                      while (is_span_boundary(index, nslots, offset_slots,
+-                                              max_slots)) {
+-                              index += stride;
+-                              if (index >= iotlb_nslabs)
+-                                      index = 0;
+-                              if (index == wrap)
+-                                      goto not_found;
+-                      }
++      do {
++              while (iommu_is_span_boundary(index, nslots, offset_slots,
++                                            max_slots)) {
++                      index += stride;
++                      if (index >= iotlb_nslabs)
++                              index = 0;
++                      if (index == wrap)
++                              goto not_found;
++              }
++
++              /*
++               * If we find a slot that indicates we have 'nslots' number of
++               * contiguous buffers, we allocate the buffers from that slot
++               * and mark the entries as '0' indicating unavailable.
++               */
++              if (io_tlb_list[index] >= nslots) {
++                      int count = 0;
++
++                      for (i = index; i < (int) (index + nslots); i++)
++                              io_tlb_list[i] = 0;
++                      for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
++                              io_tlb_list[i] = ++count;
++                      dma_addr = iotlb_virt_start + (index << IO_TLB_SHIFT);
+ 
+                       /*
+-                       * If we find a slot that indicates we have 'nslots'
+-                       * number of contiguous buffers, we allocate the
+-                       * buffers from that slot and mark the entries as '0'
+-                       * indicating unavailable.
++                       * Update the indices to avoid searching in the next
++                       * round.
+                        */
+-                      if (io_tlb_list[index] >= nslots) {
+-                              int count = 0;
+-
+-                              for (i = index; i < (int)(index + nslots); i++)
+-                                      io_tlb_list[i] = 0;
+-                              for (i = index - 1;
+-                                   (OFFSET(i, IO_TLB_SEGSIZE) !=
+-                                    IO_TLB_SEGSIZE -1) && io_tlb_list[i];
+-                                   i--)
+-                                      io_tlb_list[i] = ++count;
+-                              dma_addr = iotlb_virt_start +
+-                                      (index << IO_TLB_SHIFT);
+-
+-                              /*
+-                               * Update the indices to avoid searching in
+-                               * the next round.
+-                               */
+-                              io_tlb_index = 
+-                                      ((index + nslots) < iotlb_nslabs
+-                                       ? (index + nslots) : 0);
++                      io_tlb_index = ((index + nslots) < iotlb_nslabs
++                                      ? (index + nslots) : 0);
+ 
+-                              goto found;
+-                      }
+-                      index += stride;
+-                      if (index >= iotlb_nslabs)
+-                              index = 0;
+-              } while (index != wrap);
++                      goto found;
++              }
++              index += stride;
++              if (index >= iotlb_nslabs)
++                      index = 0;
++      } while (index != wrap);
+ 
+-  not_found:
+-              spin_unlock_irqrestore(&io_tlb_lock, flags);
+-              return NULL;
+-      }
+-  found:
++not_found:
++      spin_unlock_irqrestore(&io_tlb_lock, flags);
++      return NULL;
++found:
+       spin_unlock_irqrestore(&io_tlb_lock, flags);
+ 
+       /*
+@@ -502,11 +486,13 @@ swiotlb_full(struct device *dev, size_t 
+  * Once the device is given the dma address, the device owns this memory until
+  * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+  */
+-dma_addr_t
+-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+-{
+-      dma_addr_t dev_addr = gnttab_dma_map_page(virt_to_page(ptr)) +
+-                            offset_in_page(ptr);
++static dma_addr_t
++_swiotlb_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
++                       int dir, struct dma_attrs *attrs)
++{
++      struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
++      dma_addr_t dev_addr = gnttab_dma_map_page(page) +
++                            offset_in_page(paddr);
+       void *map;
+       struct phys_addr buffer;
+ 
+@@ -517,7 +503,7 @@ swiotlb_map_single(struct device *hwdev,
+        * we can safely return the device addr and not worry about bounce
+        * buffering it.
+        */
+-      if (!range_straddles_page_boundary(__pa(ptr), size) &&
++      if (!range_straddles_page_boundary(paddr, size) &&
+           !address_needs_mapping(hwdev, dev_addr))
+               return dev_addr;
+ 
+@@ -525,8 +511,8 @@ swiotlb_map_single(struct device *hwdev,
+        * Oh well, have to allocate and map a bounce buffer.
+        */
+       gnttab_dma_unmap_page(dev_addr);
+-      buffer.page   = virt_to_page(ptr);
+-      buffer.offset = (unsigned long)ptr & ~PAGE_MASK;
++      buffer.page   = page;
++      buffer.offset = offset_in_page(paddr);
+       map = map_single(hwdev, buffer, size, dir);
+       if (!map) {
+               swiotlb_full(hwdev, size, dir, 1);
+@@ -537,6 +523,26 @@ swiotlb_map_single(struct device *hwdev,
+       return dev_addr;
+ }
+ 
++dma_addr_t
++swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
++                       int dir, struct dma_attrs *attrs)
++{
++      return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, attrs);
++}
++EXPORT_SYMBOL(swiotlb_map_single_attrs);
++
++dma_addr_t
++swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
++{
++      return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL);
++}
++
++dma_addr_t
++swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
++{
++      return _swiotlb_map_single(hwdev, paddr, size, dir, NULL);
++}
++
+ /*
+  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
+  * match what was provided for in a previous swiotlb_map_single call.  All
+@@ -546,8 +552,8 @@ swiotlb_map_single(struct device *hwdev,
+  * whatever the device wrote there.
+  */
+ void
+-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+-                   int dir)
++swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
++                         size_t size, int dir, struct dma_attrs *attrs)
+ {
+       BUG_ON(dir == DMA_NONE);
+       if (in_swiotlb_aperture(dev_addr))
+@@ -555,7 +561,14 @@ swiotlb_unmap_single(struct device *hwde
+       else
+               gnttab_dma_unmap_page(dev_addr);
+ }
++EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
+ 
++void
++swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
++                   int dir)
++{
++      return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
++}
+ /*
+  * Make physical memory consistent for a single streaming mode DMA translation
+  * after a transfer.
+@@ -584,6 +597,26 @@ swiotlb_sync_single_for_device(struct de
+               sync_single(hwdev, bus_to_virt(dev_addr), size, dir);
+ }
+ 
++void
++swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
++                                unsigned long offset, size_t size, int dir)
++{
++      BUG_ON(dir == DMA_NONE);
++      if (in_swiotlb_aperture(dev_addr))
++              sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir);
++}
++
++void
++swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
++                                   unsigned long offset, size_t size, int dir)
++{
++      BUG_ON(dir == DMA_NONE);
++      if (in_swiotlb_aperture(dev_addr))
++              sync_single(hwdev, bus_to_virt(dev_addr + offset), size, dir);
++}
++
++void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
++                          struct dma_attrs *);
+ /*
+  * Map a set of buffers described by scatterlist in streaming mode for DMA.
+  * This is the scatter-gather version of the above swiotlb_map_single
+@@ -601,8 +634,8 @@ swiotlb_sync_single_for_device(struct de
+  * same here.
+  */
+ int
+-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
+-             int dir)
++swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
++                   int dir, struct dma_attrs *attrs)
+ {
+       struct scatterlist *sg;
+       struct phys_addr buffer;
+@@ -626,7 +659,8 @@ swiotlb_map_sg(struct device *hwdev, str
+                               /* Don't panic here, we expect map_sg users
+                                  to do proper error handling. */
+                               swiotlb_full(hwdev, sg->length, dir, 0);
+-                              swiotlb_unmap_sg(hwdev, sgl, i, dir);
++                              swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
++                                                     attrs);
+                               sgl[0].dma_length = 0;
+                               return 0;
+                       }
+@@ -637,14 +671,22 @@ swiotlb_map_sg(struct device *hwdev, str
+       }
+       return nelems;
+ }
++EXPORT_SYMBOL(swiotlb_map_sg_attrs);
++
++int
++swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
++             int dir)
++{
++      return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
++}
+ 
+ /*
+  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
+  * concerning calls here are the same as for swiotlb_unmap_single() above.
+  */
+ void
+-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
+-               int dir)
++swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
++                     int nelems, int dir, struct dma_attrs *attrs)
+ {
+       struct scatterlist *sg;
+       int i;
+@@ -659,6 +701,14 @@ swiotlb_unmap_sg(struct device *hwdev, s
+                       gnttab_dma_unmap_page(sg->dma_address);
+       }
+ }
++EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
++
++void
++swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
++               int dir)
++{
++      return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
++}
+ 
+ /*
+  * Make physical memory consistent for a set of streaming mode DMA translations
+@@ -699,46 +749,6 @@ swiotlb_sync_sg_for_device(struct device
+       }
+ }
+ 
+-#ifdef CONFIG_HIGHMEM
+-
+-dma_addr_t
+-swiotlb_map_page(struct device *hwdev, struct page *page,
+-               unsigned long offset, size_t size,
+-               enum dma_data_direction direction)
+-{
+-      struct phys_addr buffer;
+-      dma_addr_t dev_addr;
+-      char *map;
+-
+-      dev_addr = gnttab_dma_map_page(page) + offset;
+-      if (address_needs_mapping(hwdev, dev_addr)) {
+-              gnttab_dma_unmap_page(dev_addr);
+-              buffer.page   = page;
+-              buffer.offset = offset;
+-              map = map_single(hwdev, buffer, size, direction);
+-              if (!map) {
+-                      swiotlb_full(hwdev, size, direction, 1);
+-                      map = io_tlb_overflow_buffer;
+-              }
+-              dev_addr = (dma_addr_t)virt_to_bus(map);
+-      }
+-
+-      return dev_addr;
+-}
+-
+-void
+-swiotlb_unmap_page(struct device *hwdev, dma_addr_t dma_address,
+-                 size_t size, enum dma_data_direction direction)
+-{
+-      BUG_ON(direction == DMA_NONE);
+-      if (in_swiotlb_aperture(dma_address))
+-              unmap_single(hwdev, bus_to_virt(dma_address), size, direction);
+-      else
+-              gnttab_dma_unmap_page(dma_address);
+-}
+-
+-#endif
+-
+ int
+ swiotlb_dma_mapping_error(dma_addr_t dma_addr)
+ {