]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.19
Reenabled linux-xen, added patches for Xen Kernel Version 2.6.27.31,
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen3-patch-2.6.19
diff --git a/src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.19 b/src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.19
new file mode 100644 (file)
index 0000000..4ba8c96
--- /dev/null
@@ -0,0 +1,12224 @@
+From: www.kernel.org
+Subject: Linux 2.6.19
+Patch-mainline: 2.6.19
+
+Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
+
+Acked-by: jbeulich@novell.com
+
+--- sle11-2009-05-14.orig/arch/x86/Kconfig     2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/arch/x86/Kconfig  2009-03-04 11:28:34.000000000 +0100
+@@ -415,6 +415,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
+ menuconfig PARAVIRT_GUEST
+       bool "Paravirtualized guest support"
++      depends on !X86_XEN && !X86_64_XEN
+       help
+         Say Y here to get to see options related to running Linux under
+         various hypervisors.  This option alone does not add any kernel code.
+--- sle11-2009-05-14.orig/arch/x86/kernel/apic_32-xen.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/apic_32-xen.c     2009-03-04 11:28:34.000000000 +0100
+@@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
+ /*
+  * Knob to control our willingness to enable the local APIC.
+  */
+-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+ /*
+  * Debug level
+@@ -102,7 +101,7 @@ int get_physical_broadcast(void)
+ #ifndef CONFIG_XEN
+ #ifndef CONFIG_SMP
+-static void up_apic_timer_interrupt_call(struct pt_regs *regs)
++static void up_apic_timer_interrupt_call(void)
+ {
+       int cpu = smp_processor_id();
+@@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
+        */
+       per_cpu(irq_stat, cpu).apic_timer_irqs++;
+-      smp_local_timer_interrupt(regs);
++      smp_local_timer_interrupt();
+ }
+ #endif
+-void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
++void smp_send_timer_broadcast_ipi(void)
+ {
+       cpumask_t mask;
+@@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
+                * We can directly call the apic timer interrupt handler
+                * in UP case. Minus all irq related functions
+                */
+-              up_apic_timer_interrupt_call(regs);
++              up_apic_timer_interrupt_call();
+ #endif
+       }
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/cpu/common-xen.c     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/cpu/common-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
+ extern int disable_pse;
+-static void default_init(struct cpuinfo_x86 * c)
++static void __cpuinit default_init(struct cpuinfo_x86 * c)
+ {
+       /* Not much we can do here... */
+       /* Check if at least it has cpuid */
+@@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
+       }
+ }
+-static struct cpu_dev default_cpu = {
++static struct cpu_dev __cpuinitdata default_cpu = {
+       .c_init = default_init,
+       .c_vendor = "Unknown",
+ };
+@@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
+ static int __init x86_fxsr_setup(char * s)
+ {
++      /* Tell all the other CPU's to not use it... */
+       disable_x86_fxsr = 1;
++
++      /*
++       * ... and clear the bits early in the boot_cpu_data
++       * so that the bootup process doesn't try to do this
++       * either.
++       */
++      clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
++      clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
+       return 1;
+ }
+ __setup("nofxsr", x86_fxsr_setup);
+@@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
+       }
+ }
+-void __cpuinit generic_identify(struct cpuinfo_x86 * c)
++static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+ {
+       u32 tfms, xlvl;
+       int ebx;
+@@ -698,8 +707,7 @@ old_gdt:
+        */
+       atomic_inc(&init_mm.mm_count);
+       current->active_mm = &init_mm;
+-      if (current->mm)
+-              BUG();
++      BUG_ON(current->mm);
+       enter_lazy_tlb(&init_mm, current);
+       load_esp0(t, thread);
+@@ -712,7 +720,7 @@ old_gdt:
+ #endif
+       /* Clear %fs and %gs. */
+-      asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
++      asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
+       /* Clear all 6 debug registers: */
+       set_debugreg(0, 0);
+--- sle11-2009-05-14.orig/arch/x86/kernel/entry_32-xen.S       2009-05-14 11:01:46.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/entry_32-xen.S    2009-05-14 11:07:47.000000000 +0200
+@@ -80,8 +80,12 @@ VM_MASK             = 0x00020000
+ NMI_MASK      = 0x80000000
+ #ifndef CONFIG_XEN
+-#define DISABLE_INTERRUPTS    cli
+-#define ENABLE_INTERRUPTS     sti
++/* These are replaces for paravirtualization */
++#define DISABLE_INTERRUPTS            cli
++#define ENABLE_INTERRUPTS             sti
++#define ENABLE_INTERRUPTS_SYSEXIT     sti; sysexit
++#define INTERRUPT_RETURN              iret
++#define GET_CR0_INTO_EAX              movl %cr0, %eax
+ #else
+ /* Offsets into shared_info_t. */
+ #define evtchn_upcall_pending         /* 0 */
+@@ -99,15 +103,29 @@ NMI_MASK  = 0x80000000
+ #define __DISABLE_INTERRUPTS  movb $1,evtchn_upcall_mask(%esi)
+ #define __ENABLE_INTERRUPTS   movb $0,evtchn_upcall_mask(%esi)
++#define __TEST_PENDING                testb $0xFF,evtchn_upcall_pending(%esi)
+ #define DISABLE_INTERRUPTS    GET_VCPU_INFO                           ; \
+                               __DISABLE_INTERRUPTS
+ #define ENABLE_INTERRUPTS     GET_VCPU_INFO                           ; \
+                               __ENABLE_INTERRUPTS
+-#define __TEST_PENDING                testb $0xFF,evtchn_upcall_pending(%esi)
++#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS                 ; \
++sysexit_scrit:        /**** START OF SYSEXIT CRITICAL REGION ****/            ; \
++      __TEST_PENDING                                                  ; \
++      jnz  14f                # process more events if necessary...   ; \
++      movl ESI(%esp), %esi                                            ; \
++      sysexit                                                         ; \
++14:   __DISABLE_INTERRUPTS                                            ; \
++      TRACE_IRQS_OFF                                                  ; \
++sysexit_ecrit:        /**** END OF SYSEXIT CRITICAL REGION ****/              ; \
++      push %esp                                                       ; \
++      call evtchn_do_upcall                                           ; \
++      add  $4,%esp                                                    ; \
++      jmp  ret_from_intr
++#define INTERRUPT_RETURN      iret
+ #endif
+ #ifdef CONFIG_PREEMPT
+-#define preempt_stop          cli; TRACE_IRQS_OFF
++#define preempt_stop          DISABLE_INTERRUPTS; TRACE_IRQS_OFF
+ #else
+ #define preempt_stop
+ #define resume_kernel         restore_nocheck
+@@ -206,18 +224,21 @@ NMI_MASK = 0x80000000
+ #define RING0_INT_FRAME \
+       CFI_STARTPROC simple;\
++      CFI_SIGNAL_FRAME;\
+       CFI_DEF_CFA esp, 3*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+ #define RING0_EC_FRAME \
+       CFI_STARTPROC simple;\
++      CFI_SIGNAL_FRAME;\
+       CFI_DEF_CFA esp, 4*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+ #define RING0_PTREGS_FRAME \
+       CFI_STARTPROC simple;\
++      CFI_SIGNAL_FRAME;\
+       CFI_DEF_CFA esp, OLDESP-EBX;\
+       /*CFI_OFFSET cs, CS-OLDESP;*/\
+       CFI_OFFSET eip, EIP-OLDESP;\
+@@ -263,8 +284,9 @@ ret_from_intr:
+ check_userspace:
+       movl EFLAGS(%esp), %eax         # mix EFLAGS and CS
+       movb CS(%esp), %al
+-      testl $(VM_MASK | 2), %eax
+-      jz resume_kernel
++      andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
++      cmpl $USER_RPL, %eax
++      jb resume_kernel                # not returning to v8086 or userspace
+ ENTRY(resume_userspace)
+       DISABLE_INTERRUPTS              # make sure we don't miss an interrupt
+                                       # setting need_resched or sigpending
+@@ -277,7 +299,7 @@ ENTRY(resume_userspace)
+ #ifdef CONFIG_PREEMPT
+ ENTRY(resume_kernel)
+-      cli
++      DISABLE_INTERRUPTS
+       cmpl $0,TI_preempt_count(%ebp)  # non-zero preempt_count ?
+       jnz restore_nocheck
+ need_resched:
+@@ -297,6 +319,7 @@ need_resched:
+       # sysenter call handler stub
+ ENTRY(sysenter_entry)
+       CFI_STARTPROC simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA esp, 0
+       CFI_REGISTER esp, ebp
+       movl SYSENTER_stack_esp0(%esp),%esp
+@@ -305,7 +328,7 @@ sysenter_past_esp:
+        * No need to follow this irqs on/off section: the syscall
+        * disabled irqs and here we enable it straight after entry:
+        */
+-      sti
++      ENABLE_INTERRUPTS
+       pushl $(__USER_DS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ss, 0*/
+@@ -359,26 +382,8 @@ sysenter_past_esp:
+       movl EIP(%esp), %edx
+       movl OLDESP(%esp), %ecx
+       xorl %ebp,%ebp
+-#ifdef CONFIG_XEN
+       TRACE_IRQS_ON
+-      __ENABLE_INTERRUPTS
+-sysexit_scrit:        /**** START OF SYSEXIT CRITICAL REGION ****/
+-      __TEST_PENDING
+-      jnz  14f                        # process more events if necessary...
+-      movl ESI(%esp), %esi
+-      sysexit
+-14:   __DISABLE_INTERRUPTS
+-      TRACE_IRQS_OFF
+-sysexit_ecrit:        /**** END OF SYSEXIT CRITICAL REGION ****/
+-      push %esp
+-      call evtchn_do_upcall
+-      add  $4,%esp
+-      jmp  ret_from_intr
+-#else
+-      TRACE_IRQS_ON
+-      sti
+-      sysexit
+-#endif /* !CONFIG_XEN */
++      ENABLE_INTERRUPTS_SYSEXIT
+       CFI_ENDPROC
+       # pv sysenter call handler stub
+@@ -444,8 +449,8 @@ restore_all:
+       # See comments in process.c:copy_thread() for details.
+       movb OLDSS(%esp), %ah
+       movb CS(%esp), %al
+-      andl $(VM_MASK | (4 << 8) | 3), %eax
+-      cmpl $((4 << 8) | 3), %eax
++      andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
++      cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
+       CFI_REMEMBER_STATE
+       je ldt_ss                       # returning to user-space with LDT SS
+ restore_nocheck:
+@@ -467,12 +472,11 @@ restore_nocheck_notrace:
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+-1:    iret
++1:    INTERRUPT_RETURN
+ .section .fixup,"ax"
+ iret_exc:
+ #ifndef CONFIG_XEN
+-      TRACE_IRQS_ON
+-      sti
++      ENABLE_INTERRUPTS
+ #endif
+       pushl $0                        # no error code
+       pushl $do_iret_error
+@@ -498,7 +502,7 @@ ldt_ss:
+        * dosemu and wine happy. */
+       subl $8, %esp           # reserve space for switch16 pointer
+       CFI_ADJUST_CFA_OFFSET 8
+-      cli
++      DISABLE_INTERRUPTS
+       TRACE_IRQS_OFF
+       movl %esp, %eax
+       /* Set up the 16bit stack frame with switch32 pointer on top,
+@@ -508,7 +512,7 @@ ldt_ss:
+       TRACE_IRQS_IRET
+       RESTORE_REGS
+       lss 20+4(%esp), %esp    # switch to 16bit stack
+-1:    iret
++1:    INTERRUPT_RETURN
+ .section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+@@ -524,7 +528,7 @@ scrit:     /**** START OF CRITICAL REGION **
+       RESTORE_REGS
+       addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
+-1:    iret
++1:    INTERRUPT_RETURN
+ .section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+@@ -713,11 +717,9 @@ ENTRY(name)                               \
+ #define UNWIND_ESPFIX_STACK
+ #endif
+-ENTRY(divide_error)
+-      RING0_INT_FRAME
+-      pushl $0                        # no error code
+-      CFI_ADJUST_CFA_OFFSET 4
+-      pushl $do_divide_error
++KPROBE_ENTRY(page_fault)
++      RING0_EC_FRAME
++      pushl $do_page_fault
+       CFI_ADJUST_CFA_OFFSET 4
+       ALIGN
+ error_code:
+@@ -767,6 +769,7 @@ error_code:
+       call *%edi
+       jmp ret_from_exception
+       CFI_ENDPROC
++KPROBE_END(page_fault)
+ #ifdef CONFIG_XEN
+ # A note on the "critical region" in our callback handler.
+@@ -790,9 +793,11 @@ ENTRY(hypervisor_callback)
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+-      testb $2,CS(%esp)
++      movl CS(%esp),%ecx
+       movl EIP(%esp),%eax
+-      jnz  .Ldo_upcall
++      andl $SEGMENT_RPL_MASK,%ecx
++      cmpl $USER_RPL,%ecx
++      jae  .Ldo_upcall
+       cmpl $scrit,%eax
+       jb   0f
+       cmpl $ecrit,%eax
+@@ -928,7 +933,7 @@ ENTRY(device_not_available)
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+ #ifndef CONFIG_XEN
+-      movl %cr0, %eax
++      GET_CR0_INTO_EAX
+       testl $0x4, %eax                # EM (math emulation bit)
+       je device_available_emulate
+       pushl $0                        # temporary storage for ORIG_EIP
+@@ -963,9 +968,15 @@ device_available_emulate:
+       jne ok;                                 \
+ label:                                                \
+       movl SYSENTER_stack_esp0+offset(%esp),%esp;     \
++      CFI_DEF_CFA esp, 0;                     \
++      CFI_UNDEFINED eip;                      \
+       pushfl;                                 \
++      CFI_ADJUST_CFA_OFFSET 4;                \
+       pushl $__KERNEL_CS;                     \
+-      pushl $sysenter_past_esp
++      CFI_ADJUST_CFA_OFFSET 4;                \
++      pushl $sysenter_past_esp;               \
++      CFI_ADJUST_CFA_OFFSET 4;                \
++      CFI_REL_OFFSET eip, 0
+ #endif /* CONFIG_XEN */
+ KPROBE_ENTRY(debug)
+@@ -984,7 +995,8 @@ debug_stack_correct:
+       call do_debug
+       jmp ret_from_exception
+       CFI_ENDPROC
+-      .previous .text
++KPROBE_END(debug)
++
+ #ifndef CONFIG_XEN
+ /*
+  * NMI is doubly nasty. It can happen _while_ we're handling
+@@ -994,7 +1006,7 @@ debug_stack_correct:
+  * check whether we got an NMI on the debug path where the debug
+  * fault happened on the sysenter path.
+  */
+-ENTRY(nmi)
++KPROBE_ENTRY(nmi)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+@@ -1019,6 +1031,7 @@ ENTRY(nmi)
+       cmpl $sysenter_entry,12(%esp)
+       je nmi_debug_stack_check
+ nmi_stack_correct:
++      /* We have a RING0_INT_FRAME here */
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       SAVE_ALL
+@@ -1029,9 +1042,12 @@ nmi_stack_correct:
+       CFI_ENDPROC
+ nmi_stack_fixup:
++      RING0_INT_FRAME
+       FIX_STACK(12,nmi_stack_correct, 1)
+       jmp nmi_stack_correct
++
+ nmi_debug_stack_check:
++      /* We have a RING0_INT_FRAME here */
+       cmpw $__KERNEL_CS,16(%esp)
+       jne nmi_stack_correct
+       cmpl $debug,(%esp)
+@@ -1042,8 +1058,10 @@ nmi_debug_stack_check:
+       jmp nmi_stack_correct
+ nmi_16bit_stack:
+-      RING0_INT_FRAME
+-      /* create the pointer to lss back */
++      /* We have a RING0_INT_FRAME here.
++       *
++       * create the pointer to lss back
++       */
+       pushl %ss
+       CFI_ADJUST_CFA_OFFSET 4
+       pushl %esp
+@@ -1064,14 +1082,14 @@ nmi_16bit_stack:
+       call do_nmi
+       RESTORE_REGS
+       lss 12+4(%esp), %esp            # back to 16bit stack
+-1:    iret
++1:    INTERRUPT_RETURN
+       CFI_ENDPROC
+ .section __ex_table,"a"
+       .align 4
+       .long 1b,iret_exc
+ .previous
+ #else
+-ENTRY(nmi)
++KPROBE_ENTRY(nmi)
+       RING0_INT_FRAME
+       pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+@@ -1083,6 +1101,7 @@ ENTRY(nmi)
+       jmp restore_all
+       CFI_ENDPROC
+ #endif
++KPROBE_END(nmi)
+ KPROBE_ENTRY(int3)
+       RING0_INT_FRAME
+@@ -1094,7 +1113,7 @@ KPROBE_ENTRY(int3)
+       call do_int3
+       jmp ret_from_exception
+       CFI_ENDPROC
+-      .previous .text
++KPROBE_END(int3)
+ ENTRY(overflow)
+       RING0_INT_FRAME
+@@ -1159,7 +1178,7 @@ KPROBE_ENTRY(general_protection)
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+-      .previous .text
++KPROBE_END(general_protection)
+ ENTRY(alignment_check)
+       RING0_EC_FRAME
+@@ -1168,13 +1187,14 @@ ENTRY(alignment_check)
+       jmp error_code
+       CFI_ENDPROC
+-KPROBE_ENTRY(page_fault)
+-      RING0_EC_FRAME
+-      pushl $do_page_fault
++ENTRY(divide_error)
++      RING0_INT_FRAME
++      pushl $0                        # no error code
++      CFI_ADJUST_CFA_OFFSET 4
++      pushl $do_divide_error
+       CFI_ADJUST_CFA_OFFSET 4
+       jmp error_code
+       CFI_ENDPROC
+-      .previous .text
+ #ifdef CONFIG_X86_MCE
+ ENTRY(machine_check)
+@@ -1236,6 +1256,19 @@ ENTRY(fixup_4gb_segment)
+       jmp error_code
+       CFI_ENDPROC
++ENTRY(kernel_thread_helper)
++      pushl $0                # fake return address for unwinder
++      CFI_STARTPROC
++      movl %edx,%eax
++      push %edx
++      CFI_ADJUST_CFA_OFFSET 4
++      call *%ebx
++      push %eax
++      CFI_ADJUST_CFA_OFFSET 4
++      call do_exit
++      CFI_ENDPROC
++ENDPROC(kernel_thread_helper)
++
+ .section .rodata,"a"
+ #include "syscall_table.S"
+--- sle11-2009-05-14.orig/arch/x86/kernel/head_32-xen.S        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/head_32-xen.S     2009-03-04 11:28:34.000000000 +0100
+@@ -62,7 +62,7 @@ ENTRY(startup_32)
+       movl %eax,%gs
+       cld                     # gcc2 wants the direction flag cleared at all times
+-      pushl %eax              # fake return address
++      pushl $0                # fake return address for unwinder
+       jmp start_kernel
+ #define HYPERCALL_PAGE_OFFSET 0x1000
+--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_32-xen.c     2009-03-16 16:13:45.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/io_apic_32-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -31,6 +31,9 @@
+ #include <linux/acpi.h>
+ #include <linux/module.h>
+ #include <linux/sysdev.h>
++#include <linux/pci.h>
++#include <linux/msi.h>
++#include <linux/htirq.h>
+ #include <asm/io.h>
+ #include <asm/smp.h>
+@@ -38,13 +41,15 @@
+ #include <asm/timer.h>
+ #include <asm/i8259.h>
+ #include <asm/nmi.h>
++#include <asm/msidef.h>
++#include <asm/hypertransport.h>
+ #include <mach_apic.h>
++#include <mach_apicdef.h>
+ #include "io_ports.h"
+ #ifdef CONFIG_XEN
+-
+ #include <xen/interface/xen.h>
+ #include <xen/interface/physdev.h>
+ #include <xen/evtchn.h>
+@@ -56,32 +61,7 @@
+ unsigned long io_apic_irqs;
+-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
+-{
+-      struct physdev_apic apic_op;
+-      int ret;
+-
+-      apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+-      apic_op.reg = reg;
+-      ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+-      if (ret)
+-              return ret;
+-      return apic_op.value;
+-}
+-
+-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+-{
+-      struct physdev_apic apic_op;
+-
+-      apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+-      apic_op.reg = reg;
+-      apic_op.value = value;
+-      WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
+-}
+-
+-#define io_apic_read(a,r)    xen_io_apic_read(a,r)
+-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+-
++#define clear_IO_APIC() ((void)0)
+ #endif /* CONFIG_XEN */
+ int (*ioapic_renumber_irq)(int ioapic, int irq);
+@@ -108,7 +88,7 @@ int sis_apic_bug = -1;
+  */
+ int nr_ioapic_registers[MAX_IO_APICS];
+-int disable_timer_pin_1 __initdata;
++static int disable_timer_pin_1 __initdata;
+ /*
+  * Rough estimation of how many shared IRQs there are, can
+@@ -128,12 +108,124 @@ static struct irq_pin_list {
+       int apic, pin, next;
+ } irq_2_pin[PIN_MAP_SIZE];
+-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+-#ifdef CONFIG_PCI_MSI
+-#define vector_to_irq(vector)         \
+-      (platform_legacy_irq(vector) ? vector : vector_irq[vector])
++#ifndef CONFIG_XEN
++struct io_apic {
++      unsigned int index;
++      unsigned int unused[3];
++      unsigned int data;
++};
++
++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
++{
++      return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
++              + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
++}
++#endif
++
++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
++{
++#ifndef CONFIG_XEN
++      struct io_apic __iomem *io_apic = io_apic_base(apic);
++      writel(reg, &io_apic->index);
++      return readl(&io_apic->data);
++#else
++      struct physdev_apic apic_op;
++      int ret;
++
++      apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
++      apic_op.reg = reg;
++      ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
++      if (ret)
++              return ret;
++      return apic_op.value;
++#endif
++}
++
++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
++{
++#ifndef CONFIG_XEN
++      struct io_apic __iomem *io_apic = io_apic_base(apic);
++      writel(reg, &io_apic->index);
++      writel(value, &io_apic->data);
++#else
++      struct physdev_apic apic_op;
++
++      apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
++      apic_op.reg = reg;
++      apic_op.value = value;
++      WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
++#endif
++}
++
++#ifndef CONFIG_XEN
++/*
++ * Re-write a value: to be used for read-modify-write
++ * cycles where the read already set up the index register.
++ *
++ * Older SiS APIC requires we rewrite the index register
++ */
++static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
++{
++      volatile struct io_apic *io_apic = io_apic_base(apic);
++      if (sis_apic_bug)
++              writel(reg, &io_apic->index);
++      writel(value, &io_apic->data);
++}
+ #else
+-#define vector_to_irq(vector) (vector)
++#define io_apic_modify io_apic_write
++#endif
++
++union entry_union {
++      struct { u32 w1, w2; };
++      struct IO_APIC_route_entry entry;
++};
++
++#ifndef CONFIG_XEN
++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
++{
++      union entry_union eu;
++      unsigned long flags;
++      spin_lock_irqsave(&ioapic_lock, flags);
++      eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
++      eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++      spin_unlock_irqrestore(&ioapic_lock, flags);
++      return eu.entry;
++}
++#endif
++
++/*
++ * When we write a new IO APIC routing entry, we need to write the high
++ * word first! If the mask bit in the low word is clear, we will enable
++ * the interrupt, and we need to make sure the entry is fully populated
++ * before that happens.
++ */
++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++      unsigned long flags;
++      union entry_union eu;
++      eu.entry = e;
++      spin_lock_irqsave(&ioapic_lock, flags);
++      io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++      io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++      spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++#ifndef CONFIG_XEN
++/*
++ * When we mask an IO APIC routing entry, we need to write the low
++ * word first, in order to set the mask bit before we change the
++ * high bits!
++ */
++static void ioapic_mask_entry(int apic, int pin)
++{
++      unsigned long flags;
++      union entry_union eu = { .entry.mask = 1 };
++
++      spin_lock_irqsave(&ioapic_lock, flags);
++      io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++      io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++      spin_unlock_irqrestore(&ioapic_lock, flags);
++}
+ #endif
+ /*
+@@ -159,9 +251,7 @@ static void add_pin_to_irq(unsigned int 
+       entry->pin = pin;
+ }
+-#ifdef CONFIG_XEN
+-#define clear_IO_APIC() ((void)0)
+-#else
++#ifndef CONFIG_XEN
+ /*
+  * Reroute an IRQ to a different pin.
+  */
+@@ -246,25 +336,16 @@ static void unmask_IO_APIC_irq (unsigned
+ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+ {
+       struct IO_APIC_route_entry entry;
+-      unsigned long flags;
+       
+       /* Check delivery_mode to be sure we're not clearing an SMI pin */
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+-      *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      entry = ioapic_read_entry(apic, pin);
+       if (entry.delivery_mode == dest_SMI)
+               return;
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+-      memset(&entry, 0, sizeof(entry));
+-      entry.mask = 1;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+-      io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_mask_entry(apic, pin);
+ }
+ static void clear_IO_APIC (void)
+@@ -304,7 +385,7 @@ static void set_ioapic_affinity_irq(unsi
+                       break;
+               entry = irq_2_pin + entry->next;
+       }
+-      set_irq_info(irq, cpumask);
++      set_native_irq_info(irq, cpumask);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+@@ -1212,43 +1293,43 @@ static inline int IO_APIC_irq_trigger(in
+ /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
+-int assign_irq_vector(int irq)
++static int __assign_irq_vector(int irq)
+ {
+-      unsigned long flags;
+       int vector;
+       struct physdev_irq irq_op;
+-      BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
++      BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+       if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
+               return -EINVAL;
+-      spin_lock_irqsave(&vector_lock, flags);
+-
+-      if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+-              spin_unlock_irqrestore(&vector_lock, flags);
+-              return IO_APIC_VECTOR(irq);
+-      }
++      if (irq_vector[irq] > 0)
++              return irq_vector[irq];
+       irq_op.irq = irq;
+-      if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+-              spin_unlock_irqrestore(&vector_lock, flags);
++      if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
+               return -ENOSPC;
+-      }
+       vector = irq_op.vector;
+-      vector_irq[vector] = irq;
+-      if (irq != AUTO_ASSIGN)
+-              IO_APIC_VECTOR(irq) = vector;
++      irq_vector[irq] = vector;
++
++      return vector;
++}
++
++static int assign_irq_vector(int irq)
++{
++      unsigned long flags;
++      int vector;
++      spin_lock_irqsave(&vector_lock, flags);
++      vector = __assign_irq_vector(irq);
+       spin_unlock_irqrestore(&vector_lock, flags);
+       return vector;
+ }
+ #ifndef CONFIG_XEN
+-static struct hw_interrupt_type ioapic_level_type;
+-static struct hw_interrupt_type ioapic_edge_type;
++static struct irq_chip ioapic_chip;
+ #define IOAPIC_AUTO   -1
+ #define IOAPIC_EDGE   0
+@@ -1256,16 +1337,16 @@ static struct hw_interrupt_type ioapic_e
+ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+ {
+-      unsigned idx;
+-
+-      idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+-
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+-              irq_desc[idx].chip = &ioapic_level_type;
+-      else
+-              irq_desc[idx].chip = &ioapic_edge_type;
+-      set_intr_gate(vector, interrupt[idx]);
++              set_irq_chip_and_handler_name(irq, &ioapic_chip,
++                                       handle_fasteoi_irq, "fasteoi");
++      else {
++              irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
++              set_irq_chip_and_handler_name(irq, &ioapic_chip,
++                                       handle_edge_irq, "edge");
++      }
++      set_intr_gate(vector, interrupt[irq]);
+ }
+ #else
+ #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
+@@ -1336,9 +1417,8 @@ static void __init setup_IO_APIC_irqs(vo
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
++              ioapic_write_entry(apic, pin, entry);
+               spin_lock_irqsave(&ioapic_lock, flags);
+-              io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+-              io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               set_native_irq_info(irq, TARGET_CPUS);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+@@ -1355,7 +1435,6 @@ static void __init setup_IO_APIC_irqs(vo
+ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
+ {
+       struct IO_APIC_route_entry entry;
+-      unsigned long flags;
+       memset(&entry,0,sizeof(entry));
+@@ -1380,15 +1459,13 @@ static void __init setup_ExtINT_IRQ0_pin
+        * The timer IRQ doesn't have to know that behind the
+        * scene we have a 8259A-master in AEOI mode ...
+        */
+-      irq_desc[0].chip = &ioapic_edge_type;
++      irq_desc[0].chip = &ioapic_chip;
++      set_irq_handler(0, handle_edge_irq);
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+-      io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(apic, pin, entry);
+       enable_8259A_irq(0);
+ }
+@@ -1498,10 +1575,7 @@ void __init print_IO_APIC(void)
+       for (i = 0; i <= reg_01.bits.entries; i++) {
+               struct IO_APIC_route_entry entry;
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+-              *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
++              entry = ioapic_read_entry(apic, i);
+               printk(KERN_DEBUG " %02x %03X %02X  ",
+                       i,
+@@ -1521,17 +1595,12 @@ void __init print_IO_APIC(void)
+               );
+       }
+       }
+-      if (use_pci_vector())
+-              printk(KERN_INFO "Using vector-based indexing\n");
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for (i = 0; i < NR_IRQS; i++) {
+               struct irq_pin_list *entry = irq_2_pin + i;
+               if (entry->pin < 0)
+                       continue;
+-              if (use_pci_vector() && !platform_legacy_irq(i))
+-                      printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+-              else
+-                      printk(KERN_DEBUG "IRQ%d ", i);
++              printk(KERN_DEBUG "IRQ%d ", i);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+@@ -1720,10 +1789,7 @@ static void __init enable_IO_APIC(void)
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+-                      spin_lock_irqsave(&ioapic_lock, flags);
+-                      *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+-                      *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+-                      spin_unlock_irqrestore(&ioapic_lock, flags);
++                      entry = ioapic_read_entry(apic, pin);
+                       /* If the interrupt line is enabled and in ExtInt mode
+@@ -1782,7 +1848,6 @@ void disable_IO_APIC(void)
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+-              unsigned long flags;
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+@@ -1799,12 +1864,7 @@ void disable_IO_APIC(void)
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+-                      *(((int *)&entry)+1));
+-              io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+-                      *(((int *)&entry)+0));
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
++              ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+       }
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+ #endif
+@@ -1971,6 +2031,8 @@ static int __init timer_irq_works(void)
+  */
+ /*
++ * Startup quirk:
++ *
+  * Starting up a edge-triggered IO-APIC interrupt is
+  * nasty - we need to make sure that we get the edge.
+  * If it is already asserted for some reason, we need
+@@ -1978,8 +2040,10 @@ static int __init timer_irq_works(void)
+  *
+  * This is not complete - we should be able to fake
+  * an edge even if it isn't on the 8259A...
++ *
++ * (We do this for level-triggered IRQs too - it cannot hurt.)
+  */
+-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
++static unsigned int startup_ioapic_irq(unsigned int irq)
+ {
+       int was_pending = 0;
+       unsigned long flags;
+@@ -1996,47 +2060,18 @@ static unsigned int startup_edge_ioapic_
+       return was_pending;
+ }
+-/*
+- * Once we have recorded IRQ_PENDING already, we can mask the
+- * interrupt for real. This prevents IRQ storms from unhandled
+- * devices.
+- */
+-static void ack_edge_ioapic_irq(unsigned int irq)
+-{
+-      move_irq(irq);
+-      if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+-                                      == (IRQ_PENDING | IRQ_DISABLED))
+-              mask_IO_APIC_irq(irq);
+-      ack_APIC_irq();
+-}
+-
+-/*
+- * Level triggered interrupts can just be masked,
+- * and shutting down and starting up the interrupt
+- * is the same as enabling and disabling them -- except
+- * with a startup need to return a "was pending" value.
+- *
+- * Level triggered interrupts are special because we
+- * do not touch any IO-APIC register while handling
+- * them. We ack the APIC in the end-IRQ handler, not
+- * in the start-IRQ-handler. Protection against reentrance
+- * from the same interrupt is still provided, both by the
+- * generic IRQ layer and by the fact that an unacked local
+- * APIC does not accept IRQs.
+- */
+-static unsigned int startup_level_ioapic_irq (unsigned int irq)
++static void ack_ioapic_irq(unsigned int irq)
+ {
+-      unmask_IO_APIC_irq(irq);
+-
+-      return 0; /* don't check for pending */
++      move_native_irq(irq);
++      ack_APIC_irq();
+ }
+-static void end_level_ioapic_irq (unsigned int irq)
++static void ack_ioapic_quirk_irq(unsigned int irq)
+ {
+       unsigned long v;
+       int i;
+-      move_irq(irq);
++      move_native_irq(irq);
+ /*
+  * It appears there is an erratum which affects at least version 0x11
+  * of I/O APIC (that's the 82093AA and cores integrated into various
+@@ -2056,7 +2091,7 @@ static void end_level_ioapic_irq (unsign
+  * operation to prevent an edge-triggered interrupt escaping meanwhile.
+  * The idea is from Manfred Spraul.  --macro
+  */
+-      i = IO_APIC_VECTOR(irq);
++      i = irq_vector[irq];
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+@@ -2071,104 +2106,24 @@ static void end_level_ioapic_irq (unsign
+       }
+ }
+-#ifdef CONFIG_PCI_MSI
+-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      return startup_edge_ioapic_irq(irq);
+-}
+-
+-static void ack_edge_ioapic_vector(unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      move_native_irq(vector);
+-      ack_edge_ioapic_irq(irq);
+-}
+-
+-static unsigned int startup_level_ioapic_vector (unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      return startup_level_ioapic_irq (irq);
+-}
+-
+-static void end_level_ioapic_vector (unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      move_native_irq(vector);
+-      end_level_ioapic_irq(irq);
+-}
+-
+-static void mask_IO_APIC_vector (unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      mask_IO_APIC_irq(irq);
+-}
+-
+-static void unmask_IO_APIC_vector (unsigned int vector)
++static int ioapic_retrigger_irq(unsigned int irq)
+ {
+-      int irq = vector_to_irq(vector);
+-
+-      unmask_IO_APIC_irq(irq);
+-}
+-
+-#ifdef CONFIG_SMP
+-static void set_ioapic_affinity_vector (unsigned int vector,
+-                                      cpumask_t cpu_mask)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      set_native_irq_info(vector, cpu_mask);
+-      set_ioapic_affinity_irq(irq, cpu_mask);
+-}
+-#endif
+-#endif
+-
+-static int ioapic_retrigger(unsigned int irq)
+-{
+-      send_IPI_self(IO_APIC_VECTOR(irq));
++      send_IPI_self(irq_vector[irq]);
+       return 1;
+ }
+-/*
+- * Level and edge triggered IO-APIC interrupts need different handling,
+- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+- * handled with the level-triggered descriptor, but that one has slightly
+- * more overhead. Level-triggered interrupts cannot be handled with the
+- * edge-triggered handler, without risking IRQ storms and other ugly
+- * races.
+- */
+-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+-      .typename       = "IO-APIC-edge",
+-      .startup        = startup_edge_ioapic,
+-      .shutdown       = shutdown_edge_ioapic,
+-      .enable         = enable_edge_ioapic,
+-      .disable        = disable_edge_ioapic,
+-      .ack            = ack_edge_ioapic,
+-      .end            = end_edge_ioapic,
+-#ifdef CONFIG_SMP
+-      .set_affinity   = set_ioapic_affinity,
+-#endif
+-      .retrigger      = ioapic_retrigger,
+-};
+-
+-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+-      .typename       = "IO-APIC-level",
+-      .startup        = startup_level_ioapic,
+-      .shutdown       = shutdown_level_ioapic,
+-      .enable         = enable_level_ioapic,
+-      .disable        = disable_level_ioapic,
+-      .ack            = mask_and_ack_level_ioapic,
+-      .end            = end_level_ioapic,
++static struct irq_chip ioapic_chip __read_mostly = {
++      .name           = "IO-APIC",
++      .startup        = startup_ioapic_irq,
++      .mask           = mask_IO_APIC_irq,
++      .unmask         = unmask_IO_APIC_irq,
++      .ack            = ack_ioapic_irq,
++      .eoi            = ack_ioapic_quirk_irq,
+ #ifdef CONFIG_SMP
+-      .set_affinity   = set_ioapic_affinity,
++      .set_affinity   = set_ioapic_affinity_irq,
+ #endif
+-      .retrigger      = ioapic_retrigger,
++      .retrigger      = ioapic_retrigger_irq,
+ };
+ #endif /* !CONFIG_XEN */
+@@ -2189,12 +2144,7 @@ static inline void init_IO_APIC_traps(vo
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+               int tmp = irq;
+-              if (use_pci_vector()) {
+-                      if (!platform_legacy_irq(tmp))
+-                              if ((tmp = vector_to_irq(tmp)) == -1)
+-                                      continue;
+-              }
+-              if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
++              if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+@@ -2205,22 +2155,23 @@ static inline void init_IO_APIC_traps(vo
+ #ifndef CONFIG_XEN
+                       else
+                               /* Strange. Oh, well.. */
+-                              irq_desc[irq].chip = &no_irq_type;
++                              irq_desc[irq].chip = &no_irq_chip;
+ #endif
+               }
+       }
+ }
+ #ifndef CONFIG_XEN
+-static void enable_lapic_irq (unsigned int irq)
+-{
+-      unsigned long v;
++/*
++ * The local APIC irq-chip implementation:
++ */
+-      v = apic_read(APIC_LVT0);
+-      apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
++static void ack_apic(unsigned int irq)
++{
++      ack_APIC_irq();
+ }
+-static void disable_lapic_irq (unsigned int irq)
++static void mask_lapic_irq (unsigned int irq)
+ {
+       unsigned long v;
+@@ -2228,21 +2179,19 @@ static void disable_lapic_irq (unsigned 
+       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ }
+-static void ack_lapic_irq (unsigned int irq)
++static void unmask_lapic_irq (unsigned int irq)
+ {
+-      ack_APIC_irq();
+-}
++      unsigned long v;
+-static void end_lapic_irq (unsigned int i) { /* nothing */ }
++      v = apic_read(APIC_LVT0);
++      apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
++}
+-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
+-      .typename       = "local-APIC-edge",
+-      .startup        = NULL, /* startup_irq() not used for IRQ0 */
+-      .shutdown       = NULL, /* shutdown_irq() not used for IRQ0 */
+-      .enable         = enable_lapic_irq,
+-      .disable        = disable_lapic_irq,
+-      .ack            = ack_lapic_irq,
+-      .end            = end_lapic_irq
++static struct irq_chip lapic_chip __read_mostly = {
++      .name           = "local-APIC-edge",
++      .mask           = mask_lapic_irq,
++      .unmask         = unmask_lapic_irq,
++      .eoi            = ack_apic,
+ };
+ static void setup_nmi (void)
+@@ -2275,17 +2224,13 @@ static inline void unlock_ExtINT_logic(v
+       int apic, pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+-      unsigned long flags;
+       pin  = find_isa_irq_pin(8, mp_INT);
+       apic = find_isa_irq_apic(8, mp_INT);
+       if (pin == -1)
+               return;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+-      *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      entry0 = ioapic_read_entry(apic, pin);
+       clear_IO_APIC_pin(apic, pin);
+       memset(&entry1, 0, sizeof(entry1));
+@@ -2298,10 +2243,7 @@ static inline void unlock_ExtINT_logic(v
+       entry1.trigger = 0;
+       entry1.vector = 0;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+-      io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(apic, pin, entry1);
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+@@ -2320,10 +2262,7 @@ static inline void unlock_ExtINT_logic(v
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(apic, pin);
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+-      io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_write_entry(apic, pin, entry0);
+ }
+ int timer_uses_ioapic_pin_0;
+@@ -2423,7 +2362,8 @@ static inline void check_timer(void)
+       printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+       disable_8259A_irq(0);
+-      irq_desc[0].chip = &lapic_irq_type;
++      set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
++                                    "fasteio");
+       apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
+       enable_8259A_irq(0);
+@@ -2537,17 +2477,12 @@ static int ioapic_suspend(struct sys_dev
+ {
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+-      unsigned long flags;
+       int i;
+       
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+-              *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+-              *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+-      }
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
++              entry[i] = ioapic_read_entry(dev->id, i);
+       return 0;
+ }
+@@ -2569,11 +2504,9 @@ static int ioapic_resume(struct sys_devi
+               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+-      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+-              io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+-              io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+-      }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
++      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
++              ioapic_write_entry(dev->id, i, entry[i]);
+       return 0;
+ }
+@@ -2619,8 +2552,240 @@ static int __init ioapic_init_sysfs(void
+ device_initcall(ioapic_init_sysfs);
++/*
++ * Dynamic irq allocate and deallocation
++ */
++int create_irq(void)
++{
++      /* Allocate an unused irq */
++      int irq, new, vector;
++      unsigned long flags;
++
++      irq = -ENOSPC;
++      spin_lock_irqsave(&vector_lock, flags);
++      for (new = (NR_IRQS - 1); new >= 0; new--) {
++              if (platform_legacy_irq(new))
++                      continue;
++              if (irq_vector[new] != 0)
++                      continue;
++              vector = __assign_irq_vector(new);
++              if (likely(vector > 0))
++                      irq = new;
++              break;
++      }
++      spin_unlock_irqrestore(&vector_lock, flags);
++
++      if (irq >= 0) {
++              set_intr_gate(vector, interrupt[irq]);
++              dynamic_irq_init(irq);
++      }
++      return irq;
++}
++
++void destroy_irq(unsigned int irq)
++{
++      unsigned long flags;
++
++      dynamic_irq_cleanup(irq);
++
++      spin_lock_irqsave(&vector_lock, flags);
++      irq_vector[irq] = 0;
++      spin_unlock_irqrestore(&vector_lock, flags);
++}
++
+ #endif /* CONFIG_XEN */
++/*
++ * MSI mesage composition
++ */
++#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
++{
++      int vector;
++      unsigned dest;
++
++      vector = assign_irq_vector(irq);
++      if (vector >= 0) {
++              dest = cpu_mask_to_apicid(TARGET_CPUS);
++
++              msg->address_hi = MSI_ADDR_BASE_HI;
++              msg->address_lo =
++                      MSI_ADDR_BASE_LO |
++                      ((INT_DEST_MODE == 0) ?
++                              MSI_ADDR_DEST_MODE_PHYSICAL:
++                              MSI_ADDR_DEST_MODE_LOGICAL) |
++                      ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++                              MSI_ADDR_REDIRECTION_CPU:
++                              MSI_ADDR_REDIRECTION_LOWPRI) |
++                      MSI_ADDR_DEST_ID(dest);
++
++              msg->data =
++                      MSI_DATA_TRIGGER_EDGE |
++                      MSI_DATA_LEVEL_ASSERT |
++                      ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++                              MSI_DATA_DELIVERY_FIXED:
++                              MSI_DATA_DELIVERY_LOWPRI) |
++                      MSI_DATA_VECTOR(vector);
++      }
++      return vector;
++}
++
++#ifdef CONFIG_SMP
++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++      struct msi_msg msg;
++      unsigned int dest;
++      cpumask_t tmp;
++      int vector;
++
++      cpus_and(tmp, mask, cpu_online_map);
++      if (cpus_empty(tmp))
++              tmp = TARGET_CPUS;
++
++      vector = assign_irq_vector(irq);
++      if (vector < 0)
++              return;
++
++      dest = cpu_mask_to_apicid(mask);
++
++      read_msi_msg(irq, &msg);
++
++      msg.data &= ~MSI_DATA_VECTOR_MASK;
++      msg.data |= MSI_DATA_VECTOR(vector);
++      msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++      msg.address_lo |= MSI_ADDR_DEST_ID(dest);
++
++      write_msi_msg(irq, &msg);
++      set_native_irq_info(irq, mask);
++}
++#endif /* CONFIG_SMP */
++
++/*
++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
++ * which implement the MSI or MSI-X Capability Structure.
++ */
++static struct irq_chip msi_chip = {
++      .name           = "PCI-MSI",
++      .unmask         = unmask_msi_irq,
++      .mask           = mask_msi_irq,
++      .ack            = ack_ioapic_irq,
++#ifdef CONFIG_SMP
++      .set_affinity   = set_msi_irq_affinity,
++#endif
++      .retrigger      = ioapic_retrigger_irq,
++};
++
++int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
++{
++      struct msi_msg msg;
++      int ret;
++      ret = msi_compose_msg(dev, irq, &msg);
++      if (ret < 0)
++              return ret;
++
++      write_msi_msg(irq, &msg);
++
++      set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
++                                    "edge");
++
++      return 0;
++}
++
++void arch_teardown_msi_irq(unsigned int irq)
++{
++      return;
++}
++
++#endif /* CONFIG_PCI_MSI */
++
++/*
++ * Hypertransport interrupt support
++ */
++#ifdef CONFIG_HT_IRQ
++
++#ifdef CONFIG_SMP
++
++static void target_ht_irq(unsigned int irq, unsigned int dest)
++{
++      struct ht_irq_msg msg;
++      fetch_ht_irq_msg(irq, &msg);
++
++      msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
++      msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
++
++      msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
++      msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
++
++      write_ht_irq_msg(irq, &msg);
++}
++
++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++      unsigned int dest;
++      cpumask_t tmp;
++
++      cpus_and(tmp, mask, cpu_online_map);
++      if (cpus_empty(tmp))
++              tmp = TARGET_CPUS;
++
++      cpus_and(mask, tmp, CPU_MASK_ALL);
++
++      dest = cpu_mask_to_apicid(mask);
++
++      target_ht_irq(irq, dest);
++      set_native_irq_info(irq, mask);
++}
++#endif
++
++static struct irq_chip ht_irq_chip = {
++      .name           = "PCI-HT",
++      .mask           = mask_ht_irq,
++      .unmask         = unmask_ht_irq,
++      .ack            = ack_ioapic_irq,
++#ifdef CONFIG_SMP
++      .set_affinity   = set_ht_irq_affinity,
++#endif
++      .retrigger      = ioapic_retrigger_irq,
++};
++
++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
++{
++      int vector;
++
++      vector = assign_irq_vector(irq);
++      if (vector >= 0) {
++              struct ht_irq_msg msg;
++              unsigned dest;
++              cpumask_t tmp;
++
++              cpus_clear(tmp);
++              cpu_set(vector >> 8, tmp);
++              dest = cpu_mask_to_apicid(tmp);
++
++              msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
++
++              msg.address_lo =
++                      HT_IRQ_LOW_BASE |
++                      HT_IRQ_LOW_DEST_ID(dest) |
++                      HT_IRQ_LOW_VECTOR(vector) |
++                      ((INT_DEST_MODE == 0) ?
++                              HT_IRQ_LOW_DM_PHYSICAL :
++                              HT_IRQ_LOW_DM_LOGICAL) |
++                      HT_IRQ_LOW_RQEOI_EDGE |
++                      ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++                              HT_IRQ_LOW_MT_FIXED :
++                              HT_IRQ_LOW_MT_ARBITRATED) |
++                      HT_IRQ_LOW_IRQ_MASKED;
++
++              write_ht_irq_msg(irq, &msg);
++
++              set_irq_chip_and_handler_name(irq, &ht_irq_chip,
++                                            handle_edge_irq, "edge");
++      }
++      return vector;
++}
++#endif /* CONFIG_HT_IRQ */
++
+ /* --------------------------------------------------------------------------
+                           ACPI-based IOAPIC Configuration
+    -------------------------------------------------------------------------- */
+@@ -2774,13 +2939,34 @@ int io_apic_set_pci_routing (int ioapic,
+       if (!ioapic && (irq < 16))
+               disable_8259A_irq(irq);
++      ioapic_write_entry(ioapic, pin, entry);
+       spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+-      io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+-      set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
++      set_native_irq_info(irq, TARGET_CPUS);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       return 0;
+ }
+ #endif /* CONFIG_ACPI */
++
++static int __init parse_disable_timer_pin_1(char *arg)
++{
++      disable_timer_pin_1 = 1;
++      return 0;
++}
++early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
++
++static int __init parse_enable_timer_pin_1(char *arg)
++{
++      disable_timer_pin_1 = -1;
++      return 0;
++}
++early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
++
++static int __init parse_noapic(char *arg)
++{
++      /* disable IO-APIC */
++      disable_ioapic_setup();
++      return 0;
++}
++early_param("noapic", parse_noapic);
+--- sle11-2009-05-14.orig/arch/x86/kernel/irq_32-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/irq_32-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU
+  */
+ fastcall unsigned int do_IRQ(struct pt_regs *regs)
+ {     
++      struct pt_regs *old_regs;
+       /* high bit used in ret_from_ code */
+       int irq = ~regs->orig_eax;
++      struct irq_desc *desc = irq_desc + irq;
+ #ifdef CONFIG_4KSTACKS
+       union irq_ctx *curctx, *irqctx;
+       u32 *isp;
+@@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r
+               BUG();
+       }
++      old_regs = set_irq_regs(regs);
+       /*irq_enter();*/
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+@@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r
+                       (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+               asm volatile(
+-                      "       xchgl   %%ebx,%%esp      \n"
+-                      "       call    __do_IRQ         \n"
++                      "       xchgl  %%ebx,%%esp      \n"
++                      "       call   *%%edi           \n"
+                       "       movl   %%ebx,%%esp      \n"
+                       : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+-                      :  "0" (irq),   "1" (regs),  "2" (isp)
+-                      : "memory", "cc", "ecx"
++                      :  "0" (irq),   "1" (desc),  "2" (isp),
++                         "D" (desc->handle_irq)
++                      : "memory", "cc"
+               );
+       } else
+ #endif
+-              __do_IRQ(irq, regs);
++              desc->handle_irq(irq, desc);
+       /*irq_exit();*/
+-
++      set_irq_regs(old_regs);
+       return 1;
+ }
+@@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p, 
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+-              seq_printf(p, " %14s", irq_desc[i].chip->typename);
++              seq_printf(p, " %8s", irq_desc[i].chip->name);
++              seq_printf(p, "-%-8s", irq_desc[i].name);
+               seq_printf(p, "  %s", action->name);
+               for (action=action->next; action; action = action->next)
+--- sle11-2009-05-14.orig/arch/x86/kernel/ldt_32-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/ldt_32-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -1,5 +1,5 @@
+ /*
+- * linux/kernel/ldt.c
++ * linux/arch/i386/kernel/ldt.c
+  *
+  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+--- sle11-2009-05-14.orig/arch/x86/kernel/microcode-xen.c      2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/microcode-xen.c   2009-03-04 11:28:34.000000000 +0100
+@@ -2,6 +2,7 @@
+  *    Intel CPU Microcode Update Driver for Linux
+  *
+  *    Copyright (C) 2000-2004 Tigran Aivazian
++ *                  2006      Shaohua Li <shaohua.li@intel.com>
+  *
+  *    This driver allows to upgrade microcode on Intel processors
+  *    belonging to IA-32 family - PentiumPro, Pentium II, 
+@@ -33,7 +34,9 @@
+ #include <linux/spinlock.h>
+ #include <linux/mm.h>
+ #include <linux/mutex.h>
+-#include <linux/syscalls.h>
++#include <linux/cpu.h>
++#include <linux/firmware.h>
++#include <linux/platform_device.h>
+ #include <asm/msr.h>
+ #include <asm/uaccess.h>
+@@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
+ /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+ static DEFINE_MUTEX(microcode_mutex);
+                               
+-static int microcode_open (struct inode *unused1, struct file *unused2)
+-{
+-      return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+-}
+-
+-
++#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+ static int do_microcode_update (const void __user *ubuf, size_t len)
+ {
+       int err;
+@@ -85,6 +83,11 @@ static int do_microcode_update (const vo
+       return err;
+ }
++static int microcode_open (struct inode *unused1, struct file *unused2)
++{
++      return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
++}
++
+ static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
+ {
+       ssize_t ret;
+@@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
+       .fops           = &microcode_fops,
+ };
+-static int __init microcode_init (void)
++static int __init microcode_dev_init (void)
+ {
+       int error;
+@@ -129,6 +132,68 @@ static int __init microcode_init (void)
+               return error;
+       }
++      return 0;
++}
++
++static void __exit microcode_dev_exit (void)
++{
++      misc_deregister(&microcode_dev);
++}
++
++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
++#else
++#define microcode_dev_init() 0
++#define microcode_dev_exit() do { } while(0)
++#endif
++
++/* fake device for request_firmware */
++static struct platform_device *microcode_pdev;
++
++static int request_microcode(void)
++{
++      char name[30];
++      const struct cpuinfo_x86 *c = &boot_cpu_data;
++      const struct firmware *firmware;
++      int error;
++      struct xen_platform_op op;
++
++      sprintf(name,"intel-ucode/%02x-%02x-%02x",
++              c->x86, c->x86_model, c->x86_mask);
++      error = request_firmware(&firmware, name, &microcode_pdev->dev);
++      if (error) {
++              pr_debug("ucode data file %s load failed\n", name);
++              return error;
++      }
++
++      op.cmd = XENPF_microcode_update;
++      set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
++      op.u.microcode.length = firmware->size;
++      error = HYPERVISOR_platform_op(&op);
++
++      release_firmware(firmware);
++
++      if (error)
++              pr_debug("ucode load failed\n");
++
++      return error;
++}
++
++static int __init microcode_init (void)
++{
++      int error;
++
++      error = microcode_dev_init();
++      if (error)
++              return error;
++      microcode_pdev = platform_device_register_simple("microcode", -1,
++                                                       NULL, 0);
++      if (IS_ERR(microcode_pdev)) {
++              microcode_dev_exit();
++              return PTR_ERR(microcode_pdev);
++      }
++
++      request_microcode();
++
+       printk(KERN_INFO 
+               "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
+       return 0;
+@@ -136,9 +201,9 @@ static int __init microcode_init (void)
+ static void __exit microcode_exit (void)
+ {
+-      misc_deregister(&microcode_dev);
++      microcode_dev_exit();
++      platform_device_unregister(microcode_pdev);
+ }
+ module_init(microcode_init)
+ module_exit(microcode_exit)
+-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_32-xen.c     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/mpparse_32-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -30,6 +30,7 @@
+ #include <asm/io_apic.h>
+ #include <mach_apic.h>
++#include <mach_apicdef.h>
+ #include <mach_mpparse.h>
+ #include <bios_ebda.h>
+@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
+ /* Processor that is doing the boot up */
+ unsigned int boot_cpu_physical_apicid = -1U;
+ /* Internal processor count */
+-static unsigned int __devinitdata num_processors;
++unsigned int __cpuinitdata num_processors;
+ /* Bitmask of physically existing CPUs */
+ physid_mask_t phys_cpu_present_map;
+@@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
+       mpc_oem_bus_info(m, str, translation_table[mpc_record]);
++#if MAX_MP_BUSSES < 256
+       if (m->mpc_busid >= MAX_MP_BUSSES) {
+               printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+                       " is too large, max. supported is %d\n",
+                       m->mpc_busid, str, MAX_MP_BUSSES - 1);
+               return;
+       }
++#endif
+       if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+@@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+                       m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+-      /*
+-       * Well it seems all SMP boards in existence
+-       * use ExtINT/LVT1 == LINT0 and
+-       * NMI/LVT2 == LINT1 - the following check
+-       * will show us if this assumptions is false.
+-       * Until then we do not have to add baggage.
+-       */
+-      if ((m->mpc_irqtype == mp_ExtINT) &&
+-              (m->mpc_destapiclint != 0))
+-                      BUG();
+-      if ((m->mpc_irqtype == mp_NMI) &&
+-              (m->mpc_destapiclint != 1))
+-                      BUG();
+ }
+ #ifdef CONFIG_X86_NUMAQ
+@@ -838,8 +828,7 @@ int es7000_plat;
+ #ifdef CONFIG_ACPI
+-void __init mp_register_lapic_address (
+-      u64                     address)
++void __init mp_register_lapic_address(u64 address)
+ {
+ #ifndef CONFIG_XEN
+       mp_lapic_addr = (unsigned long) address;
+@@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
+ #endif
+ }
+-
+-void __devinit mp_register_lapic (
+-      u8                      id, 
+-      u8                      enabled)
++void __devinit mp_register_lapic (u8 id, u8 enabled)
+ {
+       struct mpc_config_processor processor;
+-      int                     boot_cpu = 0;
++      int boot_cpu = 0;
+       
+       if (MAX_APICS - id <= 0) {
+               printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+@@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
+       u32                     pin_programmed[4];
+ } mp_ioapic_routing[MAX_IO_APICS];
+-
+-static int mp_find_ioapic (
+-      int                     gsi)
++static int mp_find_ioapic (int gsi)
+ {
+-      int                     i = 0;
++      int i = 0;
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+@@ -915,15 +899,11 @@ static int mp_find_ioapic (
+       return -1;
+ }
+-      
+-void __init mp_register_ioapic (
+-      u8                      id, 
+-      u32                     address,
+-      u32                     gsi_base)
++void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+ {
+-      int                     idx = 0;
+-      int                     tmpid;
++      int idx = 0;
++      int tmpid;
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+@@ -971,16 +951,10 @@ void __init mp_register_ioapic (
+               mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+               mp_ioapic_routing[idx].gsi_base,
+               mp_ioapic_routing[idx].gsi_end);
+-
+-      return;
+ }
+-
+-void __init mp_override_legacy_irq (
+-      u8                      bus_irq,
+-      u8                      polarity, 
+-      u8                      trigger, 
+-      u32                     gsi)
++void __init
++mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+ {
+       struct mpc_config_intsrc intsrc;
+       int                     ioapic = -1;
+@@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
+       mp_irqs[mp_irq_entries] = intsrc;
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!\n");
+-
+-      return;
+ }
+ void __init mp_config_acpi_legacy_irqs (void)
+ {
+       struct mpc_config_intsrc intsrc;
+-      int                     i = 0;
+-      int                     ioapic = -1;
++      int i = 0;
++      int ioapic = -1;
+       /* 
+        * Fabricate the legacy ISA bus (bus #31).
+@@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
+ #define MAX_GSI_NUM   4096
+-int mp_register_gsi (u32 gsi, int triggering, int polarity)
++int mp_register_gsi(u32 gsi, int triggering, int polarity)
+ {
+-      int                     ioapic = -1;
+-      int                     ioapic_pin = 0;
+-      int                     idx, bit = 0;
+-      static int              pci_irq = 16;
++      int ioapic = -1;
++      int ioapic_pin = 0;
++      int idx, bit = 0;
++      static int pci_irq = 16;
+       /*
+        * Mapping between Global System Interrups, which
+        * represent all possible interrupts, and IRQs
+--- sle11-2009-05-14.orig/arch/x86/kernel/pci-dma-xen.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/pci-dma-xen.c     2009-03-04 11:28:34.000000000 +0100
+@@ -110,8 +110,7 @@ dma_map_sg(struct device *hwdev, struct 
+ {
+       int i, rc;
+-      if (direction == DMA_NONE)
+-              BUG();
++      BUG_ON(!valid_dma_direction(direction));
+       WARN_ON(nents == 0 || sg[0].length == 0);
+       if (swiotlb) {
+@@ -142,7 +141,7 @@ dma_unmap_sg(struct device *hwdev, struc
+ {
+       int i;
+-      BUG_ON(direction == DMA_NONE);
++      BUG_ON(!valid_dma_direction(direction));
+       if (swiotlb)
+               swiotlb_unmap_sg(hwdev, sg, nents, direction);
+       else {
+@@ -159,8 +158,7 @@ dma_map_page(struct device *dev, struct 
+ {
+       dma_addr_t dma_addr;
+-      BUG_ON(direction == DMA_NONE);
+-
++      BUG_ON(!valid_dma_direction(direction));
+       if (swiotlb) {
+               dma_addr = swiotlb_map_page(
+                       dev, page, offset, size, direction);
+@@ -177,7 +175,7 @@ void
+ dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+              enum dma_data_direction direction)
+ {
+-      BUG_ON(direction == DMA_NONE);
++      BUG_ON(!valid_dma_direction(direction));
+       if (swiotlb)
+               swiotlb_unmap_page(dev, dma_address, size, direction);
+       else
+@@ -359,8 +357,7 @@ dma_map_single(struct device *dev, void 
+ {
+       dma_addr_t dma;
+-      if (direction == DMA_NONE)
+-              BUG();
++      BUG_ON(!valid_dma_direction(direction));
+       WARN_ON(size == 0);
+       if (swiotlb) {
+@@ -381,8 +378,7 @@ void
+ dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+                enum dma_data_direction direction)
+ {
+-      if (direction == DMA_NONE)
+-              BUG();
++      BUG_ON(!valid_dma_direction(direction));
+       if (swiotlb)
+               swiotlb_unmap_single(dev, dma_addr, size, direction);
+       else
+--- sle11-2009-05-14.orig/arch/x86/kernel/process_32-xen.c     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/process_32-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -37,6 +37,7 @@
+ #include <linux/kallsyms.h>
+ #include <linux/ptrace.h>
+ #include <linux/random.h>
++#include <linux/personality.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -186,7 +187,7 @@ void cpu_idle(void)
+ void cpu_idle_wait(void)
+ {
+       unsigned int cpu, this_cpu = get_cpu();
+-      cpumask_t map;
++      cpumask_t map, tmp = current->cpus_allowed;
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+@@ -208,6 +209,8 @@ void cpu_idle_wait(void)
+               }
+               cpus_and(map, map, cpu_online_map);
+       } while (!cpus_empty(map));
++
++      set_cpus_allowed(current, tmp);
+ }
+ EXPORT_SYMBOL_GPL(cpu_idle_wait);
+@@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
+       if (user_mode_vm(regs))
+               printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+       printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
+-             regs->eflags, print_tainted(), system_utsname.release,
+-             (int)strcspn(system_utsname.version, " "),
+-             system_utsname.version);
++             regs->eflags, print_tainted(), init_utsname()->release,
++             (int)strcspn(init_utsname()->version, " "),
++             init_utsname()->version);
+       printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+               regs->eax,regs->ebx,regs->ecx,regs->edx);
+       printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+@@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
+  * the "args".
+  */
+ extern void kernel_thread_helper(void);
+-__asm__(".section .text\n"
+-      ".align 4\n"
+-      "kernel_thread_helper:\n\t"
+-      "movl %edx,%eax\n\t"
+-      "pushl %edx\n\t"
+-      "call *%ebx\n\t"
+-      "pushl %eax\n\t"
+-      "call do_exit\n"
+-      ".previous");
+ /*
+  * Create a kernel thread
+@@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
+       regs.xes = __USER_DS;
+       regs.orig_eax = -1;
+       regs.eip = (unsigned long) kernel_thread_helper;
+-      regs.xcs = GET_KERNEL_CS();
++      regs.xcs = __KERNEL_CS | get_kernel_rpl();
+       regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+       /* Ok, create the new process.. */
+@@ -369,13 +363,12 @@ int copy_thread(int nr, unsigned long cl
+       tsk = current;
+       if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
+-              p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
++              p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
++                                              IO_BITMAP_BYTES, GFP_KERNEL);
+               if (!p->thread.io_bitmap_ptr) {
+                       p->thread.io_bitmap_max = 0;
+                       return -ENOMEM;
+               }
+-              memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
+-                      IO_BITMAP_BYTES);
+               set_tsk_thread_flag(p, TIF_IO_BITMAP);
+       }
+@@ -871,7 +864,7 @@ asmlinkage int sys_get_thread_area(struc
+ unsigned long arch_align_stack(unsigned long sp)
+ {
+-      if (randomize_va_space)
++      if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+               sp -= get_random_int() % 8192;
+       return sp & ~0xf;
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup_32-xen.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/setup_32-xen.c    2009-03-04 11:28:34.000000000 +0100
+@@ -56,6 +56,7 @@
+ #include <asm/apic.h>
+ #include <asm/e820.h>
+ #include <asm/mpspec.h>
++#include <asm/mmzone.h>
+ #include <asm/setup.h>
+ #include <asm/arch_hooks.h>
+ #include <asm/sections.h>
+@@ -83,9 +84,6 @@ static struct notifier_block xen_panic_b
+       xen_panic_event, NULL, 0 /* try to go last */
+ };
+-extern char hypercall_page[PAGE_SIZE];
+-EXPORT_SYMBOL(hypercall_page);
+-
+ int disable_pse __devinitdata = 0;
+ /*
+@@ -105,18 +103,6 @@ EXPORT_SYMBOL(boot_cpu_data);
+ unsigned long mmu_cr4_features;
+-#ifdef        CONFIG_ACPI
+-      int acpi_disabled = 0;
+-#else
+-      int acpi_disabled = 1;
+-#endif
+-EXPORT_SYMBOL(acpi_disabled);
+-
+-#ifdef        CONFIG_ACPI
+-int __initdata acpi_force = 0;
+-extern acpi_interrupt_flags   acpi_sci_flags;
+-#endif
+-
+ /* for MCA, but anyone else can use it if they want */
+ unsigned int machine_id;
+ #ifdef CONFIG_MCA
+@@ -170,7 +156,6 @@ struct e820map machine_e820;
+ #endif
+ extern void early_cpu_init(void);
+-extern void generic_apic_probe(char *);
+ extern int root_mountflags;
+ unsigned long saved_videomode;
+@@ -243,9 +228,6 @@ static struct resource adapter_rom_resou
+       .flags  = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+ } };
+-#define ADAPTER_ROM_RESOURCES \
+-      (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+-
+ static struct resource video_rom_resource = {
+       .name   = "Video ROM",
+       .start  = 0xc0000,
+@@ -307,9 +289,6 @@ static struct resource standard_io_resou
+       .flags  = IORESOURCE_BUSY | IORESOURCE_IO
+ } };
+-#define STANDARD_IO_RESOURCES \
+-      (sizeof standard_io_resources / sizeof standard_io_resources[0])
+-
+ #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+ static int __init romchecksum(unsigned char *rom, unsigned long length)
+@@ -372,7 +351,7 @@ static void __init probe_roms(void)
+       }
+       /* check for adapter roms on 2k boundaries */
+-      for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
++      for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
+               rom = isa_bus_to_virt(start);
+               if (!romsignature(rom))
+                       continue;
+@@ -779,246 +758,152 @@ static inline void copy_edd(void)
+ }
+ #endif
+-static void __init parse_cmdline_early (char ** cmdline_p)
++static int __initdata user_defined_memmap = 0;
++
++/*
++ * "mem=nopentium" disables the 4MB page tables.
++ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
++ * to <mem>, overriding the bios size.
++ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
++ * <start> to <start>+<mem>, overriding the bios size.
++ *
++ * HPA tells me bootloaders need to parse mem=, so no new
++ * option should be mem=  [also see Documentation/i386/boot.txt]
++ */
++static int __init parse_mem(char *arg)
+ {
+-      char c = ' ', *to = command_line, *from = saved_command_line;
+-      int len = 0, max_cmdline;
+-      int userdef = 0;
+-
+-      if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
+-              max_cmdline = COMMAND_LINE_SIZE;
+-      memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
+-      /* Save unparsed command line copy for /proc/cmdline */
+-      saved_command_line[max_cmdline-1] = '\0';
+-
+-      for (;;) {
+-              if (c != ' ')
+-                      goto next_char;
+-              /*
+-               * "mem=nopentium" disables the 4MB page tables.
+-               * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+-               * to <mem>, overriding the bios size.
+-               * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+-               * <start> to <start>+<mem>, overriding the bios size.
+-               *
+-               * HPA tells me bootloaders need to parse mem=, so no new
+-               * option should be mem=  [also see Documentation/i386/boot.txt]
+-               */
+-              if (!memcmp(from, "mem=", 4)) {
+-                      if (to != command_line)
+-                              to--;
+-                      if (!memcmp(from+4, "nopentium", 9)) {
+-                              from += 9+4;
+-                              clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+-                              disable_pse = 1;
+-                      } else {
+-                              /* If the user specifies memory size, we
+-                               * limit the BIOS-provided memory map to
+-                               * that size. exactmap can be used to specify
+-                               * the exact map. mem=number can be used to
+-                               * trim the existing memory map.
+-                               */
+-                              unsigned long long mem_size;
+- 
+-                              mem_size = memparse(from+4, &from);
+-                              limit_regions(mem_size);
+-                              userdef=1;
+-                      }
+-              }
++      if (!arg)
++              return -EINVAL;
+-              else if (!memcmp(from, "memmap=", 7)) {
+-                      if (to != command_line)
+-                              to--;
+-                      if (!memcmp(from+7, "exactmap", 8)) {
+-#ifdef CONFIG_CRASH_DUMP
+-                              /* If we are doing a crash dump, we
+-                               * still need to know the real mem
+-                               * size before original memory map is
+-                               * reset.
+-                               */
+-                              find_max_pfn();
+-                              saved_max_pfn = max_pfn;
+-#endif
+-                              from += 8+7;
+-                              e820.nr_map = 0;
+-                              userdef = 1;
+-                      } else {
+-                              /* If the user specifies memory size, we
+-                               * limit the BIOS-provided memory map to
+-                               * that size. exactmap can be used to specify
+-                               * the exact map. mem=number can be used to
+-                               * trim the existing memory map.
+-                               */
+-                              unsigned long long start_at, mem_size;
++      if (strcmp(arg, "nopentium") == 0) {
++              clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
++              disable_pse = 1;
++      } else {
++              /* If the user specifies memory size, we
++               * limit the BIOS-provided memory map to
++               * that size. exactmap can be used to specify
++               * the exact map. mem=number can be used to
++               * trim the existing memory map.
++               */
++              unsigned long long mem_size;
+  
+-                              mem_size = memparse(from+7, &from);
+-                              if (*from == '@') {
+-                                      start_at = memparse(from+1, &from);
+-                                      add_memory_region(start_at, mem_size, E820_RAM);
+-                              } else if (*from == '#') {
+-                                      start_at = memparse(from+1, &from);
+-                                      add_memory_region(start_at, mem_size, E820_ACPI);
+-                              } else if (*from == '$') {
+-                                      start_at = memparse(from+1, &from);
+-                                      add_memory_region(start_at, mem_size, E820_RESERVED);
+-                              } else {
+-                                      limit_regions(mem_size);
+-                                      userdef=1;
+-                              }
+-                      }
+-              }
+-
+-              else if (!memcmp(from, "noexec=", 7))
+-                      noexec_setup(from + 7);
++              mem_size = memparse(arg, &arg);
++              limit_regions(mem_size);
++              user_defined_memmap = 1;
++      }
++      return 0;
++}
++early_param("mem", parse_mem);
++static int __init parse_memmap(char *arg)
++{
++      if (!arg)
++              return -EINVAL;
+-#ifdef  CONFIG_X86_MPPARSE
+-              /*
+-               * If the BIOS enumerates physical processors before logical,
+-               * maxcpus=N at enumeration-time can be used to disable HT.
++      if (strcmp(arg, "exactmap") == 0) {
++#ifdef CONFIG_CRASH_DUMP
++              /* If we are doing a crash dump, we
++               * still need to know the real mem
++               * size before original memory map is
++               * reset.
+                */
+-              else if (!memcmp(from, "maxcpus=", 8)) {
+-                      extern unsigned int maxcpus;
+-
+-                      maxcpus = simple_strtoul(from + 8, NULL, 0);
+-              }
++              find_max_pfn();
++              saved_max_pfn = max_pfn;
+ #endif
++              e820.nr_map = 0;
++              user_defined_memmap = 1;
++      } else {
++              /* If the user specifies memory size, we
++               * limit the BIOS-provided memory map to
++               * that size. exactmap can be used to specify
++               * the exact map. mem=number can be used to
++               * trim the existing memory map.
++               */
++              unsigned long long start_at, mem_size;
+-#ifdef CONFIG_ACPI
+-              /* "acpi=off" disables both ACPI table parsing and interpreter */
+-              else if (!memcmp(from, "acpi=off", 8)) {
+-                      disable_acpi();
+-              }
+-
+-              /* acpi=force to over-ride black-list */
+-              else if (!memcmp(from, "acpi=force", 10)) {
+-                      acpi_force = 1;
+-                      acpi_ht = 1;
+-                      acpi_disabled = 0;
+-              }
+-
+-              /* acpi=strict disables out-of-spec workarounds */
+-              else if (!memcmp(from, "acpi=strict", 11)) {
+-                      acpi_strict = 1;
+-              }
+-
+-              /* Limit ACPI just to boot-time to enable HT */
+-              else if (!memcmp(from, "acpi=ht", 7)) {
+-                      if (!acpi_force)
+-                              disable_acpi();
+-                      acpi_ht = 1;
+-              }
+-              
+-              /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
+-              else if (!memcmp(from, "pci=noacpi", 10)) {
+-                      acpi_disable_pci();
+-              }
+-              /* "acpi=noirq" disables ACPI interrupt routing */
+-              else if (!memcmp(from, "acpi=noirq", 10)) {
+-                      acpi_noirq_set();
++              mem_size = memparse(arg, &arg);
++              if (*arg == '@') {
++                      start_at = memparse(arg+1, &arg);
++                      add_memory_region(start_at, mem_size, E820_RAM);
++              } else if (*arg == '#') {
++                      start_at = memparse(arg+1, &arg);
++                      add_memory_region(start_at, mem_size, E820_ACPI);
++              } else if (*arg == '$') {
++                      start_at = memparse(arg+1, &arg);
++                      add_memory_region(start_at, mem_size, E820_RESERVED);
++              } else {
++                      limit_regions(mem_size);
++                      user_defined_memmap = 1;
+               }
++      }
++      return 0;
++}
++early_param("memmap", parse_memmap);
+-              else if (!memcmp(from, "acpi_sci=edge", 13))
+-                      acpi_sci_flags.trigger =  1;
+-
+-              else if (!memcmp(from, "acpi_sci=level", 14))
+-                      acpi_sci_flags.trigger = 3;
++#ifdef CONFIG_PROC_VMCORE
++/* elfcorehdr= specifies the location of elf core header
++ * stored by the crashed kernel.
++ */
++static int __init parse_elfcorehdr(char *arg)
++{
++      if (!arg)
++              return -EINVAL;
+-              else if (!memcmp(from, "acpi_sci=high", 13))
+-                      acpi_sci_flags.polarity = 1;
++      elfcorehdr_addr = memparse(arg, &arg);
++      return 0;
++}
++early_param("elfcorehdr", parse_elfcorehdr);
++#endif /* CONFIG_PROC_VMCORE */
+-              else if (!memcmp(from, "acpi_sci=low", 12))
+-                      acpi_sci_flags.polarity = 3;
++/*
++ * highmem=size forces highmem to be exactly 'size' bytes.
++ * This works even on boxes that have no highmem otherwise.
++ * This also works to reduce highmem size on bigger boxes.
++ */
++static int __init parse_highmem(char *arg)
++{
++      if (!arg)
++              return -EINVAL;
+-#ifdef CONFIG_X86_IO_APIC
+-              else if (!memcmp(from, "acpi_skip_timer_override", 24))
+-                      acpi_skip_timer_override = 1;
++      highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
++      return 0;
++}
++early_param("highmem", parse_highmem);
+-              if (!memcmp(from, "disable_timer_pin_1", 19))
+-                      disable_timer_pin_1 = 1;
+-              if (!memcmp(from, "enable_timer_pin_1", 18))
+-                      disable_timer_pin_1 = -1;
+-
+-              /* disable IO-APIC */
+-              else if (!memcmp(from, "noapic", 6))
+-                      disable_ioapic_setup();
+-#endif /* CONFIG_X86_IO_APIC */
+-#endif /* CONFIG_ACPI */
++/*
++ * vmalloc=size forces the vmalloc area to be exactly 'size'
++ * bytes. This can be used to increase (or decrease) the
++ * vmalloc area - the default is 128m.
++ */
++static int __init parse_vmalloc(char *arg)
++{
++      if (!arg)
++              return -EINVAL;
+-#ifdef CONFIG_X86_LOCAL_APIC
+-              /* enable local APIC */
+-              else if (!memcmp(from, "lapic", 5))
+-                      lapic_enable();
+-
+-              /* disable local APIC */
+-              else if (!memcmp(from, "nolapic", 6))
+-                      lapic_disable();
+-#endif /* CONFIG_X86_LOCAL_APIC */
++      __VMALLOC_RESERVE = memparse(arg, &arg);
++      return 0;
++}
++early_param("vmalloc", parse_vmalloc);
+-#ifdef CONFIG_KEXEC
+-              /* crashkernel=size@addr specifies the location to reserve for
+-               * a crash kernel.  By reserving this memory we guarantee
+-               * that linux never set's it up as a DMA target.
+-               * Useful for holding code to do something appropriate
+-               * after a kernel panic.
+-               */
+-              else if (!memcmp(from, "crashkernel=", 12)) {
+ #ifndef CONFIG_XEN
+-                      unsigned long size, base;
+-                      size = memparse(from+12, &from);
+-                      if (*from == '@') {
+-                              base = memparse(from+1, &from);
+-                              /* FIXME: Do I want a sanity check
+-                               * to validate the memory range?
+-                               */
+-                              crashk_res.start = base;
+-                              crashk_res.end   = base + size - 1;
+-                      }
+-#else
+-                      printk("Ignoring crashkernel command line, "
+-                             "parameter will be supplied by xen\n");
+-#endif
+-              }
+-#endif
+-#ifdef CONFIG_PROC_VMCORE
+-              /* elfcorehdr= specifies the location of elf core header
+-               * stored by the crashed kernel.
+-               */
+-              else if (!memcmp(from, "elfcorehdr=", 11))
+-                      elfcorehdr_addr = memparse(from+11, &from);
+-#endif
++/*
++ * reservetop=size reserves a hole at the top of the kernel address space which
++ * a hypervisor can load into later.  Needed for dynamically loaded hypervisors,
++ * so relocating the fixmap can be done before paging initialization.
++ */
++static int __init parse_reservetop(char *arg)
++{
++      unsigned long address;
+-              /*
+-               * highmem=size forces highmem to be exactly 'size' bytes.
+-               * This works even on boxes that have no highmem otherwise.
+-               * This also works to reduce highmem size on bigger boxes.
+-               */
+-              else if (!memcmp(from, "highmem=", 8))
+-                      highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
+-      
+-              /*
+-               * vmalloc=size forces the vmalloc area to be exactly 'size'
+-               * bytes. This can be used to increase (or decrease) the
+-               * vmalloc area - the default is 128m.
+-               */
+-              else if (!memcmp(from, "vmalloc=", 8))
+-                      __VMALLOC_RESERVE = memparse(from+8, &from);
++      if (!arg)
++              return -EINVAL;
+-      next_char:
+-              c = *(from++);
+-              if (!c)
+-                      break;
+-              if (COMMAND_LINE_SIZE <= ++len)
+-                      break;
+-              *(to++) = c;
+-      }
+-      *to = '\0';
+-      *cmdline_p = command_line;
+-      if (userdef) {
+-              printk(KERN_INFO "user-defined physical RAM map:\n");
+-              print_memory_map("user");
+-      }
++      address = memparse(arg, &arg);
++      reserve_top_address(address);
++      return 0;
+ }
++early_param("reservetop", parse_reservetop);
++#endif
+ /*
+  * Callback for efi_memory_walk.
+@@ -1039,7 +924,7 @@ efi_find_max_pfn(unsigned long start, un
+ static int __init
+ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
+ {
+-      memory_present(0, start, end);
++      memory_present(0, PFN_UP(start), PFN_DOWN(end));
+       return 0;
+ }
+@@ -1306,6 +1191,14 @@ static unsigned long __init setup_memory
+       }
+       printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+               pages_to_mb(highend_pfn - highstart_pfn));
++      num_physpages = highend_pfn;
++      high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
++#else
++      num_physpages = max_low_pfn;
++      high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
++#endif
++#ifdef CONFIG_FLATMEM
++      max_mapnr = num_physpages;
+ #endif
+       printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+                       pages_to_mb(max_low_pfn));
+@@ -1317,22 +1210,19 @@ static unsigned long __init setup_memory
+ void __init zone_sizes_init(void)
+ {
+-      unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+-      unsigned int max_dma, low;
+-
+-      max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+-      low = max_low_pfn;
+-
+-      if (low < max_dma)
+-              zones_size[ZONE_DMA] = low;
+-      else {
+-              zones_size[ZONE_DMA] = max_dma;
+-              zones_size[ZONE_NORMAL] = low - max_dma;
++      unsigned long max_zone_pfns[MAX_NR_ZONES];
++      memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
++      max_zone_pfns[ZONE_DMA] =
++              virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
++      max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+ #ifdef CONFIG_HIGHMEM
+-              zones_size[ZONE_HIGHMEM] = highend_pfn - low;
++      max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
++      add_active_range(0, 0, highend_pfn);
++#else
++      add_active_range(0, 0, max_low_pfn);
+ #endif
+-      }
+-      free_area_init(zones_size);
++
++      free_area_init_nodes(max_zone_pfns);
+ }
+ #else
+ extern unsigned long __init setup_memory(void);
+@@ -1389,6 +1279,7 @@ void __init setup_bootmem_allocator(void
+        */
+       acpi_reserve_bootmem();
+ #endif
++      numa_kva_reserve();
+ #endif /* !CONFIG_XEN */
+ #ifdef CONFIG_BLK_DEV_INITRD
+@@ -1574,7 +1465,7 @@ static int __init request_standard_resou
+       request_resource(&iomem_resource, &video_ram_resource);
+       /* request I/O space for devices used on all i[345]86 PCs */
+-      for (i = 0; i < STANDARD_IO_RESOURCES; i++)
++      for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+               request_resource(&ioport_resource, &standard_io_resources[i]);
+       return 0;
+ }
+@@ -1705,17 +1596,19 @@ void __init setup_arch(char **cmdline_p)
+       data_resource.start = virt_to_phys(_etext);
+       data_resource.end = virt_to_phys(_edata)-1;
+-      parse_cmdline_early(cmdline_p);
++      if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
++              i = COMMAND_LINE_SIZE;
++      memcpy(saved_command_line, xen_start_info->cmd_line, i);
++      saved_command_line[i - 1] = '\0';
++      parse_early_param();
+-#ifdef CONFIG_EARLY_PRINTK
+-      {
+-              char *s = strstr(*cmdline_p, "earlyprintk=");
+-              if (s) {
+-                      setup_early_printk(strchr(s, '=') + 1);
+-                      printk("early console enabled\n");
+-              }
++      if (user_defined_memmap) {
++              printk(KERN_INFO "user-defined physical RAM map:\n");
++              print_memory_map("user");
+       }
+-#endif
++
++      strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
++      *cmdline_p = command_line;
+       max_low_pfn = setup_memory();
+@@ -1822,7 +1715,7 @@ void __init setup_arch(char **cmdline_p)
+               dmi_scan_machine();
+ #ifdef CONFIG_X86_GENERICARCH
+-      generic_apic_probe(*cmdline_p);
++      generic_apic_probe();
+ #endif        
+       if (efi_enabled)
+               efi_map_memmap();
+@@ -1843,9 +1736,11 @@ void __init setup_arch(char **cmdline_p)
+       acpi_boot_table_init();
+ #endif
++#ifdef CONFIG_PCI
+ #ifdef CONFIG_X86_IO_APIC
+       check_acpi_pci();       /* Checks more than just ACPI actually */
+ #endif
++#endif
+ #ifdef CONFIG_ACPI
+       acpi_boot_init();
+--- sle11-2009-05-14.orig/arch/x86/kernel/smp_32-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/smp_32-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
+  * 2) Leave the mm if we are in the lazy tlb mode.
+  */
+-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+-                                   struct pt_regs *regs)
++irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
+ {
+       unsigned long cpu;
+@@ -567,16 +566,14 @@ void smp_send_stop(void)
+  * all the work is done automatically when
+  * we return from the interrupt.
+  */
+-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+-                                   struct pt_regs *regs)
++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
+ {
+       return IRQ_HANDLED;
+ }
+ #include <linux/kallsyms.h>
+-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+-                                      struct pt_regs *regs)
++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
+ {
+       void (*func) (void *info) = call_data->func;
+       void *info = call_data->info;
+@@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt(
+       return IRQ_HANDLED;
+ }
++/*
++ * this function sends a 'generic call function' IPI to one other CPU
++ * in the system.
++ *
++ * cpu is a standard Linux logical CPU number.
++ */
++static void
++__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
++                              int nonatomic, int wait)
++{
++      struct call_data_struct data;
++      int cpus = 1;
++
++      data.func = func;
++      data.info = info;
++      atomic_set(&data.started, 0);
++      data.wait = wait;
++      if (wait)
++              atomic_set(&data.finished, 0);
++
++      call_data = &data;
++      wmb();
++      /* Send a message to all other CPUs and wait for them to respond */
++      send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
++
++      /* Wait for response */
++      while (atomic_read(&data.started) != cpus)
++              cpu_relax();
++
++      if (!wait)
++              return;
++
++      while (atomic_read(&data.finished) != cpus)
++              cpu_relax();
++}
++
++/*
++ * smp_call_function_single - Run a function on another CPU
++ * @func: The function to run. This must be fast and non-blocking.
++ * @info: An arbitrary pointer to pass to the function.
++ * @nonatomic: Currently unused.
++ * @wait: If true, wait until function has completed on other CPUs.
++ *
++ * Retrurns 0 on success, else a negative status code.
++ *
++ * Does not return until the remote CPU is nearly ready to execute <func>
++ * or is or has executed.
++ */
++
++int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
++                      int nonatomic, int wait)
++{
++      /* prevent preemption and reschedule on another processor */
++      int me = get_cpu();
++      if (cpu == me) {
++              WARN_ON(1);
++              put_cpu();
++              return -EBUSY;
++      }
++      spin_lock_bh(&call_lock);
++      __smp_call_function_single(cpu, func, info, nonatomic, wait);
++      spin_unlock_bh(&call_lock);
++      put_cpu();
++      return 0;
++}
++EXPORT_SYMBOL(smp_call_function_single);
+--- sle11-2009-05-14.orig/arch/x86/kernel/time_32-xen.c        2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/time_32-xen.c     2009-03-24 10:08:00.000000000 +0100
+@@ -89,7 +89,6 @@ int pit_latch_buggy;              /* ext
+ unsigned long vxtime_hz = PIT_TICK_RATE;
+ struct vxtime_data __vxtime __section_vxtime;   /* for vsyscalls */
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
+ struct timespec __xtime __section_xtime;
+ struct timezone __sys_tz __section_sys_tz;
+ #endif
+@@ -97,8 +96,6 @@ struct timezone __sys_tz __section_sys_t
+ unsigned int cpu_khz; /* Detected as we calibrate the TSC */
+ EXPORT_SYMBOL(cpu_khz);
+-extern unsigned long wall_jiffies;
+-
+ DEFINE_SPINLOCK(rtc_lock);
+ EXPORT_SYMBOL(rtc_lock);
+@@ -265,11 +262,10 @@ static void __update_wallclock(time_t se
+       time_t wtm_sec, xtime_sec;
+       u64 tmp, wc_nsec;
+-      /* Adjust wall-clock time base based on wall_jiffies ticks. */
++      /* Adjust wall-clock time base. */
+       wc_nsec = processed_system_time;
+       wc_nsec += sec * (u64)NSEC_PER_SEC;
+       wc_nsec += nsec;
+-      wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
+       /* Split wallclock base into seconds and nanoseconds. */
+       tmp = wc_nsec;
+@@ -392,16 +388,10 @@ void do_gettimeofday(struct timeval *tv)
+       shadow = &per_cpu(shadow_time, cpu);
+       do {
+-              unsigned long lost;
+-
+               local_time_version = shadow->version;
+               seq = read_seqbegin(&xtime_lock);
+               usec = get_usec_offset(shadow);
+-              lost = jiffies - wall_jiffies;
+-
+-              if (unlikely(lost))
+-                      usec += lost * (USEC_PER_SEC / HZ);
+               sec = xtime.tv_sec;
+               usec += (xtime.tv_nsec / NSEC_PER_USEC);
+@@ -525,7 +515,7 @@ static void sync_xen_wallclock(unsigned 
+       write_seqlock_irq(&xtime_lock);
+       sec  = xtime.tv_sec;
+-      nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
++      nsec = xtime.tv_nsec;
+       __normalize_time(&sec, &nsec);
+       op.cmd = XENPF_settime;
+@@ -599,42 +589,49 @@ unsigned long long sched_clock(void)
+ }
+ #endif
+-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
+ unsigned long profile_pc(struct pt_regs *regs)
+ {
+       unsigned long pc = instruction_pointer(regs);
+-#ifdef __x86_64__
+-      /* Assume the lock function has either no stack frame or only a single word.
+-         This checks if the address on the stack looks like a kernel text address.
+-         There is a small window for false hits, but in that case the tick
+-         is just accounted to the spinlock function.
+-         Better would be to write these functions in assembler again
+-         and check exactly. */
++#if defined(CONFIG_SMP) || defined(__x86_64__)
+       if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+-              char *v = *(char **)regs->rsp;
+-              if ((v >= _stext && v <= _etext) ||
+-                      (v >= _sinittext && v <= _einittext) ||
+-                      (v >= (char *)MODULES_VADDR  && v <= (char *)MODULES_END))
+-                      return (unsigned long)v;
+-              return ((unsigned long *)regs->rsp)[1];
++# ifdef CONFIG_FRAME_POINTER
++#  ifdef __i386__
++              return ((unsigned long *)regs->ebp)[1];
++#  else
++              return ((unsigned long *)regs->rbp)[1];
++#  endif
++# else
++#  ifdef __i386__
++              unsigned long *sp;
++              if ((regs->xcs & 2) == 0)
++                      sp = (unsigned long *)&regs->esp;
++              else
++                      sp = (unsigned long *)regs->esp;
++#  else
++              unsigned long *sp = (unsigned long *)regs->rsp;
++#  endif
++              /* Return address is either directly at stack pointer
++                 or above a saved eflags. Eflags has bits 22-31 zero,
++                 kernel addresses don't. */
++              if (sp[0] >> 22)
++                      return sp[0];
++              if (sp[1] >> 22)
++                      return sp[1];
++# endif
+       }
+-#else
+-      if (!user_mode_vm(regs) && in_lock_functions(pc))
+-              return *(unsigned long *)(regs->ebp + 4);
+ #endif
+       return pc;
+ }
+ EXPORT_SYMBOL(profile_pc);
+-#endif
+ /*
+  * This is the same as the above, except we _also_ save the current
+  * Time Stamp Counter value at the time of the timer interrupt, so that
+  * we later on can estimate the time of day more exactly.
+  */
+-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t timer_interrupt(int irq, void *dev_id)
+ {
+       s64 delta, delta_cpu, stolen, blocked;
+       u64 sched_time;
+@@ -692,10 +689,14 @@ irqreturn_t timer_interrupt(int irq, voi
+       }
+       /* System-wide jiffy work. */
+-      while (delta >= NS_PER_TICK) {
+-              delta -= NS_PER_TICK;
+-              processed_system_time += NS_PER_TICK;
+-              do_timer(regs);
++      if (delta >= NS_PER_TICK) {
++              do_div(delta, NS_PER_TICK);
++              processed_system_time += delta * NS_PER_TICK;
++              while (delta > HZ) {
++                      do_timer(HZ);
++                      delta -= HZ;
++              }
++              do_timer(delta);
+       }
+       if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+@@ -740,7 +741,7 @@ irqreturn_t timer_interrupt(int irq, voi
+       if (delta_cpu > 0) {
+               do_div(delta_cpu, NS_PER_TICK);
+               per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
+-              if (user_mode_vm(regs))
++              if (user_mode_vm(get_irq_regs()))
+                       account_user_time(current, (cputime_t)delta_cpu);
+               else
+                       account_system_time(current, HARDIRQ_OFFSET,
+@@ -754,10 +755,10 @@ irqreturn_t timer_interrupt(int irq, voi
+       /* Local timer processing (see update_process_times()). */
+       run_local_timers();
+       if (rcu_pending(cpu))
+-              rcu_check_callbacks(cpu, user_mode_vm(regs));
++              rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
+       scheduler_tick();
+       run_posix_cpu_timers(current);
+-      profile_tick(CPU_PROFILING, regs);
++      profile_tick(CPU_PROFILING);
+       return IRQ_HANDLED;
+ }
+@@ -967,10 +968,11 @@ extern void (*late_time_init)(void);
+ /* Duplicate of time_init() below, with hpet_enable part added */
+ static void __init hpet_time_init(void)
+ {
+-      xtime.tv_sec = get_cmos_time();
+-      xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+-      set_normalized_timespec(&wall_to_monotonic,
+-              -xtime.tv_sec, -xtime.tv_nsec);
++      struct timespec ts;
++      ts.tv_sec = get_cmos_time();
++      ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
++
++      do_settimeofday(&ts);
+       if ((hpet_enable() >= 0) && hpet_use_timer) {
+               printk("Using HPET for base-timer\n");
+--- sle11-2009-05-14.orig/arch/x86/kernel/traps_32-xen.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/traps_32-xen.c    2009-03-04 11:28:34.000000000 +0100
+@@ -28,6 +28,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/kexec.h>
+ #include <linux/unwind.h>
++#include <linux/uaccess.h>
+ #ifdef CONFIG_EISA
+ #include <linux/ioport.h>
+@@ -40,7 +41,6 @@
+ #include <asm/processor.h>
+ #include <asm/system.h>
+-#include <asm/uaccess.h>
+ #include <asm/io.h>
+ #include <asm/atomic.h>
+ #include <asm/debugreg.h>
+@@ -51,11 +51,14 @@
+ #include <asm/smp.h>
+ #include <asm/arch_hooks.h>
+ #include <asm/kdebug.h>
++#include <asm/stacktrace.h>
+ #include <linux/module.h>
+ #include "mach_traps.h"
++int panic_on_unrecovered_nmi;
++
+ asmlinkage int system_call(void);
+ struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+@@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct
+               p < (void *)tinfo + THREAD_SIZE - 3;
+ }
+-/*
+- * Print one address/symbol entries per line.
+- */
+-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+-{
+-      printk(" [<%08lx>] ", addr);
+-
+-      print_symbol("%s\n", addr);
+-}
+-
+ static inline unsigned long print_context_stack(struct thread_info *tinfo,
+                               unsigned long *stack, unsigned long ebp,
+-                              char *log_lvl)
++                              struct stacktrace_ops *ops, void *data)
+ {
+       unsigned long addr;
+ #ifdef        CONFIG_FRAME_POINTER
+       while (valid_stack_ptr(tinfo, (void *)ebp)) {
++              unsigned long new_ebp;
+               addr = *(unsigned long *)(ebp + 4);
+-              print_addr_and_symbol(addr, log_lvl);
++              ops->address(data, addr);
+               /*
+                * break out of recursive entries (such as
+-               * end_of_stack_stop_unwind_function):
++               * end_of_stack_stop_unwind_function). Also,
++               * we can never allow a frame pointer to
++               * move downwards!
+                */
+-              if (ebp == *(unsigned long *)ebp)
++              new_ebp = *(unsigned long *)ebp;
++              if (new_ebp <= ebp)
+                       break;
+-              ebp = *(unsigned long *)ebp;
++              ebp = new_ebp;
+       }
+ #else
+       while (valid_stack_ptr(tinfo, stack)) {
+               addr = *stack++;
+               if (__kernel_text_address(addr))
+-                      print_addr_and_symbol(addr, log_lvl);
++                      ops->address(data, addr);
+       }
+ #endif
+       return ebp;
+ }
++struct ops_and_data {
++      struct stacktrace_ops *ops;
++      void *data;
++};
++
+ static asmlinkage int
+-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
++dump_trace_unwind(struct unwind_frame_info *info, void *data)
+ {
++      struct ops_and_data *oad = (struct ops_and_data *)data;
+       int n = 0;
+       while (unwind(info) == 0 && UNW_PC(info)) {
+               n++;
+-              print_addr_and_symbol(UNW_PC(info), log_lvl);
++              oad->ops->address(oad->data, UNW_PC(info));
+               if (arch_unw_user_mode(info))
+                       break;
+       }
+       return n;
+ }
+-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+-                             unsigned long *stack, char *log_lvl)
++void dump_trace(struct task_struct *task, struct pt_regs *regs,
++              unsigned long *stack,
++              struct stacktrace_ops *ops, void *data)
+ {
+-      unsigned long ebp;
++      unsigned long ebp = 0;
+       if (!task)
+               task = current;
+@@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta
+       if (call_trace >= 0) {
+               int unw_ret = 0;
+               struct unwind_frame_info info;
++              struct ops_and_data oad = { .ops = ops, .data = data };
+               if (regs) {
+                       if (unwind_init_frame_info(&info, task, regs) == 0)
+-                              unw_ret = show_trace_unwind(&info, log_lvl);
++                              unw_ret = dump_trace_unwind(&info, &oad);
+               } else if (task == current)
+-                      unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
++                      unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+               else {
+                       if (unwind_init_blocked(&info, task) == 0)
+-                              unw_ret = show_trace_unwind(&info, log_lvl);
++                              unw_ret = dump_trace_unwind(&info, &oad);
+               }
+               if (unw_ret > 0) {
+                       if (call_trace == 1 && !arch_unw_user_mode(&info)) {
+-                              print_symbol("DWARF2 unwinder stuck at %s\n",
++                              ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+                                            UNW_PC(&info));
+                               if (UNW_SP(&info) >= PAGE_OFFSET) {
+-                                      printk("Leftover inexact backtrace:\n");
++                                      ops->warning(data, "Leftover inexact backtrace:\n");
+                                       stack = (void *)UNW_SP(&info);
++                                      if (!stack)
++                                              return;
++                                      ebp = UNW_FP(&info);
+                               } else
+-                                      printk("Full inexact backtrace again:\n");
++                                      ops->warning(data, "Full inexact backtrace again:\n");
+                       } else if (call_trace >= 1)
+                               return;
+                       else
+-                              printk("Full inexact backtrace again:\n");
++                              ops->warning(data, "Full inexact backtrace again:\n");
+               } else
+-                      printk("Inexact backtrace:\n");
++                      ops->warning(data, "Inexact backtrace:\n");
+       }
+-
+-      if (task == current) {
+-              /* Grab ebp right from our regs */
+-              asm ("movl %%ebp, %0" : "=r" (ebp) : );
+-      } else {
+-              /* ebp is the last reg pushed by switch_to */
+-              ebp = *(unsigned long *) task->thread.esp;
++      if (!stack) {
++              unsigned long dummy;
++              stack = &dummy;
++              if (task && task != current)
++                      stack = (unsigned long *)task->thread.esp;
++      }
++
++#ifdef CONFIG_FRAME_POINTER
++      if (!ebp) {
++              if (task == current) {
++                      /* Grab ebp right from our regs */
++                      asm ("movl %%ebp, %0" : "=r" (ebp) : );
++              } else {
++                      /* ebp is the last reg pushed by switch_to */
++                      ebp = *(unsigned long *) task->thread.esp;
++              }
+       }
++#endif
+       while (1) {
+               struct thread_info *context;
+               context = (struct thread_info *)
+                       ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+-              ebp = print_context_stack(context, stack, ebp, log_lvl);
++              ebp = print_context_stack(context, stack, ebp, ops, data);
++              /* Should be after the line below, but somewhere
++                 in early boot context comes out corrupted and we
++                 can't reference it -AK */
++              if (ops->stack(data, "IRQ") < 0)
++                      break;
+               stack = (unsigned long*)context->previous_esp;
+               if (!stack)
+                       break;
+-              printk("%s =======================\n", log_lvl);
+       }
+ }
++EXPORT_SYMBOL(dump_trace);
++
++static void
++print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
++{
++      printk(data);
++      print_symbol(msg, symbol);
++      printk("\n");
++}
++
++static void print_trace_warning(void *data, char *msg)
++{
++      printk("%s%s\n", (char *)data, msg);
++}
+-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
++static int print_trace_stack(void *data, char *name)
++{
++      return 0;
++}
++
++/*
++ * Print one address/symbol entries per line.
++ */
++static void print_trace_address(void *data, unsigned long addr)
++{
++      printk("%s [<%08lx>] ", (char *)data, addr);
++      print_symbol("%s\n", addr);
++}
++
++static struct stacktrace_ops print_trace_ops = {
++      .warning = print_trace_warning,
++      .warning_symbol = print_trace_warning_symbol,
++      .stack = print_trace_stack,
++      .address = print_trace_address,
++};
++
++static void
++show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
++                 unsigned long * stack, char *log_lvl)
++{
++      dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
++      printk("%s =======================\n", log_lvl);
++}
++
++void show_trace(struct task_struct *task, struct pt_regs *regs,
++              unsigned long * stack)
+ {
+       show_trace_log_lvl(task, regs, stack, "");
+ }
+@@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs
+               ss = regs->xss & 0xffff;
+       }
+       print_modules();
+-      printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
+-                      "EFLAGS: %08lx   (%s %.*s) \n",
++      printk(KERN_EMERG "CPU:    %d\n"
++              KERN_EMERG "EIP:    %04x:[<%08lx>]    %s VLI\n"
++              KERN_EMERG "EFLAGS: %08lx   (%s %.*s)\n",
+               smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+-              print_tainted(), regs->eflags, system_utsname.release,
+-              (int)strcspn(system_utsname.version, " "),
+-              system_utsname.version);
++              print_tainted(), regs->eflags, init_utsname()->release,
++              (int)strcspn(init_utsname()->version, " "),
++              init_utsname()->version);
+       print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
+       printk(KERN_EMERG "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
+               regs->eax, regs->ebx, regs->ecx, regs->edx);
+@@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs
+        */
+       if (in_kernel) {
+               u8 __user *eip;
++              int code_bytes = 64;
++              unsigned char c;
+               printk("\n" KERN_EMERG "Stack: ");
+               show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
+@@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs
+               printk(KERN_EMERG "Code: ");
+               eip = (u8 __user *)regs->eip - 43;
+-              for (i = 0; i < 64; i++, eip++) {
+-                      unsigned char c;
+-
++              if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
++                      /* try starting at EIP */
++                      eip = (u8 __user *)regs->eip;
++                      code_bytes = 32;
++              }
++              for (i = 0; i < code_bytes; i++, eip++) {
+                       if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+                               printk(" Bad EIP value.");
+                               break;
+@@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r
+       if (eip < PAGE_OFFSET)
+               return;
+-      if (__get_user(ud2, (unsigned short __user *)eip))
++      if (probe_kernel_address((unsigned short __user *)eip, ud2))
+               return;
+       if (ud2 != 0x0b0f)
+               return;
+@@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r
+               char *file;
+               char c;
+-              if (__get_user(line, (unsigned short __user *)(eip + 2)))
++              if (probe_kernel_address((unsigned short __user *)(eip + 2),
++                                      line))
+                       break;
+               if (__get_user(file, (char * __user *)(eip + 4)) ||
+                   (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+@@ -604,18 +677,24 @@ gp_in_kernel:
+       }
+ }
+-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
++static __kprobes void
++mem_parity_error(unsigned char reason, struct pt_regs * regs)
+ {
+-      printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
+-                      "to continue\n");
++      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
++              "CPU %d.\n", reason, smp_processor_id());
+       printk(KERN_EMERG "You probably have a hardware problem with your RAM "
+                       "chips\n");
++      if (panic_on_unrecovered_nmi)
++                panic("NMI: Not continuing");
++
++      printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+       /* Clear and disable the memory parity error line. */
+       clear_mem_error(reason);
+ }
+-static void io_check_error(unsigned char reason, struct pt_regs * regs)
++static __kprobes void
++io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+       printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+       show_registers(regs);
+@@ -624,7 +703,8 @@ static void io_check_error(unsigned char
+       clear_io_check_error(reason);
+ }
+-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
++static __kprobes void
++unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+ {
+ #ifdef CONFIG_MCA
+       /* Might actually be able to figure out what the guilty party
+@@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c
+               return;
+       }
+ #endif
+-      printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+-              reason, smp_processor_id());
+-      printk("Dazed and confused, but trying to continue\n");
+-      printk("Do you have a strange power saving mode enabled?\n");
++      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
++              "CPU %d.\n", reason, smp_processor_id());
++      printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
++      if (panic_on_unrecovered_nmi)
++                panic("NMI: Not continuing");
++
++      printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+ static DEFINE_SPINLOCK(nmi_print_lock);
+-void die_nmi (struct pt_regs *regs, const char *msg)
++void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
+ {
+       if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
+           NOTIFY_STOP)
+@@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons
+       do_exit(SIGSEGV);
+ }
+-static void default_do_nmi(struct pt_regs * regs)
++static __kprobes void default_do_nmi(struct pt_regs * regs)
+ {
+       unsigned char reason = 0;
+@@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg
+                * Ok, so this is none of the documented NMI sources,
+                * so it must be the NMI watchdog.
+                */
+-              if (nmi_watchdog) {
+-                      nmi_watchdog_tick(regs);
++              if (nmi_watchdog_tick(regs, reason))
+                       return;
+-              }
++              if (!do_nmi_callback(regs, smp_processor_id()))
+ #endif
+-              unknown_nmi_error(reason, regs);
++                      unknown_nmi_error(reason, regs);
++
+               return;
+       }
+       if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+@@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg
+       reassert_nmi();
+ }
+-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+-{
+-      return 0;
+-}
+- 
+-static nmi_callback_t nmi_callback = dummy_nmi_callback;
+- 
+-fastcall void do_nmi(struct pt_regs * regs, long error_code)
++fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
+ {
+       int cpu;
+@@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re
+       ++nmi_count(cpu);
+-      if (!rcu_dereference(nmi_callback)(regs, cpu))
+-              default_do_nmi(regs);
++      default_do_nmi(regs);
+       nmi_exit();
+ }
+-void set_nmi_callback(nmi_callback_t callback)
+-{
+-      vmalloc_sync_all();
+-      rcu_assign_pointer(nmi_callback, callback);
+-}
+-EXPORT_SYMBOL_GPL(set_nmi_callback);
+-
+-void unset_nmi_callback(void)
+-{
+-      nmi_callback = dummy_nmi_callback;
+-}
+-EXPORT_SYMBOL_GPL(unset_nmi_callback);
+-
+ #ifdef CONFIG_KPROBES
+ fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
+ {
+--- sle11-2009-05-14.orig/arch/x86/mach-xen/setup.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mach-xen/setup.c 2009-03-04 11:28:34.000000000 +0100
+@@ -103,8 +103,10 @@ void __init pre_setup_arch_hook(void)
+       setup_xen_features();
+-      if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+-              set_fixaddr_top(pp.virt_start);
++      if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
++              hypervisor_virt_start = pp.virt_start;
++              reserve_top_address(0UL - pp.virt_start);
++      }
+       if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
+               machine_to_phys_mapping = (unsigned long *)mapping.v_start;
+--- sle11-2009-05-14.orig/arch/x86/mm/fault_32-xen.c   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mm/fault_32-xen.c        2009-03-04 11:28:34.000000000 +0100
+@@ -27,21 +27,24 @@
+ #include <asm/uaccess.h>
+ #include <asm/desc.h>
+ #include <asm/kdebug.h>
++#include <asm/segment.h>
+ extern void die(const char *,struct pt_regs *,long);
+-#ifdef CONFIG_KPROBES
+-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
++static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
++
+ int register_page_fault_notifier(struct notifier_block *nb)
+ {
+       vmalloc_sync_all();
+       return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(register_page_fault_notifier);
+ int unregister_page_fault_notifier(struct notifier_block *nb)
+ {
+       return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
+ static inline int notify_page_fault(enum die_val val, const char *str,
+                       struct pt_regs *regs, long err, int trap, int sig)
+@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
+       };
+       return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+ }
+-#else
+-static inline int notify_page_fault(enum die_val val, const char *str,
+-                      struct pt_regs *regs, long err, int trap, int sig)
+-{
+-      return NOTIFY_DONE;
+-}
+-#endif
+-
+ /*
+  * Unlock any spinlocks which will prevent us from getting the
+@@ -119,10 +114,10 @@ static inline unsigned long get_segment_
+       }
+       /* The standard kernel/user address space limit. */
+-      *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
++      *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
+       
+       /* By far the most common cases. */
+-      if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
++      if (likely(SEGMENT_IS_FLAT_CODE(seg)))
+               return eip;
+       /* Check the segment exists, is within the current LDT/GDT size,
+@@ -559,11 +554,7 @@ good_area:
+       write = 0;
+       switch (error_code & 3) {
+               default:        /* 3: write, present */
+-#ifdef TEST_VERIFY_AREA
+-                      if (regs->cs == GET_KERNEL_CS())
+-                              printk("WP fault at %08lx\n", regs->eip);
+-#endif
+-                      /* fall through */
++                              /* fall through */
+               case 2:         /* write, not present */
+                       if (!(vma->vm_flags & VM_WRITE))
+                               goto bad_area;
+@@ -572,7 +563,7 @@ good_area:
+               case 1:         /* read, present */
+                       goto bad_area;
+               case 0:         /* read, not present */
+-                      if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
++                      if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+                               goto bad_area;
+       }
+@@ -704,7 +695,7 @@ no_context:
+  */
+ out_of_memory:
+       up_read(&mm->mmap_sem);
+-      if (tsk->pid == 1) {
++      if (is_init(tsk)) {
+               yield();
+               down_read(&mm->mmap_sem);
+               goto survive;
+--- sle11-2009-05-14.orig/arch/x86/mm/highmem_32-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mm/highmem_32-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
+       idx = type + KM_TYPE_NR*smp_processor_id();
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-#ifdef CONFIG_DEBUG_HIGHMEM
+       if (!pte_none(*(kmap_pte-idx)))
+               BUG();
+-#endif
+-      set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
++      set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
+       return (void*) vaddr;
+ }
+@@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page,
+ void kunmap_atomic(void *kvaddr, enum km_type type)
+ {
+-#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
+       unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+       enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+-      if (vaddr < FIXADDR_START) { // FIXME
++#ifdef CONFIG_DEBUG_HIGHMEM
++      if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
+               dec_preempt_count();
+               preempt_check_resched();
+               return;
+       }
+-#endif
+-#if defined(CONFIG_DEBUG_HIGHMEM)
+       if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+               BUG();
+-
+-      /*
+-       * force other mappings to Oops if they'll try to access
+-       * this pte without first remap it
+-       */
+-      pte_clear(&init_mm, vaddr, kmap_pte-idx);
+-      __flush_tlb_one(vaddr);
+-#elif defined(CONFIG_XEN)
++#endif
+       /*
+-       * We must ensure there are no dangling pagetable references when
+-       * returning memory to Xen (decrease_reservation).
+-       * XXX TODO: We could make this faster by only zapping when
+-       * kmap_flush_unused is called but that is trickier and more invasive.
++       * Force other mappings to Oops if they'll try to access this pte
++       * without first remap it.  Keeping stale mappings around is a bad idea
++       * also, in case the page changes cacheability attributes or becomes
++       * a protected page in a hypervisor.
+        */
+-      pte_clear(&init_mm, vaddr, kmap_pte-idx);
+-#endif
++      kpte_clear_flush(kmap_pte-idx, vaddr);
+       dec_preempt_count();
+       preempt_check_resched();
+@@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn,
+       idx = type + KM_TYPE_NR*smp_processor_id();
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+       set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
+-      __flush_tlb_one(vaddr);
+       return (void*) vaddr;
+ }
+--- sle11-2009-05-14.orig/arch/x86/mm/hypervisor.c     2008-12-15 11:13:45.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/hypervisor.c  2009-03-04 11:28:34.000000000 +0100
+@@ -31,6 +31,7 @@
+  */
+ #include <linux/sched.h>
++#include <linux/hardirq.h>
+ #include <linux/mm.h>
+ #include <linux/vmalloc.h>
+ #include <asm/page.h>
+@@ -44,6 +45,302 @@
+ #include <asm/tlbflush.h>
+ #include <linux/highmem.h>
++EXPORT_SYMBOL(hypercall_page);
++
++#define NR_MC     BITS_PER_LONG
++#define NR_MMU    BITS_PER_LONG
++#define NR_MMUEXT (BITS_PER_LONG / 4)
++
++DEFINE_PER_CPU(bool, xen_lazy_mmu);
++EXPORT_PER_CPU_SYMBOL(xen_lazy_mmu);
++struct lazy_mmu {
++      unsigned int nr_mc, nr_mmu, nr_mmuext;
++      multicall_entry_t mc[NR_MC];
++      mmu_update_t mmu[NR_MMU];
++      struct mmuext_op mmuext[NR_MMUEXT];
++};
++static DEFINE_PER_CPU(struct lazy_mmu, lazy_mmu);
++
++static inline bool use_lazy_mmu_mode(void)
++{
++#ifdef CONFIG_PREEMPT
++      if (!preempt_count())
++              return false;
++#endif
++      return !irq_count();
++}
++
++static void multicall_failed(const multicall_entry_t *mc, int rc)
++{
++      printk(KERN_EMERG "hypercall#%lu(%lx, %lx, %lx, %lx)"
++                        " failed: %d (caller %lx)\n",
++             mc->op, mc->args[0], mc->args[1], mc->args[2], mc->args[3],
++             rc, mc->args[5]);
++      BUG();
++}
++
++int xen_multicall_flush(bool ret_last) {
++      struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
++      multicall_entry_t *mc = lazy->mc;
++      unsigned int count = lazy->nr_mc;
++
++      if (!count || !use_lazy_mmu_mode())
++              return 0;
++
++      lazy->nr_mc = 0;
++      lazy->nr_mmu = 0;
++      lazy->nr_mmuext = 0;
++
++      if (count == 1) {
++              int rc = _hypercall(int, mc->op, mc->args[0], mc->args[1],
++                                  mc->args[2], mc->args[3], mc->args[4]);
++
++              if (unlikely(rc)) {
++                      if (ret_last)
++                              return rc;
++                      multicall_failed(mc, rc);
++              }
++      } else {
++              if (HYPERVISOR_multicall(mc, count))
++                      BUG();
++              while (count-- > ret_last)
++                      if (unlikely(mc++->result))
++                              multicall_failed(mc - 1, mc[-1].result);
++              if (ret_last)
++                      return mc->result;
++      }
++
++      return 0;
++}
++EXPORT_SYMBOL(xen_multicall_flush);
++
++int xen_multi_update_va_mapping(unsigned long va, pte_t pte,
++                              unsigned long uvmf)
++{
++      struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
++      multicall_entry_t *mc;
++
++      if (unlikely(!use_lazy_mmu_mode()))
++#ifdef CONFIG_X86_PAE
++              return _hypercall4(int, update_va_mapping, va,
++                                 pte.pte_low, pte.pte_high, uvmf);
++#else
++              return _hypercall3(int, update_va_mapping, va,
++                                 pte.pte, uvmf);
++#endif
++
++      if (unlikely(lazy->nr_mc == NR_MC))
++              xen_multicall_flush(false);
++
++      mc = lazy->mc + lazy->nr_mc++;
++      mc->op = __HYPERVISOR_update_va_mapping;
++      mc->args[0] = va;
++#ifndef CONFIG_X86_PAE
++      mc->args[1] = pte.pte;
++#else
++      mc->args[1] = pte.pte_low;
++      mc->args[2] = pte.pte_high;
++#endif
++      mc->args[MULTI_UVMFLAGS_INDEX] = uvmf;
++      mc->args[5] = (long)__builtin_return_address(0);
++
++      return 0;
++}
++
++static inline bool mmu_may_merge(const multicall_entry_t *mc,
++                               unsigned int op, domid_t domid)
++{
++      return mc->op == op && !mc->args[2] && mc->args[3] == domid;
++}
++
++int xen_multi_mmu_update(mmu_update_t *src, unsigned int count,
++                       unsigned int *success_count, domid_t domid)
++{
++      struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
++      multicall_entry_t *mc = lazy->mc + lazy->nr_mc;
++      mmu_update_t *dst;
++      bool commit, merge;
++
++      if (unlikely(!use_lazy_mmu_mode()))
++              return _hypercall4(int, mmu_update, src, count,
++                                 success_count, domid);
++
++      commit = (lazy->nr_mmu + count) > NR_MMU || success_count;
++      merge = lazy->nr_mc && !commit
++              && mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid);
++      if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
++              xen_multicall_flush(false);
++              mc = lazy->mc;
++              commit = count > NR_MMU || success_count;
++      }
++
++      if (!lazy->nr_mc && unlikely(commit))
++              return _hypercall4(int, mmu_update, src, count,
++                                 success_count, domid);
++
++      dst = lazy->mmu + lazy->nr_mmu;
++      lazy->nr_mmu += count;
++      if (merge) {
++              mc[-1].args[1] += count;
++              memcpy(dst, src, count * sizeof(*src));
++      } else {
++              ++lazy->nr_mc;
++              mc->op = __HYPERVISOR_mmu_update;
++              if (!commit) {
++                      mc->args[0] = (unsigned long)dst;
++                      memcpy(dst, src, count * sizeof(*src));
++              } else
++                      mc->args[0] = (unsigned long)src;
++              mc->args[1] = count;
++              mc->args[2] = (unsigned long)success_count;
++              mc->args[3] = domid;
++              mc->args[5] = (long)__builtin_return_address(0);
++      }
++
++      while (!commit && count--)
++              switch (src++->ptr & (sizeof(pteval_t) - 1)) {
++              case MMU_NORMAL_PT_UPDATE:
++              case MMU_PT_UPDATE_PRESERVE_AD:
++                      break;
++              default:
++                      commit = true;
++                      break;
++              }
++
++      return commit ? xen_multicall_flush(true) : 0;
++}
++
++int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count,
++                      unsigned int *success_count, domid_t domid)
++{
++      struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
++      multicall_entry_t *mc;
++      struct mmuext_op *dst;
++      bool commit, merge;
++
++      if (unlikely(!use_lazy_mmu_mode()))
++              return _hypercall4(int, mmuext_op, src, count,
++                                 success_count, domid);
++
++      /*
++       * While it could be useful in theory, I've never seen the body of
++       * this conditional to be reached, hence it seems more reasonable
++       * to disable it for the time being.
++       */
++      if (0 && likely(count)
++          && likely(!success_count)
++          && likely(domid == DOMID_SELF)
++          && likely(lazy->nr_mc)
++          && lazy->mc[lazy->nr_mc - 1].op == __HYPERVISOR_update_va_mapping) {
++              unsigned long oldf, newf = UVMF_NONE;
++
++              switch (src->cmd) {
++              case MMUEXT_TLB_FLUSH_ALL:
++                      newf = UVMF_TLB_FLUSH | UVMF_ALL;
++                      break;
++              case MMUEXT_INVLPG_ALL:
++                      newf = UVMF_INVLPG | UVMF_ALL;
++                      break;
++              case MMUEXT_TLB_FLUSH_MULTI:
++                      newf = UVMF_TLB_FLUSH | UVMF_MULTI
++                             | (unsigned long)src->arg2.vcpumask.p;
++                      break;
++              case MMUEXT_INVLPG_MULTI:
++                      newf = UVMF_INVLPG | UVMF_MULTI
++                             | (unsigned long)src->arg2.vcpumask.p;
++                      break;
++              case MMUEXT_TLB_FLUSH_LOCAL:
++                      newf = UVMF_TLB_FLUSH | UVMF_LOCAL;
++                      break;
++              case MMUEXT_INVLPG_LOCAL:
++                      newf = UVMF_INVLPG | UVMF_LOCAL;
++                      break;
++              }
++              mc = lazy->mc + lazy->nr_mc - 1;
++              oldf = mc->args[MULTI_UVMFLAGS_INDEX];
++              if (newf == UVMF_NONE || oldf == UVMF_NONE
++                  || newf == (UVMF_TLB_FLUSH | UVMF_ALL))
++                      ;
++              else if (oldf == (UVMF_TLB_FLUSH | UVMF_ALL))
++                      newf = UVMF_TLB_FLUSH | UVMF_ALL;
++              else if ((newf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
++                       && (oldf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
++                       && ((src->arg1.linear_addr ^ mc->args[0])
++                           >> PAGE_SHIFT))
++                      newf = UVMF_NONE;
++              else if (((oldf | newf) & UVMF_ALL)
++                       && !((oldf ^ newf) & UVMF_FLUSHTYPE_MASK))
++                      newf |= UVMF_ALL;
++              else if ((oldf ^ newf) & ~UVMF_FLUSHTYPE_MASK)
++                      newf = UVMF_NONE;
++              else if ((oldf & UVMF_FLUSHTYPE_MASK) == UVMF_TLB_FLUSH)
++                      newf = (newf & ~UVMF_FLUSHTYPE_MASK) | UVMF_TLB_FLUSH;
++              else if ((newf & UVMF_FLUSHTYPE_MASK) != UVMF_TLB_FLUSH
++                       && ((newf ^ oldf) & UVMF_FLUSHTYPE_MASK))
++                      newf = UVMF_NONE;
++              if (newf != UVMF_NONE) {
++                      mc->args[MULTI_UVMFLAGS_INDEX] = newf;
++                      ++src;
++                      if (!--count)
++                              return 0;
++              }
++      }
++
++      mc = lazy->mc + lazy->nr_mc;
++      commit = (lazy->nr_mmuext + count) > NR_MMUEXT || success_count;
++      merge = lazy->nr_mc && !commit
++              && mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid);
++      if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
++              xen_multicall_flush(false);
++              mc = lazy->mc;
++              commit = count > NR_MMUEXT || success_count;
++      }
++
++      if (!lazy->nr_mc && unlikely(commit))
++              return _hypercall4(int, mmuext_op, src, count,
++                                 success_count, domid);
++
++      dst = lazy->mmuext + lazy->nr_mmuext;
++      lazy->nr_mmuext += count;
++      if (merge) {
++              mc[-1].args[1] += count;
++              memcpy(dst, src, count * sizeof(*src));
++      } else {
++              ++lazy->nr_mc;
++              mc->op = __HYPERVISOR_mmuext_op;
++              if (!commit) {
++                      mc->args[0] = (unsigned long)dst;
++                      memcpy(dst, src, count * sizeof(*src));
++              } else
++                      mc->args[0] = (unsigned long)src;
++              mc->args[1] = count;
++              mc->args[2] = (unsigned long)success_count;
++              mc->args[3] = domid;
++              mc->args[5] = (long)__builtin_return_address(0);
++      }
++
++      while (!commit && count--)
++              switch (src++->cmd) {
++              case MMUEXT_PIN_L1_TABLE:
++              case MMUEXT_PIN_L2_TABLE:
++              case MMUEXT_PIN_L3_TABLE:
++              case MMUEXT_PIN_L4_TABLE:
++              case MMUEXT_UNPIN_TABLE:
++              case MMUEXT_TLB_FLUSH_LOCAL:
++              case MMUEXT_INVLPG_LOCAL:
++              case MMUEXT_TLB_FLUSH_MULTI:
++              case MMUEXT_INVLPG_MULTI:
++              case MMUEXT_TLB_FLUSH_ALL:
++              case MMUEXT_INVLPG_ALL:
++                      break;
++              default:
++                      commit = true;
++                      break;
++              }
++
++      return commit ? xen_multicall_flush(true) : 0;
++}
++
+ void xen_l1_entry_update(pte_t *ptr, pte_t val)
+ {
+       mmu_update_t u;
+@@ -542,7 +839,8 @@ int write_ldt_entry(void *ldt, int entry
+ #define MAX_BATCHED_FULL_PTES 32
+ int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+-                       unsigned long addr, unsigned long end, pgprot_t newprot)
++                       unsigned long addr, unsigned long end, pgprot_t newprot,
++                       int dirty_accountable)
+ {
+       int rc = 0, i = 0;
+       mmu_update_t u[MAX_BATCHED_FULL_PTES];
+@@ -555,10 +853,14 @@ int xen_change_pte_range(struct mm_struc
+       pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+       do {
+               if (pte_present(*pte)) {
++                      pte_t ptent = pte_modify(*pte, newprot);
++
++                      if (dirty_accountable && pte_dirty(ptent))
++                              ptent = pte_mkwrite(ptent);
+                       u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
+                                  | ((unsigned long)pte & ~PAGE_MASK)
+                                  | MMU_PT_UPDATE_PRESERVE_AD;
+-                      u[i].val = __pte_val(pte_modify(*pte, newprot));
++                      u[i].val = __pte_val(ptent);
+                       if (++i == MAX_BATCHED_FULL_PTES) {
+                               if ((rc = HYPERVISOR_mmu_update(
+                                       &u[0], i, NULL, DOMID_SELF)) != 0)
+--- sle11-2009-05-14.orig/arch/x86/mm/init_32-xen.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mm/init_32-xen.c 2009-03-04 11:28:34.000000000 +0100
+@@ -462,16 +462,22 @@ EXPORT_SYMBOL(__supported_pte_mask);
+  * on      Enable
+  * off     Disable
+  */
+-void __init noexec_setup(const char *str)
++static int __init noexec_setup(char *str)
+ {
+-      if (!strncmp(str, "on",2) && cpu_has_nx) {
+-              __supported_pte_mask |= _PAGE_NX;
+-              disable_nx = 0;
+-      } else if (!strncmp(str,"off",3)) {
++      if (!str || !strcmp(str, "on")) {
++              if (cpu_has_nx) {
++                      __supported_pte_mask |= _PAGE_NX;
++                      disable_nx = 0;
++              }
++      } else if (!strcmp(str,"off")) {
+               disable_nx = 1;
+               __supported_pte_mask &= ~_PAGE_NX;
+-      }
++      } else
++              return -EINVAL;
++
++      return 0;
+ }
++early_param("noexec", noexec_setup);
+ int nx_enabled = 0;
+ #ifdef CONFIG_X86_PAE
+@@ -514,6 +520,7 @@ int __init set_kernel_exec(unsigned long
+               pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
+       else
+               pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
++      pte_update_defer(&init_mm, vaddr, pte);
+       __flush_tlb_all();
+ out:
+       return ret;
+@@ -596,18 +603,6 @@ static void __init test_wp_bit(void)
+       }
+ }
+-static void __init set_max_mapnr_init(void)
+-{
+-#ifdef CONFIG_HIGHMEM
+-      num_physpages = highend_pfn;
+-#else
+-      num_physpages = max_low_pfn;
+-#endif
+-#ifdef CONFIG_FLATMEM
+-      max_mapnr = num_physpages;
+-#endif
+-}
+-
+ static struct kcore_list kcore_mem, kcore_vmalloc; 
+ void __init mem_init(void)
+@@ -623,8 +618,7 @@ void __init mem_init(void)
+ #endif
+ #ifdef CONFIG_FLATMEM
+-      if (!mem_map)
+-              BUG();
++      BUG_ON(!mem_map);
+ #endif
+       
+       bad_ppro = ppro_with_ram_bug();
+@@ -639,17 +633,6 @@ void __init mem_init(void)
+       }
+ #endif
+  
+-      set_max_mapnr_init();
+-
+-#ifdef CONFIG_HIGHMEM
+-      high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+-#else
+-      high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
+-#endif
+-      printk("vmalloc area: %lx-%lx, maxmem %lx\n",
+-             VMALLOC_START,VMALLOC_END,MAXMEM);
+-      BUG_ON(VMALLOC_START > VMALLOC_END);
+-      
+       /* this will put all low memory onto the freelists */
+       totalram_pages += free_all_bootmem();
+       /* XEN: init and count low-mem pages outside initial allocation. */
+@@ -687,6 +670,48 @@ void __init mem_init(void)
+               (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+              );
++#if 1 /* double-sanity-check paranoia */
++      printk("virtual kernel memory layout:\n"
++             "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
++#ifdef CONFIG_HIGHMEM
++             "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
++#endif
++             "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
++             "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
++             "      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
++             "      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
++             "      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
++             FIXADDR_START, FIXADDR_TOP,
++             (FIXADDR_TOP - FIXADDR_START) >> 10,
++
++#ifdef CONFIG_HIGHMEM
++             PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
++             (LAST_PKMAP*PAGE_SIZE) >> 10,
++#endif
++
++             VMALLOC_START, VMALLOC_END,
++             (VMALLOC_END - VMALLOC_START) >> 20,
++
++             (unsigned long)__va(0), (unsigned long)high_memory,
++             ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
++
++             (unsigned long)&__init_begin, (unsigned long)&__init_end,
++             ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
++
++             (unsigned long)&_etext, (unsigned long)&_edata,
++             ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
++
++             (unsigned long)&_text, (unsigned long)&_etext,
++             ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
++
++#ifdef CONFIG_HIGHMEM
++      BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
++      BUG_ON(VMALLOC_END                     > PKMAP_BASE);
++#endif
++      BUG_ON(VMALLOC_START                   > VMALLOC_END);
++      BUG_ON((unsigned long)high_memory      > VMALLOC_START);
++#endif /* double-sanity-check paranoia */
++
+ #ifdef CONFIG_X86_PAE
+       if (!cpu_has_pae)
+               panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
+@@ -717,7 +742,7 @@ void __init mem_init(void)
+ int arch_add_memory(int nid, u64 start, u64 size)
+ {
+       struct pglist_data *pgdata = &contig_page_data;
+-      struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
++      struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+--- sle11-2009-05-14.orig/arch/x86/mm/ioremap_32-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mm/ioremap_32-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -12,7 +12,7 @@
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/module.h>
+-#include <asm/io.h>
++#include <linux/io.h>
+ #include <asm/fixmap.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+@@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are
+       if (domid == DOMID_SELF)
+               return -EINVAL;
+-      vma->vm_flags |= VM_IO | VM_RESERVED;
++      vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+       vma->vm_mm->context.has_foreign_mappings = 1;
+@@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p
+       void __iomem * addr;
+       struct vm_struct * area;
+       unsigned long offset, last_addr;
++      pgprot_t prot;
+       domid_t domid = DOMID_IO;
+       /* Don't allow wraparound or zero size */
+@@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p
+               domid = DOMID_SELF;
+       }
++      prot = __pgprot(_KERNPG_TABLE | flags);
++
+       /*
+        * Mappings have to be page-aligned
+        */
+@@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p
+               return NULL;
+       area->phys_addr = phys_addr;
+       addr = (void __iomem *) area->addr;
+-      flags |= _KERNPG_TABLE;
+       if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
+                                    phys_addr>>PAGE_SHIFT,
+-                                   size, __pgprot(flags), domid)) {
++                                   size, prot, domid)) {
+               vunmap((void __force *) addr);
+               return NULL;
+       }
+--- sle11-2009-05-14.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:25:57.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/pgtable_32-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -68,7 +68,9 @@ void show_mem(void)
+       printk(KERN_INFO "%lu pages writeback\n",
+                                       global_page_state(NR_WRITEBACK));
+       printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
+-      printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
++      printk(KERN_INFO "%lu pages slab\n",
++              global_page_state(NR_SLAB_RECLAIMABLE) +
++              global_page_state(NR_SLAB_UNRECLAIMABLE));
+       printk(KERN_INFO "%lu pages pagetables\n",
+                                       global_page_state(NR_PAGETABLE));
+ }
+@@ -108,18 +110,11 @@ void set_pmd_pfn(unsigned long vaddr, un
+       __flush_tlb_one(vaddr);
+ }
+-static int nr_fixmaps = 0;
++static int fixmaps;
+ unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
+-unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
++unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
+ EXPORT_SYMBOL(__FIXADDR_TOP);
+-void __init set_fixaddr_top(unsigned long top)
+-{
+-      BUG_ON(nr_fixmaps > 0);
+-      hypervisor_virt_start = top;
+-      __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
+-}
+-
+ void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
+ {
+       unsigned long address = __fix_to_virt(idx);
+@@ -141,7 +136,21 @@ void __set_fixmap (enum fixed_addresses 
+       if (HYPERVISOR_update_va_mapping(address, pte,
+                                        UVMF_INVLPG|UVMF_ALL))
+               BUG();
+-      nr_fixmaps++;
++      fixmaps++;
++}
++
++/**
++ * reserve_top_address - reserves a hole in the top of kernel address space
++ * @reserve - size of hole to reserve
++ *
++ * Can be used to relocate the fixmap area and poke a hole in the top
++ * of kernel address space to make room for a hypervisor.
++ */
++void __init reserve_top_address(unsigned long reserve)
++{
++      BUG_ON(fixmaps > 0);
++      __FIXADDR_TOP = -reserve - PAGE_SIZE;
++      __VMALLOC_RESERVE += reserve;
+ }
+ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+--- sle11-2009-05-14.orig/arch/x86/pci/irq-xen.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/pci/irq-xen.c    2009-03-04 11:28:34.000000000 +0100
+@@ -991,10 +991,6 @@ static void __init pcibios_fixup_irqs(vo
+                                                       pci_name(bridge), 'A' + pin, irq);
+                               }
+                               if (irq >= 0) {
+-                                      if (use_pci_vector() &&
+-                                              !platform_legacy_irq(irq))
+-                                              irq = IO_APIC_VECTOR(irq);
+-
+                                       printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+                                               pci_name(dev), 'A' + pin, irq);
+                                       dev->irq = irq;
+@@ -1155,10 +1151,6 @@ static int pirq_enable_irq(struct pci_de
+                       }
+                       dev = temp_dev;
+                       if (irq >= 0) {
+-#ifdef CONFIG_PCI_MSI
+-                              if (!platform_legacy_irq(irq))
+-                                      irq = IO_APIC_VECTOR(irq);
+-#endif
+                               printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+                                       pci_name(dev), 'A' + pin, irq);
+                               dev->irq = irq;
+@@ -1179,33 +1171,3 @@ static int pirq_enable_irq(struct pci_de
+       }
+       return 0;
+ }
+-
+-int pci_vector_resources(int last, int nr_released)
+-{
+-      int count = nr_released;
+-
+-      int next = last;
+-      int offset = (last % 8);
+-
+-      while (next < FIRST_SYSTEM_VECTOR) {
+-              next += 8;
+-#ifdef CONFIG_X86_64
+-              if (next == IA32_SYSCALL_VECTOR)
+-                      continue;
+-#else
+-              if (next == SYSCALL_VECTOR)
+-                      continue;
+-#endif
+-              count++;
+-              if (next >= FIRST_SYSTEM_VECTOR) {
+-                      if (offset%8) {
+-                              next = FIRST_DEVICE_VECTOR + offset;
+-                              offset++;
+-                              continue;
+-                      }
+-                      count--;
+-              }
+-      }
+-
+-      return count;
+-}
+--- sle11-2009-05-14.orig/arch/x86/ia32/ia32entry-xen.S        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/ia32/ia32entry-xen.S     2009-03-04 11:28:34.000000000 +0100
+@@ -83,6 +83,7 @@
+  */   
+ ENTRY(ia32_sysenter_target)
+       CFI_STARTPROC32 simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA     rsp,SS+8-RIP+16
+       /*CFI_REL_OFFSET        ss,SS-RIP+16*/
+       CFI_REL_OFFSET  rsp,RSP-RIP+16
+@@ -164,6 +165,7 @@ ENDPROC(ia32_sysenter_target)
+  */   
+ ENTRY(ia32_cstar_target)
+       CFI_STARTPROC32 simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA     rsp,SS+8-RIP+16
+       /*CFI_REL_OFFSET        ss,SS-RIP+16*/
+       CFI_REL_OFFSET  rsp,RSP-RIP+16
+@@ -243,6 +245,7 @@ ia32_badarg:
+ ENTRY(ia32_syscall)
+       CFI_STARTPROC   simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA     rsp,SS+8-RIP+16
+       /*CFI_REL_OFFSET        ss,SS-RIP+16*/
+       CFI_REL_OFFSET  rsp,RSP-RIP+16
+@@ -320,6 +323,7 @@ ENTRY(ia32_ptregs_common)
+       popq %r11
+       CFI_ENDPROC
+       CFI_STARTPROC32 simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA     rsp,SS+8-ARGOFFSET
+       CFI_REL_OFFSET  rax,RAX-ARGOFFSET
+       CFI_REL_OFFSET  rcx,RCX-ARGOFFSET
+@@ -653,8 +657,8 @@ ia32_sys_call_table:
+       .quad sys_readlinkat            /* 305 */
+       .quad sys_fchmodat
+       .quad sys_faccessat
+-      .quad quiet_ni_syscall          /* pselect6 for now */
+-      .quad quiet_ni_syscall          /* ppoll for now */
++      .quad compat_sys_pselect6
++      .quad compat_sys_ppoll
+       .quad sys_unshare               /* 310 */
+       .quad compat_sys_set_robust_list
+       .quad compat_sys_get_robust_list
+@@ -663,4 +667,5 @@ ia32_sys_call_table:
+       .quad sys_tee
+       .quad compat_sys_vmsplice
+       .quad compat_sys_move_pages
++      .quad sys_getcpu
+ ia32_syscall_end:             
+--- sle11-2009-05-14.orig/arch/x86/kernel/Makefile     2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/Makefile  2009-03-04 11:28:34.000000000 +0100
+@@ -104,9 +104,9 @@ obj-$(CONFIG_X86_XEN)              += fixup.o
+ ###
+ # 64 bit specific files
+ ifeq ($(CONFIG_X86_64),y)
+-        obj-y                         += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
+-      obj-$(CONFIG_X86_XEN_GENAPIC)   += genapic_xen_64.o
+-        obj-y                         += uv_sysfs.o
++      obj-$(CONFIG_X86_LOCAL_APIC)    += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
++      obj-$(CONFIG_X86_XEN_GENAPIC)   += genapic_64.o genapic_xen_64.o
++      obj-y                           += tlb_uv.o uv_sysfs.o
+         obj-y                         += genx2apic_cluster.o
+         obj-y                         += genx2apic_phys.o
+       obj-y                           += bios_uv.o uv_irq.o
+@@ -124,5 +124,7 @@ ifeq ($(CONFIG_X86_64),y)
+       pci-dma_64-$(CONFIG_XEN)        += pci-dma_32.o
+ endif
+-disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
++disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
++      smpboot_$(BITS).o tsc_$(BITS).o
++disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
+ %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
+--- sle11-2009-05-14.orig/arch/x86/kernel/apic_64-xen.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/apic_64-xen.c     2009-03-04 11:28:34.000000000 +0100
+@@ -43,7 +43,7 @@ int apic_verbosity;
+  */
+ void ack_bad_irq(unsigned int irq)
+ {
+-      printk("unexpected IRQ trap at vector %02x\n", irq);
++      printk("unexpected IRQ trap at irq %02x\n", irq);
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+@@ -62,19 +62,19 @@ int setup_profiling_timer(unsigned int m
+       return -EINVAL;
+ }
+-void smp_local_timer_interrupt(struct pt_regs *regs)
++void smp_local_timer_interrupt(void)
+ {
+-      profile_tick(CPU_PROFILING, regs);
++      profile_tick(CPU_PROFILING);
+ #ifndef CONFIG_XEN
+ #ifdef CONFIG_SMP
+-              update_process_times(user_mode(regs));
++      update_process_times(user_mode(get_irq_regs()));
+ #endif
+ #endif
+       /*
+        * We take the 'long' return path, and there every subsystem
+        * grabs the appropriate locks (kernel lock/ irq lock).
+        *
+-       * we might want to decouple profiling from the 'long path',
++       * We might want to decouple profiling from the 'long path',
+        * and do the profiling totally in assembly.
+        *
+        * Currently this isn't too much of an issue (performance wise),
+@@ -92,6 +92,8 @@ void smp_local_timer_interrupt(struct pt
+  */
+ void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
++      struct pt_regs *old_regs = set_irq_regs(regs);
++
+       /*
+        * the NMI deadlock-detector uses this.
+        */
+@@ -109,8 +111,9 @@ void smp_apic_timer_interrupt(struct pt_
+        */
+       exit_idle();
+       irq_enter();
+-      smp_local_timer_interrupt(regs);
++      smp_local_timer_interrupt();
+       irq_exit();
++      set_irq_regs(old_regs);
+ }
+ /*
+@@ -188,9 +191,8 @@ int disable_apic;
+ int __init APIC_init_uniprocessor (void)
+ {
+ #ifdef CONFIG_X86_IO_APIC
+-      if (smp_found_config)
+-              if (!skip_ioapic_setup && nr_ioapics)
+-                      setup_IO_APIC();
++      if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
++              setup_IO_APIC();
+ #endif
+       return 1;
+--- sle11-2009-05-14.orig/arch/x86/kernel/e820_64-xen.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/e820_64-xen.c     2009-03-04 11:28:34.000000000 +0100
+@@ -16,6 +16,7 @@
+ #include <linux/string.h>
+ #include <linux/kexec.h>
+ #include <linux/module.h>
++#include <linux/mm.h>
+ #include <asm/pgtable.h>
+ #include <asm/page.h>
+@@ -25,6 +26,11 @@
+ #include <asm/sections.h>
+ #include <xen/interface/memory.h>
++struct e820map e820 __initdata;
++#ifdef CONFIG_XEN
++struct e820map machine_e820 __initdata;
++#endif
++
+ /* 
+  * PFN of last memory page.
+  */
+@@ -41,14 +47,10 @@ unsigned long end_pfn_map; 
+ /* 
+  * Last pfn which the user wants to use.
+  */
+-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;  
++static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
+ extern struct resource code_resource, data_resource;
+-#ifdef CONFIG_XEN
+-extern struct e820map machine_e820;
+-#endif
+-
+ /* Check for some hardcoded bad areas that early boot is not allowed to touch */ 
+ static inline int bad_addr(unsigned long *addrp, unsigned long size)
+ { 
+@@ -57,13 +59,13 @@ static inline int bad_addr(unsigned long
+ #ifndef CONFIG_XEN
+       /* various gunk below that needed for SMP startup */
+       if (addr < 0x8000) { 
+-              *addrp = 0x8000;
++              *addrp = PAGE_ALIGN(0x8000);
+               return 1; 
+       }
+       /* direct mapping tables of the kernel */
+       if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { 
+-              *addrp = table_end << PAGE_SHIFT; 
++              *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
+               return 1;
+       } 
+@@ -71,23 +73,18 @@ static inline int bad_addr(unsigned long
+ #ifdef CONFIG_BLK_DEV_INITRD
+       if (LOADER_TYPE && INITRD_START && last >= INITRD_START && 
+           addr < INITRD_START+INITRD_SIZE) { 
+-              *addrp = INITRD_START + INITRD_SIZE; 
++              *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
+               return 1;
+       } 
+ #endif
+-      /* kernel code + 640k memory hole (later should not be needed, but 
+-         be paranoid for now) */
+-      if (last >= 640*1024 && addr < 1024*1024) {
+-              *addrp = 1024*1024;
+-              return 1;
+-      }
+-      if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
+-              *addrp = __pa_symbol(&_end);
++      /* kernel code */
++      if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
++              *addrp = PAGE_ALIGN(__pa_symbol(&_end));
+               return 1;
+       }
+       if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
+-              *addrp = ebda_addr + ebda_size;
++              *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
+               return 1;
+       }
+@@ -184,7 +181,7 @@ unsigned long __init find_e820_area(unsi
+                       continue; 
+               while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
+                       ;
+-              last = addr + size;
++              last = PAGE_ALIGN(addr) + size;
+               if (last > ei->addr + ei->size)
+                       continue;
+               if (last > end) 
+@@ -194,59 +191,14 @@ unsigned long __init find_e820_area(unsi
+       return -1UL;            
+ } 
+-/* 
+- * Free bootmem based on the e820 table for a node.
+- */
+-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
+-{
+-      int i;
+-      for (i = 0; i < e820.nr_map; i++) {
+-              struct e820entry *ei = &e820.map[i]; 
+-              unsigned long last, addr;
+-
+-              if (ei->type != E820_RAM || 
+-                  ei->addr+ei->size <= start || 
+-                  ei->addr >= end)
+-                      continue;
+-
+-              addr = round_up(ei->addr, PAGE_SIZE);
+-              if (addr < start) 
+-                      addr = start;
+-
+-              last = round_down(ei->addr + ei->size, PAGE_SIZE); 
+-              if (last >= end)
+-                      last = end; 
+-
+-              if (last > addr && last-addr >= PAGE_SIZE)
+-                      free_bootmem_node(pgdat, addr, last-addr);
+-      }
+-}
+-
+ /*
+  * Find the highest page frame number we have available
+  */
+ unsigned long __init e820_end_of_ram(void)
+ {
+-      int i;
+       unsigned long end_pfn = 0;
++      end_pfn = find_max_pfn_with_active_regions();
+       
+-      for (i = 0; i < e820.nr_map; i++) {
+-              struct e820entry *ei = &e820.map[i]; 
+-              unsigned long start, end;
+-
+-              start = round_up(ei->addr, PAGE_SIZE); 
+-              end = round_down(ei->addr + ei->size, PAGE_SIZE); 
+-              if (start >= end)
+-                      continue;
+-              if (ei->type == E820_RAM) { 
+-              if (end > end_pfn<<PAGE_SHIFT)
+-                      end_pfn = end>>PAGE_SHIFT;
+-              } else { 
+-                      if (end > end_pfn_map<<PAGE_SHIFT) 
+-                              end_pfn_map = end>>PAGE_SHIFT;
+-              } 
+-      }
+-
+       if (end_pfn > end_pfn_map) 
+               end_pfn_map = end_pfn;
+       if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
+@@ -256,43 +208,10 @@ unsigned long __init e820_end_of_ram(voi
+       if (end_pfn > end_pfn_map) 
+               end_pfn = end_pfn_map; 
++      printk("end_pfn_map = %lu\n", end_pfn_map);
+       return end_pfn; 
+ }
+-/* 
+- * Compute how much memory is missing in a range.
+- * Unlike the other functions in this file the arguments are in page numbers.
+- */
+-unsigned long __init
+-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
+-{
+-      unsigned long ram = 0;
+-      unsigned long start = start_pfn << PAGE_SHIFT;
+-      unsigned long end = end_pfn << PAGE_SHIFT;
+-      int i;
+-      for (i = 0; i < e820.nr_map; i++) {
+-              struct e820entry *ei = &e820.map[i];
+-              unsigned long last, addr;
+-
+-              if (ei->type != E820_RAM ||
+-                  ei->addr+ei->size <= start ||
+-                  ei->addr >= end)
+-                      continue;
+-
+-              addr = round_up(ei->addr, PAGE_SIZE);
+-              if (addr < start)
+-                      addr = start;
+-
+-              last = round_down(ei->addr + ei->size, PAGE_SIZE);
+-              if (last >= end)
+-                      last = end;
+-
+-              if (last > addr)
+-                      ram += last - addr;
+-      }
+-      return ((end - start) - ram) >> PAGE_SHIFT;
+-}
+-
+ /*
+  * Mark e820 reserved areas as busy for the resource manager.
+  */
+@@ -333,6 +252,98 @@ void __init e820_reserve_resources(struc
+       }
+ }
++#ifndef CONFIG_XEN
++/* Mark pages corresponding to given address range as nosave */
++static void __init
++e820_mark_nosave_range(unsigned long start, unsigned long end)
++{
++      unsigned long pfn, max_pfn;
++
++      if (start >= end)
++              return;
++
++      printk("Nosave address range: %016lx - %016lx\n", start, end);
++      max_pfn = end >> PAGE_SHIFT;
++      for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
++              if (pfn_valid(pfn))
++                      SetPageNosave(pfn_to_page(pfn));
++}
++
++/*
++ * Find the ranges of physical addresses that do not correspond to
++ * e820 RAM areas and mark the corresponding pages as nosave for software
++ * suspend and suspend to RAM.
++ *
++ * This function requires the e820 map to be sorted and without any
++ * overlapping entries and assumes the first e820 area to be RAM.
++ */
++void __init e820_mark_nosave_regions(void)
++{
++      int i;
++      unsigned long paddr;
++
++      paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
++      for (i = 1; i < e820.nr_map; i++) {
++              struct e820entry *ei = &e820.map[i];
++
++              if (paddr < ei->addr)
++                      e820_mark_nosave_range(paddr,
++                                      round_up(ei->addr, PAGE_SIZE));
++
++              paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
++              if (ei->type != E820_RAM)
++                      e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
++                                      paddr);
++
++              if (paddr >= (end_pfn << PAGE_SHIFT))
++                      break;
++      }
++}
++#endif
++
++/* Walk the e820 map and register active regions within a node */
++void __init
++e820_register_active_regions(int nid, unsigned long start_pfn,
++                                                      unsigned long end_pfn)
++{
++      int i;
++      unsigned long ei_startpfn, ei_endpfn;
++      for (i = 0; i < e820.nr_map; i++) {
++              struct e820entry *ei = &e820.map[i];
++              ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
++              ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
++                                                              >> PAGE_SHIFT;
++
++              /* Skip map entries smaller than a page */
++              if (ei_startpfn >= ei_endpfn)
++                      continue;
++
++              /* Check if end_pfn_map should be updated */
++              if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
++                      end_pfn_map = ei_endpfn;
++
++              /* Skip if map is outside the node */
++              if (ei->type != E820_RAM ||
++                              ei_endpfn <= start_pfn ||
++                              ei_startpfn >= end_pfn)
++                      continue;
++
++              /* Check for overlaps */
++              if (ei_startpfn < start_pfn)
++                      ei_startpfn = start_pfn;
++              if (ei_endpfn > end_pfn)
++                      ei_endpfn = end_pfn;
++
++              /* Obey end_user_pfn to save on memmap */
++              if (ei_startpfn >= end_user_pfn)
++                      continue;
++              if (ei_endpfn > end_user_pfn)
++                      ei_endpfn = end_user_pfn;
++
++              add_active_range(nid, ei_startpfn, ei_endpfn);
++      }
++}
++
+ /* 
+  * Add a memory region to the kernel e820 map.
+  */ 
+@@ -553,13 +564,6 @@ static int __init sanitize_e820_map(stru
+  * If we're lucky and live on a modern system, the setup code
+  * will have given us a memory map that we can use to properly
+  * set up memory.  If we aren't, we'll fake a memory map.
+- *
+- * We check to see that the memory map contains at least 2 elements
+- * before we'll use it, because the detection code in setup.S may
+- * not be perfect and most every PC known to man has two memory
+- * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
+- * thinkpad 560x, for example, does not cooperate with the memory
+- * detection code.)
+  */
+ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+ {
+@@ -581,27 +585,6 @@ static int __init copy_e820_map(struct e
+               if (start > end)
+                       return -1;
+-#ifndef CONFIG_XEN
+-              /*
+-               * Some BIOSes claim RAM in the 640k - 1M region.
+-               * Not right. Fix it up.
+-               * 
+-               * This should be removed on Hammer which is supposed to not
+-               * have non e820 covered ISA mappings there, but I had some strange
+-               * problems so it stays for now.  -AK
+-               */
+-              if (type == E820_RAM) {
+-                      if (start < 0x100000ULL && end > 0xA0000ULL) {
+-                              if (start < 0xA0000ULL)
+-                                      add_memory_region(start, 0xA0000ULL-start, type);
+-                              if (end <= 0x100000ULL)
+-                                      continue;
+-                              start = 0x100000ULL;
+-                              size = end - start;
+-                      }
+-              }
+-#endif
+-
+               add_memory_region(start, size, type);
+       } while (biosmap++,--nr_map);
+@@ -622,11 +605,15 @@ static int __init copy_e820_map(struct e
+       return 0;
+ }
++void early_panic(char *msg)
++{
++      early_printk(msg);
++      panic(msg);
++}
++
+ #ifndef CONFIG_XEN
+ void __init setup_memory_region(void)
+ {
+-      char *who = "BIOS-e820";
+-
+       /*
+        * Try to copy the BIOS-supplied E820-map.
+        *
+@@ -634,24 +621,10 @@ void __init setup_memory_region(void)
+        * the next section from 1mb->appropriate_mem_k
+        */
+       sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+-      if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+-              unsigned long mem_size;
+-
+-              /* compare results from other methods and take the greater */
+-              if (ALT_MEM_K < EXT_MEM_K) {
+-                      mem_size = EXT_MEM_K;
+-                      who = "BIOS-88";
+-              } else {
+-                      mem_size = ALT_MEM_K;
+-                      who = "BIOS-e801";
+-              }
+-
+-              e820.nr_map = 0;
+-              add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+-              add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+-      }
++      if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
++              early_panic("Cannot find a valid memory map");
+       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+-      e820_print_map(who);
++      e820_print_map("BIOS-e820");
+ }
+ #else  /* CONFIG_XEN */
+@@ -683,20 +656,23 @@ void __init setup_memory_region(void)
+       sanitize_e820_map(map, (char *)&memmap.nr_entries);
+-      BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
++      if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
++              early_panic("Cannot find a valid memory map");
+       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+       e820_print_map("Xen");
+ }
+ #endif
+-void __init parse_memopt(char *p, char **from) 
+-{ 
++static int __init parse_memopt(char *p)
++{
+       int i;
+       unsigned long current_end;
+       unsigned long end;
+-      end_user_pfn = memparse(p, from);
++      if (!p)
++              return -EINVAL;
++      end_user_pfn = memparse(p, &p);
+       end_user_pfn >>= PAGE_SHIFT;    
+       end = end_user_pfn<<PAGE_SHIFT;
+@@ -713,27 +689,61 @@ void __init parse_memopt(char *p, char *
+               else
+                       add_memory_region(current_end, end - current_end, E820_RAM);
+       }
++
++      return 0;
+ } 
++early_param("mem", parse_memopt);
++
++static int userdef __initdata;
+-void __init parse_memmapopt(char *p, char **from)
++static int __init parse_memmap_opt(char *p)
+ {
++      char *oldp;
+       unsigned long long start_at, mem_size;
+-      mem_size = memparse(p, from);
+-      p = *from;
++      if (!strcmp(p, "exactmap")) {
++#ifdef CONFIG_CRASH_DUMP
++              /* If we are doing a crash dump, we
++               * still need to know the real mem
++               * size before original memory map is
++               * reset.
++               */
++              e820_register_active_regions(0, 0, -1UL);
++              saved_max_pfn = e820_end_of_ram();
++              remove_all_active_ranges();
++#endif
++              end_pfn_map = 0;
++              e820.nr_map = 0;
++              userdef = 1;
++              return 0;
++      }
++
++      oldp = p;
++      mem_size = memparse(p, &p);
++      if (p == oldp)
++              return -EINVAL;
+       if (*p == '@') {
+-              start_at = memparse(p+1, from);
++              start_at = memparse(p+1, &p);
+               add_memory_region(start_at, mem_size, E820_RAM);
+       } else if (*p == '#') {
+-              start_at = memparse(p+1, from);
++              start_at = memparse(p+1, &p);
+               add_memory_region(start_at, mem_size, E820_ACPI);
+       } else if (*p == '$') {
+-              start_at = memparse(p+1, from);
++              start_at = memparse(p+1, &p);
+               add_memory_region(start_at, mem_size, E820_RESERVED);
+       } else {
+               end_user_pfn = (mem_size >> PAGE_SHIFT);
+       }
+-      p = *from;
++      return *p == '\0' ? 0 : -EINVAL;
++}
++early_param("memmap", parse_memmap_opt);
++
++void finish_e820_parsing(void)
++{
++      if (userdef) {
++              printk(KERN_INFO "user-defined physical RAM map:\n");
++              e820_print_map("user");
++      }
+ }
+ unsigned long pci_mem_start = 0xaeedbabe;
+--- sle11-2009-05-14.orig/arch/x86/kernel/early_printk-xen.c   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/early_printk-xen.c        2009-03-04 11:28:34.000000000 +0100
+@@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...)
+ static int __initdata keep_early;
+-int __init setup_early_printk(char *opt)
++static int __init setup_early_printk(char *buf)
+ {
+-      char *space;
+-      char buf[256];
++      if (!buf)
++              return 0;
+       if (early_console_initialized)
+-              return 1;
+-
+-      strlcpy(buf,opt,sizeof(buf));
+-      space = strchr(buf, ' ');
+-      if (space)
+-              *space = 0;
++              return 0;
++      early_console_initialized = 1;
+-      if (strstr(buf,"keep"))
++      if (strstr(buf, "keep"))
+               keep_early = 1;
+       if (!strncmp(buf, "serial", 6)) {
+@@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt)
+               early_console = &simnow_console;
+               keep_early = 1;
+       }
+-      early_console_initialized = 1;
+       register_console(early_console);
+       return 0;
+ }
++early_param("earlyprintk", setup_early_printk);
++
+ void __init disable_early_printk(void)
+ {
+       if (!early_console_initialized || !early_console)
+@@ -299,4 +296,3 @@ void __init disable_early_printk(void)
+       }
+ }
+-__setup("earlyprintk=", setup_early_printk);
+--- sle11-2009-05-14.orig/arch/x86/kernel/entry_64-xen.S       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/entry_64-xen.S    2009-03-04 11:28:34.000000000 +0100
+@@ -4,9 +4,6 @@
+  *  Copyright (C) 1991, 1992  Linus Torvalds
+  *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
+  *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
+- * 
+- *  $Id$
+- *
+  *  Jun Nakajima <jun.nakajima@intel.com>
+  *  Asit Mallick <asit.k.mallick@intel.com>
+  *      Modified for Xen
+@@ -26,15 +23,25 @@
+  * at the top of the kernel process stack.    
+  * - partial stack frame: partially saved registers upto R11.
+  * - full stack frame: Like partial stack frame, but all register saved. 
+- *    
+- * TODO:       
+- * - schedule it carefully for the final hardware.
++ *
++ * Some macro usage:
++ * - CFI macros are used to generate dwarf2 unwind information for better
++ * backtraces. They don't change any code.
++ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
++ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
++ * There are unfortunately lots of special cases where some registers
++ * not touched. The macro is a big mess that should be cleaned up.
++ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
++ * Gives a full stack frame.
++ * - ENTRY/END Define functions in the symbol table.
++ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
++ * frame that is otherwise undefined after a SYSCALL
++ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
++ * - errorentry/paranoidentry/zeroentry - Define exception entry points.
+  */
+-#define ASSEMBLY 1
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/smp.h>
+ #include <asm/cache.h>
+ #include <asm/errno.h>
+ #include <asm/dwarf2.h>
+@@ -117,6 +124,7 @@ NMI_MASK = 0x80000000
+       .macro  CFI_DEFAULT_STACK start=1,adj=0
+       .if \start
+       CFI_STARTPROC   simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA     rsp,SS+8 - \adj*ARGOFFSET
+       .else
+       CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET
+@@ -207,6 +215,7 @@ END(ret_from_fork)
+  */
+       .macro _frame ref
+       CFI_STARTPROC simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA rsp,SS+8-\ref
+       /*CFI_REL_OFFSET ss,SS-\ref*/
+       CFI_REL_OFFSET rsp,RSP-\ref
+@@ -334,6 +343,8 @@ tracesys:                   
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       RESTORE_REST
+       cmpq $__NR_syscall_max,%rax
++      movq $-ENOSYS,%rcx
++      cmova %rcx,%rax
+       ja  1f
+       movq %r10,%rcx  /* fixup for C */
+       call *sys_call_table(,%rax,8)
+@@ -349,6 +360,7 @@ END(system_call)
+  */   
+ ENTRY(int_ret_from_sys_call)
+       CFI_STARTPROC   simple
++      CFI_SIGNAL_FRAME
+       CFI_DEF_CFA     rsp,SS+8-ARGOFFSET
+       /*CFI_REL_OFFSET        ss,SS-ARGOFFSET*/
+       CFI_REL_OFFSET  rsp,RSP-ARGOFFSET
+@@ -583,8 +595,7 @@ retint_signal:
+ #ifdef CONFIG_PREEMPT
+       /* Returning to kernel space. Check if we need preemption */
+       /* rcx:  threadinfo. interrupts off. */
+-      .p2align
+-retint_kernel:        
++ENTRY(retint_kernel)
+       cmpl $0,threadinfo_preempt_count(%rcx)
+       jnz  retint_restore_args
+       bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+@@ -644,7 +655,6 @@ ENTRY(call_function_interrupt)
+ END(call_function_interrupt)
+ #endif
+-#ifdef CONFIG_X86_LOCAL_APIC  
+ ENTRY(apic_timer_interrupt)
+       apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
+ END(apic_timer_interrupt)
+@@ -656,7 +666,6 @@ END(error_interrupt)
+ ENTRY(spurious_interrupt)
+       apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+ END(spurious_interrupt)
+-#endif
+ #endif /* !CONFIG_XEN */
+                               
+ /*
+@@ -755,7 +764,9 @@ paranoid_exit\trace:
+       testl $3,CS(%rsp)
+       jnz   paranoid_userspace\trace
+ paranoid_swapgs\trace:
++      .if \trace
+       TRACE_IRQS_IRETQ 0
++      .endif
+       swapgs
+ paranoid_restore\trace:
+       RESTORE_ALL 8
+@@ -802,7 +813,7 @@ paranoid_schedule\trace:
+  * Exception entry point. This expects an error code/orig_rax on the stack
+  * and the exception handler in %rax. 
+  */                                           
+-ENTRY(error_entry)
++KPROBE_ENTRY(error_entry)
+       _frame RDI
+       CFI_REL_OFFSET rax,0
+       /* rdi slot contains rax, oldrax contains error code */
+@@ -896,7 +907,7 @@ error_kernelspace:
+       jmp  error_sti
+ #endif
+       CFI_ENDPROC
+-END(error_entry)
++KPROBE_END(error_entry)
+       
+ ENTRY(hypervisor_callback)
+       zeroentry do_hypervisor_callback
+@@ -936,26 +947,6 @@ ENTRY(do_hypervisor_callback)   # do_hyp
+       CFI_ENDPROC
+ END(do_hypervisor_callback)
+-#ifdef CONFIG_X86_LOCAL_APIC
+-KPROBE_ENTRY(nmi)
+-      zeroentry do_nmi_callback
+-ENTRY(do_nmi_callback)
+-      CFI_STARTPROC
+-        addq $8, %rsp
+-      CFI_ENDPROC
+-      CFI_DEFAULT_STACK
+-        call do_nmi
+-        orl  $NMI_MASK,EFLAGS(%rsp)
+-        RESTORE_REST
+-        XEN_BLOCK_EVENTS(%rsi)
+-      TRACE_IRQS_OFF
+-        GET_THREAD_INFO(%rcx)
+-        jmp  retint_restore_args
+-      CFI_ENDPROC
+-      .previous .text
+-END(nmi)
+-#endif
+-
+         ALIGN
+ restore_all_enable_events:  
+       CFI_DEFAULT_STACK adj=1
+@@ -1121,7 +1112,7 @@ ENDPROC(child_rip)
+  * do_sys_execve asm fallback arguments:
+  *    rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+  */
+-ENTRY(execve)
++ENTRY(kernel_execve)
+       CFI_STARTPROC
+       FAKE_STACK_FRAME $0
+       SAVE_ALL        
+@@ -1135,12 +1126,11 @@ ENTRY(execve)
+       UNFAKE_STACK_FRAME
+       ret
+       CFI_ENDPROC
+-ENDPROC(execve)
++ENDPROC(kernel_execve)
+ KPROBE_ENTRY(page_fault)
+       errorentry do_page_fault
+-END(page_fault)
+-      .previous .text
++KPROBE_END(page_fault)
+ ENTRY(coprocessor_error)
+       zeroentry do_coprocessor_error
+@@ -1162,25 +1152,25 @@ KPROBE_ENTRY(debug)
+       zeroentry do_debug
+ /*    paranoidexit
+       CFI_ENDPROC */
+-END(debug)
+-      .previous .text
++KPROBE_END(debug)
+-#if 0
+-      /* runs on exception stack */   
+ KPROBE_ENTRY(nmi)
+-      INTR_FRAME
+-      pushq $-1
+-      CFI_ADJUST_CFA_OFFSET 8
+-      paranoidentry do_nmi, 0, 0
+-#ifdef CONFIG_TRACE_IRQFLAGS
+-      paranoidexit 0
+-#else
+-      jmp paranoid_exit1
+-      CFI_ENDPROC
+-#endif
+-END(nmi)
+-      .previous .text
+-#endif        
++      zeroentry do_nmi_callback
++KPROBE_END(nmi)
++do_nmi_callback:
++      CFI_STARTPROC
++      addq $8, %rsp
++      CFI_ENDPROC
++      CFI_DEFAULT_STACK
++      call do_nmi
++      orl  $NMI_MASK,EFLAGS(%rsp)
++      RESTORE_REST
++      XEN_BLOCK_EVENTS(%rsi)
++      TRACE_IRQS_OFF
++      GET_THREAD_INFO(%rcx)
++      jmp  retint_restore_args
++      CFI_ENDPROC
++END(do_nmi_callback)
+ KPROBE_ENTRY(int3)
+ /*    INTR_FRAME
+@@ -1189,8 +1179,7 @@ KPROBE_ENTRY(int3)
+       zeroentry do_int3
+ /*    jmp paranoid_exit1
+       CFI_ENDPROC */
+-END(int3)
+-      .previous .text
++KPROBE_END(int3)
+ ENTRY(overflow)
+       zeroentry do_overflow
+@@ -1241,8 +1230,7 @@ END(stack_segment)
+ KPROBE_ENTRY(general_protection)
+       errorentry do_general_protection
+-END(general_protection)
+-      .previous .text
++KPROBE_END(general_protection)
+ ENTRY(alignment_check)
+       errorentry do_alignment_check
+--- sle11-2009-05-14.orig/arch/x86/kernel/genapic_xen_64.c     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/genapic_xen_64.c  2009-03-04 11:28:34.000000000 +0100
+@@ -71,6 +71,13 @@ static cpumask_t xen_target_cpus(void)
+       return cpu_online_map;
+ }
++static cpumask_t xen_vector_allocation_domain(int cpu)
++{
++      cpumask_t domain = CPU_MASK_NONE;
++      cpu_set(cpu, domain);
++      return domain;
++}
++
+ /*
+  * Set up the logical destination ID.
+  * Do nothing, not called now.
+@@ -147,8 +154,8 @@ struct genapic apic_xen =  {
+       .int_delivery_mode = dest_LowestPrio,
+ #endif
+       .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+-      .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+       .target_cpus = xen_target_cpus,
++      .vector_allocation_domain = xen_vector_allocation_domain,
+ #ifdef CONFIG_XEN_PRIVILEGED_GUEST
+       .apic_id_registered = xen_apic_id_registered,
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/head_64-xen.S        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/head_64-xen.S     2009-03-04 11:28:34.000000000 +0100
+@@ -5,9 +5,6 @@
+  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+  *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
+  *  Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+- *
+- *  $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
+- *
+  *  Jun Nakajima <jun.nakajima@intel.com>
+  *    Modified for Xen                                
+  */
+@@ -149,7 +146,7 @@ ENTRY(cpu_gdt_table)
+       .quad   0,0                     /* TSS */
+       .quad   0,0                     /* LDT */
+       .quad   0,0,0                   /* three TLS descriptors */
+-      .quad   0                       /* unused */
++      .quad   0x0000f40000000000      /* node/CPU stored in limit */
+ gdt_end:
+       /* asm/segment.h:GDT_ENTRIES must match this */
+       /* This should be a multiple of the cache line size */
+--- sle11-2009-05-14.orig/arch/x86/kernel/head64-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/head64-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -7,6 +7,9 @@
+  *    Modified for Xen.
+  */
++/* PDA is not ready to be used until the end of x86_64_start_kernel(). */
++#define arch_use_lazy_mmu_mode() false
++
+ #include <linux/init.h>
+ #include <linux/linkage.h>
+ #include <linux/types.h>
+@@ -54,11 +57,9 @@ static void __init copy_bootdata(char *r
+       new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+       if (!new_data) {
+               if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+-                      printk("so old bootloader that it does not support commandline?!\n");
+                       return;
+               }
+               new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+-              printk("old bootloader convention, maybe loadlin?\n");
+       }
+       command_line = (char *) ((u64)(new_data));
+       memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
+@@ -70,25 +71,6 @@ static void __init copy_bootdata(char *r
+       memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
+       saved_command_line[max_cmdline-1] = '\0';
+ #endif
+-      printk("Bootdata ok (command line is %s)\n", saved_command_line);
+-}
+-
+-static void __init setup_boot_cpu_data(void)
+-{
+-      unsigned int dummy, eax;
+-
+-      /* get vendor info */
+-      cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
+-            (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
+-            (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
+-            (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
+-
+-      /* get cpu type */
+-      cpuid(1, &eax, &dummy, &dummy,
+-              (unsigned int *) &boot_cpu_data.x86_capability);
+-      boot_cpu_data.x86 = (eax >> 8) & 0xf;
+-      boot_cpu_data.x86_model = (eax >> 4) & 0xf;
+-      boot_cpu_data.x86_mask = eax & 0xf;
+ }
+ #include <xen/interface/memory.h>
+@@ -101,7 +83,6 @@ void __init x86_64_start_kernel(char * r
+ {
+       struct xen_machphys_mapping mapping;
+       unsigned long machine_to_phys_nr_ents;
+-      char *s;
+       int i;
+       setup_xen_features();
+@@ -128,10 +109,7 @@ void __init x86_64_start_kernel(char * r
+       asm volatile("lidt %0" :: "m" (idt_descr));
+ #endif
+-      /*
+-       * This must be called really, really early:
+-       */
+-      lockdep_init();
++      early_printk("Kernel alive\n");
+       for (i = 0; i < NR_CPUS; i++)
+               cpu_pda(i) = &boot_cpu_pda[i];
+@@ -141,22 +119,5 @@ void __init x86_64_start_kernel(char * r
+ #ifdef CONFIG_SMP
+       cpu_set(0, cpu_online_map);
+ #endif
+-      s = strstr(saved_command_line, "earlyprintk=");
+-      if (s != NULL)
+-              setup_early_printk(strchr(s, '=') + 1);
+-#ifdef CONFIG_NUMA
+-      s = strstr(saved_command_line, "numa=");
+-      if (s != NULL)
+-              numa_setup(s+5);
+-#endif
+-#ifdef CONFIG_X86_IO_APIC
+-      if (strstr(saved_command_line, "disableapic"))
+-              disable_apic = 1;
+-#endif
+-      /* You need early console to see that */
+-      if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
+-              panic("Kernel too big for kernel mapping\n");
+-
+-      setup_boot_cpu_data();
+       start_kernel();
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_64-xen.c     2009-03-16 16:13:45.000000000 +0100
++++ sle11-2009-05-14/arch/x86/kernel/io_apic_64-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -26,9 +26,12 @@
+ #include <linux/delay.h>
+ #include <linux/sched.h>
+ #include <linux/smp_lock.h>
++#include <linux/pci.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/acpi.h>
+ #include <linux/sysdev.h>
++#include <linux/msi.h>
++#include <linux/htirq.h>
+ #ifdef CONFIG_ACPI
+ #include <acpi/acpi_bus.h>
+ #endif
+@@ -41,6 +44,10 @@
+ #include <asm/acpi.h>
+ #include <asm/dma.h>
+ #include <asm/nmi.h>
++#include <asm/msidef.h>
++#include <asm/hypertransport.h>
++
++static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
+ #define __apicdebuginit  __init
+@@ -48,17 +55,30 @@ int sis_apic_bug; /* not actually suppor
+ static int no_timer_check;
+-int disable_timer_pin_1 __initdata;
++static int disable_timer_pin_1 __initdata;
+-#ifndef CONFIG_XEN
+-int timer_over_8254 __initdata = 0;
++#ifdef CONFIG_XEN
++#include <xen/interface/xen.h>
++#include <xen/interface/physdev.h>
++#include <xen/evtchn.h>
++
++/* Fake i8259 */
++#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
++#define disable_8259A_irq(_irq)  ((void)0)
++#define i8259A_irq_pending(_irq) (0)
++
++unsigned long io_apic_irqs;
++
++#define clear_IO_APIC() ((void)0)
++#else
++int timer_over_8254 __initdata = 1;
+ /* Where if anywhere is the i8259 connect in external int mode */
+ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+ #endif
+ static DEFINE_SPINLOCK(ioapic_lock);
+-static DEFINE_SPINLOCK(vector_lock);
++DEFINE_SPINLOCK(vector_lock);
+ /*
+  * # of IRQ routing registers
+@@ -83,29 +103,27 @@ static struct irq_pin_list {
+       short apic, pin, next;
+ } irq_2_pin[PIN_MAP_SIZE];
+-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+-#ifdef CONFIG_PCI_MSI
+-#define vector_to_irq(vector)         \
+-      (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+-#else
+-#define vector_to_irq(vector) (vector)
+-#endif
+-
+-#ifdef CONFIG_XEN
+-
+-#include <xen/interface/xen.h>
+-#include <xen/interface/physdev.h>
+-#include <xen/evtchn.h>
+-
+-/* Fake i8259 */
+-#define make_8259A_irq(_irq)     (io_apic_irqs &= ~(1UL<<(_irq)))
+-#define disable_8259A_irq(_irq)  ((void)0)
+-#define i8259A_irq_pending(_irq) (0)
++#ifndef CONFIG_XEN
++struct io_apic {
++      unsigned int index;
++      unsigned int unused[3];
++      unsigned int data;
++};
+-unsigned long io_apic_irqs;
++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
++{
++      return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
++              + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
++}
++#endif
+-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+ {
++#ifndef CONFIG_XEN
++      struct io_apic __iomem *io_apic = io_apic_base(apic);
++      writel(reg, &io_apic->index);
++      return readl(&io_apic->data);
++#else
+       struct physdev_apic apic_op;
+       int ret;
+@@ -115,31 +133,133 @@ static inline unsigned int xen_io_apic_r
+       if (ret)
+               return ret;
+       return apic_op.value;
++#endif
+ }
+-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+ {
++#ifndef CONFIG_XEN
++      struct io_apic __iomem *io_apic = io_apic_base(apic);
++      writel(reg, &io_apic->index);
++      writel(value, &io_apic->data);
++#else
+       struct physdev_apic apic_op;
+       apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+       apic_op.reg = reg;
+       apic_op.value = value;
+       WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
++#endif
++}
++
++#ifndef CONFIG_XEN
++/*
++ * Re-write a value: to be used for read-modify-write
++ * cycles where the read already set up the index register.
++ */
++static inline void io_apic_modify(unsigned int apic, unsigned int value)
++{
++      struct io_apic __iomem *io_apic = io_apic_base(apic);
++      writel(value, &io_apic->data);
+ }
++#else
++#define io_apic_modify io_apic_write
++#endif
+-#define io_apic_read(a,r)    xen_io_apic_read(a,r)
+-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
++/*
++ * Synchronize the IO-APIC and the CPU by doing
++ * a dummy read from the IO-APIC
++ */
++static inline void io_apic_sync(unsigned int apic)
++{
++#ifndef CONFIG_XEN
++      struct io_apic __iomem *io_apic = io_apic_base(apic);
++      readl(&io_apic->data);
++#endif
++}
+-#define clear_IO_APIC() ((void)0)
++union entry_union {
++      struct { u32 w1, w2; };
++      struct IO_APIC_route_entry entry;
++};
+-#else
++#ifndef CONFIG_XEN
++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
++{
++      union entry_union eu;
++      unsigned long flags;
++      spin_lock_irqsave(&ioapic_lock, flags);
++      eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
++      eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++      spin_unlock_irqrestore(&ioapic_lock, flags);
++      return eu.entry;
++}
++#endif
++
++/*
++ * When we write a new IO APIC routing entry, we need to write the high
++ * word first! If the mask bit in the low word is clear, we will enable
++ * the interrupt, and we need to make sure the entry is fully populated
++ * before that happens.
++ */
++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++      unsigned long flags;
++      union entry_union eu;
++      eu.entry = e;
++      spin_lock_irqsave(&ioapic_lock, flags);
++      io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++      io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++      spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++#ifndef CONFIG_XEN
++/*
++ * When we mask an IO APIC routing entry, we need to write the low
++ * word first, in order to set the mask bit before we change the
++ * high bits!
++ */
++static void ioapic_mask_entry(int apic, int pin)
++{
++      unsigned long flags;
++      union entry_union eu = { .entry.mask = 1 };
++
++      spin_lock_irqsave(&ioapic_lock, flags);
++      io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++      io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++      spin_unlock_irqrestore(&ioapic_lock, flags);
++}
+ #ifdef CONFIG_SMP
++static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
++{
++      int apic, pin;
++      struct irq_pin_list *entry = irq_2_pin + irq;
++
++      BUG_ON(irq >= NR_IRQS);
++      for (;;) {
++              unsigned int reg;
++              apic = entry->apic;
++              pin = entry->pin;
++              if (pin == -1)
++                      break;
++              io_apic_write(apic, 0x11 + pin*2, dest);
++              reg = io_apic_read(apic, 0x10 + pin*2);
++              reg &= ~0x000000ff;
++              reg |= vector;
++              io_apic_modify(apic, reg);
++              if (!entry->next)
++                      break;
++              entry = irq_2_pin + entry->next;
++      }
++}
++
+ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ {
+       unsigned long flags;
+       unsigned int dest;
+       cpumask_t tmp;
++      int vector;
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+@@ -147,7 +267,11 @@ static void set_ioapic_affinity_irq(unsi
+       cpus_and(mask, tmp, CPU_MASK_ALL);
+-      dest = cpu_mask_to_apicid(mask);
++      vector = assign_irq_vector(irq, mask, &tmp);
++      if (vector < 0)
++              return;
++
++      dest = cpu_mask_to_apicid(tmp);
+       /*
+        * Only the high 8 bits are valid.
+@@ -155,13 +279,12 @@ static void set_ioapic_affinity_irq(unsi
+       dest = SET_APIC_LOGICAL_ID(dest);
+       spin_lock_irqsave(&ioapic_lock, flags);
+-      __DO_ACTION(1, = dest, )
+-      set_irq_info(irq, mask);
++      __target_IO_APIC_irq(irq, dest, vector);
++      set_native_irq_info(irq, mask);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ #endif
+-
+-#endif /* !CONFIG_XEN */
++#endif
+ /*
+  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+@@ -241,24 +364,15 @@ static void unmask_IO_APIC_irq (unsigned
+ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+ {
+       struct IO_APIC_route_entry entry;
+-      unsigned long flags;
+       /* Check delivery_mode to be sure we're not clearing an SMI pin */
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+-      *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      entry = ioapic_read_entry(apic, pin);
+       if (entry.delivery_mode == dest_SMI)
+               return;
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+-      memset(&entry, 0, sizeof(entry));
+-      entry.mask = 1;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+-      io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      ioapic_mask_entry(apic, pin);
+ }
+ static void clear_IO_APIC (void)
+@@ -272,16 +386,6 @@ static void clear_IO_APIC (void)
+ #endif /* !CONFIG_XEN */
+-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+-
+-/*
+- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+- * specific CPU-side IRQs.
+- */
+-
+-#define MAX_PIRQS 8
+-static int pirq_entries [MAX_PIRQS];
+-static int pirqs_enabled;
+ int skip_ioapic_setup;
+ int ioapic_force;
+@@ -290,18 +394,17 @@ int ioapic_force;
+ static int __init disable_ioapic_setup(char *str)
+ {
+       skip_ioapic_setup = 1;
+-      return 1;
++      return 0;
+ }
++early_param("noapic", disable_ioapic_setup);
+-static int __init enable_ioapic_setup(char *str)
++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
++static int __init disable_timer_pin_setup(char *arg)
+ {
+-      ioapic_force = 1;
+-      skip_ioapic_setup = 0;
++      disable_timer_pin_1 = 1;
+       return 1;
+ }
+-
+-__setup("noapic", disable_ioapic_setup);
+-__setup("apic", enable_ioapic_setup);
++__setup("disable_timer_pin_1", disable_timer_pin_setup);
+ #ifndef CONFIG_XEN
+ static int __init setup_disable_8254_timer(char *s)
+@@ -319,137 +422,6 @@ __setup("disable_8254_timer", setup_disa
+ __setup("enable_8254_timer", setup_enable_8254_timer);
+ #endif /* !CONFIG_XEN */
+-#include <asm/pci-direct.h>
+-#include <linux/pci_ids.h>
+-#include <linux/pci.h>
+-
+-
+-#ifdef CONFIG_ACPI
+-
+-static int nvidia_hpet_detected __initdata;
+-
+-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+-{
+-      nvidia_hpet_detected = 1;
+-      return 0;
+-}
+-#endif
+-
+-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
+-   off. Check for an Nvidia or VIA PCI bridge and turn it off.
+-   Use pci direct infrastructure because this runs before the PCI subsystem. 
+-
+-   Can be overwritten with "apic"
+-
+-   And another hack to disable the IOMMU on VIA chipsets.
+-
+-   ... and others. Really should move this somewhere else.
+-
+-   Kludge-O-Rama. */
+-void __init check_ioapic(void) 
+-{ 
+-      int num,slot,func; 
+-      /* Poor man's PCI discovery */
+-      for (num = 0; num < 32; num++) { 
+-              for (slot = 0; slot < 32; slot++) { 
+-                      for (func = 0; func < 8; func++) { 
+-                              u32 class;
+-                              u32 vendor;
+-                              u8 type;
+-                              class = read_pci_config(num,slot,func,
+-                                                      PCI_CLASS_REVISION);
+-                              if (class == 0xffffffff)
+-                                      break; 
+-
+-                              if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+-                                      continue; 
+-
+-                              vendor = read_pci_config(num, slot, func, 
+-                                                       PCI_VENDOR_ID);
+-                              vendor &= 0xffff;
+-                              switch (vendor) { 
+-                              case PCI_VENDOR_ID_VIA:
+-#ifdef CONFIG_IOMMU
+-                                      if ((end_pfn > MAX_DMA32_PFN ||
+-                                           force_iommu) &&
+-                                          !iommu_aperture_allowed) {
+-                                              printk(KERN_INFO
+-    "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
+-                                              iommu_aperture_disabled = 1;
+-                                      }
+-#endif
+-                                      return;
+-                              case PCI_VENDOR_ID_NVIDIA:
+-#ifdef CONFIG_ACPI
+-                                      /*
+-                                       * All timer overrides on Nvidia are
+-                                       * wrong unless HPET is enabled.
+-                                       */
+-                                      nvidia_hpet_detected = 0;
+-                                      acpi_table_parse(ACPI_HPET,
+-                                                      nvidia_hpet_check);
+-                                      if (nvidia_hpet_detected == 0) {
+-                                              acpi_skip_timer_override = 1;
+-                                              printk(KERN_INFO "Nvidia board "
+-                                                  "detected. Ignoring ACPI "
+-                                                  "timer override.\n");
+-                                      }
+-#endif
+-                                      /* RED-PEN skip them on mptables too? */
+-                                      return;
+-                              case PCI_VENDOR_ID_ATI:
+-
+-                              /* This should be actually default, but
+-                                 for 2.6.16 let's do it for ATI only where
+-                                 it's really needed. */
+-#ifndef CONFIG_XEN
+-                                      if (timer_over_8254 == 1) {     
+-                                              timer_over_8254 = 0;    
+-                                      printk(KERN_INFO
+-              "ATI board detected. Disabling timer routing over 8254.\n");
+-                                      }       
+-#endif
+-                                      return;
+-                              } 
+-
+-
+-                              /* No multi-function device? */
+-                              type = read_pci_config_byte(num,slot,func,
+-                                                          PCI_HEADER_TYPE);
+-                              if (!(type & 0x80))
+-                                      break;
+-                      } 
+-              }
+-      }
+-} 
+-
+-static int __init ioapic_pirq_setup(char *str)
+-{
+-      int i, max;
+-      int ints[MAX_PIRQS+1];
+-
+-      get_options(str, ARRAY_SIZE(ints), ints);
+-
+-      for (i = 0; i < MAX_PIRQS; i++)
+-              pirq_entries[i] = -1;
+-
+-      pirqs_enabled = 1;
+-      apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
+-      max = MAX_PIRQS;
+-      if (ints[0] < MAX_PIRQS)
+-              max = ints[0];
+-
+-      for (i = 0; i < max; i++) {
+-              apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+-              /*
+-               * PIRQs are mapped upside down, usually.
+-               */
+-              pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+-      }
+-      return 1;
+-}
+-
+-__setup("pirq=", ioapic_pirq_setup);
+ /*
+  * Find the IRQ entry number of a certain pin.
+@@ -479,9 +451,7 @@ static int __init find_isa_irq_pin(int i
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+-              if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
++              if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+@@ -497,9 +467,7 @@ static int __init find_isa_irq_apic(int 
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+-              if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+-                   mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
++              if (test_bit(lbus, mp_bus_not_pci) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+                       break;
+@@ -540,7 +508,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
+                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                               break;
+-              if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
++              if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].mpc_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+@@ -563,27 +531,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, 
+       return best_guess;
+ }
+-/*
+- * EISA Edge/Level control register, ELCR
+- */
+-static int EISA_ELCR(unsigned int irq)
+-{
+-      if (irq < 16) {
+-              unsigned int port = 0x4d0 + (irq >> 3);
+-              return (inb(port) >> (irq & 7)) & 1;
+-      }
+-      apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
+-      return 0;
+-}
+-
+-/* EISA interrupts are always polarity zero and can be edge or level
+- * trigger depending on the ELCR value.  If an interrupt is listed as
+- * EISA conforming in the MP table, that means its trigger type must
+- * be read in from the ELCR */
+-
+-#define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+-#define default_EISA_polarity(idx)    (0)
+-
+ /* ISA interrupts are always polarity zero edge triggered,
+  * when listed as conforming in the MP table. */
+@@ -596,12 +543,6 @@ static int EISA_ELCR(unsigned int irq)
+ #define default_PCI_trigger(idx)      (1)
+ #define default_PCI_polarity(idx)     (1)
+-/* MCA interrupts are always polarity zero level triggered,
+- * when listed as conforming in the MP table. */
+-
+-#define default_MCA_trigger(idx)      (1)
+-#define default_MCA_polarity(idx)     (0)
+-
+ static int __init MPBIOS_polarity(int idx)
+ {
+       int bus = mp_irqs[idx].mpc_srcbus;
+@@ -613,38 +554,11 @@ static int __init MPBIOS_polarity(int id
+       switch (mp_irqs[idx].mpc_irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+-              {
+-                      switch (mp_bus_id_to_type[bus])
+-                      {
+-                              case MP_BUS_ISA: /* ISA pin */
+-                              {
+-                                      polarity = default_ISA_polarity(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_EISA: /* EISA pin */
+-                              {
+-                                      polarity = default_EISA_polarity(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_PCI: /* PCI pin */
+-                              {
+-                                      polarity = default_PCI_polarity(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_MCA: /* MCA pin */
+-                              {
+-                                      polarity = default_MCA_polarity(idx);
+-                                      break;
+-                              }
+-                              default:
+-                              {
+-                                      printk(KERN_WARNING "broken BIOS!!\n");
+-                                      polarity = 1;
+-                                      break;
+-                              }
+-                      }
++                      if (test_bit(bus, mp_bus_not_pci))
++                              polarity = default_ISA_polarity(idx);
++                      else
++                              polarity = default_PCI_polarity(idx);
+                       break;
+-              }
+               case 1: /* high active */
+               {
+                       polarity = 0;
+@@ -682,38 +596,11 @@ static int MPBIOS_trigger(int idx)
+       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+-              {
+-                      switch (mp_bus_id_to_type[bus])
+-                      {
+-                              case MP_BUS_ISA: /* ISA pin */
+-                              {
+-                                      trigger = default_ISA_trigger(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_EISA: /* EISA pin */
+-                              {
+-                                      trigger = default_EISA_trigger(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_PCI: /* PCI pin */
+-                              {
+-                                      trigger = default_PCI_trigger(idx);
+-                                      break;
+-                              }
+-                              case MP_BUS_MCA: /* MCA pin */
+-                              {
+-                                      trigger = default_MCA_trigger(idx);
+-                                      break;
+-                              }
+-                              default:
+-                              {
+-                                      printk(KERN_WARNING "broken BIOS!!\n");
+-                                      trigger = 1;
+-                                      break;
+-                              }
+-                      }
++                      if (test_bit(bus, mp_bus_not_pci))
++                              trigger = default_ISA_trigger(idx);
++                      else
++                              trigger = default_PCI_trigger(idx);
+                       break;
+-              }
+               case 1: /* edge */
+               {
+                       trigger = 0;
+@@ -750,64 +637,6 @@ static inline int irq_trigger(int idx)
+       return MPBIOS_trigger(idx);
+ }
+-static int next_irq = 16;
+-
+-/*
+- * gsi_irq_sharing -- Name overload!  "irq" can be either a legacy IRQ
+- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+- * from ACPI, which can reach 800 in large boxen.
+- *
+- * Compact the sparse GSI space into a sequential IRQ series and reuse
+- * vectors if possible.
+- */
+-int gsi_irq_sharing(int gsi)
+-{
+-      int i, tries, vector;
+-
+-      BUG_ON(gsi >= NR_IRQ_VECTORS);
+-
+-      if (platform_legacy_irq(gsi))
+-              return gsi;
+-
+-      if (gsi_2_irq[gsi] != 0xFF)
+-              return (int)gsi_2_irq[gsi];
+-
+-      tries = NR_IRQS;
+-  try_again:
+-      vector = assign_irq_vector(gsi);
+-
+-      /*
+-       * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+-       * use of vector and if found, return that IRQ.  However, we never want
+-       * to share legacy IRQs, which usually have a different trigger mode
+-       * than PCI.
+-       */
+-      for (i = 0; i < NR_IRQS; i++)
+-              if (IO_APIC_VECTOR(i) == vector)
+-                      break;
+-      if (platform_legacy_irq(i)) {
+-              if (--tries >= 0) {
+-                      IO_APIC_VECTOR(i) = 0;
+-                      goto try_again;
+-              }
+-              panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+-      }
+-      if (i < NR_IRQS) {
+-              gsi_2_irq[gsi] = i;
+-              printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+-                              gsi, vector, i);
+-              return i;
+-      }
+-
+-      i = next_irq++;
+-      BUG_ON(i >= NR_IRQS);
+-      gsi_2_irq[gsi] = i;
+-      IO_APIC_VECTOR(i) = vector;
+-      printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+-                      gsi, vector, i);
+-      return i;
+-}
+-
+ static int pin_2_irq(int idx, int apic, int pin)
+ {
+       int irq, i;
+@@ -819,49 +648,16 @@ static int pin_2_irq(int idx, int apic, 
+       if (mp_irqs[idx].mpc_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+-      switch (mp_bus_id_to_type[bus])
+-      {
+-              case MP_BUS_ISA: /* ISA pin */
+-              case MP_BUS_EISA:
+-              case MP_BUS_MCA:
+-              {
+-                      irq = mp_irqs[idx].mpc_srcbusirq;
+-                      break;
+-              }
+-              case MP_BUS_PCI: /* PCI pin */
+-              {
+-                      /*
+-                       * PCI IRQs are mapped in order
+-                       */
+-                      i = irq = 0;
+-                      while (i < apic)
+-                              irq += nr_ioapic_registers[i++];
+-                      irq += pin;
+-                      irq = gsi_irq_sharing(irq);
+-                      break;
+-              }
+-              default:
+-              {
+-                      printk(KERN_ERR "unknown bus type %d.\n",bus); 
+-                      irq = 0;
+-                      break;
+-              }
+-      }
+-      BUG_ON(irq >= NR_IRQS);
+-
+-      /*
+-       * PCI IRQ command line redirection. Yes, limits are hardcoded.
+-       */
+-      if ((pin >= 16) && (pin <= 23)) {
+-              if (pirq_entries[pin-16] != -1) {
+-                      if (!pirq_entries[pin-16]) {
+-                              apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
+-                      } else {
+-                              irq = pirq_entries[pin-16];
+-                              apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
+-                                              pin-16, irq);
+-                      }
+-              }
++      if (test_bit(bus, mp_bus_not_pci)) {
++              irq = mp_irqs[idx].mpc_srcbusirq;
++      } else {
++              /*
++               * PCI IRQs are mapped in order
++               */
++              i = irq = 0;
++              while (i < apic)
++                      irq += nr_ioapic_registers[i++];
++              irq += pin;
+       }
+       BUG_ON(irq >= NR_IRQS);
+       return irq;
+@@ -885,46 +681,71 @@ static inline int IO_APIC_irq_trigger(in
+ }
+ /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
++static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
+-int assign_irq_vector(int irq)
++static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
+ {
+-      unsigned long flags;
+       int vector;
+       struct physdev_irq irq_op;
+   
+-      BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
++      BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+       if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
+               return -EINVAL;
+-      spin_lock_irqsave(&vector_lock, flags);
++      cpus_and(*result, mask, cpu_online_map);
+-      if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+-              spin_unlock_irqrestore(&vector_lock, flags);
+-              return IO_APIC_VECTOR(irq);
+-      }
++      if (irq_vector[irq] > 0)
++              return irq_vector[irq];
+       irq_op.irq = irq;
+-      if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+-              spin_unlock_irqrestore(&vector_lock, flags);
++      if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
+               return -ENOSPC;
+-      }
+       vector = irq_op.vector;
+-      vector_irq[vector] = irq;
+-      if (irq != AUTO_ASSIGN)
+-              IO_APIC_VECTOR(irq) = vector;
++      irq_vector[irq] = vector;
+-      spin_unlock_irqrestore(&vector_lock, flags);
++      return vector;
++}
++
++static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
++{
++      int vector;
++      unsigned long flags;
++      spin_lock_irqsave(&vector_lock, flags);
++      vector = __assign_irq_vector(irq, mask, result);
++      spin_unlock_irqrestore(&vector_lock, flags);
+       return vector;
+ }
+-extern void (*interrupt[NR_IRQS])(void);
+ #ifndef CONFIG_XEN
+-static struct hw_interrupt_type ioapic_level_type;
+-static struct hw_interrupt_type ioapic_edge_type;
++void __setup_vector_irq(int cpu)
++{
++      /* Initialize vector_irq on a new cpu */
++      /* This function must be called with vector_lock held */
++      int irq, vector;
++
++      /* Mark the inuse vectors */
++      for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
++              if (!cpu_isset(cpu, irq_domain[irq]))
++                      continue;
++              vector = irq_vector[irq];
++              per_cpu(vector_irq, cpu)[vector] = irq;
++      }
++      /* Mark the free vectors */
++      for (vector = 0; vector < NR_VECTORS; ++vector) {
++              irq = per_cpu(vector_irq, cpu)[vector];
++              if (irq < 0)
++                      continue;
++              if (!cpu_isset(cpu, irq_domain[irq]))
++                      per_cpu(vector_irq, cpu)[vector] = -1;
++      }
++}
++
++extern void (*interrupt[NR_IRQS])(void);
++
++static struct irq_chip ioapic_chip;
+ #define IOAPIC_AUTO   -1
+ #define IOAPIC_EDGE   0
+@@ -932,16 +753,15 @@ static struct hw_interrupt_type ioapic_e
+ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+ {
+-      unsigned idx;
+-
+-      idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+-
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+-              irq_desc[idx].chip = &ioapic_level_type;
+-      else
+-              irq_desc[idx].chip = &ioapic_edge_type;
+-      set_intr_gate(vector, interrupt[idx]);
++              set_irq_chip_and_handler_name(irq, &ioapic_chip,
++                                            handle_fasteoi_irq, "fasteoi");
++      else {
++              irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
++              set_irq_chip_and_handler_name(irq, &ioapic_chip,
++                                            handle_edge_irq, "edge");
++      }
+ }
+ #else
+ #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
+@@ -994,16 +814,21 @@ static void __init setup_IO_APIC_irqs(vo
+                       continue;
+               if (IO_APIC_IRQ(irq)) {
+-                      vector = assign_irq_vector(irq);
++                      cpumask_t mask;
++                      vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
++                      if (vector < 0)
++                              continue;
++
++                      entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+                       entry.vector = vector;
+                       ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
++              ioapic_write_entry(apic, pin, entry);
++
+               spin_lock_irqsave(&ioapic_lock, flags);
+-              io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+-              io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               set_native_irq_info(irq, TARGET_CPUS);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+@@ -1046,7 +871,7 @@ static void __init setup_ExtINT_IRQ0_pin
+        * The timer IRQ doesn't have to know that behind the
+        * scene we have a 8259A-master in AEOI mode ...
+        */
+-      irq_desc[0].chip = &ioapic_edge_type;
++      set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+       /*
+        * Add it to the IO-APIC irq-routing table:
+@@ -1142,10 +967,7 @@ void __apicdebuginit print_IO_APIC(void)
+       for (i = 0; i <= reg_01.bits.entries; i++) {
+               struct IO_APIC_route_entry entry;
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+-              *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
++              entry = ioapic_read_entry(apic, i);
+               printk(KERN_DEBUG " %02x %03X %02X  ",
+                       i,
+@@ -1165,17 +987,12 @@ void __apicdebuginit print_IO_APIC(void)
+               );
+       }
+       }
+-      if (use_pci_vector())
+-              printk(KERN_INFO "Using vector-based indexing\n");
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for (i = 0; i < NR_IRQS; i++) {
+               struct irq_pin_list *entry = irq_2_pin + i;
+               if (entry->pin < 0)
+                       continue;
+-              if (use_pci_vector() && !platform_legacy_irq(i))
+-                      printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+-              else
+-                      printk(KERN_DEBUG "IRQ%d ", i);
++              printk(KERN_DEBUG "IRQ%d ", i);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+@@ -1339,9 +1156,6 @@ static void __init enable_IO_APIC(void)
+               irq_2_pin[i].pin = -1;
+               irq_2_pin[i].next = 0;
+       }
+-      if (!pirqs_enabled)
+-              for (i = 0; i < MAX_PIRQS; i++)
+-                      pirq_entries[i] = -1;
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+@@ -1358,11 +1172,7 @@ static void __init enable_IO_APIC(void)
+               /* See if any of the pins is in ExtINT mode */
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       struct IO_APIC_route_entry entry;
+-                      spin_lock_irqsave(&ioapic_lock, flags);
+-                      *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+-                      *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+-                      spin_unlock_irqrestore(&ioapic_lock, flags);
+-
++                      entry = ioapic_read_entry(apic, pin);
+                       /* If the interrupt line is enabled and in ExtInt mode
+                        * I have found the pin where the i8259 is connected.
+@@ -1416,7 +1226,6 @@ void disable_IO_APIC(void)
+        */
+       if (ioapic_i8259.pin != -1) {
+               struct IO_APIC_route_entry entry;
+-              unsigned long flags;
+               memset(&entry, 0, sizeof(entry));
+               entry.mask            = 0; /* Enabled */
+@@ -1433,12 +1242,7 @@ void disable_IO_APIC(void)
+               /*
+                * Add it to the IO-APIC irq-routing table:
+                */
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+-                      *(((int *)&entry)+1));
+-              io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+-                      *(((int *)&entry)+0));
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
++              ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+       }
+       disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+@@ -1446,76 +1250,6 @@ void disable_IO_APIC(void)
+ }
+ /*
+- * function to set the IO-APIC physical IDs based on the
+- * values stored in the MPC table.
+- *
+- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+- */
+-
+-#ifndef CONFIG_XEN
+-static void __init setup_ioapic_ids_from_mpc (void)
+-{
+-      union IO_APIC_reg_00 reg_00;
+-      int apic;
+-      int i;
+-      unsigned char old_id;
+-      unsigned long flags;
+-
+-      /*
+-       * Set the IOAPIC ID to the value stored in the MPC table.
+-       */
+-      for (apic = 0; apic < nr_ioapics; apic++) {
+-
+-              /* Read the register 0 value */
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              reg_00.raw = io_apic_read(apic, 0);
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
+-              
+-              old_id = mp_ioapics[apic].mpc_apicid;
+-
+-
+-              printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
+-
+-
+-              /*
+-               * We need to adjust the IRQ routing table
+-               * if the ID changed.
+-               */
+-              if (old_id != mp_ioapics[apic].mpc_apicid)
+-                      for (i = 0; i < mp_irq_entries; i++)
+-                              if (mp_irqs[i].mpc_dstapic == old_id)
+-                                      mp_irqs[i].mpc_dstapic
+-                                              = mp_ioapics[apic].mpc_apicid;
+-
+-              /*
+-               * Read the right value from the MPC table and
+-               * write it into the ID register.
+-               */
+-              apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+-                              mp_ioapics[apic].mpc_apicid);
+-
+-              reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              io_apic_write(apic, 0, reg_00.raw);
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-              /*
+-               * Sanity check
+-               */
+-              spin_lock_irqsave(&ioapic_lock, flags);
+-              reg_00.raw = io_apic_read(apic, 0);
+-              spin_unlock_irqrestore(&ioapic_lock, flags);
+-              if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+-                      printk("could not set ID!\n");
+-              else
+-                      apic_printk(APIC_VERBOSE," ok.\n");
+-      }
+-}
+-#else
+-static void __init setup_ioapic_ids_from_mpc(void) { }
+-#endif
+-
+-/*
+  * There is a nasty bug in some older SMP boards, their mptable lies
+  * about the timer IRQ. We do the following to work around the situation:
+  *
+@@ -1569,7 +1303,7 @@ static int __init timer_irq_works(void)
+  * an edge even if it isn't on the 8259A...
+  */
+-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
++static unsigned int startup_ioapic_irq(unsigned int irq)
+ {
+       int was_pending = 0;
+       unsigned long flags;
+@@ -1586,107 +1320,19 @@ static unsigned int startup_edge_ioapic_
+       return was_pending;
+ }
+-/*
+- * Once we have recorded IRQ_PENDING already, we can mask the
+- * interrupt for real. This prevents IRQ storms from unhandled
+- * devices.
+- */
+-static void ack_edge_ioapic_irq(unsigned int irq)
+-{
+-      move_irq(irq);
+-      if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+-                                      == (IRQ_PENDING | IRQ_DISABLED))
+-              mask_IO_APIC_irq(irq);
+-      ack_APIC_irq();
+-}
+-
+-/*
+- * Level triggered interrupts can just be masked,
+- * and shutting down and starting up the interrupt
+- * is the same as enabling and disabling them -- except
+- * with a startup need to return a "was pending" value.
+- *
+- * Level triggered interrupts are special because we
+- * do not touch any IO-APIC register while handling
+- * them. We ack the APIC in the end-IRQ handler, not
+- * in the start-IRQ-handler. Protection against reentrance
+- * from the same interrupt is still provided, both by the
+- * generic IRQ layer and by the fact that an unacked local
+- * APIC does not accept IRQs.
+- */
+-static unsigned int startup_level_ioapic_irq (unsigned int irq)
+-{
+-      unmask_IO_APIC_irq(irq);
+-
+-      return 0; /* don't check for pending */
+-}
+-
+-static void end_level_ioapic_irq (unsigned int irq)
+-{
+-      move_irq(irq);
+-      ack_APIC_irq();
+-}
+-
+-#ifdef CONFIG_PCI_MSI
+-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      return startup_edge_ioapic_irq(irq);
+-}
+-
+-static void ack_edge_ioapic_vector(unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      move_native_irq(vector);
+-      ack_edge_ioapic_irq(irq);
+-}
+-
+-static unsigned int startup_level_ioapic_vector (unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      return startup_level_ioapic_irq (irq);
+-}
+-
+-static void end_level_ioapic_vector (unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      move_native_irq(vector);
+-      end_level_ioapic_irq(irq);
+-}
+-
+-static void mask_IO_APIC_vector (unsigned int vector)
+-{
+-      int irq = vector_to_irq(vector);
+-
+-      mask_IO_APIC_irq(irq);
+-}
+-
+-static void unmask_IO_APIC_vector (unsigned int vector)
++static int ioapic_retrigger_irq(unsigned int irq)
+ {
+-      int irq = vector_to_irq(vector);
+-
+-      unmask_IO_APIC_irq(irq);
+-}
+-
+-#ifdef CONFIG_SMP
+-static void set_ioapic_affinity_vector (unsigned int vector,
+-                                      cpumask_t cpu_mask)
+-{
+-      int irq = vector_to_irq(vector);
++      cpumask_t mask;
++      unsigned vector;
++      unsigned long flags;
+-      set_native_irq_info(vector, cpu_mask);
+-      set_ioapic_affinity_irq(irq, cpu_mask);
+-}
+-#endif // CONFIG_SMP
+-#endif // CONFIG_PCI_MSI
++      spin_lock_irqsave(&vector_lock, flags);
++      vector = irq_vector[irq];
++      cpus_clear(mask);
++      cpu_set(first_cpu(irq_domain[irq]), mask);
+-static int ioapic_retrigger(unsigned int irq)
+-{
+-      send_IPI_self(IO_APIC_VECTOR(irq));
++      send_IPI_mask(mask, vector);
++      spin_unlock_irqrestore(&vector_lock, flags);
+       return 1;
+ }
+@@ -1700,32 +1346,47 @@ static int ioapic_retrigger(unsigned int
+  * races.
+  */
+-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+-      .typename = "IO-APIC-edge",
+-      .startup        = startup_edge_ioapic,
+-      .shutdown       = shutdown_edge_ioapic,
+-      .enable         = enable_edge_ioapic,
+-      .disable        = disable_edge_ioapic,
+-      .ack            = ack_edge_ioapic,
+-      .end            = end_edge_ioapic,
+-#ifdef CONFIG_SMP
+-      .set_affinity = set_ioapic_affinity,
++static void ack_apic_edge(unsigned int irq)
++{
++      move_native_irq(irq);
++      ack_APIC_irq();
++}
++
++static void ack_apic_level(unsigned int irq)
++{
++      int do_unmask_irq = 0;
++
++#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
++      /* If we are moving the irq we need to mask it */
++      if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
++              do_unmask_irq = 1;
++              mask_IO_APIC_irq(irq);
++      }
+ #endif
+-      .retrigger      = ioapic_retrigger,
+-};
+-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+-      .typename = "IO-APIC-level",
+-      .startup        = startup_level_ioapic,
+-      .shutdown       = shutdown_level_ioapic,
+-      .enable         = enable_level_ioapic,
+-      .disable        = disable_level_ioapic,
+-      .ack            = mask_and_ack_level_ioapic,
+-      .end            = end_level_ioapic,
++      /*
++       * We must acknowledge the irq before we move it or the acknowledge will
++       * not propogate properly.
++       */
++      ack_APIC_irq();
++
++      /* Now we can move and renable the irq */
++      move_masked_irq(irq);
++      if (unlikely(do_unmask_irq))
++              unmask_IO_APIC_irq(irq);
++}
++
++static struct irq_chip ioapic_chip __read_mostly = {
++      .name           = "IO-APIC",
++      .startup        = startup_ioapic_irq,
++      .mask           = mask_IO_APIC_irq,
++      .unmask         = unmask_IO_APIC_irq,
++      .ack            = ack_apic_edge,
++      .eoi            = ack_apic_level,
+ #ifdef CONFIG_SMP
+-      .set_affinity = set_ioapic_affinity,
++      .set_affinity   = set_ioapic_affinity_irq,
+ #endif
+-      .retrigger      = ioapic_retrigger,
++      .retrigger      = ioapic_retrigger_irq,
+ };
+ #endif /* !CONFIG_XEN */
+@@ -1746,12 +1407,7 @@ static inline void init_IO_APIC_traps(vo
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+               int tmp = irq;
+-              if (use_pci_vector()) {
+-                      if (!platform_legacy_irq(tmp))
+-                              if ((tmp = vector_to_irq(tmp)) == -1)
+-                                      continue;
+-              }
+-              if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
++              if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+@@ -1762,7 +1418,7 @@ static inline void init_IO_APIC_traps(vo
+ #ifndef CONFIG_XEN
+                       else
+                               /* Strange. Oh, well.. */
+-                              irq_desc[irq].chip = &no_irq_type;
++                              irq_desc[irq].chip = &no_irq_chip;
+ #endif
+               }
+       }
+@@ -1883,8 +1539,6 @@ static inline void unlock_ExtINT_logic(v
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+-int timer_uses_ioapic_pin_0;
+-
+ /*
+  * This code may look a bit paranoid, but it's supposed to cooperate with
+  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+@@ -1897,13 +1551,13 @@ static inline void check_timer(void)
+ {
+       int apic1, pin1, apic2, pin2;
+       int vector;
++      cpumask_t mask;
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+-      vector = assign_irq_vector(0);
+-      set_intr_gate(vector, interrupt[0]);
++      vector = assign_irq_vector(0, TARGET_CPUS, &mask);
+       /*
+        * Subtle, code in do_timer_interrupt() expects an AEOI
+@@ -1922,9 +1576,6 @@ static inline void check_timer(void)
+       pin2  = ioapic_i8259.pin;
+       apic2 = ioapic_i8259.apic;
+-      if (pin1 == 0)
+-              timer_uses_ioapic_pin_0 = 1;
+-
+       apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+               vector, apic1, pin1, apic2, pin2);
+@@ -2039,11 +1690,6 @@ void __init setup_IO_APIC(void)
+       apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+-      /*
+-       * Set up the IO-APIC IRQ routing table.
+-       */
+-      if (!acpi_ioapic)
+-              setup_ioapic_ids_from_mpc();
+ #ifndef CONFIG_XEN
+       sync_Arb_IDs();
+ #endif /* !CONFIG_XEN */
+@@ -2066,17 +1712,12 @@ static int ioapic_suspend(struct sys_dev
+ {
+       struct IO_APIC_route_entry *entry;
+       struct sysfs_ioapic_data *data;
+-      unsigned long flags;
+       int i;
+       data = container_of(dev, struct sysfs_ioapic_data, dev);
+       entry = data->entry;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+-              *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+-              *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+-      }
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
++              *entry = ioapic_read_entry(dev->id, i);
+       return 0;
+ }
+@@ -2098,11 +1739,9 @@ static int ioapic_resume(struct sys_devi
+               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+               io_apic_write(dev->id, 0, reg_00.raw);
+       }
+-      for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+-              io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+-              io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+-      }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
++      for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
++              ioapic_write_entry(dev->id, i, entry[i]);
+       return 0;
+ }
+@@ -2148,28 +1787,254 @@ static int __init ioapic_init_sysfs(void
+ device_initcall(ioapic_init_sysfs);
++/*
++ * Dynamic irq allocate and deallocation
++ */
++int create_irq(void)
++{
++      /* Allocate an unused irq */
++      int irq;
++      int new;
++      int vector = 0;
++      unsigned long flags;
++      cpumask_t mask;
++
++      irq = -ENOSPC;
++      spin_lock_irqsave(&vector_lock, flags);
++      for (new = (NR_IRQS - 1); new >= 0; new--) {
++              if (platform_legacy_irq(new))
++                      continue;
++              if (irq_vector[new] != 0)
++                      continue;
++              vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
++              if (likely(vector > 0))
++                      irq = new;
++              break;
++      }
++      spin_unlock_irqrestore(&vector_lock, flags);
++
++      if (irq >= 0) {
++              dynamic_irq_init(irq);
++      }
++      return irq;
++}
++
++void destroy_irq(unsigned int irq)
++{
++      unsigned long flags;
++
++      dynamic_irq_cleanup(irq);
++
++      spin_lock_irqsave(&vector_lock, flags);
++      irq_vector[irq] = 0;
++      spin_unlock_irqrestore(&vector_lock, flags);
++}
++
+ #endif /* CONFIG_XEN */
+-/* --------------------------------------------------------------------------
+-                          ACPI-based IOAPIC Configuration
+-   -------------------------------------------------------------------------- */
++/*
++ * MSI mesage composition
++ */
++#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
++{
++      int vector;
++      unsigned dest;
++      cpumask_t tmp;
+-#ifdef CONFIG_ACPI
++      vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
++      if (vector >= 0) {
++              dest = cpu_mask_to_apicid(tmp);
++
++              msg->address_hi = MSI_ADDR_BASE_HI;
++              msg->address_lo =
++                      MSI_ADDR_BASE_LO |
++                      ((INT_DEST_MODE == 0) ?
++                              MSI_ADDR_DEST_MODE_PHYSICAL:
++                              MSI_ADDR_DEST_MODE_LOGICAL) |
++                      ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++                              MSI_ADDR_REDIRECTION_CPU:
++                              MSI_ADDR_REDIRECTION_LOWPRI) |
++                      MSI_ADDR_DEST_ID(dest);
++
++              msg->data =
++                      MSI_DATA_TRIGGER_EDGE |
++                      MSI_DATA_LEVEL_ASSERT |
++                      ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++                              MSI_DATA_DELIVERY_FIXED:
++                              MSI_DATA_DELIVERY_LOWPRI) |
++                      MSI_DATA_VECTOR(vector);
++      }
++      return vector;
++}
+-#define IO_APIC_MAX_ID                0xFE
++#ifdef CONFIG_SMP
++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++      struct msi_msg msg;
++      unsigned int dest;
++      cpumask_t tmp;
++      int vector;
++
++      cpus_and(tmp, mask, cpu_online_map);
++      if (cpus_empty(tmp))
++              tmp = TARGET_CPUS;
++
++      cpus_and(mask, tmp, CPU_MASK_ALL);
++
++      vector = assign_irq_vector(irq, mask, &tmp);
++      if (vector < 0)
++              return;
++
++      dest = cpu_mask_to_apicid(tmp);
++
++      read_msi_msg(irq, &msg);
++
++      msg.data &= ~MSI_DATA_VECTOR_MASK;
++      msg.data |= MSI_DATA_VECTOR(vector);
++      msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++      msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+-int __init io_apic_get_version (int ioapic)
++      write_msi_msg(irq, &msg);
++      set_native_irq_info(irq, mask);
++}
++#endif /* CONFIG_SMP */
++
++/*
++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
++ * which implement the MSI or MSI-X Capability Structure.
++ */
++static struct irq_chip msi_chip = {
++      .name           = "PCI-MSI",
++      .unmask         = unmask_msi_irq,
++      .mask           = mask_msi_irq,
++      .ack            = ack_apic_edge,
++#ifdef CONFIG_SMP
++      .set_affinity   = set_msi_irq_affinity,
++#endif
++      .retrigger      = ioapic_retrigger_irq,
++};
++
++int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
+ {
+-      union IO_APIC_reg_01    reg_01;
+-      unsigned long flags;
++      struct msi_msg msg;
++      int ret;
++      ret = msi_compose_msg(dev, irq, &msg);
++      if (ret < 0)
++              return ret;
+-      spin_lock_irqsave(&ioapic_lock, flags);
+-      reg_01.raw = io_apic_read(ioapic, 1);
+-      spin_unlock_irqrestore(&ioapic_lock, flags);
++      write_msi_msg(irq, &msg);
+-      return reg_01.bits.version;
++      set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
++
++      return 0;
++}
++
++void arch_teardown_msi_irq(unsigned int irq)
++{
++      return;
+ }
++#endif /* CONFIG_PCI_MSI */
++
++/*
++ * Hypertransport interrupt support
++ */
++#ifdef CONFIG_HT_IRQ
++
++#ifdef CONFIG_SMP
++
++static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
++{
++      struct ht_irq_msg msg;
++      fetch_ht_irq_msg(irq, &msg);
++
++      msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
++      msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
++
++      msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
++      msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
++
++      write_ht_irq_msg(irq, &msg);
++}
++
++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++      unsigned int dest;
++      cpumask_t tmp;
++      int vector;
++
++      cpus_and(tmp, mask, cpu_online_map);
++      if (cpus_empty(tmp))
++              tmp = TARGET_CPUS;
++
++      cpus_and(mask, tmp, CPU_MASK_ALL);
++
++      vector = assign_irq_vector(irq, mask, &tmp);
++      if (vector < 0)
++              return;
++
++      dest = cpu_mask_to_apicid(tmp);
++
++      target_ht_irq(irq, dest, vector);
++      set_native_irq_info(irq, mask);
++}
++#endif
++
++static struct irq_chip ht_irq_chip = {
++      .name           = "PCI-HT",
++      .mask           = mask_ht_irq,
++      .unmask         = unmask_ht_irq,
++      .ack            = ack_apic_edge,
++#ifdef CONFIG_SMP
++      .set_affinity   = set_ht_irq_affinity,
++#endif
++      .retrigger      = ioapic_retrigger_irq,
++};
++
++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
++{
++      int vector;
++      cpumask_t tmp;
++
++      vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
++      if (vector >= 0) {
++              struct ht_irq_msg msg;
++              unsigned dest;
++
++              dest = cpu_mask_to_apicid(tmp);
++
++              msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
++
++              msg.address_lo =
++                      HT_IRQ_LOW_BASE |
++                      HT_IRQ_LOW_DEST_ID(dest) |
++                      HT_IRQ_LOW_VECTOR(vector) |
++                      ((INT_DEST_MODE == 0) ?
++                              HT_IRQ_LOW_DM_PHYSICAL :
++                              HT_IRQ_LOW_DM_LOGICAL) |
++                      HT_IRQ_LOW_RQEOI_EDGE |
++                      ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++                              HT_IRQ_LOW_MT_FIXED :
++                              HT_IRQ_LOW_MT_ARBITRATED) |
++                      HT_IRQ_LOW_IRQ_MASKED;
++
++              write_ht_irq_msg(irq, &msg);
++
++              set_irq_chip_and_handler_name(irq, &ht_irq_chip,
++                                            handle_edge_irq, "edge");
++      }
++      return vector;
++}
++#endif /* CONFIG_HT_IRQ */
++
++/* --------------------------------------------------------------------------
++                          ACPI-based IOAPIC Configuration
++   -------------------------------------------------------------------------- */
++
++#ifdef CONFIG_ACPI
++
++#define IO_APIC_MAX_ID                0xFE
+ int __init io_apic_get_redir_entries (int ioapic)
+ {
+@@ -2188,6 +2053,8 @@ int io_apic_set_pci_routing (int ioapic,
+ {
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
++      int vector;
++      cpumask_t mask;
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+@@ -2196,6 +2063,17 @@ int io_apic_set_pci_routing (int ioapic,
+       }
+       /*
++       * IRQs < 16 are already in the irq_2_pin[] map
++       */
++      if (irq >= 16)
++              add_pin_to_irq(irq, ioapic, pin);
++
++
++      vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
++      if (vector < 0)
++              return vector;
++
++      /*
+        * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+        * Note that we mask (disable) IRQs now -- these get enabled when the
+        * corresponding device driver registers for this IRQ.
+@@ -2205,19 +2083,11 @@ int io_apic_set_pci_routing (int ioapic,
+       entry.delivery_mode = INT_DELIVERY_MODE;
+       entry.dest_mode = INT_DEST_MODE;
+-      entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
++      entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+       entry.trigger = edge_level;
+       entry.polarity = active_high_low;
+       entry.mask = 1;                                  /* Disabled (masked) */
+-
+-      irq = gsi_irq_sharing(irq);
+-      /*
+-       * IRQs < 16 are already in the irq_2_pin[] map
+-       */
+-      if (irq >= 16)
+-              add_pin_to_irq(irq, ioapic, pin);
+-
+-      entry.vector = assign_irq_vector(irq);
++      entry.vector = vector & 0xff;
+       apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
+               "IRQ %d Mode:%i Active:%i)\n", ioapic, 
+@@ -2229,10 +2099,10 @@ int io_apic_set_pci_routing (int ioapic,
+       if (!ioapic && (irq < 16))
+               disable_8259A_irq(irq);
++      ioapic_write_entry(ioapic, pin, entry);
++
+       spin_lock_irqsave(&ioapic_lock, flags);
+-      io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+-      io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+-      set_native_irq_info(use_pci_vector() ?  entry.vector : irq, TARGET_CPUS);
++      set_native_irq_info(irq, TARGET_CPUS);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       return 0;
+--- sle11-2009-05-14.orig/arch/x86/kernel/ioport_64-xen.c      2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/ioport_64-xen.c   2009-03-04 11:28:34.000000000 +0100
+@@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long
+               memset(bitmap, 0xff, IO_BITMAP_BYTES);
+               t->io_bitmap_ptr = bitmap;
++              set_thread_flag(TIF_IO_BITMAP);
+               set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
+               set_iobitmap.nr_ports = IO_BITMAP_BITS;
+--- sle11-2009-05-14.orig/arch/x86/kernel/irq_64-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/irq_64-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -20,11 +20,6 @@
+ #include <asm/idle.h>
+ atomic_t irq_err_count;
+-#ifdef CONFIG_X86_IO_APIC
+-#ifdef APIC_MISMATCH_DEBUG
+-atomic_t irq_mis_count;
+-#endif
+-#endif
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /*
+@@ -79,7 +74,8 @@ int show_interrupts(struct seq_file *p, 
+               for_each_online_cpu(j)
+                       seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+-              seq_printf(p, " %14s", irq_desc[i].chip->typename);
++              seq_printf(p, " %8s", irq_desc[i].chip->name);
++              seq_printf(p, "-%-8s", irq_desc[i].name);
+               seq_printf(p, "  %s", action->name);
+               for (action=action->next; action; action = action->next)
+@@ -99,11 +95,6 @@ skip:
+               seq_putc(p, '\n');
+ #endif
+               seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+-#ifdef CONFIG_X86_IO_APIC
+-#ifdef APIC_MISMATCH_DEBUG
+-              seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+-#endif
+-#endif
+       }
+       return 0;
+ }
+@@ -114,24 +105,28 @@ skip:
+  * handlers).
+  */
+ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+-{     
++{
++      struct pt_regs *old_regs = set_irq_regs(regs);
++
+       /* high bit used in ret_from_ code  */
+       unsigned irq = ~regs->orig_rax;
+-      if (unlikely(irq >= NR_IRQS)) {
+-              printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+-                                      __FUNCTION__, irq);
+-              BUG();
+-      }
+-
+       /*exit_idle();*/
+       /*irq_enter();*/
++
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+       stack_overflow_check(regs);
+ #endif
+-      __do_IRQ(irq, regs);
++
++      if (likely(irq < NR_IRQS))
++              generic_handle_irq(irq);
++      else
++              printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
++                      __func__, smp_processor_id(), irq);
++
+       /*irq_exit();*/
++      set_irq_regs(old_regs);
+       return 1;
+ }
+@@ -192,6 +187,6 @@ EXPORT_SYMBOL(do_softirq);
+  */
+ void ack_bad_irq(unsigned int irq)
+ {
+-        printk("unexpected IRQ trap at vector %02x\n", irq);
++        printk("unexpected IRQ trap at irq %02x\n", irq);
+ }
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_64-xen.c     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/mpparse_64-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -41,8 +41,7 @@ int acpi_found_madt;
+  * Various Linux-internal data structures created from the
+  * MP-table.
+  */
+-unsigned char apic_version [MAX_APICS];
+-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+ int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+ static int mp_current_pci_id = 0;
+@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
+ int mp_irq_entries;
+ int nr_ioapics;
+-int pic_mode;
+ unsigned long mp_lapic_addr = 0;
+@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
+ /* Bitmask of physically existing CPUs */
+ physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
+-/* ACPI MADT entry parsing functions */
+-#ifdef CONFIG_ACPI
+-extern struct acpi_boot_flags acpi_boot;
+-#ifdef CONFIG_X86_LOCAL_APIC
+-extern int acpi_parse_lapic (acpi_table_entry_header *header);
+-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
+-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
+-#endif /*CONFIG_X86_LOCAL_APIC*/
+-#ifdef CONFIG_X86_IO_APIC
+-extern int acpi_parse_ioapic (acpi_table_entry_header *header);
+-#endif /*CONFIG_X86_IO_APIC*/
+-#endif /*CONFIG_ACPI*/
+-
+ u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+@@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned 
+ static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+ {
+       int cpu;
+-      unsigned char ver;
+       cpumask_t tmp_map;
++      char *bootup_cpu = "";
+       if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+               disabled_cpus++;
+               return;
+       }
+-
+-      printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
+-              m->mpc_apicid,
+-             (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
+-             (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
+-              m->mpc_apicver);
+-
+       if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+-              Dprintk("    Bootup CPU\n");
++              bootup_cpu = " (Bootup-CPU)";
+               boot_cpu_id = m->mpc_apicid;
+       }
++
++      printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
++
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       " Processor ignored.\n", NR_CPUS);
+@@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info 
+       cpus_complement(tmp_map, cpu_present_map);
+       cpu = first_cpu(tmp_map);
+-#if MAX_APICS < 255   
+-      if ((int)m->mpc_apicid > MAX_APICS) {
+-              printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+-                      m->mpc_apicid, MAX_APICS);
+-              return;
+-      }
+-#endif
+-      ver = m->mpc_apicver;
+-
+       physid_set(m->mpc_apicid, phys_cpu_present_map);
+-      /*
+-       * Validate version
+-       */
+-      if (ver == 0x0) {
+-              printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+-              ver = 0x10;
+-      }
+-      apic_version[m->mpc_apicid] = ver;
+       if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+               /*
+                * bios_cpu_apicid is required to have processors listed
+@@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m
+       Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+       if (strncmp(str, "ISA", 3) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+-      } else if (strncmp(str, "EISA", 4) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
++              set_bit(m->mpc_busid, mp_bus_not_pci);
+       } else if (strncmp(str, "PCI", 3) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
++              clear_bit(m->mpc_busid, mp_bus_not_pci);
+               mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+               mp_current_pci_id++;
+-      } else if (strncmp(str, "MCA", 3) == 0) {
+-              mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+       } else {
+               printk(KERN_ERR "Unknown bustype %s\n", str);
+       }
+ }
++static int bad_ioapic(unsigned long address)
++{
++      if (nr_ioapics >= MAX_IO_APICS) {
++              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
++                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
++              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
++      }
++      if (!address) {
++              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
++                      " found in table, skipping!\n");
++              return 1;
++      }
++      return 0;
++}
++
+ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+ {
+       if (!(m->mpc_flags & MPC_APIC_USABLE))
+               return;
+-      printk("I/O APIC #%d Version %d at 0x%X.\n",
+-              m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
+-                      MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+-      }
+-      if (!m->mpc_apicaddr) {
+-              printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+-                      " found in MP table, skipping!\n");
++      printk("I/O APIC #%d at 0x%X.\n",
++              m->mpc_apicid, m->mpc_apicaddr);
++
++      if (bad_ioapic(m->mpc_apicaddr))
+               return;
+-      }
++
+       mp_ioapics[nr_ioapics] = *m;
+       nr_ioapics++;
+ }
+@@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+                       m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+-      /*
+-       * Well it seems all SMP boards in existence
+-       * use ExtINT/LVT1 == LINT0 and
+-       * NMI/LVT2 == LINT1 - the following check
+-       * will show us if this assumptions is false.
+-       * Until then we do not have to add baggage.
+-       */
+-      if ((m->mpc_irqtype == mp_ExtINT) &&
+-              (m->mpc_destapiclint != 0))
+-                      BUG();
+-      if ((m->mpc_irqtype == mp_NMI) &&
+-              (m->mpc_destapiclint != 1))
+-                      BUG();
+ }
+ /*
+@@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp
+       unsigned char *mpt=((unsigned char *)mpc)+count;
+       if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+-              printk("SMP mptable: bad signature [%c%c%c%c]!\n",
++              printk("MPTABLE: bad signature [%c%c%c%c]!\n",
+                       mpc->mpc_signature[0],
+                       mpc->mpc_signature[1],
+                       mpc->mpc_signature[2],
+@@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp
+               return 0;
+       }
+       if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+-              printk("SMP mptable: checksum error!\n");
++              printk("MPTABLE: checksum error!\n");
+               return 0;
+       }
+       if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+-              printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
++              printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
+                       mpc->mpc_spec);
+               return 0;
+       }
+       if (!mpc->mpc_lapic) {
+-              printk(KERN_ERR "SMP mptable: null local APIC address!\n");
++              printk(KERN_ERR "MPTABLE: null local APIC address!\n");
+               return 0;
+       }
+       memcpy(str,mpc->mpc_oem,8);
+-      str[8]=0;
+-      printk(KERN_INFO "OEM ID: %s ",str);
++      str[8] = 0;
++      printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
+       memcpy(str,mpc->mpc_productid,12);
+-      str[12]=0;
+-      printk("Product ID: %s ",str);
++      str[12] = 0;
++      printk("MPTABLE: Product ID: %s ",str);
+-      printk("APIC at: 0x%X\n",mpc->mpc_lapic);
++      printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
+       /* save the local APIC address, it might be non-default */
+       if (!acpi_lapic)
+-      mp_lapic_addr = mpc->mpc_lapic;
++              mp_lapic_addr = mpc->mpc_lapic;
+       /*
+        *      Now process the configuration blocks.
+@@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp
+                               struct mpc_config_processor *m=
+                                       (struct mpc_config_processor *)mpt;
+                               if (!acpi_lapic)
+-                              MP_processor_info(m);
++                                      MP_processor_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+@@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp
+                               struct mpc_config_ioapic *m=
+                                       (struct mpc_config_ioapic *)mpt;
+                               MP_ioapic_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
+                               break;
+                       }
+                       case MP_INTSRC:
+@@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp
+                                       (struct mpc_config_intsrc *)mpt;
+                               MP_intsrc_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
+                               break;
+                       }
+                       case MP_LINTSRC:
+@@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp
+                               struct mpc_config_lintsrc *m=
+                                       (struct mpc_config_lintsrc *)mpt;
+                               MP_lintsrc_info(m);
+-                              mpt+=sizeof(*m);
+-                              count+=sizeof(*m);
++                              mpt += sizeof(*m);
++                              count += sizeof(*m);
+                               break;
+                       }
+               }
+       }
+       clustered_apic_check();
+       if (!num_processors)
+-              printk(KERN_ERR "SMP mptable: no processors registered!\n");
++              printk(KERN_ERR "MPTABLE: no processors registered!\n");
+       return num_processors;
+ }
+@@ -451,13 +407,10 @@ static inline void __init construct_defa
+        * 2 CPUs, numbered 0 & 1.
+        */
+       processor.mpc_type = MP_PROCESSOR;
+-      /* Either an integrated APIC or a discrete 82489DX. */
+-      processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++      processor.mpc_apicver = 0;
+       processor.mpc_cpuflag = CPU_ENABLED;
+-      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+-                                 (boot_cpu_data.x86_model << 4) |
+-                                 boot_cpu_data.x86_mask;
+-      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
++      processor.mpc_cpufeature = 0;
++      processor.mpc_featureflag = 0;
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+       for (i = 0; i < 2; i++) {
+@@ -476,14 +429,6 @@ static inline void __init construct_defa
+               case 5:
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       break;
+-              case 2:
+-              case 6:
+-              case 3:
+-                      memcpy(bus.mpc_bustype, "EISA  ", 6);
+-                      break;
+-              case 4:
+-              case 7:
+-                      memcpy(bus.mpc_bustype, "MCA   ", 6);
+       }
+       MP_bus_info(&bus);
+       if (mpc_default_type > 4) {
+@@ -494,7 +439,7 @@ static inline void __init construct_defa
+       ioapic.mpc_type = MP_IOAPIC;
+       ioapic.mpc_apicid = 2;
+-      ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++      ioapic.mpc_apicver = 0;
+       ioapic.mpc_flags = MPC_APIC_USABLE;
+       ioapic.mpc_apicaddr = 0xFEC00000;
+       MP_ioapic_info(&ioapic);
+@@ -537,13 +482,6 @@ void __init get_smp_config (void)
+               printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+       printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+-      if (mpf->mpf_feature2 & (1<<7)) {
+-              printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+-              pic_mode = 1;
+-      } else {
+-              printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
+-              pic_mode = 0;
+-      }
+       /*
+        * Now see if we need to read further.
+@@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig
+       return 0;
+ }
+-void __init find_intel_smp (void)
++void __init find_smp_config(void)
+ {
+       unsigned int address;
+@@ -637,9 +575,7 @@ void __init find_intel_smp (void)
+                       smp_scan_config(0xF0000,0x10000))
+               return;
+       /*
+-       * If it is an SMP machine we should know now, unless the
+-       * configuration is in an EISA/MCA bus machine with an
+-       * extended bios data area.
++       * If it is an SMP machine we should know now.
+        *
+        * there is a real-mode segmented pointer pointing to the
+        * 4K EBDA area at 0x40E, calculate and scan it here.
+@@ -660,64 +596,38 @@ void __init find_intel_smp (void)
+        printk(KERN_INFO "No mptable found.\n");
+ }
+-/*
+- * - Intel MP Configuration Table
+- */
+-void __init find_smp_config (void)
+-{
+-#ifdef CONFIG_X86_LOCAL_APIC
+-      find_intel_smp();
+-#endif
+-}
+-
+-
+ /* --------------------------------------------------------------------------
+                             ACPI-based MP Configuration
+    -------------------------------------------------------------------------- */
+ #ifdef CONFIG_ACPI
+-void __init mp_register_lapic_address (
+-      u64                     address)
++void __init mp_register_lapic_address(u64 address)
+ {
+ #ifndef CONFIG_XEN
+       mp_lapic_addr = (unsigned long) address;
+-
+       set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+-
+       if (boot_cpu_id == -1U)
+               boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+-
+-      Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+ #endif
+ }
+-
+-void __cpuinit mp_register_lapic (
+-      u8                      id, 
+-      u8                      enabled)
++void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+ {
+       struct mpc_config_processor processor;
+       int                     boot_cpu = 0;
+       
+-      if (id >= MAX_APICS) {
+-              printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+-                      id, MAX_APICS);
+-              return;
+-      }
+-
+-      if (id == boot_cpu_physical_apicid)
++      if (id == boot_cpu_id)
+               boot_cpu = 1;
+ #ifndef CONFIG_XEN
+       processor.mpc_type = MP_PROCESSOR;
+       processor.mpc_apicid = id;
+-      processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
++      processor.mpc_apicver = 0;
+       processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+       processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+-      processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
+-              (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+-      processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
++      processor.mpc_cpufeature = 0;
++      processor.mpc_featureflag = 0;
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+ #endif
+@@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic (
+       MP_processor_info(&processor);
+ }
+-#ifdef CONFIG_X86_IO_APIC
+-
+ #define MP_ISA_BUS            0
+ #define MP_MAX_IOAPIC_PIN     127
+@@ -737,11 +645,9 @@ static struct mp_ioapic_routing {
+       u32                     pin_programmed[4];
+ } mp_ioapic_routing[MAX_IO_APICS];
+-
+-static int mp_find_ioapic (
+-      int                     gsi)
++static int mp_find_ioapic(int gsi)
+ {
+-      int                     i = 0;
++      int i = 0;
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+@@ -751,28 +657,15 @@ static int mp_find_ioapic (
+       }
+       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+-
+       return -1;
+ }
+-      
+-void __init mp_register_ioapic (
+-      u8                      id, 
+-      u32                     address,
+-      u32                     gsi_base)
++void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+ {
+-      int                     idx = 0;
++      int idx = 0;
+-      if (nr_ioapics >= MAX_IO_APICS) {
+-              printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+-                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+-              panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+-      }
+-      if (!address) {
+-              printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+-                      " found in MADT table, skipping!\n");
++      if (bad_ioapic(address))
+               return;
+-      }
+       idx = nr_ioapics++;
+@@ -784,7 +677,7 @@ void __init mp_register_ioapic (
+       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+ #endif
+       mp_ioapics[idx].mpc_apicid = id;
+-      mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
++      mp_ioapics[idx].mpc_apicver = 0;
+       
+       /* 
+        * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
+@@ -795,21 +688,15 @@ void __init mp_register_ioapic (
+       mp_ioapic_routing[idx].gsi_end = gsi_base + 
+               io_apic_get_redir_entries(idx);
+-      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
++      printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
+               "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
+-              mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
++              mp_ioapics[idx].mpc_apicaddr,
+               mp_ioapic_routing[idx].gsi_start,
+               mp_ioapic_routing[idx].gsi_end);
+-
+-      return;
+ }
+-
+-void __init mp_override_legacy_irq (
+-      u8                      bus_irq,
+-      u8                      polarity, 
+-      u8                      trigger, 
+-      u32                     gsi)
++void __init
++mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32       gsi)
+ {
+       struct mpc_config_intsrc intsrc;
+       int                     ioapic = -1;
+@@ -847,22 +734,18 @@ void __init mp_override_legacy_irq (
+       mp_irqs[mp_irq_entries] = intsrc;
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!\n");
+-
+-      return;
+ }
+-
+-void __init mp_config_acpi_legacy_irqs (void)
++void __init mp_config_acpi_legacy_irqs(void)
+ {
+       struct mpc_config_intsrc intsrc;
+-      int                     i = 0;
+-      int                     ioapic = -1;
++      int i = 0;
++      int ioapic = -1;
+       /* 
+        * Fabricate the legacy ISA bus (bus #31).
+        */
+-      mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+-      Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
++      set_bit(MP_ISA_BUS, mp_bus_not_pci);
+       /* 
+        * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+@@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs (
+               if (++mp_irq_entries == MAX_IRQ_SOURCES)
+                       panic("Max # of irq sources exceeded!\n");
+       }
+-
+-      return;
+ }
+-#define MAX_GSI_NUM   4096
+-
+ int mp_register_gsi(u32 gsi, int triggering, int polarity)
+ {
+-      int                     ioapic = -1;
+-      int                     ioapic_pin = 0;
+-      int                     idx, bit = 0;
+-      static int              pci_irq = 16;
+-      /*
+-       * Mapping between Global System Interrupts, which
+-       * represent all possible interrupts, to the IRQs
+-       * assigned to actual devices.
+-       */
+-      static int              gsi_to_irq[MAX_GSI_NUM];
++      int ioapic = -1;
++      int ioapic_pin = 0;
++      int idx, bit = 0;
+       if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+               return gsi;
+@@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger
+       if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+-              return gsi_to_irq[gsi];
++              return gsi;
+       }
+       mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+-      if (triggering == ACPI_LEVEL_SENSITIVE) {
+-              /*
+-               * For PCI devices assign IRQs in order, avoiding gaps
+-               * due to unused I/O APIC pins.
+-               */
+-              int irq = gsi;
+-              if (gsi < MAX_GSI_NUM) {
+-                      /*
+-                       * Retain the VIA chipset work-around (gsi > 15), but
+-                       * avoid a problem where the 8254 timer (IRQ0) is setup
+-                       * via an override (so it's not on pin 0 of the ioapic),
+-                       * and at the same time, the pin 0 interrupt is a PCI
+-                       * type.  The gsi > 15 test could cause these two pins
+-                       * to be shared as IRQ0, and they are not shareable.
+-                       * So test for this condition, and if necessary, avoid
+-                       * the pin collision.
+-                       */
+-                      if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+-                              gsi = pci_irq++;
+-                      /*
+-                       * Don't assign IRQ used by ACPI SCI
+-                       */
+-                      if (gsi == acpi_fadt.sci_int)
+-                              gsi = pci_irq++;
+-                      gsi_to_irq[irq] = gsi;
+-              } else {
+-                      printk(KERN_ERR "GSI %u is too high\n", gsi);
+-                      return gsi;
+-              }
+-      }
+-
+       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+               triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+               polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+       return gsi;
+ }
+-
+-#endif /*CONFIG_X86_IO_APIC*/
+ #endif /*CONFIG_ACPI*/
+--- sle11-2009-05-14.orig/arch/x86/kernel/process_64-xen.c     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/process_64-xen.c  2009-03-04 11:28:34.000000000 +0100
+@@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not
+ }
+ EXPORT_SYMBOL(idle_notifier_unregister);
+-enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
+-static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
+-
+ void enter_idle(void)
+ {
+-      __get_cpu_var(idle_state) = CPU_IDLE;
++      write_pda(isidle, 1);
+       atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+ }
+ static void __exit_idle(void)
+ {
+-      __get_cpu_var(idle_state) = CPU_NOT_IDLE;
++      if (test_and_clear_bit_pda(0, isidle) == 0)
++              return;
+       atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+ }
+ /* Called from interrupts to signify idle end */
+ void exit_idle(void)
+ {
+-      if (current->pid | read_pda(irqcount))
++      /* idle loop has pid 0 */
++      if (current->pid)
+               return;
+       __exit_idle();
+ }
+@@ -184,6 +183,9 @@ void cpu_idle (void)
+                               play_dead();
+                       enter_idle();
+                       idle();
++                      /* In many cases the interrupt that ended idle
++                         has already called exit_idle. But some idle
++                         loops can be woken up without interrupt. */
+                       __exit_idle();
+               }
+@@ -196,7 +198,7 @@ void cpu_idle (void)
+ void cpu_idle_wait(void)
+ {
+       unsigned int cpu, this_cpu = get_cpu();
+-      cpumask_t map;
++      cpumask_t map, tmp = current->cpus_allowed;
+       set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+       put_cpu();
+@@ -219,6 +221,8 @@ void cpu_idle_wait(void)
+               }
+               cpus_and(map, map, cpu_online_map);
+       } while (!cpus_empty(map));
++
++      set_cpus_allowed(current, tmp);
+ }
+ EXPORT_SYMBOL_GPL(cpu_idle_wait);
+@@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs)
+       print_modules();
+       printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+               current->pid, current->comm, print_tainted(),
+-              system_utsname.release,
+-              (int)strcspn(system_utsname.version, " "),
+-              system_utsname.version);
++              init_utsname()->release,
++              (int)strcspn(init_utsname()->version, " "),
++              init_utsname()->version);
+       printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
+       printk_address(regs->rip); 
+       printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+@@ -310,6 +314,7 @@ void exit_thread(void)
+               kfree(t->io_bitmap_ptr);
+               t->io_bitmap_ptr = NULL;
++              clear_thread_flag(TIF_IO_BITMAP);
+               /*
+                * Careful, clear this in the TSS too:
+                */
+@@ -340,6 +345,7 @@ void flush_thread(void)
+               if (t->flags & _TIF_IA32)
+                       current_thread_info()->status |= TS_COMPAT;
+       }
++      t->flags &= ~_TIF_DEBUG;
+       tsk->thread.debugreg0 = 0;
+       tsk->thread.debugreg1 = 0;
+@@ -432,7 +438,7 @@ int copy_thread(int nr, unsigned long cl
+       asm("mov %%es,%0" : "=m" (p->thread.es));
+       asm("mov %%ds,%0" : "=m" (p->thread.ds));
+-      if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 
++      if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
+               p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+               if (!p->thread.io_bitmap_ptr) {
+                       p->thread.io_bitmap_max = 0;
+@@ -440,6 +446,7 @@ int copy_thread(int nr, unsigned long cl
+               }
+               memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+                               IO_BITMAP_BYTES);
++              set_tsk_thread_flag(p, TIF_IO_BITMAP);
+       } 
+       /*
+@@ -474,6 +481,30 @@ static inline void __save_init_fpu( stru
+ }
+ /*
++ * This special macro can be used to load a debugging register
++ */
++#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
++
++static inline void __switch_to_xtra(struct task_struct *prev_p,
++                                  struct task_struct *next_p)
++{
++      struct thread_struct *prev, *next;
++
++      prev = &prev_p->thread,
++      next = &next_p->thread;
++
++      if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
++              loaddebug(next, 0);
++              loaddebug(next, 1);
++              loaddebug(next, 2);
++              loaddebug(next, 3);
++              /* no 4 and 5 */
++              loaddebug(next, 6);
++              loaddebug(next, 7);
++      }
++}
++
++/*
+  *    switch_to(x,y) should switch tasks from x to y.
+  *
+  * This could still be optimized: 
+@@ -501,6 +532,10 @@ __switch_to(struct task_struct *prev_p, 
+ #endif
+       multicall_entry_t _mcl[8], *mcl = _mcl;
++      /* we're going to use this soon, after a few expensive things */
++      if (next_p->fpu_counter>5)
++              prefetch(&next->i387.fxsave);
++
+       /*
+        * This is basically '__unlazy_fpu', except that we queue a
+        * multicall to indicate FPU task switch, rather than
+@@ -513,7 +548,8 @@ __switch_to(struct task_struct *prev_p, 
+               mcl->op      = __HYPERVISOR_fpu_taskswitch;
+               mcl->args[0] = 1;
+               mcl++;
+-      }
++      } else
++              prev_p->fpu_counter = 0;
+       /*
+        * Reload esp0, LDT and the page table pointer:
+@@ -608,21 +644,29 @@ __switch_to(struct task_struct *prev_p, 
+       write_pda(oldrsp, next->userrsp); 
+       write_pda(pcurrent, next_p); 
+       write_pda(kernelstack,
+-                task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
++      (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
++#ifdef CONFIG_CC_STACKPROTECTOR
++      write_pda(stack_canary, next_p->stack_canary);
++
++      /*
++       * Build time only check to make sure the stack_canary is at
++       * offset 40 in the pda; this is a gcc ABI requirement
++       */
++      BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
++#endif
+       /*
+        * Now maybe reload the debug registers
+        */
+-      if (unlikely(next->debugreg7)) {
+-              set_debugreg(next->debugreg0, 0);
+-              set_debugreg(next->debugreg1, 1);
+-              set_debugreg(next->debugreg2, 2);
+-              set_debugreg(next->debugreg3, 3);
+-              /* no 4 and 5 */
+-              set_debugreg(next->debugreg6, 6);
+-              set_debugreg(next->debugreg7, 7);
+-      }
++      if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
++              __switch_to_xtra(prev_p, next_p);
++      /* If the task has used fpu the last 5 timeslices, just do a full
++       * restore of the math state immediately to avoid the trap; the
++       * chances of needing FPU soon are obviously high now
++       */
++      if (next_p->fpu_counter>5)
++              math_state_restore();
+       return prev_p;
+ }
+@@ -842,7 +886,7 @@ int dump_task_regs(struct task_struct *t
+ unsigned long arch_align_stack(unsigned long sp)
+ {
+-      if (randomize_va_space)
++      if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+               sp -= get_random_int() % 8192;
+       return sp & ~0xf;
+ }
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup_64-xen.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/setup_64-xen.c    2009-03-04 11:28:34.000000000 +0100
+@@ -88,9 +88,6 @@ extern struct edid_info edid_info;
+ shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+ EXPORT_SYMBOL(HYPERVISOR_shared_info);
+-extern char hypercall_page[PAGE_SIZE];
+-EXPORT_SYMBOL(hypercall_page);
+-
+ static int xen_panic_event(struct notifier_block *, unsigned long, void *);
+ static struct notifier_block xen_panic_block = {
+       xen_panic_event, NULL, 0 /* try to go last */
+@@ -118,16 +115,6 @@ EXPORT_SYMBOL(boot_cpu_data);
+ unsigned long mmu_cr4_features;
+-int acpi_disabled;
+-EXPORT_SYMBOL(acpi_disabled);
+-#ifdef        CONFIG_ACPI
+-extern int __initdata acpi_ht;
+-extern acpi_interrupt_flags   acpi_sci_flags;
+-int __initdata acpi_force = 0;
+-#endif
+-
+-int acpi_numa __initdata;
+-
+ /* Boot loader ID as an integer, for the benefit of proc_dointvec */
+ int bootloader_type;
+@@ -151,10 +138,6 @@ struct sys_desc_table_struct {
+ struct edid_info edid_info;
+ EXPORT_SYMBOL_GPL(edid_info);
+-struct e820map e820;
+-#ifdef CONFIG_XEN
+-struct e820map machine_e820;
+-#endif
+ extern int root_mountflags;
+@@ -181,9 +164,6 @@ struct resource standard_io_resources[] 
+               .flags = IORESOURCE_BUSY | IORESOURCE_IO }
+ };
+-#define STANDARD_IO_RESOURCES \
+-      (sizeof standard_io_resources / sizeof standard_io_resources[0])
+-
+ #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
+ struct resource data_resource = {
+@@ -230,9 +210,6 @@ static struct resource adapter_rom_resou
+               .flags = IORESOURCE_ROM }
+ };
+-#define ADAPTER_ROM_RESOURCES \
+-      (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+-
+ static struct resource video_rom_resource = {
+       .name = "Video ROM",
+       .start = 0xc0000,
+@@ -309,7 +286,8 @@ static void __init probe_roms(void)
+       }
+       /* check for adapter roms on 2k boundaries */
+-      for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
++      for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
++           start += 2048) {
+               rom = isa_bus_to_virt(start);
+               if (!romsignature(rom))
+                       continue;
+@@ -329,187 +307,22 @@ static void __init probe_roms(void)
+       }
+ }
+-/* Check for full argument with no trailing characters */
+-static int fullarg(char *p, char *arg)
++#ifdef CONFIG_PROC_VMCORE
++/* elfcorehdr= specifies the location of elf core header
++ * stored by the crashed kernel. This option will be passed
++ * by kexec loader to the capture kernel.
++ */
++static int __init setup_elfcorehdr(char *arg)
+ {
+-      int l = strlen(arg);
+-      return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
++      char *end;
++      if (!arg)
++              return -EINVAL;
++      elfcorehdr_addr = memparse(arg, &end);
++      return end > arg ? 0 : -EINVAL;
+ }
+-
+-static __init void parse_cmdline_early (char ** cmdline_p)
+-{
+-      char c = ' ', *to = command_line, *from = COMMAND_LINE;
+-      int len = 0;
+-      int userdef = 0;
+-
+-      for (;;) {
+-              if (c != ' ') 
+-                      goto next_char; 
+-
+-#ifdef  CONFIG_SMP
+-              /*
+-               * If the BIOS enumerates physical processors before logical,
+-               * maxcpus=N at enumeration-time can be used to disable HT.
+-               */
+-              else if (!memcmp(from, "maxcpus=", 8)) {
+-                      extern unsigned int maxcpus;
+-
+-                      maxcpus = simple_strtoul(from + 8, NULL, 0);
+-              }
+-#endif
+-#ifdef CONFIG_ACPI
+-              /* "acpi=off" disables both ACPI table parsing and interpreter init */
+-              if (fullarg(from,"acpi=off"))
+-                      disable_acpi();
+-
+-              if (fullarg(from, "acpi=force")) { 
+-                      /* add later when we do DMI horrors: */
+-                      acpi_force = 1;
+-                      acpi_disabled = 0;
+-              }
+-
+-              /* acpi=ht just means: do ACPI MADT parsing 
+-                 at bootup, but don't enable the full ACPI interpreter */
+-              if (fullarg(from, "acpi=ht")) { 
+-                      if (!acpi_force)
+-                              disable_acpi();
+-                      acpi_ht = 1; 
+-              }
+-                else if (fullarg(from, "pci=noacpi")) 
+-                      acpi_disable_pci();
+-              else if (fullarg(from, "acpi=noirq"))
+-                      acpi_noirq_set();
+-
+-              else if (fullarg(from, "acpi_sci=edge"))
+-                      acpi_sci_flags.trigger =  1;
+-              else if (fullarg(from, "acpi_sci=level"))
+-                      acpi_sci_flags.trigger = 3;
+-              else if (fullarg(from, "acpi_sci=high"))
+-                      acpi_sci_flags.polarity = 1;
+-              else if (fullarg(from, "acpi_sci=low"))
+-                      acpi_sci_flags.polarity = 3;
+-
+-              /* acpi=strict disables out-of-spec workarounds */
+-              else if (fullarg(from, "acpi=strict")) {
+-                      acpi_strict = 1;
+-              }
+-#ifdef CONFIG_X86_IO_APIC
+-              else if (fullarg(from, "acpi_skip_timer_override"))
+-                      acpi_skip_timer_override = 1;
+-#endif
+-#endif
+-
+-#ifndef CONFIG_XEN
+-              if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
+-                      clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+-                      disable_apic = 1;
+-              }
+-
+-              if (fullarg(from, "noapic"))
+-                      skip_ioapic_setup = 1;
+-
+-              if (fullarg(from,"apic")) {
+-                      skip_ioapic_setup = 0;
+-                      ioapic_force = 1;
+-              }
+-#endif
+-                      
+-              if (!memcmp(from, "mem=", 4))
+-                      parse_memopt(from+4, &from); 
+-
+-              if (!memcmp(from, "memmap=", 7)) {
+-                      /* exactmap option is for used defined memory */
+-                      if (!memcmp(from+7, "exactmap", 8)) {
+-#ifdef CONFIG_CRASH_DUMP
+-                              /* If we are doing a crash dump, we
+-                               * still need to know the real mem
+-                               * size before original memory map is
+-                               * reset.
+-                               */
+-                              saved_max_pfn = e820_end_of_ram();
+-#endif
+-                              from += 8+7;
+-                              end_pfn_map = 0;
+-                              e820.nr_map = 0;
+-                              userdef = 1;
+-                      }
+-                      else {
+-                              parse_memmapopt(from+7, &from);
+-                              userdef = 1;
+-                      }
+-              }
+-
+-#ifdef CONFIG_NUMA
+-              if (!memcmp(from, "numa=", 5))
+-                      numa_setup(from+5); 
++early_param("elfcorehdr", setup_elfcorehdr);
+ #endif
+-              if (!memcmp(from,"iommu=",6)) { 
+-                      iommu_setup(from+6); 
+-              }
+-
+-              if (fullarg(from,"oops=panic"))
+-                      panic_on_oops = 1;
+-
+-              if (!memcmp(from, "noexec=", 7))
+-                      nonx_setup(from + 7);
+-
+-#ifdef CONFIG_KEXEC
+-              /* crashkernel=size@addr specifies the location to reserve for
+-               * a crash kernel.  By reserving this memory we guarantee
+-               * that linux never set's it up as a DMA target.
+-               * Useful for holding code to do something appropriate
+-               * after a kernel panic.
+-               */
+-              else if (!memcmp(from, "crashkernel=", 12)) {
+-#ifndef CONFIG_XEN
+-                      unsigned long size, base;
+-                      size = memparse(from+12, &from);
+-                      if (*from == '@') {
+-                              base = memparse(from+1, &from);
+-                              /* FIXME: Do I want a sanity check
+-                               * to validate the memory range?
+-                               */
+-                              crashk_res.start = base;
+-                              crashk_res.end   = base + size - 1;
+-                      }
+-#else
+-                      printk("Ignoring crashkernel command line, "
+-                             "parameter will be supplied by xen\n");
+-#endif
+-              }
+-#endif
+-
+-#ifdef CONFIG_PROC_VMCORE
+-              /* elfcorehdr= specifies the location of elf core header
+-               * stored by the crashed kernel. This option will be passed
+-               * by kexec loader to the capture kernel.
+-               */
+-              else if(!memcmp(from, "elfcorehdr=", 11))
+-                      elfcorehdr_addr = memparse(from+11, &from);
+-#endif
+-
+-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
+-              else if (!memcmp(from, "additional_cpus=", 16))
+-                      setup_additional_cpus(from+16);
+-#endif
+-
+-      next_char:
+-              c = *(from++);
+-              if (!c)
+-                      break;
+-              if (COMMAND_LINE_SIZE <= ++len)
+-                      break;
+-              *(to++) = c;
+-      }
+-      if (userdef) {
+-              printk(KERN_INFO "user-defined physical RAM map:\n");
+-              e820_print_map("user");
+-      }
+-      *to = '\0';
+-      *cmdline_p = command_line;
+-}
+-
+ #ifndef CONFIG_NUMA
+ static void __init
+ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+@@ -521,10 +334,11 @@ contig_initmem_init(unsigned long start_
+       if (bootmap == -1L)
+               panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+       bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
++      e820_register_active_regions(0, start_pfn, end_pfn);
+ #ifdef CONFIG_XEN
+-      e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
++      free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
+ #else
+-      e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
++      free_bootmem_with_active_regions(0, end_pfn);
+ #endif
+       reserve_bootmem(bootmap, bootmap_size);
+ } 
+@@ -587,6 +401,10 @@ static void discover_ebda(void)
+ void __init setup_arch(char **cmdline_p)
+ {
+ #ifdef CONFIG_XEN
++      extern struct e820map machine_e820;
++
++      printk(KERN_INFO "Command line: %s\n", saved_command_line);
++
+       /* Register a call for panic conditions. */
+       atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+@@ -612,6 +430,8 @@ void __init setup_arch(char **cmdline_p)
+       ARCH_SETUP
+ #else
++      printk(KERN_INFO "Command line: %s\n", saved_command_line);
++
+       ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+       screen_info = SCREEN_INFO;
+       edid_info = EDID_INFO;
+@@ -639,16 +459,22 @@ void __init setup_arch(char **cmdline_p)
+       data_resource.start = virt_to_phys(&_etext);
+       data_resource.end = virt_to_phys(&_edata)-1;
+-      parse_cmdline_early(cmdline_p);
+-
+       early_identify_cpu(&boot_cpu_data);
++      strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
++      *cmdline_p = command_line;
++
++      parse_early_param();
++
++      finish_e820_parsing();
++
++      e820_register_active_regions(0, 0, -1UL);
+       /*
+        * partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+       end_pfn = e820_end_of_ram();
+-      num_physpages = end_pfn;                /* for pfn_valid */
++      num_physpages = end_pfn;
+       check_efer();
+@@ -659,6 +485,14 @@ void __init setup_arch(char **cmdline_p)
+       if (is_initial_xendomain())
+               dmi_scan_machine();
++      /* How many end-of-memory variables you have, grandma! */
++      max_low_pfn = end_pfn;
++      max_pfn = end_pfn;
++      high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
++
++      /* Remove active ranges so rediscovery with NUMA-awareness happens */
++      remove_all_active_ranges();
++
+ #ifdef CONFIG_ACPI_NUMA
+       /*
+        * Parse SRAT to discover nodes.
+@@ -848,16 +682,16 @@ void __init setup_arch(char **cmdline_p)
+                               BUG();
+       }
++#ifdef CONFIG_ACPI
+       if (!is_initial_xendomain()) {
+               acpi_disabled = 1;
+-#ifdef  CONFIG_ACPI
+               acpi_ht = 0;
+-#endif
+       }
+ #endif
++#endif
+-#ifndef CONFIG_XEN
+-      check_ioapic();
++#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
++      early_quirks();
+ #endif
+       zap_low_mappings(0);
+@@ -907,6 +741,7 @@ void __init setup_arch(char **cmdline_p)
+               e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
+ #else
+       e820_reserve_resources(e820.map, e820.nr_map);
++      e820_mark_nosave_regions();
+ #endif
+       request_resource(&iomem_resource, &video_ram_resource);
+@@ -914,7 +749,7 @@ void __init setup_arch(char **cmdline_p)
+       {
+       unsigned i;
+       /* request I/O space for devices used on all i[345]86 PCs */
+-      for (i = 0; i < STANDARD_IO_RESOURCES; i++)
++      for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+               request_resource(&ioport_resource, &standard_io_resources[i]);
+       }
+@@ -1098,7 +933,7 @@ static void __init amd_detect_cmp(struct
+ #endif
+ }
+-static void __init init_amd(struct cpuinfo_x86 *c)
++static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+ {
+       unsigned level;
+@@ -1154,6 +989,12 @@ static void __init init_amd(struct cpuin
+       /* Fix cpuid4 emulation for more */
+       num_cache_leaves = 3;
++
++      /* When there is only one core no need to synchronize RDTSC */
++      if (num_possible_cpus() == 1)
++              set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
++      else
++              clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ }
+ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+@@ -1235,8 +1076,7 @@ static void srat_detect_node(void)
+               node = first_node(node_online_map);
+       numa_set_node(cpu, node);
+-      if (acpi_numa > 0)
+-              printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
++      printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+ #endif
+ }
+@@ -1270,6 +1110,8 @@ static void __cpuinit init_intel(struct 
+       if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+           (c->x86 == 0x6 && c->x86_model >= 0x0e))
+               set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
++      if (c->x86 == 6)
++              set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+       set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+       c->x86_max_cores = intel_num_cpu_cores(c);
+@@ -1488,8 +1330,8 @@ static int show_cpuinfo(struct seq_file 
+               /* Intel-defined (#2) */
+               "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+-              "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
+-              NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
++              "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
++              NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               /* VIA/Cyrix/Centaur-defined */
+--- sle11-2009-05-14.orig/arch/x86/kernel/setup64-xen.c        2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/setup64-xen.c     2009-03-04 11:28:34.000000000 +0100
+@@ -31,7 +31,7 @@
+ #include <asm/hypervisor.h>
+ #endif
+-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
++char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
+ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+@@ -55,8 +55,10 @@ Control non executable mappings for 64bi
+ on    Enable(default)
+ off   Disable
+ */ 
+-int __init nonx_setup(char *str)
++static int __init nonx_setup(char *str)
+ {
++      if (!str)
++              return -EINVAL;
+       if (!strncmp(str, "on", 2)) {
+                 __supported_pte_mask |= _PAGE_NX; 
+               do_not_nx = 0; 
+@@ -64,9 +66,9 @@ int __init nonx_setup(char *str)
+               do_not_nx = 1;
+               __supported_pte_mask &= ~_PAGE_NX;
+         }
+-      return 1;
++      return 0;
+ } 
+-__setup("noexec=", nonx_setup);       /* parsed early actually */
++early_param("noexec", nonx_setup);
+ int force_personality32 = 0; 
+@@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void)
+ #endif
+       /* Copy section for each CPU (we discard the original) */
+-      size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+-#ifdef CONFIG_MODULES
+-      if (size < PERCPU_ENOUGH_ROOM)
+-              size = PERCPU_ENOUGH_ROOM;
+-#endif
++      size = PERCPU_ENOUGH_ROOM;
++      printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
+       for_each_cpu_mask (i, cpu_possible_map) {
+               char *ptr;
+@@ -169,7 +168,10 @@ void pda_init(int cpu)
+       /* Setup up data that may be needed in __get_free_pages early */
+       asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 
+ #ifndef CONFIG_XEN
++      /* Memory clobbers used to order PDA accessed */
++      mb();
+       wrmsrl(MSR_GS_BASE, pda);
++      mb();
+ #else
+       if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
+                                       (unsigned long)pda))
+@@ -302,28 +304,17 @@ void __cpuinit cpu_init (void)
+        * set up and load the per-CPU TSS
+        */
+       for (v = 0; v < N_EXCEPTION_STACKS; v++) {
++              static const unsigned int order[N_EXCEPTION_STACKS] = {
++                      [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
++                      [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
++              };
+               if (cpu) {
+-                      static const unsigned int order[N_EXCEPTION_STACKS] = {
+-                              [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+-                              [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+-                      };
+-
+                       estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+                       if (!estacks)
+                               panic("Cannot allocate exception stack %ld %d\n",
+                                     v, cpu); 
+               }
+-              switch (v + 1) {
+-#if DEBUG_STKSZ > EXCEPTION_STKSZ
+-              case DEBUG_STACK:
+-                      cpu_pda(cpu)->debugstack = (unsigned long)estacks;
+-                      estacks += DEBUG_STKSZ;
+-                      break;
+-#endif
+-              default:
+-                      estacks += EXCEPTION_STKSZ;
+-                      break;
+-              }
++              estacks += PAGE_SIZE << order[v];
+               orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
+       }
+--- sle11-2009-05-14.orig/arch/x86/kernel/smp_64-xen.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/smp_64-xen.c      2009-03-04 11:28:34.000000000 +0100
+@@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v
+       /* prevent preemption and reschedule on another processor */
+       int me = get_cpu();
+       if (cpu == me) {
+-              WARN_ON(1);
+               put_cpu();
+-              return -EBUSY;
++              return 0;
+       }
+       spin_lock_bh(&call_lock);
+       __smp_call_function_single(cpu, func, info, nonatomic, wait);
+@@ -501,7 +500,7 @@ void smp_send_stop(void)
+ #ifndef CONFIG_XEN
+ asmlinkage void smp_reschedule_interrupt(void)
+ #else
+-asmlinkage irqreturn_t smp_reschedule_interrupt(void)
++asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
+ #endif
+ {
+ #ifndef CONFIG_XEN
+@@ -514,7 +513,7 @@ asmlinkage irqreturn_t smp_reschedule_in
+ #ifndef CONFIG_XEN
+ asmlinkage void smp_call_function_interrupt(void)
+ #else
+-asmlinkage irqreturn_t smp_call_function_interrupt(void)
++asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
+ #endif
+ {
+       void (*func) (void *info) = call_data->func;
+@@ -545,31 +544,3 @@ asmlinkage irqreturn_t smp_call_function
+       return IRQ_HANDLED;
+ #endif
+ }
+-
+-int safe_smp_processor_id(void)
+-{
+-#ifdef CONFIG_XEN
+-      return smp_processor_id();
+-#else
+-      unsigned apicid, i;
+-
+-      if (disable_apic)
+-              return 0;
+-
+-      apicid = hard_smp_processor_id();
+-      if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
+-              return apicid;
+-
+-      for (i = 0; i < NR_CPUS; ++i) {
+-              if (x86_cpu_to_apicid[i] == apicid)
+-                      return i;
+-      }
+-
+-      /* No entries in x86_cpu_to_apicid?  Either no MPS|ACPI,
+-       * or called too early.  Either way, we must be CPU 0. */
+-              if (x86_cpu_to_apicid[0] == BAD_APICID)
+-              return 0;
+-
+-      return 0; /* Should not happen */
+-#endif
+-}
+--- sle11-2009-05-14.orig/arch/x86/kernel/traps_64-xen.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/traps_64-xen.c    2009-03-04 11:28:34.000000000 +0100
+@@ -23,6 +23,7 @@
+ #include <linux/delay.h>
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/kallsyms.h>
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/nmi.h>
+@@ -45,6 +46,7 @@
+ #include <asm/pda.h>
+ #include <asm/proto.h>
+ #include <asm/nmi.h>
++#include <asm/stacktrace.h>
+ asmlinkage void divide_error(void);
+ asmlinkage void debug(void);
+@@ -114,7 +116,6 @@ static int call_trace = 1;
+ #endif
+ #ifdef CONFIG_KALLSYMS
+-# include <linux/kallsyms.h>
+ void printk_address(unsigned long address)
+ {
+       unsigned long offset = 0, symsize;
+@@ -142,7 +143,7 @@ void printk_address(unsigned long addres
+ #endif
+ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+-                                      unsigned *usedp, const char **idp)
++                                      unsigned *usedp, char **idp)
+ {
+ #ifndef CONFIG_X86_NO_TSS
+       static char ids[][8] = {
+@@ -162,26 +163,7 @@ static unsigned long *in_exception_stack
+        * 'stack' is in one of them:
+        */
+       for (k = 0; k < N_EXCEPTION_STACKS; k++) {
+-              unsigned long end;
+-
+-              /*
+-               * set 'end' to the end of the exception stack.
+-               */
+-              switch (k + 1) {
+-              /*
+-               * TODO: this block is not needed i think, because
+-               * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
+-               * properly too.
+-               */
+-#if DEBUG_STKSZ > EXCEPTION_STKSZ
+-              case DEBUG_STACK:
+-                      end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
+-                      break;
+-#endif
+-              default:
+-                      end = per_cpu(orig_ist, cpu).ist[k];
+-                      break;
+-              }
++              unsigned long end = per_cpu(orig_ist, cpu).ist[k];
+               /*
+                * Is 'stack' above this exception frame's end?
+                * If yes then skip to the next frame.
+@@ -236,13 +218,19 @@ static unsigned long *in_exception_stack
+       return NULL;
+ }
+-static int show_trace_unwind(struct unwind_frame_info *info, void *context)
++struct ops_and_data {
++      struct stacktrace_ops *ops;
++      void *data;
++};
++
++static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
+ {
++      struct ops_and_data *oad = (struct ops_and_data *)context;
+       int n = 0;
+       while (unwind(info) == 0 && UNW_PC(info)) {
+               n++;
+-              printk_address(UNW_PC(info));
++              oad->ops->address(oad->data, UNW_PC(info));
+               if (arch_unw_user_mode(info))
+                       break;
+       }
+@@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi
+  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+  */
+-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
++static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+-      const unsigned cpu = safe_smp_processor_id();
++      void *t = (void *)tinfo;
++        return p > t && p < t + THREAD_SIZE - 3;
++}
++
++void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
++              struct stacktrace_ops *ops, void *data)
++{
++      const unsigned cpu = smp_processor_id();
+       unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+       unsigned used = 0;
+-
+-      printk("\nCall Trace:\n");
++      struct thread_info *tinfo;
+       if (!tsk)
+               tsk = current;
+@@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk,
+       if (call_trace >= 0) {
+               int unw_ret = 0;
+               struct unwind_frame_info info;
++              struct ops_and_data oad = { .ops = ops, .data = data };
+               if (regs) {
+                       if (unwind_init_frame_info(&info, tsk, regs) == 0)
+-                              unw_ret = show_trace_unwind(&info, NULL);
++                              unw_ret = dump_trace_unwind(&info, &oad);
+               } else if (tsk == current)
+-                      unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
++                      unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+               else {
+                       if (unwind_init_blocked(&info, tsk) == 0)
+-                              unw_ret = show_trace_unwind(&info, NULL);
++                              unw_ret = dump_trace_unwind(&info, &oad);
+               }
+               if (unw_ret > 0) {
+                       if (call_trace == 1 && !arch_unw_user_mode(&info)) {
+-                              print_symbol("DWARF2 unwinder stuck at %s\n",
++                              ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+                                            UNW_PC(&info));
+                               if ((long)UNW_SP(&info) < 0) {
+-                                      printk("Leftover inexact backtrace:\n");
++                                      ops->warning(data, "Leftover inexact backtrace:\n");
+                                       stack = (unsigned long *)UNW_SP(&info);
++                                      if (!stack)
++                                              return;
+                               } else
+-                                      printk("Full inexact backtrace again:\n");
++                                      ops->warning(data, "Full inexact backtrace again:\n");
+                       } else if (call_trace >= 1)
+                               return;
+                       else
+-                              printk("Full inexact backtrace again:\n");
++                              ops->warning(data, "Full inexact backtrace again:\n");
+               } else
+-                      printk("Inexact backtrace:\n");
++                      ops->warning(data, "Inexact backtrace:\n");
+       }
++      if (!stack) {
++              unsigned long dummy;
++              stack = &dummy;
++              if (tsk && tsk != current)
++                      stack = (unsigned long *)tsk->thread.rsp;
++      }
++      /*
++       * Align the stack pointer on word boundary, later loops
++       * rely on that (and corruption / debug info bugs can cause
++       * unaligned values here):
++       */
++      stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
+       /*
+        * Print function call entries within a stack. 'cond' is the
+@@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk,
+ #define HANDLE_STACK(cond) \
+       do while (cond) { \
+               unsigned long addr = *stack++; \
+-              if (kernel_text_address(addr)) { \
++              if (oops_in_progress ?          \
++                      __kernel_text_address(addr) : \
++                      kernel_text_address(addr)) { \
+                       /* \
+                        * If the address is either in the text segment of the \
+                        * kernel, or in the region which contains vmalloc'ed \
+@@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk,
+                        * down the cause of the crash will be able to figure \
+                        * out the call path that was taken. \
+                        */ \
+-                      printk_address(addr); \
++                      ops->address(data, addr);   \
+               } \
+       } while (0)
+@@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk,
+        * current stack address. If the stacks consist of nested
+        * exceptions
+        */
+-      for ( ; ; ) {
+-              const char *id;
++      for (;;) {
++              char *id;
+               unsigned long *estack_end;
+               estack_end = in_exception_stack(cpu, (unsigned long)stack,
+                                               &used, &id);
+               if (estack_end) {
+-                      printk(" <%s>", id);
++                      if (ops->stack(data, id) < 0)
++                              break;
+                       HANDLE_STACK (stack < estack_end);
+-                      printk(" <EOE>");
++                      ops->stack(data, "<EOE>");
+                       /*
+                        * We link to the next stack via the
+                        * second-to-last pointer (index -2 to end) in the
+@@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk,
+                               (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+                       if (stack >= irqstack && stack < irqstack_end) {
+-                              printk(" <IRQ>");
++                              if (ops->stack(data, "IRQ") < 0)
++                                      break;
+                               HANDLE_STACK (stack < irqstack_end);
+                               /*
+                                * We link to the next stack (which would be
+@@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk,
+                                */
+                               stack = (unsigned long *) (irqstack_end[-1]);
+                               irqstack_end = NULL;
+-                              printk(" <EOI>");
++                              ops->stack(data, "EOI");
+                               continue;
+                       }
+               }
+@@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk,
+       }
+       /*
+-       * This prints the process stack:
++       * This handles the process stack:
+        */
+-      HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
++      tinfo = current_thread_info();
++      HANDLE_STACK (valid_stack_ptr(tinfo, stack));
+ #undef HANDLE_STACK
++}
++EXPORT_SYMBOL(dump_trace);
++static void
++print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
++{
++      print_symbol(msg, symbol);
+       printk("\n");
+ }
+-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
++static void print_trace_warning(void *data, char *msg)
++{
++      printk("%s\n", msg);
++}
++
++static int print_trace_stack(void *data, char *name)
++{
++      printk(" <%s> ", name);
++      return 0;
++}
++
++static void print_trace_address(void *data, unsigned long addr)
++{
++      printk_address(addr);
++}
++
++static struct stacktrace_ops print_trace_ops = {
++      .warning = print_trace_warning,
++      .warning_symbol = print_trace_warning_symbol,
++      .stack = print_trace_stack,
++      .address = print_trace_address,
++};
++
++void
++show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
++{
++      printk("\nCall Trace:\n");
++      dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
++      printk("\n");
++}
++
++static void
++_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
+ {
+       unsigned long *stack;
+       int i;
+-      const int cpu = safe_smp_processor_id();
++      const int cpu = smp_processor_id();
+       unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+       unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+@@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs
+       int i;
+       int in_kernel = !user_mode(regs);
+       unsigned long rsp;
+-      const int cpu = safe_smp_processor_id(); 
++      const int cpu = smp_processor_id();
+       struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+               rsp = regs->rsp;
+@@ -505,9 +557,11 @@ static unsigned int die_nest_count;
+ unsigned __kprobes long oops_begin(void)
+ {
+-      int cpu = safe_smp_processor_id();
++      int cpu = smp_processor_id();
+       unsigned long flags;
++      oops_enter();
++
+       /* racy, but better than risking deadlock. */
+       local_irq_save(flags);
+       if (!spin_trylock(&die_lock)) { 
+@@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl
+               spin_unlock_irqrestore(&die_lock, flags);
+       if (panic_on_oops)
+               panic("Fatal exception");
++      oops_exit();
+ }
+ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
+@@ -572,8 +627,8 @@ void die(const char * str, struct pt_reg
+       do_exit(SIGSEGV); 
+ }
+-#ifdef CONFIG_X86_LOCAL_APIC
+-void __kprobes die_nmi(char *str, struct pt_regs *regs)
++#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
++void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
+ {
+       unsigned long flags = oops_begin();
+@@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct
+        * We are in trouble anyway, lets at least try
+        * to get a message out.
+        */
+-      printk(str, safe_smp_processor_id());
++      printk(str, smp_processor_id());
+       show_registers(regs);
+       if (kexec_should_crash(current))
+               crash_kexec(regs);
+-      if (panic_on_timeout || panic_on_oops)
+-              panic("nmi watchdog");
+-      printk("console shuts up ...\n");
++      if (do_panic || panic_on_oops)
++              panic("Non maskable interrupt");
+       oops_end(flags);
+       nmi_exit();
+       local_irq_enable();
+@@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro
+ static __kprobes void
+ mem_parity_error(unsigned char reason, struct pt_regs * regs)
+ {
+-      printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+-      printk("You probably have a hardware problem with your RAM chips\n");
++      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
++              reason);
++      printk(KERN_EMERG "You probably have a hardware problem with your "
++              "RAM chips\n");
++
++      if (panic_on_unrecovered_nmi)
++              panic("NMI: Not continuing");
++
++      printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ #if 0 /* XEN */
+       /* Clear and disable the memory parity error line. */
+@@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str
+ static __kprobes void
+ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+-{     printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+-      printk("Dazed and confused, but trying to continue\n");
+-      printk("Do you have a strange power saving mode enabled?\n");
++{
++      printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
++              reason);
++      printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
++
++      if (panic_on_unrecovered_nmi)
++              panic("NMI: Not continuing");
++
++      printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+ /* Runs on IST stack. This code must keep interrupts off all the time.
+@@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi
+                * Ok, so this is none of the documented NMI sources,
+                * so it must be the NMI watchdog.
+                */
+-              if (nmi_watchdog > 0) {
+-                      nmi_watchdog_tick(regs,reason);
++              if (nmi_watchdog_tick(regs,reason))
+                       return;
+-              }
+ #endif
+-              unknown_nmi_error(reason, regs);
++              if (!do_nmi_callback(regs,cpu))
++                      unknown_nmi_error(reason, regs);
++
+               return;
+       }
+       if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+@@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void)
+               init_fpu(me);
+       restore_fpu_checking(&me->thread.i387.fxsave);
+       task_thread_info(me)->status |= TS_USEDFPU;
++      me->fpu_counter++;
+ }
+@@ -1141,24 +1209,30 @@ void __cpuinit smp_trap_init(trap_info_t
+ }
+-/* Actual parsing is done early in setup.c. */
+-static int __init oops_dummy(char *s)
++static int __init oops_setup(char *s)
+ { 
+-      panic_on_oops = 1;
+-      return 1;
++      if (!s)
++              return -EINVAL;
++      if (!strcmp(s, "panic"))
++              panic_on_oops = 1;
++      return 0;
+ } 
+-__setup("oops=", oops_dummy); 
++early_param("oops", oops_setup);
+ static int __init kstack_setup(char *s)
+ {
++      if (!s)
++              return -EINVAL;
+       kstack_depth_to_print = simple_strtoul(s,NULL,0);
+-      return 1;
++      return 0;
+ }
+-__setup("kstack=", kstack_setup);
++early_param("kstack", kstack_setup);
+ #ifdef CONFIG_STACK_UNWIND
+ static int __init call_trace_setup(char *s)
+ {
++      if (!s)
++              return -EINVAL;
+       if (strcmp(s, "old") == 0)
+               call_trace = -1;
+       else if (strcmp(s, "both") == 0)
+@@ -1167,7 +1241,7 @@ static int __init call_trace_setup(char 
+               call_trace = 1;
+       else if (strcmp(s, "new") == 0)
+               call_trace = 2;
+-      return 1;
++      return 0;
+ }
+-__setup("call_trace=", call_trace_setup);
++early_param("call_trace", call_trace_setup);
+ #endif
+--- sle11-2009-05-14.orig/arch/x86/kernel/vsyscall_64-xen.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:28:34.000000000 +0100
+@@ -26,6 +26,10 @@
+ #include <linux/seqlock.h>
+ #include <linux/jiffies.h>
+ #include <linux/sysctl.h>
++#include <linux/getcpu.h>
++#include <linux/cpu.h>
++#include <linux/smp.h>
++#include <linux/notifier.h>
+ #include <asm/vsyscall.h>
+ #include <asm/pgtable.h>
+@@ -33,11 +37,15 @@
+ #include <asm/fixmap.h>
+ #include <asm/errno.h>
+ #include <asm/io.h>
++#include <asm/segment.h>
++#include <asm/desc.h>
++#include <asm/topology.h>
+ #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+ int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
+ seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
++int __vgetcpu_mode __section_vgetcpu_mode;
+ #include <asm/unistd.h>
+@@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo
+               sequence = read_seqbegin(&__xtime_lock);
+               
+               sec = __xtime.tv_sec;
+-              usec = (__xtime.tv_nsec / 1000) +
+-                      (__jiffies - __wall_jiffies) * (1000000 / HZ);
++              usec = __xtime.tv_nsec / 1000;
+               if (__vxtime.mode != VXTIME_HPET) {
+                       t = get_cycles_sync();
+@@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo
+                                __vxtime.tsc_quot) >> 32;
+                       /* See comment in x86_64 do_gettimeofday. */
+               } else {
+-                      usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
++                      usec += ((readl((void __iomem *)
++                                 fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+                                 __vxtime.last) * __vxtime.quot) >> 32;
+               }
+       } while (read_seqretry(&__xtime_lock, sequence));
+@@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t)
+       return __xtime.tv_sec;
+ }
+-long __vsyscall(2) venosys_0(void)
+-{
+-      return -ENOSYS;
++/* Fast way to get current CPU and node.
++   This helps to do per node and per CPU caches in user space.
++   The result is not guaranteed without CPU affinity, but usually
++   works out because the scheduler tries to keep a thread on the same
++   CPU.
++
++   tcache must point to a two element sized long array.
++   All arguments can be NULL. */
++long __vsyscall(2)
++vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
++{
++      unsigned int dummy, p;
++      unsigned long j = 0;
++
++      /* Fast cache - only recompute value once per jiffies and avoid
++         relatively costly rdtscp/cpuid otherwise.
++         This works because the scheduler usually keeps the process
++         on the same CPU and this syscall doesn't guarantee its
++         results anyways.
++         We do this here because otherwise user space would do it on
++         its own in a likely inferior way (no access to jiffies).
++         If you don't like it pass NULL. */
++      if (tcache && tcache->blob[0] == (j = __jiffies)) {
++              p = tcache->blob[1];
++      } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
++              /* Load per CPU data from RDTSCP */
++              rdtscp(dummy, dummy, p);
++      } else {
++              /* Load per CPU data from GDT */
++              asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
++      }
++      if (tcache) {
++              tcache->blob[0] = j;
++              tcache->blob[1] = p;
++      }
++      if (cpu)
++              *cpu = p & 0xfff;
++      if (node)
++              *node = p >> 12;
++      return 0;
+ }
+ long __vsyscall(3) venosys_1(void)
+@@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta
+                         void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+       extern u16 vsysc1, vsysc2;
+-      u16 *map1, *map2;
++      u16 __iomem *map1;
++      u16 __iomem *map2;
+       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       if (!write)
+               return ret;
+@@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta
+               goto out;
+       }
+       if (!sysctl_vsyscall) {
+-              *map1 = SYSCALL;
+-              *map2 = SYSCALL;
++              writew(SYSCALL, map1);
++              writew(SYSCALL, map2);
+       } else {
+-              *map1 = NOP2;
+-              *map2 = NOP2;
++              writew(NOP2, map1);
++              writew(NOP2, map2);
+       }
+       iounmap(map2);
+ out:
+@@ -200,6 +246,48 @@ static ctl_table kernel_root_table2[] = 
+ #endif
++/* Assume __initcall executes before all user space. Hopefully kmod
++   doesn't violate that. We'll find out if it does. */
++static void __cpuinit vsyscall_set_cpu(int cpu)
++{
++      unsigned long d;
++      unsigned long node = 0;
++#ifdef CONFIG_NUMA
++      node = cpu_to_node[cpu];
++#endif
++      if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
++              write_rdtscp_aux((node << 12) | cpu);
++
++      /* Store cpu number in limit so that it can be loaded quickly
++         in user space in vgetcpu.
++         12 bits for the CPU and 8 bits for the node. */
++      d = 0x0f40000000000ULL;
++      d |= cpu;
++      d |= (node & 0xf) << 12;
++      d |= (node >> 4) << 48;
++      if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
++                                                       + GDT_ENTRY_PER_CPU),
++                                       d))
++              BUG();
++}
++
++static void __cpuinit cpu_vsyscall_init(void *arg)
++{
++      /* preemption should be already off */
++      vsyscall_set_cpu(raw_smp_processor_id());
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++static int __cpuinit
++cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
++{
++      long cpu = (long)arg;
++      if (action == CPU_ONLINE)
++              smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
++      return NOTIFY_DONE;
++}
++#endif
++
+ static void __init map_vsyscall(void)
+ {
+       extern char __vsyscall_0;
+@@ -214,13 +302,20 @@ static int __init vsyscall_init(void)
+                       VSYSCALL_ADDR(__NR_vgettimeofday)));
+       BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
+       BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
++      BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
+       map_vsyscall();
+ #ifdef CONFIG_XEN
+       sysctl_vsyscall = 0; /* disable vgettimeofay() */
++      if (boot_cpu_has(X86_FEATURE_RDTSCP))
++              vgetcpu_mode = VGETCPU_RDTSCP;
++      else
++              vgetcpu_mode = VGETCPU_LSL;
+ #endif
+ #ifdef CONFIG_SYSCTL
+       register_sysctl_table(kernel_root_table2, 0);
+ #endif
++      on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
++      hotcpu_notifier(cpu_vsyscall_notifier, 0);
+       return 0;
+ }
+--- sle11-2009-05-14.orig/arch/x86/mm/fault_64-xen.c   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mm/fault_64-xen.c        2009-03-04 11:28:34.000000000 +0100
+@@ -40,8 +40,7 @@
+ #define PF_RSVD       (1<<3)
+ #define PF_INSTR      (1<<4)
+-#ifdef CONFIG_KPROBES
+-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
++static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+ /* Hook to register for page fault notifications */
+ int register_page_fault_notifier(struct notifier_block *nb)
+@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct 
+       vmalloc_sync_all();
+       return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(register_page_fault_notifier);
+ int unregister_page_fault_notifier(struct notifier_block *nb)
+ {
+       return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
+ static inline int notify_page_fault(enum die_val val, const char *str,
+                       struct pt_regs *regs, long err, int trap, int sig)
+@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
+       };
+       return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+ }
+-#else
+-static inline int notify_page_fault(enum die_val val, const char *str,
+-                      struct pt_regs *regs, long err, int trap, int sig)
+-{
+-      return NOTIFY_DONE;
+-}
+-#endif
+ void bust_spinlocks(int yes)
+ {
+@@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
+ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
+                               unsigned long error_code)
+ { 
+-      unsigned char *instr;
++      unsigned char __user *instr;
+       int scan_more = 1;
+       int prefetch = 0; 
+       unsigned char *max_instr;
+@@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p
+       if (error_code & PF_INSTR)
+               return 0;
+       
+-      instr = (unsigned char *)convert_rip_to_linear(current, regs);
++      instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+       max_instr = instr + 15;
+       if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+@@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p
+               unsigned char instr_hi;
+               unsigned char instr_lo;
+-              if (__get_user(opcode, instr))
++              if (__get_user(opcode, (char __user *)instr))
+                       break; 
+               instr_hi = opcode & 0xf0; 
+@@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p
+               case 0x00:
+                       /* Prefetch instruction is 0x0F0D or 0x0F18 */
+                       scan_more = 0;
+-                      if (__get_user(opcode, instr)) 
++                      if (__get_user(opcode, (char __user *)instr))
+                               break;
+                       prefetch = (instr_lo == 0xF) &&
+                               (opcode == 0x0D || opcode == 0x18);
+@@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p
+ static int bad_address(void *p) 
+ { 
+       unsigned long dummy;
+-      return __get_user(dummy, (unsigned long *)p);
++      return __get_user(dummy, (unsigned long __user *)p);
+ } 
+ void dump_pagetable(unsigned long address)
+@@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r
+ int unhandled_signal(struct task_struct *tsk, int sig)
+ {
+-      if (tsk->pid == 1)
++      if (is_init(tsk))
+               return 1;
+       if (tsk->ptrace & PT_PTRACED)
+               return 0;
+@@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a
+       if (pgd_none(*pgd))
+               set_pgd(pgd, *pgd_ref);
+       else
+-              BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
++              BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+       /* Below here mismatches are bugs because these lower tables
+          are shared */
+@@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a
+       pud_ref = pud_offset(pgd_ref, address);
+       if (pud_none(*pud_ref))
+               return -1;
+-      if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
++      if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+               BUG();
+       pmd = pmd_offset(pud, address);
+       pmd_ref = pmd_offset(pud_ref, address);
+@@ -531,7 +525,7 @@ good_area:
+               case PF_PROT:           /* read, present */
+                       goto bad_area;
+               case 0:                 /* read, not present */
+-                      if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
++                      if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+                               goto bad_area;
+       }
+@@ -647,7 +641,7 @@ no_context:
+  */
+ out_of_memory:
+       up_read(&mm->mmap_sem);
+-      if (current->pid == 1) { 
++      if (is_init(current)) {
+               yield();
+               goto again;
+       }
+@@ -702,7 +696,7 @@ void vmalloc_sync_all(void)
+                               if (pgd_none(*pgd))
+                                       set_pgd(pgd, *pgd_ref);
+                               else
+-                                      BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
++                                      BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+                       }
+                       spin_unlock(&pgd_lock);
+                       set_bit(pgd_index(address), insync);
+--- sle11-2009-05-14.orig/arch/x86/mm/init_64-xen.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/arch/x86/mm/init_64-xen.c 2009-03-04 11:28:34.000000000 +0100
+@@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user);
+ int after_bootmem;
+-static unsigned long dma_reserve __initdata;
+-
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ extern unsigned long start_pfn;
+@@ -420,7 +418,6 @@ __init void *early_ioremap(unsigned long
+       /* actually usually some more */
+       if (size >= LARGE_PAGE_SIZE) {
+-              printk("SMBIOS area too long %lu\n", size);
+               return NULL;
+       }
+       set_pmd(temp_mappings[0].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+@@ -442,16 +439,24 @@ __init void early_iounmap(void *addr, un
+ #endif
+ static void __meminit
+-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
++phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+ {
+-      int i, k;
++      int i = pmd_index(address);
+-      for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
++      for (; i < PTRS_PER_PMD; i++) {
+               unsigned long pte_phys;
++              pmd_t *pmd = pmd_page + i;
+               pte_t *pte, *pte_save;
++              int k;
+               if (address >= end)
+                       break;
++
++              if (__pmd_val(*pmd)) {
++                      address += PMD_SIZE;
++                      continue;
++              }
++
+               pte = alloc_static_page(&pte_phys);
+               pte_save = pte;
+               for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
+@@ -478,40 +483,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long 
+ static void __meminit
+ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+ {
+-      pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
+-
+-      if (pmd_none(*pmd)) {
+-              spin_lock(&init_mm.page_table_lock);
+-              phys_pmd_init(pmd, address, end);
+-              spin_unlock(&init_mm.page_table_lock);
+-              __flush_tlb_all();
+-      }
++      pmd_t *pmd = pmd_offset(pud,0);
++      spin_lock(&init_mm.page_table_lock);
++      phys_pmd_init(pmd, address, end);
++      spin_unlock(&init_mm.page_table_lock);
++      __flush_tlb_all();
+ }
+-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
++static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+ { 
+-      long i = pud_index(address);
+-
+-      pud = pud + i;
+-
+-      if (after_bootmem && pud_val(*pud)) {
+-              phys_pmd_update(pud, address, end);
+-              return;
+-      }
++      int i = pud_index(addr);
+-      for (; i < PTRS_PER_PUD; pud++, i++) {
+-              unsigned long paddr, pmd_phys;
++      for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
++              unsigned long pmd_phys;
++              pud_t *pud = pud_page + pud_index(addr);
+               pmd_t *pmd;
+-              paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+-              if (paddr >= end)
++              if (addr >= end)
+                       break;
++              if (__pud_val(*pud)) {
++                      phys_pmd_update(pud, addr, end);
++                      continue;
++              }
++
+               pmd = alloc_static_page(&pmd_phys);
+               spin_lock(&init_mm.page_table_lock);
+               *pud = __pud(pmd_phys | _KERNPG_TABLE);
+-              phys_pmd_init(pmd, paddr, end);
++              phys_pmd_init(pmd, addr, end);
+               spin_unlock(&init_mm.page_table_lock);
+               early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
+@@ -775,69 +775,18 @@ void __cpuinit zap_low_mappings(int cpu)
+ #endif
+ }
+-/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+-__init void
+-size_zones(unsigned long *z, unsigned long *h,
+-         unsigned long start_pfn, unsigned long end_pfn)
+-{
+-      int i;
+-      unsigned long w;
+-
+-      for (i = 0; i < MAX_NR_ZONES; i++)
+-              z[i] = 0;
+-
+-      if (start_pfn < MAX_DMA_PFN)
+-              z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+-      if (start_pfn < MAX_DMA32_PFN) {
+-              unsigned long dma32_pfn = MAX_DMA32_PFN;
+-              if (dma32_pfn > end_pfn)
+-                      dma32_pfn = end_pfn;
+-              z[ZONE_DMA32] = dma32_pfn - start_pfn;
+-      }
+-      z[ZONE_NORMAL] = end_pfn - start_pfn;
+-
+-      /* Remove lower zones from higher ones. */
+-      w = 0;
+-      for (i = 0; i < MAX_NR_ZONES; i++) {
+-              if (z[i])
+-                      z[i] -= w;
+-              w += z[i];
+-      }
+-
+-      /* Compute holes */
+-      w = start_pfn;
+-      for (i = 0; i < MAX_NR_ZONES; i++) {
+-              unsigned long s = w;
+-              w += z[i];
+-              h[i] = e820_hole_size(s, w);
+-      }
+-
+-      /* Add the space pace needed for mem_map to the holes too. */
+-      for (i = 0; i < MAX_NR_ZONES; i++)
+-              h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
+-
+-      /* The 16MB DMA zone has the kernel and other misc mappings.
+-         Account them too */
+-      if (h[ZONE_DMA]) {
+-              h[ZONE_DMA] += dma_reserve;
+-              if (h[ZONE_DMA] >= z[ZONE_DMA]) {
+-                      printk(KERN_WARNING
+-                              "Kernel too large and filling up ZONE_DMA?\n");
+-                      h[ZONE_DMA] = z[ZONE_DMA];
+-              }
+-      }
+-}
+-
+ #ifndef CONFIG_NUMA
+ void __init paging_init(void)
+ {
+-      unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
++      unsigned long max_zone_pfns[MAX_NR_ZONES];
++      memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
++      max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
++      max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
++      max_zone_pfns[ZONE_NORMAL] = end_pfn;
+       memory_present(0, 0, end_pfn);
+       sparse_init();
+-      size_zones(zones, holes, 0, end_pfn);
+-      free_area_init_node(0, NODE_DATA(0), zones,
+-                          __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
++      free_area_init_nodes(max_zone_pfns);
+       init_mm.context.pinned = 1;
+ }
+@@ -891,36 +840,23 @@ void online_page(struct page *page)
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
+- *    via probe interface of sysfs. If acpi notifies hot-add event, then it
+- *    can tell node id by searching dsdt. But, probe interface doesn't have
+- *    node id. So, return 0 as node id at this time.
+- */
+-#ifdef CONFIG_NUMA
+-int memory_add_physaddr_to_nid(u64 start)
+-{
+-      return 0;
+-}
+-#endif
+-
+-/*
+  * Memory is added always to NORMAL zone. This means you will never get
+  * additional DMA/DMA32 memory.
+  */
+ int arch_add_memory(int nid, u64 start, u64 size)
+ {
+       struct pglist_data *pgdat = NODE_DATA(nid);
+-      struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
++      struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+       int ret;
++      init_memory_mapping(start, (start + size -1));
++
+       ret = __add_pages(zone, start_pfn, nr_pages);
+       if (ret)
+               goto error;
+-      init_memory_mapping(start, (start + size -1));
+-
+       return ret;
+ error:
+       printk("%s: Problem encountered in __add_pages!\n", __func__);
+@@ -934,7 +870,17 @@ int remove_memory(u64 start, u64 size)
+ }
+ EXPORT_SYMBOL_GPL(remove_memory);
+-#else /* CONFIG_MEMORY_HOTPLUG */
++#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
++int memory_add_physaddr_to_nid(u64 start)
++{
++      return 0;
++}
++EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
++#endif
++
++#endif /* CONFIG_MEMORY_HOTPLUG */
++
++#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
+ /*
+  * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
+  * just online the pages.
+@@ -960,7 +906,7 @@ int __add_pages(struct zone *z, unsigned
+       }
+       return err;
+ }
+-#endif /* CONFIG_MEMORY_HOTPLUG */
++#endif
+ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
+                        kcore_vsyscall;
+@@ -972,12 +918,6 @@ void __init mem_init(void)
+       pci_iommu_alloc();
+-      /* How many end-of-memory variables you have, grandma! */
+-      max_low_pfn = end_pfn;
+-      max_pfn = end_pfn;
+-      num_physpages = end_pfn;
+-      high_memory = (void *) __va(end_pfn * PAGE_SIZE);
+-
+       /* clear the zero-page */
+       memset(empty_zero_page, 0, PAGE_SIZE);
+@@ -995,7 +935,8 @@ void __init mem_init(void)
+               init_page_count(pfn_to_page(pfn));
+               totalram_pages++;
+       }
+-      reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
++      reservedpages = end_pfn - totalram_pages -
++                                      absent_pages_in_range(0, end_pfn);
+       after_bootmem = 1;
+@@ -1102,15 +1043,32 @@ void free_initrd_mem(unsigned long start
+ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
+ { 
+-      /* Should check here against the e820 map to avoid double free */ 
+ #ifdef CONFIG_NUMA
+       int nid = phys_to_nid(phys);
++#endif
++      unsigned long pfn = phys >> PAGE_SHIFT;
++      if (pfn >= end_pfn) {
++              /* This can happen with kdump kernels when accessing firmware
++                 tables. */
++              if (pfn < end_pfn_map)
++                      return;
++              printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
++                              phys, len);
++              return;
++      }
++
++      /* Should check here against the e820 map to avoid double free */
++#ifdef CONFIG_NUMA
+       reserve_bootmem_node(NODE_DATA(nid), phys, len);
+ #else                 
+       reserve_bootmem(phys, len);    
+ #endif
+-      if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
++#ifndef CONFIG_XEN
++      if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
+               dma_reserve += len / PAGE_SIZE;
++              set_dma_reserve(dma_reserve);
++      }
++#endif
+ }
+ int kern_addr_valid(unsigned long addr) 
+--- sle11-2009-05-14.orig/arch/x86/mm/pageattr_64-xen.c        2008-12-01 11:25:57.000000000 +0100
++++ sle11-2009-05-14/arch/x86/mm/pageattr_64-xen.c     2009-03-04 11:28:34.000000000 +0100
+@@ -377,8 +377,8 @@ static void revert_page(unsigned long ad
+       BUG_ON(pud_none(*pud));
+       pmd = pmd_offset(pud, address);
+       BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
+-      pgprot_val(ref_prot) |= _PAGE_PSE;
+       large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
++      large_pte = pte_mkhuge(large_pte);
+       set_pte((pte_t *)pmd, large_pte);
+ }      
+@@ -388,32 +388,28 @@ __change_page_attr(unsigned long address
+ { 
+       pte_t *kpte; 
+       struct page *kpte_page;
+-      unsigned kpte_flags;
+       pgprot_t ref_prot2;
+       kpte = lookup_address(address);
+       if (!kpte) return 0;
+       kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+-      kpte_flags = pte_val(*kpte); 
+       if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
+-              if ((kpte_flags & _PAGE_PSE) == 0) { 
++              if (!pte_huge(*kpte)) {
+                       set_pte(kpte, pfn_pte(pfn, prot));
+               } else {
+                       /*
+                        * split_large_page will take the reference for this
+                        * change_page_attr on the split page.
+                        */
+-
+                       struct page *split;
+-                      ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
+-
++                      ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
+                       split = split_large_page(address, prot, ref_prot2);
+                       if (!split)
+                               return -ENOMEM;
+-                      set_pte(kpte,mk_pte(split, ref_prot2));
++                      set_pte(kpte, mk_pte(split, ref_prot2));
+                       kpte_page = split;
+-              }       
++              }
+               page_private(kpte_page)++;
+-      } else if ((kpte_flags & _PAGE_PSE) == 0) { 
++      } else if (!pte_huge(*kpte)) {
+               set_pte(kpte, pfn_pte(pfn, ref_prot));
+               BUG_ON(page_private(kpte_page) == 0);
+               page_private(kpte_page)--;
+@@ -470,10 +466,12 @@ int change_page_attr_addr(unsigned long 
+                * lowmem */
+               if (__pa(address) < KERNEL_TEXT_SIZE) {
+                       unsigned long addr2;
+-                      pgprot_t prot2 = prot;
++                      pgprot_t prot2;
+                       addr2 = __START_KERNEL_map + __pa(address);
+-                      pgprot_val(prot2) &= ~_PAGE_NX;
+-                      err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
++                      /* Make sure the kernel mappings stay executable */
++                      prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
++                      err = __change_page_attr(addr2, pfn, prot2,
++                                               PAGE_KERNEL_EXEC);
+               } 
+       }       
+       up_write(&init_mm.mmap_sem); 
+--- sle11-2009-05-14.orig/drivers/char/tpm/tpm_xen.c   2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/drivers/char/tpm/tpm_xen.c        2009-03-04 11:28:34.000000000 +0100
+@@ -85,8 +85,7 @@ static struct tpm_private *my_priv;
+ /* local function prototypes */
+ static irqreturn_t tpmif_int(int irq,
+-                             void *tpm_priv,
+-                             struct pt_regs *ptregs);
++                             void *tpm_priv);
+ static void tpmif_rx_action(unsigned long unused);
+ static int tpmif_connect(struct xenbus_device *dev,
+                          struct tpm_private *tp,
+@@ -559,7 +558,7 @@ static void tpmif_rx_action(unsigned lon
+ }
+-static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
++static irqreturn_t tpmif_int(int irq, void *tpm_priv)
+ {
+       struct tpm_private *tp = tpm_priv;
+       unsigned long flags;
+--- sle11-2009-05-14.orig/drivers/pci/Kconfig  2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/pci/Kconfig       2009-03-04 11:28:34.000000000 +0100
+@@ -48,7 +48,7 @@ config PCI_DEBUG
+ config HT_IRQ
+       bool "Interrupts on hypertransport devices"
+       default y
+-      depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
++      depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
+       help
+          This allows native hypertransport devices to use interrupts.
+--- sle11-2009-05-14.orig/drivers/pci/msi-xen.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/pci/msi-xen.c     2009-04-24 13:31:56.000000000 +0200
+@@ -6,6 +6,7 @@
+  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
+  */
++#include <linux/err.h>
+ #include <linux/mm.h>
+ #include <linux/irq.h>
+ #include <linux/interrupt.h>
+@@ -14,6 +15,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/pci.h>
+ #include <linux/proc_fs.h>
++#include <linux/msi.h>
+ #include <xen/evtchn.h>
+@@ -26,14 +28,6 @@
+ static int pci_msi_enable = 1;
+-static struct msi_ops *msi_ops;
+-
+-int msi_register(struct msi_ops *ops)
+-{
+-      msi_ops = ops;
+-      return 0;
+-}
+-
+ static LIST_HEAD(msi_dev_head);
+ DEFINE_SPINLOCK(msi_dev_lock);
+@@ -406,9 +400,9 @@ void pci_restore_msix_state(struct pci_d
+  * @dev: pointer to the pci_dev data structure of MSI device function
+  *
+  * Setup the MSI capability structure of device function with a single
+- * MSI vector, regardless of device function is capable of handling
++ * MSI irq, regardless of device function is capable of handling
+  * multiple messages. A return of zero indicates the successful setup
+- * of an entry zero with the new MSI vector or non-zero for otherwise.
++ * of an entry zero with the new MSI irq or non-zero for otherwise.
+  **/
+ static int msi_capability_init(struct pci_dev *dev)
+ {
+@@ -422,11 +416,11 @@ static int msi_capability_init(struct pc
+       if (pirq < 0)
+               return -EBUSY;
+-      dev->irq = pirq;
+       /* Set MSI enabled bits  */
+       enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
+       dev->msi_enabled = 1;
++      dev->irq = pirq;
+       return 0;
+ }
+@@ -437,8 +431,8 @@ static int msi_capability_init(struct pc
+  * @nvec: number of @entries
+  *
+  * Setup the MSI-X capability structure of device function with a
+- * single MSI-X vector. A return of zero indicates the successful setup of
+- * requested MSI-X entries with allocated vectors or non-zero for otherwise.
++ * single MSI-X irq. A return of zero indicates the successful setup of
++ * requested MSI-X entries with allocated irqs or non-zero for otherwise.
+  **/
+ static int msix_capability_init(struct pci_dev *dev,
+                               struct msix_entry *entries, int nvec)
+@@ -480,12 +474,18 @@ static int msix_capability_init(struct p
+       }
+       if (i != nvec) {
++              int avail = i - 1;
+               for (j = --i; j >= 0; j--) {
+                       msi_unmap_pirq(dev, entries[j].vector);
+                       detach_pirq_entry(entries[j].entry, msi_dev_entry);
+                       entries[j].vector = 0;
+               }
+-              return -EBUSY;
++              /* If we had some success report the number of irqs
++               * we succeeded in setting up.
++               */
++              if (avail <= 0)
++                      avail = -EBUSY;
++              return avail;
+       }
+       enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
+@@ -495,11 +495,40 @@ static int msix_capability_init(struct p
+ }
+ /**
++ * pci_msi_supported - check whether MSI may be enabled on device
++ * @dev: pointer to the pci_dev data structure of MSI device function
++ *
++ * Look at global flags, the device itself, and its parent busses
++ * to return 0 if MSI are supported for the device.
++ **/
++static
++int pci_msi_supported(struct pci_dev * dev)
++{
++      struct pci_bus *bus;
++
++      /* MSI must be globally enabled and supported by the device */
++      if (!pci_msi_enable || !dev || dev->no_msi)
++              return -EINVAL;
++
++      /* Any bridge which does NOT route MSI transactions from it's
++       * secondary bus to it's primary bus must set NO_MSI flag on
++       * the secondary pci_bus.
++       * We expect only arch-specific PCI host bus controller driver
++       * or quirks for specific PCI bridges to be setting NO_MSI.
++       */
++      for (bus = dev->bus; bus; bus = bus->parent)
++              if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
++                      return -EINVAL;
++
++      return 0;
++}
++
++/**
+  * pci_enable_msi - configure device's MSI capability structure
+  * @dev: pointer to the pci_dev data structure of MSI device function
+  *
+  * Setup the MSI capability structure of device function with
+- * a single MSI vector upon its software driver call to request for
++ * a single MSI irq upon its software driver call to request for
+  * MSI mode enabled on its hardware device function. A return of zero
+  * indicates the successful setup of an entry zero with the new MSI
+  * vector or non-zero for otherwise.
+@@ -507,18 +536,10 @@ static int msix_capability_init(struct p
+ extern int pci_frontend_enable_msi(struct pci_dev *dev);
+ int pci_enable_msi(struct pci_dev* dev)
+ {
+-      struct pci_bus *bus;
+-      int pos, temp, status = -EINVAL;
++      int pos, temp, status;
+-      if (!pci_msi_enable || !dev)
+-              return status;
+-
+-      if (dev->no_msi)
+-              return status;
+-
+-      for (bus = dev->bus; bus; bus = bus->parent)
+-              if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+-                      return -EINVAL;
++      if (pci_msi_supported(dev) < 0)
++              return -EINVAL;
+       status = msi_init();
+       if (status < 0)
+@@ -547,10 +568,10 @@ int pci_enable_msi(struct pci_dev* dev)
+       if (!pos)
+               return -EINVAL;
+-      /* Check whether driver already requested for MSI-X vectors */
++      /* Check whether driver already requested for MSI-X irqs */
+       if (dev->msix_enabled) {
+               printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
+-                         "Device already has MSI-X vectors assigned\n",
++                     "Device already has MSI-X irq assigned\n",
+                          pci_name(dev));
+               dev->irq = temp;
+               return -EINVAL;
+@@ -602,36 +623,28 @@ void pci_disable_msi(struct pci_dev* dev
+  * pci_enable_msix - configure device's MSI-X capability structure
+  * @dev: pointer to the pci_dev data structure of MSI-X device function
+  * @entries: pointer to an array of MSI-X entries
+- * @nvec: number of MSI-X vectors requested for allocation by device driver
++ * @nvec: number of MSI-X irqs requested for allocation by device driver
+  *
+  * Setup the MSI-X capability structure of device function with the number
+- * of requested vectors upon its software driver call to request for
++ * of requested irqs upon its software driver call to request for
+  * MSI-X mode enabled on its hardware device function. A return of zero
+  * indicates the successful configuration of MSI-X capability structure
+- * with new allocated MSI-X vectors. A return of < 0 indicates a failure.
++ * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
+  * Or a return of > 0 indicates that driver request is exceeding the number
+- * of vectors available. Driver should use the returned value to re-send
++ * of irqs available. Driver should use the returned value to re-send
+  * its request.
+  **/
+ extern int pci_frontend_enable_msix(struct pci_dev *dev,
+               struct msix_entry *entries, int nvec);
+ int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
+ {
+-      struct pci_bus *bus;
+       int status, pos, nr_entries;
+       int i, j, temp;
+       u16 control;
+-      if (!pci_msi_enable || !dev || !entries)
++      if (!entries || pci_msi_supported(dev) < 0)
+               return -EINVAL;
+-      if (dev->no_msi)
+-              return -EINVAL;
+-
+-      for (bus = dev->bus; bus; bus = bus->parent)
+-              if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+-                      return -EINVAL;
+-
+ #ifdef CONFIG_XEN_PCIDEV_FRONTEND
+       if (!is_initial_xendomain()) {
+               struct msi_dev_list *msi_dev_entry;
+@@ -694,7 +707,7 @@ int pci_enable_msix(struct pci_dev* dev,
+       /* Check whether driver already requested for MSI vector */
+       if (dev->msi_enabled) {
+               printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
+-                     "Device already has an MSI vector assigned\n",
++                     "Device already has an MSI irq assigned\n",
+                      pci_name(dev));
+               dev->irq = temp;
+               return -EINVAL;
+@@ -757,11 +770,11 @@ void pci_disable_msix(struct pci_dev* de
+ }
+ /**
+- * msi_remove_pci_irq_vectors - reclaim MSI(X) vectors to unused state
++ * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
+  * @dev: pointer to the pci_dev data structure of MSI(X) device function
+  *
+  * Being called during hotplug remove, from which the device function
+- * is hot-removed. All previous assigned MSI/MSI-X vectors, if
++ * is hot-removed. All previous assigned MSI/MSI-X irqs, if
+  * allocated for this device function, are reclaimed to unused state,
+  * which may be used later on.
+  **/
+--- sle11-2009-05-14.orig/drivers/xen/Kconfig  2008-12-05 08:43:56.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/Kconfig       2009-03-04 11:28:34.000000000 +0100
+@@ -287,6 +287,9 @@ endmenu
+ config HAVE_IRQ_IGNORE_UNHANDLED
+       def_bool y
++config GENERIC_HARDIRQS_NO__DO_IRQ
++      def_bool y
++
+ config NO_IDLE_HZ
+       def_bool y
+--- sle11-2009-05-14.orig/drivers/xen/balloon/balloon.c        2008-11-25 13:31:07.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/balloon/balloon.c     2009-03-04 11:28:34.000000000 +0100
+@@ -84,7 +84,7 @@ static unsigned long frame_list[PAGE_SIZ
+ /* VM /proc information for memory */
+ extern unsigned long totalram_pages;
+-#ifndef MODULE
++#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
+ extern unsigned long totalhigh_pages;
+ #define inc_totalhigh_pages() (totalhigh_pages++)
+ #define dec_totalhigh_pages() (totalhigh_pages--)
+--- sle11-2009-05-14.orig/drivers/xen/blkback/blkback.c        2008-12-01 11:21:10.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkback/blkback.c     2009-03-04 11:28:34.000000000 +0100
+@@ -288,7 +288,7 @@ static void blkif_notify_work(blkif_t *b
+       wake_up(&blkif->wq);
+ }
+-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t blkif_be_int(int irq, void *dev_id)
+ {
+       blkif_notify_work(dev_id);
+       return IRQ_HANDLED;
+--- sle11-2009-05-14.orig/drivers/xen/blkback/common.h 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/blkback/common.h      2009-03-04 11:28:34.000000000 +0100
+@@ -130,7 +130,7 @@ void blkif_interface_init(void);
+ void blkif_xenbus_init(void);
+-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t blkif_be_int(int irq, void *dev_id);
+ int blkif_schedule(void *arg);
+ int blkback_barrier(struct xenbus_transaction xbt,
+--- sle11-2009-05-14.orig/drivers/xen/blkfront/blkfront.c      2009-03-05 15:42:00.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/blkfront/blkfront.c   2009-03-24 10:08:16.000000000 +0100
+@@ -70,7 +70,7 @@ static int setup_blkring(struct xenbus_d
+ static void kick_pending_request_queues(struct blkfront_info *);
+-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
++static irqreturn_t blkif_int(int irq, void *dev_id);
+ static void blkif_restart_queue(void *arg);
+ static void blkif_recover(struct blkfront_info *);
+ static void blkif_completion(struct blk_shadow *);
+@@ -707,7 +707,7 @@ void do_blkif_request(request_queue_t *r
+ }
+-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
++static irqreturn_t blkif_int(int irq, void *dev_id)
+ {
+       struct request *req;
+       blkif_response_t *bret;
+--- sle11-2009-05-14.orig/drivers/xen/blktap/blktap.c  2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/blktap/blktap.c       2009-04-20 11:37:34.000000000 +0200
+@@ -1222,7 +1222,7 @@ static void blkif_notify_work(blkif_t *b
+       wake_up(&blkif->wq);
+ }
+-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
+ {
+       blkif_notify_work(dev_id);
+       return IRQ_HANDLED;
+--- sle11-2009-05-14.orig/drivers/xen/blktap/common.h  2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/blktap/common.h       2009-03-04 11:28:34.000000000 +0100
+@@ -113,7 +113,7 @@ void tap_blkif_interface_init(void);
+ void tap_blkif_xenbus_init(void);
+-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
+ int tap_blkif_schedule(void *arg);
+ int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
+--- sle11-2009-05-14.orig/drivers/xen/console/console.c        2008-12-15 11:13:47.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/console/console.c     2009-03-04 11:28:34.000000000 +0100
+@@ -361,7 +361,7 @@ static struct tty_struct *xencons_tty;
+ static int xencons_priv_irq;
+ static char x_char;
+-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
++void xencons_rx(char *buf, unsigned len)
+ {
+       int           i;
+       unsigned long flags;
+@@ -386,8 +386,7 @@ void xencons_rx(char *buf, unsigned len,
+                               if (time_before(jiffies, sysrq_timeout)) {
+                                       spin_unlock_irqrestore(
+                                               &xencons_lock, flags);
+-                                      handle_sysrq(
+-                                              buf[i], regs, xencons_tty);
++                                      handle_sysrq(buf[i], xencons_tty);
+                                       spin_lock_irqsave(
+                                               &xencons_lock, flags);
+                                       continue;
+@@ -452,14 +451,13 @@ void xencons_tx(void)
+ }
+ /* Privileged receive callback and transmit kicker. */
+-static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
+-                                        struct pt_regs *regs)
++static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
+ {
+       static char rbuf[16];
+       int         l;
+       while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
+-              xencons_rx(rbuf, l, regs);
++              xencons_rx(rbuf, l);
+       xencons_tx();
+@@ -647,7 +645,7 @@ static void xencons_close(struct tty_str
+       spin_unlock_irqrestore(&xencons_lock, flags);
+ }
+-static struct tty_operations xencons_ops = {
++static const struct tty_operations xencons_ops = {
+       .open = xencons_open,
+       .close = xencons_close,
+       .write = xencons_write,
+--- sle11-2009-05-14.orig/drivers/xen/console/xencons_ring.c   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/console/xencons_ring.c        2009-03-04 11:28:34.000000000 +0100
+@@ -83,7 +83,7 @@ int xencons_ring_send(const char *data, 
+       return sent;
+ }
+-static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
++static irqreturn_t handle_input(int irq, void *unused)
+ {
+       struct xencons_interface *intf = xencons_interface();
+       XENCONS_RING_IDX cons, prod;
+@@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq,
+       BUG_ON((prod - cons) > sizeof(intf->in));
+       while (cons != prod) {
+-              xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
++              xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
+               cons++;
+       }
+--- sle11-2009-05-14.orig/drivers/xen/core/evtchn.c    2009-03-16 16:14:12.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/core/evtchn.c 2009-03-04 11:28:34.000000000 +0100
+@@ -516,7 +516,7 @@ static void unbind_from_irq(unsigned int
+ int bind_caller_port_to_irqhandler(
+       unsigned int caller_port,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id)
+@@ -539,7 +539,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir
+ int bind_listening_port_to_irqhandler(
+       unsigned int remote_domain,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id)
+@@ -563,7 +563,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to
+ int bind_interdomain_evtchn_to_irqhandler(
+       unsigned int remote_domain,
+       unsigned int remote_port,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id)
+@@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch
+ int bind_virq_to_irqhandler(
+       unsigned int virq,
+       unsigned int cpu,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id)
+@@ -611,7 +611,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle
+ int bind_ipi_to_irqhandler(
+       unsigned int ipi,
+       unsigned int cpu,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id)
+@@ -696,15 +696,7 @@ static unsigned int startup_dynirq(unsig
+       return 0;
+ }
+-static void shutdown_dynirq(unsigned int irq)
+-{
+-      int evtchn = evtchn_from_irq(irq);
+-
+-      if (VALID_EVTCHN(evtchn))
+-              mask_evtchn(evtchn);
+-}
+-
+-static void enable_dynirq(unsigned int irq)
++static void unmask_dynirq(unsigned int irq)
+ {
+       int evtchn = evtchn_from_irq(irq);
+@@ -712,7 +704,7 @@ static void enable_dynirq(unsigned int i
+               unmask_evtchn(evtchn);
+ }
+-static void disable_dynirq(unsigned int irq)
++static void mask_dynirq(unsigned int irq)
+ {
+       int evtchn = evtchn_from_irq(irq);
+@@ -740,12 +732,13 @@ static void end_dynirq(unsigned int irq)
+               unmask_evtchn(evtchn);
+ }
+-static struct hw_interrupt_type dynirq_type = {
+-      .typename = "Dynamic-irq",
++static struct irq_chip dynirq_chip = {
++      .name     = "Dynamic",
+       .startup  = startup_dynirq,
+-      .shutdown = shutdown_dynirq,
+-      .enable   = enable_dynirq,
+-      .disable  = disable_dynirq,
++      .shutdown = mask_dynirq,
++      .mask     = mask_dynirq,
++      .unmask   = unmask_dynirq,
++      .mask_ack = ack_dynirq,
+       .ack      = ack_dynirq,
+       .end      = end_dynirq,
+ #ifdef CONFIG_SMP
+@@ -859,12 +852,12 @@ static void shutdown_pirq(unsigned int i
+       irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
+ }
+-static void enable_pirq(unsigned int irq)
++static void unmask_pirq(unsigned int irq)
+ {
+       startup_pirq(irq);
+ }
+-static void disable_pirq(unsigned int irq)
++static void mask_pirq(unsigned int irq)
+ {
+ }
+@@ -891,12 +884,13 @@ static void end_pirq(unsigned int irq)
+               pirq_unmask_and_notify(evtchn, irq);
+ }
+-static struct hw_interrupt_type pirq_type = {
+-      .typename = "Phys-irq",
++static struct irq_chip pirq_chip = {
++      .name     = "Phys",
+       .startup  = startup_pirq,
+       .shutdown = shutdown_pirq,
+-      .enable   = enable_pirq,
+-      .disable  = disable_pirq,
++      .mask     = mask_pirq,
++      .unmask   = unmask_pirq,
++      .mask_ack = ack_pirq,
+       .ack      = ack_pirq,
+       .end      = end_pirq,
+ #ifdef CONFIG_SMP
+@@ -1081,7 +1075,8 @@ void evtchn_register_pirq(int irq)
+       if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
+               return;
+       irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
+-      irq_desc[irq].chip = &pirq_type;
++      set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq,
++                                    "level");
+ }
+ int evtchn_map_pirq(int irq, int xen_pirq)
+@@ -1104,11 +1099,18 @@ int evtchn_map_pirq(int irq, int xen_pir
+               spin_unlock(&irq_alloc_lock);
+               if (irq < PIRQ_BASE)
+                       return -ENOSPC;
+-              irq_desc[irq].chip = &pirq_type;
++              set_irq_chip_and_handler_name(irq, &pirq_chip,
++                                            handle_level_irq, "level");
+       } else if (!xen_pirq) {
+               if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
+                       return -EINVAL;
+-              irq_desc[irq].chip = &no_irq_type;
++              /*
++               * dynamic_irq_cleanup(irq) would seem to be the correct thing
++               * here, but cannot be used as we get here also during shutdown
++               * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
++               * then causes a warning in dynamic_irq_cleanup().
++               */
++              set_irq_chip_and_handler(irq, NULL, NULL);
+               irq_info[irq] = IRQ_UNBOUND;
+               return 0;
+       } else if (type_from_irq(irq) != IRQT_PIRQ
+@@ -1154,10 +1156,9 @@ void __init xen_init_IRQ(void)
+       for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
+               irq_bindcount[i] = 0;
+-              irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
+-              irq_desc[i].action = NULL;
+-              irq_desc[i].depth = 1;
+-              irq_desc[i].chip = &dynirq_type;
++              irq_desc[i].status |= IRQ_NOPROBE;
++              set_irq_chip_and_handler_name(i, &dynirq_chip,
++                                            handle_level_irq, "level");
+       }
+       /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
+@@ -1173,9 +1174,7 @@ void __init xen_init_IRQ(void)
+                       continue;
+ #endif
+-              irq_desc[i].status = IRQ_DISABLED;
+-              irq_desc[i].action = NULL;
+-              irq_desc[i].depth = 1;
+-              irq_desc[i].chip = &pirq_type;
++              set_irq_chip_and_handler_name(i, &pirq_chip,
++                                            handle_level_irq, "level");
+       }
+ }
+--- sle11-2009-05-14.orig/drivers/xen/core/reboot.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/core/reboot.c 2009-03-04 11:28:34.000000000 +0100
+@@ -14,6 +14,7 @@
+ #ifdef HAVE_XEN_PLATFORM_COMPAT_H
+ #include <xen/platform-compat.h>
++#undef handle_sysrq
+ #endif
+ MODULE_LICENSE("Dual BSD/GPL");
+@@ -231,7 +232,7 @@ static void sysrq_handler(struct xenbus_
+ #ifdef CONFIG_MAGIC_SYSRQ
+       if (sysrq_key != '\0')
+-              handle_sysrq(sysrq_key, NULL, NULL);
++              handle_sysrq(sysrq_key, NULL);
+ #endif
+ }
+@@ -245,7 +246,7 @@ static struct xenbus_watch sysrq_watch =
+       .callback = sysrq_handler
+ };
+-static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
++static irqreturn_t suspend_int(int irq, void* dev_id)
+ {
+       switch_shutdown_state(SHUTDOWN_SUSPEND);
+       return IRQ_HANDLED;
+--- sle11-2009-05-14.orig/drivers/xen/core/smpboot.c   2009-04-28 16:02:07.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/core/smpboot.c        2009-03-04 11:28:34.000000000 +0100
+@@ -25,8 +25,8 @@
+ #include <xen/cpu_hotplug.h>
+ #include <xen/xenbus.h>
+-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
++extern irqreturn_t smp_reschedule_interrupt(int, void *);
++extern irqreturn_t smp_call_function_interrupt(int, void *);
+ extern int local_setup_timer(unsigned int cpu);
+ extern void local_teardown_timer(unsigned int cpu);
+@@ -62,8 +62,6 @@ EXPORT_SYMBOL(cpu_core_map);
+ #if defined(__i386__)
+ u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
+ EXPORT_SYMBOL(x86_cpu_to_apicid);
+-#elif !defined(CONFIG_X86_IO_APIC)
+-unsigned int maxcpus = NR_CPUS;
+ #endif
+ void __init prefill_possible_map(void)
+--- sle11-2009-05-14.orig/drivers/xen/fbfront/xenfb.c  2009-02-16 15:59:55.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/fbfront/xenfb.c       2009-03-04 11:28:34.000000000 +0100
+@@ -524,8 +524,7 @@ static struct fb_ops xenfb_fb_ops = {
+       .fb_set_par     = xenfb_set_par,
+ };
+-static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
+-                                     struct pt_regs *regs)
++static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
+ {
+       /*
+        * No in events recognized, simply ignore them all.
+--- sle11-2009-05-14.orig/drivers/xen/fbfront/xenkbd.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/fbfront/xenkbd.c      2009-03-04 11:28:34.000000000 +0100
+@@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st
+  * to do that.
+  */
+-static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
++static irqreturn_t input_handler(int rq, void *dev_id)
+ {
+       struct xenkbd_info *info = dev_id;
+       struct xenkbd_page *page = info->page;
+--- sle11-2009-05-14.orig/drivers/xen/gntdev/gntdev.c  2008-12-15 11:13:45.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/gntdev/gntdev.c       2009-03-04 11:28:34.000000000 +0100
+@@ -752,9 +752,6 @@ static pte_t gntdev_clear_pte(struct vm_
+               BUG();
+       }
+-      /* Copy the existing value of the PTE for returning. */
+-      copy = *ptep;
+-
+       /* Calculate the grant relating to this PTE. */
+       slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+@@ -769,6 +766,10 @@ static pte_t gntdev_clear_pte(struct vm_
+                   GNTDEV_INVALID_HANDLE && 
+                   !xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* NOT USING SHADOW PAGE TABLES. */
++
++                      /* Copy the existing value of the PTE for returning. */
++                      copy = *ptep;
++
+                       gnttab_set_unmap_op(&op, ptep_to_machine(ptep),
+                                           GNTMAP_contains_pte,
+                                           private_data->grants[slot_index]
+@@ -781,7 +782,7 @@ static pte_t gntdev_clear_pte(struct vm_
+                                      op.status);
+               } else {
+                       /* USING SHADOW PAGE TABLES. */
+-                      pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
++                      copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+               }
+               /* Finally, we unmap the grant from kernel space. */
+@@ -809,7 +810,7 @@ static pte_t gntdev_clear_pte(struct vm_
+                                   >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+       } else {
+-              pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
++              copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+       }
+       return copy;
+--- sle11-2009-05-14.orig/drivers/xen/netback/accel.c  2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/netback/accel.c       2009-03-04 11:28:34.000000000 +0100
+@@ -65,7 +65,7 @@ static int match_accelerator(struct xenb
+       
+       if (IS_ERR(eth_name)) {
+               /* Probably means not present */
+-              DPRINTK("%s: no match due to xenbus_read accel error %d\n", 
++              DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
+                       __FUNCTION__, PTR_ERR(eth_name));
+               return 0;
+       } else {
+--- sle11-2009-05-14.orig/drivers/xen/netback/common.h 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/netback/common.h      2009-03-04 11:28:34.000000000 +0100
+@@ -200,7 +200,7 @@ void netif_deschedule_work(netif_t *neti
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t netif_be_int(int irq, void *dev_id);
+ static inline int netbk_can_queue(struct net_device *dev)
+ {
+--- sle11-2009-05-14.orig/drivers/xen/netback/loopback.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/netback/loopback.c    2009-03-04 11:28:34.000000000 +0100
+@@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk
+       np->stats.rx_bytes += skb->len;
+       np->stats.rx_packets++;
+-      if (skb->ip_summed == CHECKSUM_HW) {
++      if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               /* Defer checksum calculation. */
+               skb->proto_csum_blank = 1;
+               /* Must be a local packet: assert its integrity. */
+--- sle11-2009-05-14.orig/drivers/xen/netback/netback.c        2008-12-23 09:31:07.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/netback/netback.c     2009-03-04 11:28:34.000000000 +0100
+@@ -692,7 +692,7 @@ static void net_rx_action(unsigned long 
+               id = meta[npo.meta_cons].id;
+               flags = nr_frags ? NETRXF_more_data : 0;
+-              if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++              if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+                       flags |= NETRXF_csum_blank | NETRXF_data_validated;
+               else if (skb->proto_data_valid) /* remote but checksummed? */
+                       flags |= NETRXF_data_validated;
+@@ -1459,7 +1459,7 @@ static void netif_page_release(struct pa
+       netif_idx_release(idx);
+ }
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+       netif_t *netif = dev_id;
+@@ -1526,7 +1526,7 @@ static netif_rx_response_t *make_rx_resp
+ }
+ #ifdef NETBE_DEBUG_INTERRUPT
+-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++static irqreturn_t netif_be_dbg(int irq, void *dev_id)
+ {
+       struct list_head *ent;
+       netif_t *netif;
+--- sle11-2009-05-14.orig/drivers/xen/netfront/netfront.c      2009-04-09 14:41:33.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/netfront/netfront.c   2009-03-30 16:34:59.000000000 +0200
+@@ -136,7 +136,7 @@ static inline int netif_needs_gso(struct
+ {
+         return skb_is_gso(skb) &&
+                (!skb_gso_ok(skb, dev->features) ||
+-                unlikely(skb->ip_summed != CHECKSUM_HW));
++                unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
+ }
+ #else
+ #define HAVE_GSO                      0
+@@ -222,7 +222,7 @@ static void network_tx_buf_gc(struct net
+ static void network_alloc_rx_buffers(struct net_device *);
+ static void send_fake_arp(struct net_device *);
+-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
++static irqreturn_t netif_int(int irq, void *dev_id);
+ #ifdef CONFIG_SYSFS
+ static int xennet_sysfs_addif(struct net_device *netdev);
+@@ -992,7 +992,7 @@ static int network_start_xmit(struct sk_
+       tx->flags = 0;
+       extra = NULL;
+-      if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++      if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+               tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+ #ifdef CONFIG_XEN
+       if (skb->proto_data_valid) /* remote but checksummed? */
+@@ -1049,7 +1049,7 @@ static int network_start_xmit(struct sk_
+       return 0;
+ }
+-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
++static irqreturn_t netif_int(int irq, void *dev_id)
+ {
+       struct net_device *dev = dev_id;
+       struct netfront_info *np = netdev_priv(dev);
+--- sle11-2009-05-14.orig/drivers/xen/pciback/pciback.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/pciback/pciback.h     2009-03-04 11:28:34.000000000 +0100
+@@ -87,7 +87,7 @@ int pciback_publish_pci_roots(struct pci
+ void pciback_release_devices(struct pciback_device *pdev);
+ /* Handles events from front-end */
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t pciback_handle_event(int irq, void *dev_id);
+ void pciback_do_op(void *data);
+ int pciback_xenbus_register(void);
+--- sle11-2009-05-14.orig/drivers/xen/pciback/pciback_ops.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/pciback/pciback_ops.c 2009-03-04 11:28:34.000000000 +0100
+@@ -107,7 +107,7 @@ void pciback_do_op(void *data)
+       test_and_schedule_op(pdev);
+ }
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t pciback_handle_event(int irq, void *dev_id)
+ {
+       struct pciback_device *pdev = dev_id;
+--- sle11-2009-05-14.orig/drivers/xen/pcifront/pci_op.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/pcifront/pci_op.c     2009-03-04 11:28:34.000000000 +0100
+@@ -508,10 +508,16 @@ int __devinit pcifront_rescan_root(struc
+               d = pci_scan_single_device(b, devfn);
+               if (d) {
++                      int err;
++
+                       dev_info(&pdev->xdev->dev, "New device on "
+                                "%04x:%02x:%02x.%02x found.\n", domain, bus,
+                                PCI_SLOT(devfn), PCI_FUNC(devfn));
+-                      pci_bus_add_device(d);
++                      err = pci_bus_add_device(d);
++                      if (err)
++                              dev_err(&pdev->xdev->dev,
++                                      "error %d adding device, continuing.\n",
++                                      err);
+               }
+       }
+--- sle11-2009-05-14.orig/drivers/xen/privcmd/compat_privcmd.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/privcmd/compat_privcmd.c      2009-03-04 11:28:34.000000000 +0100
+@@ -18,7 +18,6 @@
+  * Authors: Jimi Xenidis <jimix@watson.ibm.com>
+  */
+-#include <linux/config.h>
+ #include <linux/compat.h>
+ #include <linux/ioctl.h>
+ #include <linux/syscalls.h>
+--- sle11-2009-05-14.orig/drivers/xen/privcmd/privcmd.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/privcmd/privcmd.c     2009-03-04 11:28:34.000000000 +0100
+@@ -40,7 +40,7 @@ static int privcmd_enforce_singleshot_ma
+ static long privcmd_ioctl(struct file *file,
+                         unsigned int cmd, unsigned long data)
+ {
+-      int ret = -ENOSYS;
++      long ret = -ENOSYS;
+       void __user *udata = (void __user *) data;
+       switch (cmd) {
+@@ -50,42 +50,15 @@ static long privcmd_ioctl(struct file *f
+               if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+                       return -EFAULT;
+-#if defined(__i386__)
++#ifdef CONFIG_X86
+               if (hypercall.op >= (PAGE_SIZE >> 5))
+                       break;
+-              __asm__ __volatile__ (
+-                      "pushl %%ebx; pushl %%ecx; pushl %%edx; "
+-                      "pushl %%esi; pushl %%edi; "
+-                      "movl  8(%%eax),%%ebx ;"
+-                      "movl 16(%%eax),%%ecx ;"
+-                      "movl 24(%%eax),%%edx ;"
+-                      "movl 32(%%eax),%%esi ;"
+-                      "movl 40(%%eax),%%edi ;"
+-                      "movl   (%%eax),%%eax ;"
+-                      "shll $5,%%eax ;"
+-                      "addl $hypercall_page,%%eax ;"
+-                      "call *%%eax ;"
+-                      "popl %%edi; popl %%esi; popl %%edx; "
+-                      "popl %%ecx; popl %%ebx"
+-                      : "=a" (ret) : "0" (&hypercall) : "memory" );
+-#elif defined (__x86_64__)
+-              if (hypercall.op < (PAGE_SIZE >> 5)) {
+-                      long ign1, ign2, ign3;
+-                      __asm__ __volatile__ (
+-                              "movq %8,%%r10; movq %9,%%r8;"
+-                              "shll $5,%%eax ;"
+-                              "addq $hypercall_page,%%rax ;"
+-                              "call *%%rax"
+-                              : "=a" (ret), "=D" (ign1),
+-                                "=S" (ign2), "=d" (ign3)
+-                              : "0" ((unsigned int)hypercall.op),
+-                              "1" (hypercall.arg[0]),
+-                              "2" (hypercall.arg[1]),
+-                              "3" (hypercall.arg[2]),
+-                              "g" (hypercall.arg[3]),
+-                              "g" (hypercall.arg[4])
+-                              : "r8", "r10", "memory" );
+-              }
++              ret = _hypercall(long, (unsigned int)hypercall.op,
++                               (unsigned long)hypercall.arg[0],
++                               (unsigned long)hypercall.arg[1],
++                               (unsigned long)hypercall.arg[2],
++                               (unsigned long)hypercall.arg[3],
++                               (unsigned long)hypercall.arg[4]);
+ #else
+               ret = privcmd_hypercall(&hypercall);
+ #endif
+@@ -306,7 +279,7 @@ static int privcmd_mmap(struct file * fi
+               return -ENOSYS;
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+-      vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
++      vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
+       vma->vm_ops = &privcmd_vm_ops;
+       vma->vm_private_data = NULL;
+--- sle11-2009-05-14.orig/drivers/xen/scsiback/common.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/scsiback/common.h     2009-03-04 11:28:34.000000000 +0100
+@@ -142,7 +142,7 @@ typedef struct {
+ #define VSCSIIF_TIMEOUT               (900*HZ)
+-irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
++irqreturn_t scsiback_intr(int, void *);
+ int scsiback_init_sring(struct vscsibk_info *info,
+               unsigned long ring_ref, unsigned int evtchn);
+ int scsiback_schedule(void *data);
+--- sle11-2009-05-14.orig/drivers/xen/scsiback/scsiback.c      2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/scsiback/scsiback.c   2009-03-04 11:28:34.000000000 +0100
+@@ -440,7 +440,7 @@ void scsiback_cmd_exec(pending_req_t *pe
+       write = (data_dir == DMA_TO_DEVICE);
+       rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
+-      rq->flags  |= REQ_BLOCK_PC;
++      rq->cmd_type = REQ_TYPE_BLOCK_PC;
+       rq->cmd_len = cmd_len;
+       memcpy(rq->cmd, pending_req->cmnd, cmd_len);
+@@ -484,7 +484,7 @@ static void scsiback_device_reset_exec(p
+ }
+-irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t scsiback_intr(int irq, void *dev_id)
+ {
+       scsiback_notify_work((struct vscsibk_info *)dev_id);
+       return IRQ_HANDLED;
+--- sle11-2009-05-14.orig/drivers/xen/scsifront/common.h       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/scsifront/common.h    2009-03-04 11:28:34.000000000 +0100
+@@ -122,7 +122,7 @@ struct vscsifrnt_info {
+ int scsifront_xenbus_init(void);
+ void scsifront_xenbus_unregister(void);
+ int scsifront_schedule(void *data);
+-irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
++irqreturn_t scsifront_intr(int irq, void *dev_id);
+ int scsifront_cmd_done(struct vscsifrnt_info *info);
+--- sle11-2009-05-14.orig/drivers/xen/scsifront/scsifront.c    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/scsifront/scsifront.c 2009-03-04 11:28:34.000000000 +0100
+@@ -100,7 +100,7 @@ static void scsifront_do_request(struct 
+               notify_remote_via_irq(irq);
+ }
+-irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
++irqreturn_t scsifront_intr(int irq, void *dev_id)
+ {
+       scsifront_notify_work((struct vscsifrnt_info *)dev_id);
+       return IRQ_HANDLED;
+--- sle11-2009-05-14.orig/drivers/xen/sfc_netback/accel_xenbus.c       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/sfc_netback/accel_xenbus.c    2009-03-04 11:28:34.000000000 +0100
+@@ -68,8 +68,7 @@ static void unlink_bend(struct netback_a
+ /* Demultiplex a message IRQ from the frontend driver.  */
+-static irqreturn_t msgirq_from_frontend(int irq, void *context, 
+-                                   struct pt_regs *unused)
++static irqreturn_t msgirq_from_frontend(int irq, void *context)
+ {
+       struct xenbus_device *dev = context;
+       struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
+@@ -84,8 +83,7 @@ static irqreturn_t msgirq_from_frontend(
+  * functionally, but we need it to pass to the bind function, and may
+  * get called spuriously
+  */
+-static irqreturn_t netirq_from_frontend(int irq, void *context, 
+-                                      struct pt_regs *unused)
++static irqreturn_t netirq_from_frontend(int irq, void *context)
+ {
+       VPRINTK("netirq %d from device %s\n", irq,
+               ((struct xenbus_device *)context)->nodename);
+--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel.h     2009-04-09 14:41:38.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel.h  2009-03-30 16:34:56.000000000 +0200
+@@ -451,10 +451,8 @@ void netfront_accel_msg_tx_fastpath(netf
+                                   u32 ip, u16 port, u8 protocol);
+ /* Process an IRQ received from back end driver */
+-irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, 
+-                                                   struct pt_regs *unused);
+-irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, 
+-                                                   struct pt_regs *unused);
++irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
++irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
+ extern void netfront_accel_msg_from_bend(struct work_struct *context);
+--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel_msg.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel_msg.c      2009-03-04 11:28:34.000000000 +0100
+@@ -490,8 +490,7 @@ void netfront_accel_msg_from_bend(void *
+ }
+-irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, 
+-                                               struct pt_regs *unused)
++irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
+ {
+       netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
+       VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
+@@ -502,8 +501,7 @@ irqreturn_t netfront_accel_msg_channel_i
+ }
+ /* Process an interrupt received from the NIC via backend */
+-irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, 
+-                                                   struct pt_regs *unused)
++irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
+ {
+       netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
+       struct net_device *net_dev = vnic->net_dev;
+--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel_tso.c 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel_tso.c      2009-03-04 11:28:34.000000000 +0100
+@@ -363,7 +363,7 @@ int netfront_accel_enqueue_skb_tso(netfr
+       tso_check_safe(skb);
+-      if (skb->ip_summed != CHECKSUM_HW)
++      if (skb->ip_summed != CHECKSUM_PARTIAL)
+               EPRINTK("Trying to TSO send a packet without HW checksum\n");
+       tso_start(&state, skb);
+--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel_vi.c  2009-04-09 14:41:38.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel_vi.c       2009-03-30 16:35:11.000000000 +0200
+@@ -461,7 +461,7 @@ netfront_accel_enqueue_skb_multi(netfron
+       frag_i = -1;
+-      if (skb->ip_summed == CHECKSUM_HW) {
++      if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               /* Set to zero to encourage falcon to work it out for us */
+               *(u16*)(skb->h.raw + skb->csum) = 0;
+       }
+@@ -580,7 +580,7 @@ netfront_accel_enqueue_skb_single(netfro
+       
+       kva = buf->pkt_kva;
+-      if (skb->ip_summed == CHECKSUM_HW) {
++      if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               /* Set to zero to encourage falcon to work it out for us */
+               *(u16*)(skb->h.raw + skb->csum) = 0;
+       }
+--- sle11-2009-05-14.orig/drivers/xen/tpmback/common.h 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/tpmback/common.h      2009-03-04 11:28:34.000000000 +0100
+@@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm
+ void tpmif_xenbus_init(void);
+ void tpmif_xenbus_exit(void);
+ int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
+-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t tpmif_be_int(int irq, void *dev_id);
+ long int tpmback_get_instance(struct backend_info *bi);
+--- sle11-2009-05-14.orig/drivers/xen/tpmback/tpmback.c        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/tpmback/tpmback.c     2009-03-04 11:28:34.000000000 +0100
+@@ -502,7 +502,7 @@ static ssize_t vtpm_op_read(struct file 
+               list_del(&pak->next);
+               write_unlock_irqrestore(&dataex.pak_lock, flags);
+-              DPRINTK("size given by app: %d, available: %d\n", size, left);
++              DPRINTK("size given by app: %zu, available: %u\n", size, left);
+               ret_size = min_t(size_t, size, left);
+@@ -899,7 +899,7 @@ static void tpm_tx_action(unsigned long 
+       }
+ }
+-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t tpmif_be_int(int irq, void *dev_id)
+ {
+       tpmif_t *tpmif = (tpmif_t *) dev_id;
+--- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_comms.c    2008-11-25 12:35:56.000000000 +0100
++++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_comms.c 2009-03-04 11:28:34.000000000 +0100
+@@ -55,7 +55,7 @@ static DECLARE_WORK(probe_work, xenbus_p
+ static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+-static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
++static irqreturn_t wake_waiting(int irq, void *unused)
+ {
+       if (unlikely(xenstored_ready == 0)) {
+               xenstored_ready = 1;
+--- sle11-2009-05-14.orig/drivers/xen/xenoprof/xenoprofile.c   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/drivers/xen/xenoprof/xenoprofile.c        2009-03-04 11:28:34.000000000 +0100
+@@ -194,8 +194,7 @@ done:
+               oprofile_add_domain_switch(COORDINATOR_DOMAIN);
+ }
+-static irqreturn_t 
+-xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
++static irqreturn_t xenoprof_ovf_interrupt(int irq, void *dev_id)
+ {
+       struct xenoprof_buf * buf;
+       static unsigned long flag;
+--- sle11-2009-05-14.orig/include/asm-generic/pgtable.h        2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/include/asm-generic/pgtable.h     2009-03-04 11:28:34.000000000 +0100
+@@ -100,7 +100,7 @@ static inline void ptep_set_wrprotect(st
+ #endif
+ #ifndef arch_change_pte_range
+-#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
++#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
+ #endif
+ #ifndef __HAVE_ARCH_PTE_SAME
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc_32.h       2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/desc_32.h    2009-03-04 11:28:34.000000000 +0100
+@@ -32,52 +32,110 @@ static inline struct desc_struct *get_cp
+       return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
+ }
++/*
++ * This is the ldt that every process will get unless we need
++ * something other than this.
++ */
++extern struct desc_struct default_ldt[];
++extern struct desc_struct idt_table[];
++extern void set_intr_gate(unsigned int irq, void * addr);
++
++static inline void pack_descriptor(__u32 *a, __u32 *b,
++      unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
++{
++      *a = ((base & 0xffff) << 16) | (limit & 0xffff);
++      *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
++              (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
++}
++
++static inline void pack_gate(__u32 *a, __u32 *b,
++      unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
++{
++      *a = (seg << 16) | (base & 0xffff);
++      *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
++}
++
++#define DESCTYPE_LDT  0x82    /* present, system, DPL-0, LDT */
++#define DESCTYPE_TSS  0x89    /* present, system, DPL-0, 32-bit TSS */
++#define DESCTYPE_TASK 0x85    /* present, system, DPL-0, task gate */
++#define DESCTYPE_INT  0x8e    /* present, system, DPL-0, interrupt gate */
++#define DESCTYPE_TRAP 0x8f    /* present, system, DPL-0, trap gate */
++#define DESCTYPE_DPL3 0x60    /* DPL-3 */
++#define DESCTYPE_S    0x10    /* !system */
++
+ #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
+ #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
+ #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+ #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+-#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
+-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
++#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
++#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
+ #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
+ #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
+-#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
+-#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
++#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
++#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
+-/*
+- * This is the ldt that every process will get unless we need
+- * something other than this.
+- */
+-extern struct desc_struct default_ldt[];
+-extern void set_intr_gate(unsigned int irq, void * addr);
++#if TLS_SIZE != 24
++# error update this code.
++#endif
++
++static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
++{
++#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
++                                            *(u64 *)&t->tls_array[i]) \
++              BUG()
++      C(0); C(1); C(2);
++#undef C
++}
+-#define _set_tssldt_desc(n,addr,limit,type) \
+-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+-      "movw %w1,2(%2)\n\t" \
+-      "rorl $16,%1\n\t" \
+-      "movb %b1,4(%2)\n\t" \
+-      "movb %4,5(%2)\n\t" \
+-      "movb $0,6(%2)\n\t" \
+-      "movb %h1,7(%2)\n\t" \
+-      "rorl $16,%1" \
+-      : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
++#ifndef CONFIG_XEN
++static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
++{
++      __u32 *lp = (__u32 *)((char *)dt + entry*8);
++      *lp = entry_a;
++      *(lp+1) = entry_b;
++}
+-#ifndef CONFIG_X86_NO_TSS
+-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
++#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
++#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
++#else
++extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
++extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
++#endif
++#ifndef CONFIG_X86_NO_IDT
++#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
++
++static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
+ {
+-      _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
+-              offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
++      __u32 a, b;
++      pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
++      write_idt_entry(idt_table, gate, a, b);
+ }
++#endif
+-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
++#ifndef CONFIG_X86_NO_TSS
++static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
++{
++      __u32 a, b;
++      pack_descriptor(&a, &b, (unsigned long)addr,
++                      offsetof(struct tss_struct, __cacheline_filler) - 1,
++                      DESCTYPE_TSS, 0);
++      write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
++}
+ #endif
+-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
++static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
+ {
+-      _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
++      __u32 a, b;
++      pack_descriptor(&a, &b, (unsigned long)addr,
++                      entries * sizeof(struct desc_struct) - 1,
++                      DESCTYPE_LDT, 0);
++      write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
+ }
++#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
++
+ #define LDT_entry_a(info) \
+       ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+@@ -103,21 +161,6 @@ static inline void set_ldt_desc(unsigned
+       (info)->seg_not_present == 1    && \
+       (info)->useable         == 0    )
+-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
+-
+-#if TLS_SIZE != 24
+-# error update this code.
+-#endif
+-
+-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
+-{
+-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
+-                                             *(u64 *)&t->tls_array[i])) \
+-              BUG();
+-      C(0); C(1); C(2);
+-#undef C
+-}
+-
+ static inline void clear_LDT(void)
+ {
+       int cpu = get_cpu();
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_32.h     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_32.h  2009-03-04 11:28:34.000000000 +0100
+@@ -55,7 +55,7 @@ enum fixed_addresses {
+ #ifdef CONFIG_X86_LOCAL_APIC
+       FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
+ #endif
+-#ifdef CONFIG_X86_IO_APIC
++#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+ #endif
+@@ -95,10 +95,9 @@ enum fixed_addresses {
+       __end_of_fixed_addresses
+ };
+-extern void set_fixaddr_top(unsigned long top);
+-
+ extern void __set_fixmap(enum fixed_addresses idx,
+                                       maddr_t phys, pgprot_t flags);
++extern void reserve_top_address(unsigned long reserve);
+ #define set_fixmap(idx, phys) \
+               __set_fixmap(idx, phys, PAGE_KERNEL)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypercall_32.h  2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypercall_32.h       2009-03-04 11:28:34.000000000 +0100
+@@ -128,6 +128,23 @@
+       __res;                                                  \
+ })
++#define _hypercall(type, op, a1, a2, a3, a4, a5)              \
++({                                                            \
++      type __res;                                             \
++      register typeof((a1)+0) __arg1 asm("ebx") = (a1);       \
++      register typeof((a2)+0) __arg2 asm("ecx") = (a2);       \
++      register typeof((a3)+0) __arg3 asm("edx") = (a3);       \
++      register typeof((a4)+0) __arg4 asm("esi") = (a4);       \
++      register typeof((a5)+0) __arg5 asm("edi") = (a5);       \
++      asm volatile (                                          \
++              "call *%6"                                      \
++              : "=a" (__res), "+r" (__arg1), "+r" (__arg2),   \
++                "+r" (__arg3), "+r" (__arg4), "+r" (__arg5)   \
++              : "0" (hypercall_page + (op) * 32)              \
++              : "memory" );                                   \
++      __res;                                                  \
++})
++
+ static inline int __must_check
+ HYPERVISOR_set_trap_table(
+       const trap_info_t *table)
+@@ -140,6 +157,8 @@ HYPERVISOR_mmu_update(
+       mmu_update_t *req, unsigned int count, unsigned int *success_count,
+       domid_t domid)
+ {
++      if (arch_use_lazy_mmu_mode())
++              return xen_multi_mmu_update(req, count, success_count, domid);
+       return _hypercall4(int, mmu_update, req, count, success_count, domid);
+ }
+@@ -148,6 +167,8 @@ HYPERVISOR_mmuext_op(
+       struct mmuext_op *op, unsigned int count, unsigned int *success_count,
+       domid_t domid)
+ {
++      if (arch_use_lazy_mmu_mode())
++              return xen_multi_mmuext_op(op, count, success_count, domid);
+       return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+ }
+@@ -238,6 +259,8 @@ static inline int __must_check
+ HYPERVISOR_memory_op(
+       unsigned int cmd, void *arg)
+ {
++      if (arch_use_lazy_mmu_mode())
++              xen_multicall_flush(false);
+       return _hypercall2(int, memory_op, cmd, arg);
+ }
+@@ -253,6 +276,9 @@ HYPERVISOR_update_va_mapping(
+       unsigned long va, pte_t new_val, unsigned long flags)
+ {
+       unsigned long pte_hi = 0;
++
++      if (arch_use_lazy_mmu_mode())
++              return xen_multi_update_va_mapping(va, new_val, flags);
+ #ifdef CONFIG_X86_PAE
+       pte_hi = new_val.pte_high;
+ #endif
+@@ -316,6 +342,8 @@ static inline int __must_check
+ HYPERVISOR_grant_table_op(
+       unsigned int cmd, void *uop, unsigned int count)
+ {
++      if (arch_use_lazy_mmu_mode())
++              xen_multicall_flush(false);
+       return _hypercall3(int, grant_table_op, cmd, uop, count);
+ }
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypercall_64.h  2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypercall_64.h       2009-03-04 11:28:34.000000000 +0100
+@@ -134,6 +134,23 @@
+       __res;                                                  \
+ })
++#define _hypercall(type, op, a1, a2, a3, a4, a5)              \
++({                                                            \
++      type __res;                                             \
++      register typeof((a1)+0) __arg1 asm("rdi") = (a1);       \
++      register typeof((a2)+0) __arg2 asm("rsi") = (a2);       \
++      register typeof((a3)+0) __arg3 asm("rdx") = (a3);       \
++      register typeof((a4)+0) __arg4 asm("r10") = (a4);       \
++      register typeof((a5)+0) __arg5 asm("r8") = (a5);        \
++      asm volatile (                                          \
++              "call *%6"                                      \
++              : "=a" (__res), "+r" (__arg1), "+r" (__arg2),   \
++                "+r" (__arg3), "+r" (__arg4), "+r" (__arg5)   \
++              : "0" (hypercall_page + (op) * 32)              \
++              : "memory" );                                   \
++      __res;                                                  \
++})
++
+ static inline int __must_check
+ HYPERVISOR_set_trap_table(
+       const trap_info_t *table)
+@@ -146,6 +163,8 @@ HYPERVISOR_mmu_update(
+       mmu_update_t *req, unsigned int count, unsigned int *success_count,
+       domid_t domid)
+ {
++      if (arch_use_lazy_mmu_mode())
++              return xen_multi_mmu_update(req, count, success_count, domid);
+       return _hypercall4(int, mmu_update, req, count, success_count, domid);
+ }
+@@ -154,6 +173,8 @@ HYPERVISOR_mmuext_op(
+       struct mmuext_op *op, unsigned int count, unsigned int *success_count,
+       domid_t domid)
+ {
++      if (arch_use_lazy_mmu_mode())
++              return xen_multi_mmuext_op(op, count, success_count, domid);
+       return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+ }
+@@ -241,6 +262,8 @@ static inline int __must_check
+ HYPERVISOR_memory_op(
+       unsigned int cmd, void *arg)
+ {
++      if (arch_use_lazy_mmu_mode())
++              xen_multicall_flush(false);
+       return _hypercall2(int, memory_op, cmd, arg);
+ }
+@@ -255,6 +278,8 @@ static inline int __must_check
+ HYPERVISOR_update_va_mapping(
+       unsigned long va, pte_t new_val, unsigned long flags)
+ {
++      if (arch_use_lazy_mmu_mode())
++              return xen_multi_update_va_mapping(va, new_val, flags);
+       return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
+ }
+@@ -314,6 +339,8 @@ static inline int __must_check
+ HYPERVISOR_grant_table_op(
+       unsigned int cmd, void *uop, unsigned int count)
+ {
++      if (arch_use_lazy_mmu_mode())
++              xen_multicall_flush(false);
+       return _hypercall3(int, grant_table_op, cmd, uop, count);
+ }
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypervisor.h    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-04 11:28:34.000000000 +0100
+@@ -43,6 +43,7 @@
+ #include <xen/interface/physdev.h>
+ #include <xen/interface/sched.h>
+ #include <xen/interface/nmi.h>
++#include <asm/percpu.h>
+ #include <asm/ptrace.h>
+ #include <asm/page.h>
+ #if defined(__i386__)
+@@ -135,7 +136,46 @@ void scrub_pages(void *, unsigned int);
+ #define scrub_pages(_p,_n) ((void)0)
+ #endif
+-#include <xen/hypercall.h>
++#if defined(CONFIG_XEN) && !defined(MODULE)
++
++DECLARE_PER_CPU(bool, xen_lazy_mmu);
++
++int xen_multicall_flush(bool);
++
++int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
++                                           unsigned long flags);
++int __must_check xen_multi_mmu_update(mmu_update_t *, unsigned int count,
++                                    unsigned int *success_count, domid_t);
++int __must_check xen_multi_mmuext_op(struct mmuext_op *, unsigned int count,
++                                   unsigned int *success_count, domid_t);
++
++#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
++static inline void arch_enter_lazy_mmu_mode(void)
++{
++      __get_cpu_var(xen_lazy_mmu) = true;
++}
++
++static inline void arch_leave_lazy_mmu_mode(void)
++{
++      __get_cpu_var(xen_lazy_mmu) = false;
++      xen_multicall_flush(false);
++}
++
++#if defined(CONFIG_X86_32)
++#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
++#elif !defined(arch_use_lazy_mmu_mode)
++#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
++#endif
++
++#else /* !CONFIG_XEN || MODULE */
++
++static inline void xen_multicall_flush(bool ignore) {}
++#define arch_use_lazy_mmu_mode() false
++#define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
++#define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
++#define xen_multi_mmuext_op(...) ({ BUG(); -ENOSYS; })
++
++#endif /* CONFIG_XEN && !MODULE */
+ #if defined(CONFIG_X86_64)
+ #define MULTI_UVMFLAGS_INDEX 2
+@@ -147,11 +187,15 @@ void scrub_pages(void *, unsigned int);
+ #ifdef CONFIG_XEN
+ #define is_running_on_xen() 1
++extern char hypercall_page[PAGE_SIZE];
+ #else
+ extern char *hypercall_stubs;
++#define hypercall_page hypercall_stubs
+ #define is_running_on_xen() (!!hypercall_stubs)
+ #endif
++#include <xen/hypercall.h>
++
+ static inline int
+ HYPERVISOR_yield(
+       void)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable-3level.h     2009-03-04 11:28:34.000000000 +0100
+@@ -53,7 +53,6 @@ static inline int pte_exec_kernel(pte_t 
+  * not possible, use pte_get_and_clear to obtain the old pte
+  * value and then use set_pte to update it.  -ben
+  */
+-#define __HAVE_ARCH_SET_PTE_ATOMIC
+ static inline void set_pte(pte_t *ptep, pte_t pte)
+ {
+@@ -70,14 +69,6 @@ static inline void set_pte(pte_t *ptep, 
+               set_pte((ptep), (pteval));                              \
+ } while (0)
+-#define set_pte_at_sync(_mm,addr,ptep,pteval) do {                    \
+-      if (((_mm) != current->mm && (_mm) != &init_mm) ||              \
+-          HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
+-              set_pte((ptep), (pteval));                              \
+-              xen_invlpg((addr));                                     \
+-      }                                                               \
+-} while (0)
+-
+ #define set_pmd(pmdptr,pmdval)                                \
+               xen_l2_entry_update((pmdptr), (pmdval))
+ #define set_pud(pudptr,pudval) \
+@@ -94,7 +85,7 @@ static inline void pud_clear (pud_t * pu
+ #define pud_page(pud) \
+ ((struct page *) __va(pud_val(pud) & PAGE_MASK))
+-#define pud_page_kernel(pud) \
++#define pud_page_vaddr(pud) \
+ ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
+@@ -124,6 +115,7 @@ static inline void pte_clear(struct mm_s
+ #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+       pte_t pte = *ptep;
+@@ -142,6 +134,7 @@ static inline pte_t ptep_get_and_clear(s
+       return pte;
+ }
++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep)                     \
+ ({                                                            \
+       pte_t *__ptep = (ptep);                                 \
+@@ -159,6 +152,7 @@ static inline pte_t ptep_get_and_clear(s
+       __res;                                                  \
+ })
++#define __HAVE_ARCH_PTE_SAME
+ static inline int pte_same(pte_t a, pte_t b)
+ {
+       return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_32.h    2008-12-15 11:13:45.000000000 +0100
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:28:34.000000000 +0100
+@@ -260,31 +260,89 @@ static inline pte_t pte_mkhuge(pte_t pte
+ # include <asm/pgtable-2level.h>
+ #endif
+-#define ptep_test_and_clear_dirty(vma, addr, ptep)                    \
++/*
++ * Rules for using pte_update - it must be called after any PTE update which
++ * has not been done using the set_pte / clear_pte interfaces.  It is used by
++ * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
++ * updates should either be sets, clears, or set_pte_atomic for P->P
++ * transitions, which means this hook should only be called for user PTEs.
++ * This hook implies a P->P protection or access change has taken place, which
++ * requires a subsequent TLB flush.  The notification can optionally be delayed
++ * until the TLB flush event by using the pte_update_defer form of the
++ * interface, but care must be taken to assure that the flush happens while
++ * still holding the same page table lock so that the shadow and primary pages
++ * do not become out of sync on SMP.
++ */
++#define pte_update(mm, addr, ptep)            do { } while (0)
++#define pte_update_defer(mm, addr, ptep)      do { } while (0)
++
++
++/*
++ * We only update the dirty/accessed state if we set
++ * the dirty bit by hand in the kernel, since the hardware
++ * will do the accessed bit for us, and we don't want to
++ * race with other CPU's that might be updating the dirty
++ * bit at the same time.
++ */
++#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
++#define ptep_set_access_flags(vma, address, ptep, entry, dirty)               \
++do {                                                                  \
++      if (dirty)                                                      \
++              ptep_establish(vma, address, ptep, entry);              \
++} while (0)
++
++/*
++ * We don't actually have these, but we want to advertise them so that
++ * we can encompass the flush here.
++ */
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++
++/*
++ * Rules for using ptep_establish: the pte MUST be a user pte, and
++ * must be a present->present transition.
++ */
++#define __HAVE_ARCH_PTEP_ESTABLISH
++#define ptep_establish(vma, address, ptep, pteval)                    \
++do {                                                                  \
++      if ( likely((vma)->vm_mm == current->mm) ) {                    \
++              BUG_ON(HYPERVISOR_update_va_mapping(address,            \
++                      pteval,                                         \
++                      (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits|  \
++                              UVMF_INVLPG|UVMF_MULTI));               \
++      } else {                                                        \
++              xen_l1_entry_update(ptep, pteval);                      \
++              flush_tlb_page(vma, address);                           \
++      }                                                               \
++} while (0)
++
++#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
++#define ptep_clear_flush_dirty(vma, address, ptep)                    \
+ ({                                                                    \
+       pte_t __pte = *(ptep);                                          \
+-      int __ret = pte_dirty(__pte);                                   \
+-      if (__ret) {                                                    \
+-              __pte = pte_mkclean(__pte);                             \
+-              if ((vma)->vm_mm != current->mm ||                      \
+-                  HYPERVISOR_update_va_mapping(addr, __pte, 0))       \
+-                      (ptep)->pte_low = __pte.pte_low;                \
+-      }                                                               \
+-      __ret;                                                          \
++      int __dirty = pte_dirty(__pte);                                 \
++      __pte = pte_mkclean(__pte);                                     \
++      if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
++              ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
++      else if (__dirty)                                               \
++              (ptep)->pte_low = __pte.pte_low;                        \
++      __dirty;                                                        \
+ })
+-#define ptep_test_and_clear_young(vma, addr, ptep)                    \
++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
++#define ptep_clear_flush_young(vma, address, ptep)                    \
+ ({                                                                    \
+       pte_t __pte = *(ptep);                                          \
+-      int __ret = pte_young(__pte);                                   \
+-      if (__ret)                                                      \
+-              __pte = pte_mkold(__pte);                               \
+-              if ((vma)->vm_mm != current->mm ||                      \
+-                  HYPERVISOR_update_va_mapping(addr, __pte, 0))       \
+-                      (ptep)->pte_low = __pte.pte_low;                \
+-      __ret;                                                          \
++      int __young = pte_young(__pte);                                 \
++      __pte = pte_mkold(__pte);                                       \
++      if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
++              ptep_set_access_flags(vma, address, ptep, __pte, __young); \
++      else if (__young)                                               \
++              (ptep)->pte_low = __pte.pte_low;                        \
++      __young;                                                        \
+ })
++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+ #define ptep_get_and_clear_full(mm, addr, ptep, full)                 \
+       ((full) ? ({                                                    \
+               pte_t __res = *(ptep);                                  \
+@@ -296,6 +354,7 @@ static inline pte_t pte_mkhuge(pte_t pte
+        }) :                                                           \
+        ptep_get_and_clear(mm, addr, ptep))
++#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+       pte_t pte = *ptep;
+@@ -391,11 +450,11 @@ static inline pte_t pte_modify(pte_t pte
+ #define pte_index(address) \
+               (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) \
+-      ((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
++      ((pte_t *) pmd_page_vaddr(*(dir)) +  pte_index(address))
+ #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+-#define pmd_page_kernel(pmd) \
++#define pmd_page_vaddr(pmd) \
+               ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+ /*
+@@ -418,8 +477,6 @@ extern pte_t *lookup_address(unsigned lo
+  static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
+ #endif
+-extern void noexec_setup(const char *str);
+-
+ #if defined(CONFIG_HIGHPTE)
+ #define pte_offset_map(dir, address) \
+       ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
+@@ -437,37 +494,17 @@ extern void noexec_setup(const char *str
+ #define pte_unmap_nested(pte) do { } while (0)
+ #endif
+-#define __HAVE_ARCH_PTEP_ESTABLISH
+-#define ptep_establish(vma, address, ptep, pteval)                    \
+-      do {                                                            \
+-              if ( likely((vma)->vm_mm == current->mm) ) {            \
+-                      BUG_ON(HYPERVISOR_update_va_mapping(address,    \
+-                              pteval,                                 \
+-                              (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
+-                                      UVMF_INVLPG|UVMF_MULTI));       \
+-              } else {                                                \
+-                      xen_l1_entry_update(ptep, pteval);              \
+-                      flush_tlb_page(vma, address);                   \
+-              }                                                       \
+-      } while (0)
++/* Clear a kernel PTE and flush it from the TLB */
++#define kpte_clear_flush(ptep, vaddr) do { \
++      if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
++              BUG(); \
++} while (0)
+ /*
+  * The i386 doesn't have any external MMU info: the kernel page
+  * tables contain all the necessary information.
+- *
+- * Also, we only update the dirty/accessed state if we set
+- * the dirty bit by hand in the kernel, since the hardware
+- * will do the accessed bit for us, and we don't want to
+- * race with other CPU's that might be updating the dirty
+- * bit at the same time.
+  */
+ #define update_mmu_cache(vma,address,pte) do { } while (0)
+-#define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+-#define ptep_set_access_flags(vma, address, ptep, entry, dirty)               \
+-      do {                                                            \
+-              if (dirty)                                              \
+-                      ptep_establish(vma, address, ptep, entry);      \
+-      } while (0)
+ #include <xen/features.h>
+ void make_lowmem_page_readonly(void *va, unsigned int feature);
+@@ -526,10 +563,11 @@ int touch_pte_range(struct mm_struct *mm
+                     unsigned long size);
+ int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+-              unsigned long addr, unsigned long end, pgprot_t newprot);
++              unsigned long addr, unsigned long end, pgprot_t newprot,
++              int dirty_accountable);
+-#define arch_change_pte_range(mm, pmd, addr, end, newprot)    \
+-              xen_change_pte_range(mm, pmd, addr, end, newprot)
++#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
++      xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
+ #define io_remap_pfn_range(vma,from,pfn,size,prot) \
+ direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
+@@ -538,13 +576,6 @@ direct_remap_pfn_range(vma,from,pfn,size
+ #define GET_IOSPACE(pfn)              0
+ #define GET_PFN(pfn)                  (pfn)
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+-#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+-#define __HAVE_ARCH_PTE_SAME
+ #include <asm-generic/pgtable.h>
+ #endif /* _I386_PGTABLE_H */
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor_32.h  2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor_32.h       2009-03-04 11:28:34.000000000 +0100
+@@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui
+ #define X86_EFLAGS_VIP        0x00100000 /* Virtual Interrupt Pending */
+ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
++static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
++                         unsigned int *ecx, unsigned int *edx)
++{
++      /* ecx is often an input as well as an output. */
++      __asm__(XEN_CPUID
++              : "=a" (*eax),
++                "=b" (*ebx),
++                "=c" (*ecx),
++                "=d" (*edx)
++              : "0" (*eax), "2" (*ecx));
++}
++
+ /*
+  * Generic CPUID function
+  * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+@@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui
+  */
+ static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
+ {
+-      __asm__(XEN_CPUID
+-              : "=a" (*eax),
+-                "=b" (*ebx),
+-                "=c" (*ecx),
+-                "=d" (*edx)
+-              : "0" (op), "c"(0));
++      *eax = op;
++      *ecx = 0;
++      __cpuid(eax, ebx, ecx, edx);
+ }
+ /* Some CPUID calls want 'count' to be placed in ecx */
+ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+-              int *edx)
++                             int *edx)
+ {
+-      __asm__(XEN_CPUID
+-              : "=a" (*eax),
+-                "=b" (*ebx),
+-                "=c" (*ecx),
+-                "=d" (*edx)
+-              : "0" (op), "c" (count));
++      *eax = op;
++      *ecx = count;
++      __cpuid(eax, ebx, ecx, edx);
+ }
+ /*
+@@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i
+  */
+ static inline unsigned int cpuid_eax(unsigned int op)
+ {
+-      unsigned int eax;
++      unsigned int eax, ebx, ecx, edx;
+-      __asm__(XEN_CPUID
+-              : "=a" (eax)
+-              : "0" (op)
+-              : "bx", "cx", "dx");
++      cpuid(op, &eax, &ebx, &ecx, &edx);
+       return eax;
+ }
+ static inline unsigned int cpuid_ebx(unsigned int op)
+ {
+-      unsigned int eax, ebx;
++      unsigned int eax, ebx, ecx, edx;
+-      __asm__(XEN_CPUID
+-              : "=a" (eax), "=b" (ebx)
+-              : "0" (op)
+-              : "cx", "dx" );
++      cpuid(op, &eax, &ebx, &ecx, &edx);
+       return ebx;
+ }
+ static inline unsigned int cpuid_ecx(unsigned int op)
+ {
+-      unsigned int eax, ecx;
++      unsigned int eax, ebx, ecx, edx;
+-      __asm__(XEN_CPUID
+-              : "=a" (eax), "=c" (ecx)
+-              : "0" (op)
+-              : "bx", "dx" );
++      cpuid(op, &eax, &ebx, &ecx, &edx);
+       return ecx;
+ }
+ static inline unsigned int cpuid_edx(unsigned int op)
+ {
+-      unsigned int eax, edx;
++      unsigned int eax, ebx, ecx, edx;
+-      __asm__(XEN_CPUID
+-              : "=a" (eax), "=d" (edx)
+-              : "0" (op)
+-              : "bx", "cx");
++      cpuid(op, &eax, &ebx, &ecx, &edx);
+       return edx;
+ }
+@@ -315,6 +309,8 @@ static inline void __mwait(unsigned long
+               : :"a" (eax), "c" (ecx));
+ }
++extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
++
+ /* from system description table in BIOS.  Mostly for MCA use, but
+ others may find it useful. */
+ extern unsigned int machine_id;
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment_32.h    2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:28:34.000000000 +0100
+@@ -61,11 +61,9 @@
+ #define GDT_ENTRY_KERNEL_CS           (GDT_ENTRY_KERNEL_BASE + 0)
+ #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+-#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
+ #define GDT_ENTRY_KERNEL_DS           (GDT_ENTRY_KERNEL_BASE + 1)
+ #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+-#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
+ #define GDT_ENTRY_TSS                 (GDT_ENTRY_KERNEL_BASE + 4)
+ #define GDT_ENTRY_LDT                 (GDT_ENTRY_KERNEL_BASE + 5)
+@@ -85,6 +83,11 @@
+ #define GDT_SIZE (GDT_ENTRIES * 8)
++/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
++#define SEGMENT_IS_FLAT_CODE(x)  (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
++/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
++#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
++
+ /* Simple and small GDT entries for booting only */
+ #define GDT_ENTRY_BOOT_CS             2
+@@ -114,4 +117,16 @@
+  */
+ #define IDT_ENTRIES 256
++/* Bottom two bits of selector give the ring privilege level */
++#define SEGMENT_RPL_MASK      0x3
++/* Bit 2 is table indicator (LDT/GDT) */
++#define SEGMENT_TI_MASK               0x4
++
++/* User mode is privilege level 3 */
++#define USER_RPL              0x3
++/* LDT segment has TI set, GDT has it cleared */
++#define SEGMENT_LDT           0x4
++#define SEGMENT_GDT           0x0
++
++#define get_kernel_rpl()  (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_32.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp_32.h     2009-03-04 11:28:34.000000000 +0100
+@@ -79,25 +79,36 @@ static inline int hard_smp_processor_id(
+       return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+ }
+ #endif
+-
+-static __inline int logical_smp_processor_id(void)
+-{
+-      /* we don't want to mark this access volatile - bad code generation */
+-      return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+-}
+-
+ #endif
++#define safe_smp_processor_id() smp_processor_id()
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+ extern void prefill_possible_map(void);
++extern unsigned int num_processors;
++
+ #endif /* !__ASSEMBLY__ */
+ #else /* CONFIG_SMP */
++#define safe_smp_processor_id()               0
+ #define cpu_physical_id(cpu)          boot_cpu_physical_apicid
+ #define NO_PROC_ID            0xFF            /* No processor magic marker */
+ #endif
++
++#ifndef __ASSEMBLY__
++
++extern u8 apicid_2_node[];
++
++#ifdef CONFIG_X86_LOCAL_APIC
++static __inline int logical_smp_processor_id(void)
++{
++      /* we don't want to mark this access volatile - bad code generation */
++      return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
++}
++#endif
++#endif
++
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system_32.h     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system_32.h  2009-03-04 11:28:34.000000000 +0100
+@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig
+ #define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
++#define sync_cmpxchg(ptr,o,n)\
++      ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
++                                      (unsigned long)(n),sizeof(*(ptr))))
+ #endif
+ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo
+       return old;
+ }
++/*
++ * Always use locked operations when touching memory shared with a
++ * hypervisor, since the system may be SMP even if the guest kernel
++ * isn't.
++ */
++static inline unsigned long __sync_cmpxchg(volatile void *ptr,
++                                          unsigned long old,
++                                          unsigned long new, int size)
++{
++      unsigned long prev;
++      switch (size) {
++      case 1:
++              __asm__ __volatile__("lock; cmpxchgb %b1,%2"
++                                   : "=a"(prev)
++                                   : "q"(new), "m"(*__xg(ptr)), "0"(old)
++                                   : "memory");
++              return prev;
++      case 2:
++              __asm__ __volatile__("lock; cmpxchgw %w1,%2"
++                                   : "=a"(prev)
++                                   : "r"(new), "m"(*__xg(ptr)), "0"(old)
++                                   : "memory");
++              return prev;
++      case 4:
++              __asm__ __volatile__("lock; cmpxchgl %1,%2"
++                                   : "=a"(prev)
++                                   : "r"(new), "m"(*__xg(ptr)), "0"(old)
++                                   : "memory");
++              return prev;
++      }
++      return old;
++}
++
+ #ifndef CONFIG_X86_CMPXCHG
+ /*
+  * Building a kernel capable running on 80386. It may be necessary to
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush_32.h        2009-03-04 11:28:34.000000000 +0100
+@@ -8,8 +8,6 @@
+ #define __flush_tlb_global() xen_tlb_flush()
+ #define __flush_tlb_all() xen_tlb_flush()
+-extern unsigned long pgkern_mask;
+-
+ #define cpu_has_invlpg        (boot_cpu_data.x86 > 3)
+ #define __flush_tlb_single(addr) xen_invlpg(addr)
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/dma-mapping_64.h     2009-03-04 11:28:34.000000000 +0100
+@@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address;
+ extern struct dma_mapping_ops* dma_ops;
+ extern int iommu_merge;
+-static inline int valid_dma_direction(int dma_direction)
+-{
+-      return ((dma_direction == DMA_BIDIRECTIONAL) ||
+-              (dma_direction == DMA_TO_DEVICE) ||
+-              (dma_direction == DMA_FROM_DEVICE));
+-}
+-
+ #if 0
+ static inline int dma_mapping_error(dma_addr_t dma_addr)
+ {
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_64.h     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_64.h  2009-03-04 11:28:34.000000000 +0100
+@@ -41,7 +41,7 @@ enum fixed_addresses {
+ #ifdef CONFIG_X86_LOCAL_APIC
+       FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
+ #endif
+-#ifdef CONFIG_X86_IO_APIC
++#ifndef CONFIG_XEN
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_64.h    2009-04-20 11:36:10.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:28:34.000000000 +0100
+@@ -44,12 +44,9 @@ extern unsigned long __supported_pte_mas
+ #define swapper_pg_dir init_level4_pgt
+-extern int nonx_setup(char *str);
+ extern void paging_init(void);
+ extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+-extern unsigned long pgkern_mask;
+-
+ /*
+  * ZERO_PAGE is a global shared page that is always zero: used
+  * for zero-mapped memory areas etc..
+@@ -119,9 +116,6 @@ static inline void pgd_clear (pgd_t * pg
+         set_pgd(__user_pgd(pgd), __pgd(0));
+ }
+-#define pud_page(pud) \
+-    ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+-
+ #define pte_same(a, b)                ((a).pte == (b).pte)
+ #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
+@@ -333,7 +327,7 @@ static inline pte_t ptep_get_and_clear_f
+ #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
+ static inline int pte_user(pte_t pte)         { return __pte_val(pte) & _PAGE_USER; }
+ static inline int pte_read(pte_t pte)         { return __pte_val(pte) & _PAGE_USER; }
+-static inline int pte_exec(pte_t pte)         { return __pte_val(pte) & _PAGE_USER; }
++static inline int pte_exec(pte_t pte)         { return !(__pte_val(pte) & _PAGE_NX); }
+ static inline int pte_dirty(pte_t pte)                { return __pte_val(pte) & _PAGE_DIRTY; }
+ static inline int pte_young(pte_t pte)                { return __pte_val(pte) & _PAGE_ACCESSED; }
+ static inline int pte_write(pte_t pte)                { return __pte_val(pte) & _PAGE_RW; }
+@@ -346,29 +340,12 @@ static inline pte_t pte_mkclean(pte_t pt
+ static inline pte_t pte_mkold(pte_t pte)      { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+ static inline pte_t pte_wrprotect(pte_t pte)  { __pte_val(pte) &= ~_PAGE_RW; return pte; }
+ static inline pte_t pte_mkread(pte_t pte)     { __pte_val(pte) |= _PAGE_USER; return pte; }
+-static inline pte_t pte_mkexec(pte_t pte)     { __pte_val(pte) |= _PAGE_USER; return pte; }
++static inline pte_t pte_mkexec(pte_t pte)     { __pte_val(pte) &= ~_PAGE_NX; return pte; }
+ static inline pte_t pte_mkdirty(pte_t pte)    { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
+ static inline pte_t pte_mkyoung(pte_t pte)    { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+ static inline pte_t pte_mkwrite(pte_t pte)    { __pte_val(pte) |= _PAGE_RW; return pte; }
+ static inline pte_t pte_mkhuge(pte_t pte)     { __pte_val(pte) |= _PAGE_PSE; return pte; }
+-
+-#define ptep_test_and_clear_dirty(vma, addr, ptep)                    \
+-({                                                                    \
+-      pte_t __pte = *(ptep);                                          \
+-      int __ret = pte_dirty(__pte);                                   \
+-      if (__ret)                                                      \
+-              set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
+-      __ret;                                                          \
+-})
+-
+-#define ptep_test_and_clear_young(vma, addr, ptep)                    \
+-({                                                                    \
+-      pte_t __pte = *(ptep);                                          \
+-      int __ret = pte_young(__pte);                                   \
+-      if (__ret)                                                      \
+-              set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
+-      __ret;                                                          \
+-})
++static inline pte_t pte_clrhuge(pte_t pte)    { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
+ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+@@ -395,7 +372,8 @@ static inline int pmd_large(pmd_t pte) {
+ /*
+  * Level 4 access.
+  */
+-#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
++#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
++#define pgd_page(pgd)         (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
+ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+ #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
+ #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
+@@ -404,16 +382,18 @@ static inline int pmd_large(pmd_t pte) {
+ /* PUD - Level3 access */
+ /* to find an entry in a page-table-directory. */
++#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
++#define pud_page(pud)         (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
+ #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
++#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
+ #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
+ /* PMD  - Level 2 access */
+-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
++#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+ #define pmd_page(pmd)         (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+ #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
++#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
+                                   pmd_index(address))
+ #define pmd_none(x)   (!__pmd_val(x))
+ #if CONFIG_XEN_COMPAT <= 0x030002
+@@ -444,6 +424,7 @@ static inline pte_t mk_pte_phys(unsigned
+ { 
+       unsigned long pteval;
+       pteval = physpage | pgprot_val(pgprot);
++      pteval &= __supported_pte_mask;
+       return __pte(pteval);
+ }
+  
+@@ -465,7 +446,7 @@ static inline pte_t pte_modify(pte_t pte
+ #define pte_index(address) \
+               (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
++#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
+                       pte_index(address))
+ /* x86-64 always has all page tables mapped. */
+@@ -506,6 +487,40 @@ static inline pte_t pte_modify(pte_t pte
+                       ptep_establish(vma, address, ptep, entry);      \
+       } while (0)
++
++/*
++ * i386 says: We don't actually have these, but we want to advertise
++ * them so that we can encompass the flush here.
++ */
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++
++#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
++#define ptep_clear_flush_dirty(vma, address, ptep)                    \
++({                                                                    \
++      pte_t __pte = *(ptep);                                          \
++      int __dirty = pte_dirty(__pte);                                 \
++      __pte = pte_mkclean(__pte);                                     \
++      if ((vma)->vm_mm->context.pinned)                               \
++              ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
++      else if (__dirty)                                               \
++              set_pte(ptep, __pte);                                   \
++      __dirty;                                                        \
++})
++
++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
++#define ptep_clear_flush_young(vma, address, ptep)                    \
++({                                                                    \
++      pte_t __pte = *(ptep);                                          \
++      int __young = pte_young(__pte);                                 \
++      __pte = pte_mkold(__pte);                                       \
++      if ((vma)->vm_mm->context.pinned)                               \
++              ptep_set_access_flags(vma, address, ptep, __pte, __young); \
++      else if (__young)                                               \
++              set_pte(ptep, __pte);                                   \
++      __young;                                                        \
++})
++
+ /* Encode and de-code a swap entry */
+ #define __swp_type(x)                 (((x).val >> 1) & 0x3f)
+ #define __swp_offset(x)                       ((x).val >> 8)
+@@ -547,10 +562,11 @@ int touch_pte_range(struct mm_struct *mm
+                     unsigned long size);
+ int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
+-              unsigned long addr, unsigned long end, pgprot_t newprot);
++              unsigned long addr, unsigned long end, pgprot_t newprot,
++              int dirty_accountable);
+-#define arch_change_pte_range(mm, pmd, addr, end, newprot)    \
+-              xen_change_pte_range(mm, pmd, addr, end, newprot)
++#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
++      xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
+ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot)               \
+               direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
+@@ -572,8 +588,6 @@ int xen_change_pte_range(struct mm_struc
+ #define       kc_offset_to_vaddr(o) \
+    (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor_64.h  2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor_64.h       2009-03-04 11:28:34.000000000 +0100
+@@ -484,6 +484,8 @@ static inline void __mwait(unsigned long
+               : :"a" (eax), "c" (ecx));
+ }
++extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
++
+ #define stack_current() \
+ ({                                                            \
+       struct thread_info *ti;                                 \
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_64.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp_64.h     2009-03-04 11:28:34.000000000 +0100
+@@ -4,15 +4,12 @@
+ /*
+  * We need the APIC definitions automatically as part of 'smp.h'
+  */
+-#ifndef __ASSEMBLY__
+ #include <linux/threads.h>
+ #include <linux/cpumask.h>
+ #include <linux/bitops.h>
+ extern int disable_apic;
+-#endif
+ #ifdef CONFIG_X86_LOCAL_APIC
+-#ifndef __ASSEMBLY__
+ #include <asm/fixmap.h>
+ #include <asm/mpspec.h>
+ #ifdef CONFIG_X86_IO_APIC
+@@ -21,10 +18,8 @@ extern int disable_apic;
+ #include <asm/apic.h>
+ #include <asm/thread_info.h>
+ #endif
+-#endif
+ #ifdef CONFIG_SMP
+-#ifndef ASSEMBLY
+ #include <asm/pda.h>
+@@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized;
+  
+ extern void smp_alloc_memory(void);
+ extern volatile unsigned long smp_invalidate_needed;
+-extern int pic_mode;
+ extern void lock_ipi_call_lock(void);
+ extern void unlock_ipi_call_lock(void);
+ extern int smp_num_siblings;
+ extern void smp_send_reschedule(int cpu);
+ void smp_stop_cpu(void);
+-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
+-                              void *info, int retry, int wait);
+ extern cpumask_t cpu_sibling_map[NR_CPUS];
+ extern cpumask_t cpu_core_map[NR_CPUS];
+@@ -77,20 +69,16 @@ static inline int hard_smp_processor_id(
+ }
+ #endif
+-extern int safe_smp_processor_id(void);
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+ extern void prefill_possible_map(void);
+ extern unsigned num_processors;
+ extern unsigned disabled_cpus;
+-#endif /* !ASSEMBLY */
+-
+ #define NO_PROC_ID            0xFF            /* No processor magic marker */
+ #endif
+-#ifndef ASSEMBLY
+ /*
+  * Some lowlevel functions might want to know about
+  * the real APIC ID <-> CPU # mapping.
+@@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid(
+ }
+ #endif
+-#endif /* !ASSEMBLY */
+-
+ #ifndef CONFIG_SMP
+ #define stack_smp_processor_id() 0
+-#define safe_smp_processor_id() 0
+ #define cpu_logical_map(x) (x)
+ #else
+ #include <asm/thread_info.h>
+@@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid(
+ })
+ #endif
+-#ifndef __ASSEMBLY__
+ #ifdef CONFIG_X86_LOCAL_APIC
+ static __inline int logical_smp_processor_id(void)
+ {
+@@ -138,13 +122,18 @@ static __inline int logical_smp_processo
+       return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+ }
+ #endif
+-#endif
+ #ifdef CONFIG_SMP
+ #define cpu_physical_id(cpu)          x86_cpu_to_apicid[cpu]
+ #else
+ #define cpu_physical_id(cpu)          boot_cpu_id
+-#endif
+-
++static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
++                              void *info, int retry, int wait)
++{
++      /* Disable interrupts here? */
++      func(info);
++      return 0;
++}
++#endif /* !CONFIG_SMP */
+ #endif
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system_64.h     2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system_64.h  2009-03-04 11:28:34.000000000 +0100
+@@ -24,6 +24,7 @@
+ #define __EXTRA_CLOBBER  \
+       ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
++/* Save restore flags to clear handle leaking NT */
+ #define switch_to(prev,next,last) \
+       asm volatile(SAVE_CONTEXT                                                   \
+                    "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
+--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h   2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush_64.h        2009-03-04 11:28:34.000000000 +0100
+@@ -12,9 +12,6 @@
+  */
+ #define __flush_tlb_global()  xen_tlb_flush()
+-
+-extern unsigned long pgkern_mask;
+-
+ #define __flush_tlb_all() __flush_tlb_global()
+ #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
+--- sle11-2009-05-14.orig/include/linux/skbuff.h       2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/include/linux/skbuff.h    2009-03-04 11:28:34.000000000 +0100
+@@ -1771,5 +1771,12 @@ static inline void skb_forward_csum(stru
+ }
+ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
++
++#ifdef CONFIG_XEN
++int skb_checksum_setup(struct sk_buff *skb);
++#else
++static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
++#endif
++
+ #endif        /* __KERNEL__ */
+ #endif        /* _LINUX_SKBUFF_H */
+--- sle11-2009-05-14.orig/include/xen/evtchn.h 2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/xen/evtchn.h      2009-03-04 11:28:34.000000000 +0100
+@@ -54,34 +54,34 @@
+  */
+ int bind_caller_port_to_irqhandler(
+       unsigned int caller_port,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id);
+ int bind_listening_port_to_irqhandler(
+       unsigned int remote_domain,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id);
+ int bind_interdomain_evtchn_to_irqhandler(
+       unsigned int remote_domain,
+       unsigned int remote_port,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id);
+ int bind_virq_to_irqhandler(
+       unsigned int virq,
+       unsigned int cpu,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id);
+ int bind_ipi_to_irqhandler(
+       unsigned int ipi,
+       unsigned int cpu,
+-      irqreturn_t (*handler)(int, void *, struct pt_regs *),
++      irq_handler_t handler,
+       unsigned long irqflags,
+       const char *devname,
+       void *dev_id);
+--- sle11-2009-05-14.orig/include/xen/xencons.h        2009-05-14 11:02:43.000000000 +0200
++++ sle11-2009-05-14/include/xen/xencons.h     2009-03-04 11:28:34.000000000 +0100
+@@ -8,7 +8,7 @@ void xencons_force_flush(void);
+ void xencons_resume(void);
+ /* Interrupt work hooks. Receive data, or kick data out. */
+-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
++void xencons_rx(char *buf, unsigned len);
+ void xencons_tx(void);
+ int xencons_ring_init(void);
+--- sle11-2009-05-14.orig/mm/mprotect.c        2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/mm/mprotect.c     2009-03-04 11:28:34.000000000 +0100
+@@ -92,7 +92,7 @@ static inline void change_pmd_range(stru
+               next = pmd_addr_end(addr, end);
+               if (pmd_none_or_clear_bad(pmd))
+                       continue;
+-              if (arch_change_pte_range(mm, pmd, addr, next, newprot))
++              if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
+                       continue;
+               change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
+       } while (pmd++, addr = next, addr != end);
+--- sle11-2009-05-14.orig/net/core/dev.c       2009-02-16 15:58:14.000000000 +0100
++++ sle11-2009-05-14/net/core/dev.c    2009-03-04 11:28:34.000000000 +0100
+@@ -1765,15 +1765,14 @@ inline int skb_checksum_setup(struct sk_
+               }
+               if ((skb->h.raw + skb->csum + 2) > skb->tail)
+                       goto out;
+-              skb->ip_summed = CHECKSUM_HW;
++              skb->ip_summed = CHECKSUM_PARTIAL;
+               skb->proto_csum_blank = 0;
+       }
+       return 0;
+ out:
+       return -EPROTO;
+ }
+-#else
+-inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
++EXPORT_SYMBOL(skb_checksum_setup);
+ #endif
+ /**
+@@ -2327,7 +2326,7 @@ int netif_receive_skb(struct sk_buff *sk
+       case CHECKSUM_UNNECESSARY:
+               skb->proto_data_valid = 1;
+               break;
+-      case CHECKSUM_HW:
++      case CHECKSUM_PARTIAL:
+               /* XXX Implement me. */
+       default:
+               skb->proto_data_valid = 0;
+@@ -4989,7 +4988,6 @@ EXPORT_SYMBOL(unregister_netdevice_notif
+ EXPORT_SYMBOL(net_enable_timestamp);
+ EXPORT_SYMBOL(net_disable_timestamp);
+ EXPORT_SYMBOL(dev_get_flags);
+-EXPORT_SYMBOL(skb_checksum_setup);
+ #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+ EXPORT_SYMBOL(br_handle_frame_hook);