+@@ -349,7 +373,7 @@ static inline pte_t ptep_get_and_clear(s
+ if (!pte_none(pte)
+ && (mm != &init_mm
+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
+- pte = raw_ptep_get_and_clear(ptep, pte);
++ pte = xen_ptep_get_and_clear(ptep, pte);
+ pte_update(mm, addr, ptep);
+ }
+ return pte;
+@@ -491,24 +515,10 @@ extern pte_t *lookup_address(unsigned lo
+ #endif
+
+ #if defined(CONFIG_HIGHPTE)
+-#define pte_offset_map(dir, address) \
+-({ \
+- pte_t *__ptep; \
+- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
+- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
+- paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
+- __ptep = __ptep + pte_index(address); \
+- __ptep; \
+-})
+-#define pte_offset_map_nested(dir, address) \
+-({ \
+- pte_t *__ptep; \
+- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
+- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
+- paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
+- __ptep = __ptep + pte_index(address); \
+- __ptep; \
+-})
++#define pte_offset_map(dir, address) \
++ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
++#define pte_offset_map_nested(dir, address) \
++ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
+ #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
+ #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
+ #else
+@@ -597,10 +607,6 @@ int xen_change_pte_range(struct mm_struc
+ #define io_remap_pfn_range(vma,from,pfn,size,prot) \
+ direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
+
+-#define MK_IOSPACE_PFN(space, pfn) (pfn)
+-#define GET_IOSPACE(pfn) 0
+-#define GET_PFN(pfn) (pfn)
+-
+ #include <asm-generic/pgtable.h>
+
+ #endif /* _I386_PGTABLE_H */
+--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:25:55.000000000 +0100
++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-15 11:27:22.000000000 +0100
+@@ -21,6 +21,7 @@
+ #include <asm/percpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/init.h>
++#include <asm/processor-flags.h>
+ #include <xen/interface/physdev.h>
+
+ /* flag for disabling the tsc */
+@@ -118,7 +119,8 @@ extern char ignore_fpu_irq;
+
+ void __init cpu_detect(struct cpuinfo_x86 *c);
+
+-extern void identify_cpu(struct cpuinfo_x86 *);
++extern void identify_boot_cpu(void);
++extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+ extern void print_cpu_info(struct cpuinfo_x86 *);
+ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+ extern unsigned short num_cache_leaves;
+@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86
+ static inline void detect_ht(struct cpuinfo_x86 *c) {}
+ #endif
+
+-/*
+- * EFLAGS bits
+- */
+-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
+-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+-
+-static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
+- unsigned int *ecx, unsigned int *edx)
++static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx,
++ unsigned int *ecx, unsigned int *edx)
+ {
+ /* ecx is often an input as well as an output. */
+ __asm__(XEN_CPUID
+@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un
+ #define load_cr3(pgdir) write_cr3(__pa(pgdir))
+
+ /*
+- * Intel CPU features in CR4
+- */
+-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
+-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
+-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
+-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
+-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
+-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
+-#define X86_CR4_MCE 0x0040 /* Machine check enable */
+-#define X86_CR4_PGE 0x0080 /* enable global pages */
+-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
+-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
+-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
+-
+-/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne
+ }
+
+ /*
+- * NSC/Cyrix CPU configuration register indexes
+- */
+-
+-#define CX86_PCR0 0x20
+-#define CX86_GCR 0xb8
+-#define CX86_CCR0 0xc0
+-#define CX86_CCR1 0xc1
+-#define CX86_CCR2 0xc2
+-#define CX86_CCR3 0xc3
+-#define CX86_CCR4 0xe8
+-#define CX86_CCR5 0xe9
+-#define CX86_CCR6 0xea
+-#define CX86_CCR7 0xeb
+-#define CX86_PCR1 0xf0
+-#define CX86_DIR0 0xfe
+-#define CX86_DIR1 0xff
+-#define CX86_ARR_BASE 0xc4
+-#define CX86_RCR_BASE 0xdc
+-
+-/*
+ * NSC/Cyrix CPU indexed register access macros
+ */
+
+@@ -351,7 +297,8 @@ typedef struct {
+ struct thread_struct;
+
+ #ifndef CONFIG_X86_NO_TSS
+-struct tss_struct {
++/* This is the TSS defined by the hardware. */
++struct i386_hw_tss {
+ unsigned short back_link,__blh;
+ unsigned long esp0;
+ unsigned short ss0,__ss0h;
+@@ -375,6 +322,11 @@ struct tss_struct {
+ unsigned short gs, __gsh;
+ unsigned short ldt, __ldth;
+ unsigned short trace, io_bitmap_base;
++} __attribute__((packed));
++
++struct tss_struct {
++ struct i386_hw_tss x86_tss;
++
+ /*
+ * The extra 1 is there because the CPU will access an
+ * additional byte beyond the end of the IO permission
+@@ -428,10 +380,11 @@ struct thread_struct {
+ };
+
+ #define INIT_THREAD { \
++ .esp0 = sizeof(init_stack) + (long)&init_stack, \
+ .vm86_info = NULL, \
+ .sysenter_cs = __KERNEL_CS, \
+ .io_bitmap_ptr = NULL, \
+- .fs = __KERNEL_PDA, \
++ .fs = __KERNEL_PERCPU, \
+ }
+
+ /*
+@@ -441,10 +394,12 @@ struct thread_struct {
+ * be within the limit.
+ */
+ #define INIT_TSS { \
+- .esp0 = sizeof(init_stack) + (long)&init_stack, \
+- .ss0 = __KERNEL_DS, \
+- .ss1 = __KERNEL_CS, \
+- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
++ .x86_tss = { \
++ .esp0 = sizeof(init_stack) + (long)&init_stack, \
++ .ss0 = __KERNEL_DS, \
++ .ss1 = __KERNEL_CS, \
++ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
++ }, \
+ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
+ }
+
+@@ -551,38 +506,33 @@ static inline void rep_nop(void)
+
+ #define cpu_relax() rep_nop()
+
+-#define paravirt_enabled() 0
+-#define __cpuid xen_cpuid
+-
+ #ifndef CONFIG_X86_NO_TSS
+-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
++static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+ {
+- tss->esp0 = thread->esp0;
++ tss->x86_tss.esp0 = thread->esp0;
+ /* This can only happen when SEP is enabled, no need to test "SEP"arately */
+- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
+- tss->ss1 = thread->sysenter_cs;
++ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
++ tss->x86_tss.ss1 = thread->sysenter_cs;
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+ }
+ }
+-#define load_esp0(tss, thread) \
+- __load_esp0(tss, thread)
+ #else
+-#define load_esp0(tss, thread) do { \
++#define xen_load_esp0(tss, thread) do { \
+ if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
+ BUG(); \
+ } while (0)
+ #endif
+
+
+-/*
+- * These special macros can be used to get or set a debugging register
+- */
+-#define get_debugreg(var, register) \
+- (var) = HYPERVISOR_get_debugreg(register)
+-#define set_debugreg(value, register) \
+- WARN_ON(HYPERVISOR_set_debugreg(register, value))
++static inline unsigned long xen_get_debugreg(int regno)
++{
++ return HYPERVISOR_get_debugreg(regno);
++}
+
+-#define set_iopl_mask xen_set_iopl_mask
++static inline void xen_set_debugreg(int regno, unsigned long value)
++{
++ WARN_ON(HYPERVISOR_set_debugreg(regno, value));
++}
+
+ /*
+ * Set IOPL bits in EFLAGS from given mask
+@@ -597,6 +547,21 @@ static inline void xen_set_iopl_mask(uns
+ }
+
+
++#define paravirt_enabled() 0
++#define __cpuid xen_cpuid
++
++#define load_esp0 xen_load_esp0
++
++/*
++ * These special macros can be used to get or set a debugging register
++ */
++#define get_debugreg(var, register) \
++ (var) = xen_get_debugreg(register)
++#define set_debugreg(value, register) \
++ xen_set_debugreg(register, value)
++
++#define set_iopl_mask xen_set_iopl_mask
++
+ /*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+@@ -749,8 +714,14 @@ extern unsigned long boot_option_idle_ov
+ extern void enable_sep_cpu(void);
+ extern int sysenter_setup(void);
+
+-extern int init_gdt(int cpu, struct task_struct *idle);
++/* Defined in head.S */
++extern struct Xgt_desc_struct early_gdt_descr;
++
+ extern void cpu_set_gdt(int);
+-extern void secondary_cpu_init(void);
++extern void switch_to_new_gdt(void);
++extern void cpu_init(void);
++extern void init_gdt(int cpu);
++
++extern int force_mwait;
+
+ #endif /* __ASM_I386_PROCESSOR_H */
+--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:25:55.000000000 +0100
++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-15 11:27:22.000000000 +0100
+@@ -39,7 +39,7 @@
+ * 25 - APM BIOS support
+ *
+ * 26 - ESPFIX small SS
+- * 27 - PDA [ per-cpu private data area ]
++ * 27 - per-cpu [ offset to per-cpu data area ]
+ * 28 - unused
+ * 29 - unused
+ * 30 - unused
+@@ -74,8 +74,12 @@
+ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
+ #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
+-#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
+-#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
++#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
++#ifdef CONFIG_SMP
++#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
++#else
++#define __KERNEL_PERCPU 0
++#endif
+
+ #define GDT_ENTRY_DOUBLEFAULT_TSS 31
+
+--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:25:55.000000000 +0100
++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-15 11:27:22.000000000 +0100
+@@ -8,19 +8,15 @@
+ #include <linux/kernel.h>
+ #include <linux/threads.h>
+ #include <linux/cpumask.h>
+-#include <asm/pda.h>
+ #endif
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+-#ifndef __ASSEMBLY__
+-#include <asm/fixmap.h>
++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
+ #include <asm/bitops.h>
+ #include <asm/mpspec.h>
++#include <asm/apic.h>
+ #ifdef CONFIG_X86_IO_APIC
+ #include <asm/io_apic.h>
+ #endif
+-#include <asm/apic.h>
+-#endif
+ #endif
+
+ #define BAD_APICID 0xFFu
+@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void);
+ extern void cpu_uninit(void);
+ #endif
+
+-#ifndef CONFIG_PARAVIRT
++#ifndef CONFIG_XEN
++struct smp_ops
++{
++ void (*smp_prepare_boot_cpu)(void);
++ void (*smp_prepare_cpus)(unsigned max_cpus);
++ int (*cpu_up)(unsigned cpu);
++ void (*smp_cpus_done)(unsigned max_cpus);
++
++ void (*smp_send_stop)(void);
++ void (*smp_send_reschedule)(int cpu);
++ int (*smp_call_function_mask)(cpumask_t mask,
++ void (*func)(void *info), void *info,
++ int wait);
++};
++
++extern struct smp_ops smp_ops;
++
++static inline void smp_prepare_boot_cpu(void)
++{
++ smp_ops.smp_prepare_boot_cpu();
++}
++static inline void smp_prepare_cpus(unsigned int max_cpus)
++{
++ smp_ops.smp_prepare_cpus(max_cpus);
++}
++static inline int __cpu_up(unsigned int cpu)
++{
++ return smp_ops.cpu_up(cpu);
++}
++static inline void smp_cpus_done(unsigned int max_cpus)
++{
++ smp_ops.smp_cpus_done(max_cpus);
++}
++
++static inline void smp_send_stop(void)
++{
++ smp_ops.smp_send_stop();
++}
++static inline void smp_send_reschedule(int cpu)
++{
++ smp_ops.smp_send_reschedule(cpu);
++}
++static inline int smp_call_function_mask(cpumask_t mask,
++ void (*func) (void *info), void *info,
++ int wait)
++{
++ return smp_ops.smp_call_function_mask(mask, func, info, wait);
++}
++
++void native_smp_prepare_boot_cpu(void);
++void native_smp_prepare_cpus(unsigned int max_cpus);
++int native_cpu_up(unsigned int cpunum);
++void native_smp_cpus_done(unsigned int max_cpus);
++
+ #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
+ do { } while (0)
++
++#else
++
++
++void xen_smp_send_stop(void);
++void xen_smp_send_reschedule(int cpu);
++int xen_smp_call_function_mask(cpumask_t mask,
++ void (*func) (void *info), void *info,
++ int wait);
++
++#define smp_send_stop xen_smp_send_stop
++#define smp_send_reschedule xen_smp_send_reschedule
++#define smp_call_function_mask xen_smp_call_function_mask
++
+ #endif
+
+ /*
+@@ -62,7 +125,8 @@ do { } while (0)
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+-#define raw_smp_processor_id() (read_pda(cpu_number))
++DECLARE_PER_CPU(int, cpu_number);
++#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
+
+ extern cpumask_t cpu_possible_map;
+ #define cpu_callin_map cpu_possible_map
+@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void)
+ return cpus_weight(cpu_possible_map);
+ }
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+-
+-#ifdef APIC_DEFINITION
+-extern int hard_smp_processor_id(void);
+-#else
+-#include <mach_apicdef.h>
+-static inline int hard_smp_processor_id(void)
+-{
+- /* we don't want to mark this access volatile - bad code generation */
+- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+-}
+-#endif
+-#endif
+-
+ #define safe_smp_processor_id() smp_processor_id()
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+@@ -102,10 +152,31 @@ extern unsigned int num_processors;
+
+ #define NO_PROC_ID 0xFF /* No processor magic marker */
+
+-#endif
++#endif /* CONFIG_SMP */
+
+ #ifndef __ASSEMBLY__
+
++#ifdef CONFIG_X86_LOCAL_APIC
++
++#ifdef APIC_DEFINITION
++extern int hard_smp_processor_id(void);
++#else
++#include <mach_apicdef.h>
++static inline int hard_smp_processor_id(void)
++{
++ /* we don't want to mark this access volatile - bad code generation */
++ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
++}
++#endif /* APIC_DEFINITION */
++
++#else /* CONFIG_X86_LOCAL_APIC */
++
++#ifndef CONFIG_SMP
++#define hard_smp_processor_id() 0
++#endif
++
++#endif /* CONFIG_X86_LOCAL_APIC */
++
+ extern u8 apicid_2_node[];
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:26:44.000000000 +0100
++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:27:22.000000000 +0100
+@@ -4,7 +4,7 @@
+ #include <linux/kernel.h>
+ #include <asm/segment.h>
+ #include <asm/cpufeature.h>
+-#include <linux/bitops.h> /* for LOCK_PREFIX */
++#include <asm/cmpxchg.h>
+ #include <asm/synch_bitops.h>
+ #include <asm/hypervisor.h>
+
+@@ -90,308 +90,102 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
+ #define savesegment(seg, value) \
+ asm volatile("mov %%" #seg ",%0":"=rm" (value))
+
+-#define read_cr0() ({ \
+- unsigned int __dummy; \
+- __asm__ __volatile__( \
+- "movl %%cr0,%0\n\t" \
+- :"=r" (__dummy)); \
+- __dummy; \
+-})
+-#define write_cr0(x) \
+- __asm__ __volatile__("movl %0,%%cr0": :"r" (x))
+-
+-#define read_cr2() (current_vcpu_info()->arch.cr2)
+-#define write_cr2(x) \
+- __asm__ __volatile__("movl %0,%%cr2": :"r" (x))
+-
+-#define read_cr3() ({ \
+- unsigned int __dummy; \
+- __asm__ ( \
+- "movl %%cr3,%0\n\t" \
+- :"=r" (__dummy)); \
+- __dummy = xen_cr3_to_pfn(__dummy); \
+- mfn_to_pfn(__dummy) << PAGE_SHIFT; \
+-})
+-#define write_cr3(x) ({ \
+- unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \
+- __dummy = xen_pfn_to_cr3(__dummy); \
+- __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \
+-})
+-#define read_cr4() ({ \
+- unsigned int __dummy; \
+- __asm__( \
+- "movl %%cr4,%0\n\t" \
+- :"=r" (__dummy)); \
+- __dummy; \
+-})
+-#define read_cr4_safe() ({ \
+- unsigned int __dummy; \
+- /* This could fault if %cr4 does not exist */ \
+- __asm__("1: movl %%cr4, %0 \n" \
+- "2: \n" \
+- ".section __ex_table,\"a\" \n" \
+- ".long 1b,2b \n" \
+- ".previous \n" \
+- : "=r" (__dummy): "0" (0)); \
+- __dummy; \
+-})
+-
+-#define write_cr4(x) \
+- __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
+-
+-#define wbinvd() \
+- __asm__ __volatile__ ("wbinvd": : :"memory")
+-
+-/* Clear the 'TS' bit */
+-#define clts() (HYPERVISOR_fpu_taskswitch(0))
+-
+-/* Set the 'TS' bit */
+-#define stts() (HYPERVISOR_fpu_taskswitch(1))
+-
+-#endif /* __KERNEL__ */
+-
+-static inline unsigned long get_limit(unsigned long segment)
++static inline void xen_clts(void)
+ {
+- unsigned long __limit;
+- __asm__("lsll %1,%0"
+- :"=r" (__limit):"r" (segment));
+- return __limit+1;
++ HYPERVISOR_fpu_taskswitch(0);
+ }
+
+-#define nop() __asm__ __volatile__ ("nop")
+-
+-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+-
+-#define tas(ptr) (xchg((ptr),1))
+-
+-struct __xchg_dummy { unsigned long a[100]; };
+-#define __xg(x) ((struct __xchg_dummy *)(x))
++static inline unsigned long xen_read_cr0(void)
++{
++ unsigned long val;
++ asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
++ return val;
++}
+
++static inline void xen_write_cr0(unsigned long val)
++{
++ asm volatile("movl %0,%%cr0": :"r" (val));
++}
+
+-#ifdef CONFIG_X86_CMPXCHG64
++#define xen_read_cr2() (current_vcpu_info()->arch.cr2)
+
+-/*
+- * The semantics of XCHGCMP8B are a bit strange, this is why
+- * there is a loop and the loading of %%eax and %%edx has to
+- * be inside. This inlines well in most cases, the cached
+- * cost is around ~38 cycles. (in the future we might want
+- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
+- * might have an implicit FPU-save as a cost, so it's not
+- * clear which path to go.)
+- *
+- * cmpxchg8b must be used with the lock prefix here to allow
+- * the instruction to be executed atomically, see page 3-102
+- * of the instruction set reference 24319102.pdf. We need
+- * the reader side to see the coherent 64bit value.
+- */
+-static inline void __set_64bit (unsigned long long * ptr,
+- unsigned int low, unsigned int high)
++static inline void xen_write_cr2(unsigned long val)
+ {
+- __asm__ __volatile__ (
+- "\n1:\t"
+- "movl (%0), %%eax\n\t"
+- "movl 4(%0), %%edx\n\t"
+- "lock cmpxchg8b (%0)\n\t"
+- "jnz 1b"
+- : /* no outputs */
+- : "D"(ptr),
+- "b"(low),
+- "c"(high)
+- : "ax","dx","memory");
++ asm volatile("movl %0,%%cr2": :"r" (val));
+ }
+
+-static inline void __set_64bit_constant (unsigned long long *ptr,
+- unsigned long long value)
++static inline unsigned long xen_read_cr3(void)
+ {
+- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
++ unsigned long val;
++ asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
++ return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
+ }
+-#define ll_low(x) *(((unsigned int*)&(x))+0)
+-#define ll_high(x) *(((unsigned int*)&(x))+1)
+
+-static inline void __set_64bit_var (unsigned long long *ptr,
+- unsigned long long value)
++static inline void xen_write_cr3(unsigned long val)
+ {
+- __set_64bit(ptr,ll_low(value), ll_high(value));
++ val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT));
++ asm volatile("movl %0,%%cr3": :"r" (val));
+ }
+
+-#define set_64bit(ptr,value) \
+-(__builtin_constant_p(value) ? \
+- __set_64bit_constant(ptr, value) : \
+- __set_64bit_var(ptr, value) )
+-
+-#define _set_64bit(ptr,value) \
+-(__builtin_constant_p(value) ? \
+- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
+- __set_64bit(ptr, ll_low(value), ll_high(value)) )
+-
+-#endif
+-
+-/*
+- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+- * Note 2: xchg has side effect, so that attribute volatile is necessary,
+- * but generally the primitive is invalid, *ptr is output argument. --ANK
+- */
+-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
++static inline unsigned long xen_read_cr4(void)
+ {
+- switch (size) {
+- case 1:
+- __asm__ __volatile__("xchgb %b0,%1"
+- :"=q" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 2:
+- __asm__ __volatile__("xchgw %w0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- case 4:
+- __asm__ __volatile__("xchgl %0,%1"
+- :"=r" (x)
+- :"m" (*__xg(ptr)), "0" (x)
+- :"memory");
+- break;
+- }
+- return x;
++ unsigned long val;
++ asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
++ return val;
+ }
+
+-/*
+- * Atomic compare and exchange. Compare OLD with MEM, if identical,
+- * store NEW in MEM. Return the initial value in MEM. Success is
+- * indicated by comparing RETURN with OLD.
+- */
+-
+-#ifdef CONFIG_X86_CMPXCHG
+-#define __HAVE_ARCH_CMPXCHG 1
+-#define cmpxchg(ptr,o,n)\
+- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+- (unsigned long)(n),sizeof(*(ptr))))
+-#define sync_cmpxchg(ptr,o,n)\
+- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
+- (unsigned long)(n),sizeof(*(ptr))))
+-#endif
++static inline unsigned long xen_read_cr4_safe(void)
++{
++ unsigned long val;
++ /* This could fault if %cr4 does not exist */
++ asm("1: movl %%cr4, %0 \n"
++ "2: \n"
++ ".section __ex_table,\"a\" \n"
++ ".long 1b,2b \n"
++ ".previous \n"
++ : "=r" (val): "0" (0));
++ return val;
++}
+
+-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+- unsigned long new, int size)
++static inline void xen_write_cr4(unsigned long val)
+ {
+- unsigned long prev;
+- switch (size) {
+- case 1:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 2:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 4:
+- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- }
+- return old;
++ asm volatile("movl %0,%%cr4": :"r" (val));
+ }
+
+-/*
+- * Always use locked operations when touching memory shared with a
+- * hypervisor, since the system may be SMP even if the guest kernel
+- * isn't.
+- */
+-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+- unsigned long old,
+- unsigned long new, int size)
+-{
+- unsigned long prev;
+- switch (size) {
+- case 1:
+- __asm__ __volatile__("lock; cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 2:
+- __asm__ __volatile__("lock; cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- case 4:
+- __asm__ __volatile__("lock; cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
+- : "memory");
+- return prev;
+- }
+- return old;
++static inline void xen_wbinvd(void)
++{
++ asm volatile("wbinvd": : :"memory");
+ }