X-Git-Url: http://git.ipfire.org/?p=people%2Fteissler%2Fipfire-2.x.git;a=blobdiff_plain;f=src%2Fpatches%2Fsuse-2.6.27.25%2Fpatches.xen%2Fxen3-patch-2.6.22;fp=src%2Fpatches%2F60031_xen3-patch-2.6.22.patch1;h=4af854835d25a110c7caeaed944b2aa75cfbdb02;hp=98d5f9633c047535d295cd48fb7113e2d1b82bbc;hb=00e5a55c3c58b7320050d37ca15d98fd48acbf2f;hpb=4935eb8b91cbff9f3a05de77a40b16e980815e77 diff --git a/src/patches/60031_xen3-patch-2.6.22.patch1 b/src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.22 similarity index 90% rename from src/patches/60031_xen3-patch-2.6.22.patch1 rename to src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.22 index 98d5f9633..4af854835 100644 --- a/src/patches/60031_xen3-patch-2.6.22.patch1 +++ b/src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.22 @@ -6,161 +6,9 @@ Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches Acked-by: jbeulich@novell.com ---- - arch/x86/Kconfig | 4 - arch/x86/ia32/ia32entry-xen.S | 18 - - arch/x86/kernel/Makefile | 2 - arch/x86/kernel/acpi/sleep_64-xen.c | 26 - - arch/x86/kernel/apic_32-xen.c | 1 - arch/x86/kernel/apic_64-xen.c | 1 - arch/x86/kernel/asm-offsets_32.c | 5 - arch/x86/kernel/cpu/common-xen.c | 224 ++++--------- - arch/x86/kernel/cpu/mtrr/main-xen.c | 2 - arch/x86/kernel/e820_32-xen.c | 46 +- - arch/x86/kernel/e820_64-xen.c | 28 - - arch/x86/kernel/early_printk-xen.c | 27 - - arch/x86/kernel/entry_32-xen.S | 30 - - arch/x86/kernel/entry_64-xen.S | 7 - arch/x86/kernel/genapic_64-xen.c | 108 +----- - arch/x86/kernel/genapic_xen_64.c | 3 - arch/x86/kernel/head64-xen.c | 32 + - arch/x86/kernel/head_32-xen.S | 101 ------ - arch/x86/kernel/head_64-xen.S | 52 --- - arch/x86/kernel/io_apic_32-xen.c | 43 -- - arch/x86/kernel/io_apic_64-xen.c | 39 -- - arch/x86/kernel/ioport_32-xen.c | 2 - arch/x86/kernel/ioport_64-xen.c | 2 - arch/x86/kernel/irq_32-xen.c | 3 - arch/x86/kernel/irq_64-xen.c | 34 +- - arch/x86/kernel/ldt_32-xen.c | 1 - arch/x86/kernel/ldt_64-xen.c | 1 - arch/x86/kernel/microcode-xen.c | 2 - arch/x86/kernel/mpparse_32-xen.c | 3 - arch/x86/kernel/mpparse_64-xen.c | 3 - arch/x86/kernel/pci-dma-xen.c | 29 + - arch/x86/kernel/process_32-xen.c | 27 + - arch/x86/kernel/process_64-xen.c | 16 - arch/x86/kernel/quirks-xen.c | 63 --- - arch/x86/kernel/setup64-xen.c | 17 - - arch/x86/kernel/setup_64-xen.c | 30 - - arch/x86/kernel/smp_32-xen.c | 191 ++++------- - arch/x86/kernel/smp_64-xen.c | 29 - - arch/x86/kernel/time_32-xen.c | 165 ++++++---- - arch/x86/kernel/traps_32-xen.c | 46 +- - arch/x86/kernel/traps_64-xen.c | 55 +-- - arch/x86/kernel/vsyscall_64-xen.c | 73 +++- - arch/x86/mm/fault_32-xen.c | 42 +- - arch/x86/mm/fault_64-xen.c | 15 - arch/x86/mm/highmem_32-xen.c | 14 - arch/x86/mm/init_32-xen.c | 157 ++++++--- - arch/x86/mm/init_64-xen.c | 132 ++++---- - arch/x86/mm/ioremap_32-xen.c | 1 - arch/x86/mm/pageattr_64-xen.c | 27 + - arch/x86/mm/pgtable_32-xen.c | 206 +++++++----- - drivers/char/tpm/tpm_xen.c | 2 - drivers/pci/msi-xen.c | 127 +++++-- - drivers/xen/blkfront/blkfront.c | 2 - drivers/xen/char/mem.c | 1 - drivers/xen/core/hypervisor_sysfs.c | 2 - drivers/xen/core/smpboot.c | 45 +- - drivers/xen/core/xen_sysfs.c | 24 - - drivers/xen/netback/netback.c | 14 - drivers/xen/netfront/netfront.c | 2 - drivers/xen/pciback/xenbus.c | 2 - drivers/xen/pcifront/xenbus.c | 4 - drivers/xen/scsifront/xenbus.c | 2 - drivers/xen/sfc_netback/accel_fwd.c | 7 - drivers/xen/sfc_netback/accel_solarflare.c | 2 - drivers/xen/sfc_netfront/accel_tso.c | 28 - - drivers/xen/sfc_netfront/accel_vi.c | 4 - drivers/xen/sfc_netfront/accel_xenbus.c | 4 - fs/aio.c | 7 - include/asm-x86/mach-xen/asm/desc_32.h | 119 ++++--- - include/asm-x86/mach-xen/asm/desc_64.h | 30 - - include/asm-x86/mach-xen/asm/dma-mapping_64.h | 2 - include/asm-x86/mach-xen/asm/fixmap_32.h | 9 - include/asm-x86/mach-xen/asm/fixmap_64.h | 1 - include/asm-x86/mach-xen/asm/highmem.h | 6 - include/asm-x86/mach-xen/asm/io_32.h | 13 - include/asm-x86/mach-xen/asm/irqflags_32.h | 75 ++-- - include/asm-x86/mach-xen/asm/irqflags_64.h | 19 - - include/asm-x86/mach-xen/asm/mmu_context_32.h | 29 + - include/asm-x86/mach-xen/asm/mmu_context_64.h | 3 - include/asm-x86/mach-xen/asm/page_64.h | 61 +-- - include/asm-x86/mach-xen/asm/pgalloc_32.h | 3 - include/asm-x86/mach-xen/asm/pgalloc_64.h | 15 - include/asm-x86/mach-xen/asm/pgtable-3level-defs.h | 2 - include/asm-x86/mach-xen/asm/pgtable-3level.h | 61 ++- - include/asm-x86/mach-xen/asm/pgtable_32.h | 80 ++-- - include/asm-x86/mach-xen/asm/pgtable_64.h | 83 ++--- - include/asm-x86/mach-xen/asm/processor_32.h | 141 +++----- - include/asm-x86/mach-xen/asm/processor_64.h | 55 --- - include/asm-x86/mach-xen/asm/segment_32.h | 10 - include/asm-x86/mach-xen/asm/smp_32.h | 117 +++++-- - include/asm-x86/mach-xen/asm/smp_64.h | 20 - - include/asm-x86/mach-xen/asm/system_32.h | 342 ++++----------------- - include/asm-x86/mach-xen/asm/system_64.h | 106 ------ - include/asm-x86/mach-xen/asm/tlbflush_32.h | 11 - include/asm-x86/mach-xen/asm/tlbflush_64.h | 2 - include/linux/pci.h | 2 - lib/swiotlb-xen.c | 1 - net/core/dev.c | 15 - scripts/Makefile.xen.awk | 2 - 99 files changed, 1771 insertions(+), 2128 deletions(-) - ---- a/arch/x86/ia32/ia32entry-xen.S -+++ b/arch/x86/ia32/ia32entry-xen.S -@@ -431,11 +431,7 @@ ia32_sys_call_table: - .quad sys_symlink - .quad sys_lstat - .quad sys_readlink /* 85 */ --#ifdef CONFIG_IA32_AOUT - .quad sys_uselib --#else -- .quad quiet_ni_syscall --#endif - .quad sys_swapon - .quad sys_reboot - .quad compat_sys_old_readdir -@@ -574,7 +570,7 @@ ia32_sys_call_table: - .quad quiet_ni_syscall /* tux */ - .quad quiet_ni_syscall /* security */ - .quad sys_gettid -- .quad sys_readahead /* 225 */ -+ .quad sys32_readahead /* 225 */ - .quad sys_setxattr - .quad sys_lsetxattr - .quad sys_fsetxattr -@@ -599,7 +595,7 @@ ia32_sys_call_table: - .quad compat_sys_io_getevents - .quad compat_sys_io_submit - .quad sys_io_cancel -- .quad sys_fadvise64 /* 250 */ -+ .quad sys32_fadvise64 /* 250 */ - .quad quiet_ni_syscall /* free_huge_pages */ - .quad sys_exit_group - .quad sys32_lookup_dcookie -@@ -663,10 +659,14 @@ ia32_sys_call_table: - .quad compat_sys_set_robust_list - .quad compat_sys_get_robust_list - .quad sys_splice -- .quad sys_sync_file_range -- .quad sys_tee -+ .quad sys32_sync_file_range -+ .quad sys_tee /* 315 */ - .quad compat_sys_vmsplice - .quad compat_sys_move_pages - .quad sys_getcpu - .quad sys_epoll_pwait --ia32_syscall_end: -+ .quad compat_sys_utimensat /* 320 */ -+ .quad compat_sys_signalfd -+ .quad compat_sys_timerfd -+ .quad sys_eventfd -+ia32_syscall_end: ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -1429,7 +1429,7 @@ config PHYSICAL_START +--- sle11-2009-04-20.orig/arch/x86/Kconfig 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/Kconfig 2009-02-05 10:22:38.000000000 +0100 +@@ -1433,7 +1433,7 @@ config PHYSICAL_START config RELOCATABLE bool "Build a relocatable kernel (EXPERIMENTAL)" @@ -169,7 +17,7 @@ Acked-by: jbeulich@novell.com help This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. -@@ -1483,7 +1483,6 @@ config COMPAT_VDSO +@@ -1487,7 +1487,6 @@ config COMPAT_VDSO def_bool y prompt "Compat VDSO support" depends on X86_32 || IA32_EMULATION @@ -177,7 +25,7 @@ Acked-by: jbeulich@novell.com help Map the 32-bit VDSO to the predictable old-style address too. ---help--- -@@ -1662,6 +1661,7 @@ config PCI +@@ -1666,6 +1665,7 @@ config PCI bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) @@ -185,75 +33,16 @@ Acked-by: jbeulich@novell.com help Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside ---- a/arch/x86/kernel/acpi/sleep_64-xen.c -+++ b/arch/x86/kernel/acpi/sleep_64-xen.c -@@ -60,19 +60,6 @@ unsigned long acpi_video_flags; - extern char wakeup_start, wakeup_end; - - extern unsigned long acpi_copy_wakeup_routine(unsigned long); -- --static pgd_t low_ptr; -- --static void init_low_mapping(void) --{ -- pgd_t *slot0 = pgd_offset(current->mm, 0UL); -- low_ptr = *slot0; -- /* FIXME: We're playing with the current task's page tables here, which -- * is potentially dangerous on SMP systems. -- */ -- set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); -- local_flush_tlb(); --} - #endif - - /** -@@ -84,8 +71,6 @@ static void init_low_mapping(void) - int acpi_save_state_mem(void) - { - #ifndef CONFIG_ACPI_PV_SLEEP -- init_low_mapping(); -- - memcpy((void *)acpi_wakeup_address, &wakeup_start, - &wakeup_end - &wakeup_start); - acpi_copy_wakeup_routine(acpi_wakeup_address); -@@ -98,10 +83,6 @@ int acpi_save_state_mem(void) - */ - void acpi_restore_state_mem(void) - { --#ifndef CONFIG_ACPI_PV_SLEEP -- set_pgd(pgd_offset(current->mm, 0UL), low_ptr); -- local_flush_tlb(); --#endif - } - - /** -@@ -115,10 +96,11 @@ void acpi_restore_state_mem(void) - void __init acpi_reserve_bootmem(void) - { - #ifndef CONFIG_ACPI_PV_SLEEP -- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); -- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) -+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); -+ if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) - printk(KERN_CRIT -- "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); -+ "ACPI: Wakeup code way too big, will crash on attempt" -+ " to suspend\n"); - #endif - } - ---- a/arch/x86/kernel/apic_32-xen.c -+++ b/arch/x86/kernel/apic_32-xen.c -@@ -19,7 +19,6 @@ - #include - #include - #include --#include - #include - #include - #include ---- a/arch/x86/kernel/apic_64-xen.c -+++ b/arch/x86/kernel/apic_64-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/Makefile 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/Makefile 2008-12-15 11:27:22.000000000 +0100 +@@ -127,4 +127,4 @@ endif + disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ + smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o + disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o +-%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := ++%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +--- sle11-2009-04-20.orig/arch/x86/kernel/apic_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/apic_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -19,7 +19,6 @@ #include #include @@ -262,8 +51,8 @@ Acked-by: jbeulich@novell.com #include #include #include ---- a/arch/x86/kernel/asm-offsets_32.c -+++ b/arch/x86/kernel/asm-offsets_32.c +--- sle11-2009-04-20.orig/arch/x86/kernel/asm-offsets_32.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/asm-offsets_32.c 2008-12-15 11:27:22.000000000 +0100 @@ -109,11 +109,6 @@ void foo(void) OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); @@ -276,8 +65,8 @@ Acked-by: jbeulich@novell.com #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); ---- a/arch/x86/kernel/cpu/common-xen.c -+++ b/arch/x86/kernel/cpu/common-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/cpu/common-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -22,16 +22,40 @@ #define phys_pkg_id(a,b) a #endif @@ -570,8 +359,8 @@ Acked-by: jbeulich@novell.com #ifdef CONFIG_HOTPLUG_CPU void __cpuinit cpu_uninit(void) { ---- a/arch/x86/kernel/cpu/mtrr/main-xen.c -+++ b/arch/x86/kernel/cpu/mtrr/main-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -167,7 +167,7 @@ mtrr_del(int reg, unsigned long base, un EXPORT_SYMBOL(mtrr_add); EXPORT_SYMBOL(mtrr_del); @@ -581,8 +370,8 @@ Acked-by: jbeulich@novell.com { } ---- a/arch/x86/kernel/e820_32-xen.c -+++ b/arch/x86/kernel/e820_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/e820_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/e820_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -162,26 +162,27 @@ static struct resource standard_io_resou static int __init romsignature(const unsigned char *rom) @@ -710,132 +499,8 @@ Acked-by: jbeulich@novell.com start = 0x100000ULL; size = end - start; } ---- a/arch/x86/kernel/e820_64-xen.c -+++ b/arch/x86/kernel/e820_64-xen.c -@@ -17,6 +17,8 @@ - #include - #include - #include -+#include -+#include - - #include - #include -@@ -28,7 +30,7 @@ - - struct e820map e820 __initdata; - #ifdef CONFIG_XEN --struct e820map machine_e820 __initdata; -+struct e820map machine_e820; - #endif - - /* -@@ -291,22 +293,6 @@ void __init e820_reserve_resources(struc - } - - #ifndef CONFIG_XEN --/* Mark pages corresponding to given address range as nosave */ --static void __init --e820_mark_nosave_range(unsigned long start, unsigned long end) --{ -- unsigned long pfn, max_pfn; -- -- if (start >= end) -- return; -- -- printk("Nosave address range: %016lx - %016lx\n", start, end); -- max_pfn = end >> PAGE_SHIFT; -- for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) -- if (pfn_valid(pfn)) -- SetPageNosave(pfn_to_page(pfn)); --} -- - /* - * Find the ranges of physical addresses that do not correspond to - * e820 RAM areas and mark the corresponding pages as nosave for software -@@ -325,13 +311,13 @@ void __init e820_mark_nosave_regions(voi - struct e820entry *ei = &e820.map[i]; - - if (paddr < ei->addr) -- e820_mark_nosave_range(paddr, -- round_up(ei->addr, PAGE_SIZE)); -+ register_nosave_region(PFN_DOWN(paddr), -+ PFN_UP(ei->addr)); - - paddr = round_down(ei->addr + ei->size, PAGE_SIZE); - if (ei->type != E820_RAM) -- e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), -- paddr); -+ register_nosave_region(PFN_UP(ei->addr), -+ PFN_DOWN(paddr)); - - if (paddr >= (end_pfn << PAGE_SHIFT)) - break; ---- a/arch/x86/kernel/early_printk-xen.c -+++ b/arch/x86/kernel/early_printk-xen.c -@@ -11,11 +11,10 @@ - - #ifdef __i386__ - #include --#define VGABASE (__ISA_IO_base + 0xb8000) - #else - #include --#define VGABASE ((void __iomem *)0xffffffff800b8000UL) - #endif -+#define VGABASE (__ISA_IO_base + 0xb8000) - - #ifndef CONFIG_XEN - static int max_ypos = 25, max_xpos = 80; -@@ -93,9 +92,9 @@ static int early_serial_putc(unsigned ch - static void early_serial_write(struct console *con, const char *s, unsigned n) - { - while (*s && n-- > 0) { -- early_serial_putc(*s); - if (*s == '\n') - early_serial_putc('\r'); -+ early_serial_putc(*s); - s++; - } - } -@@ -205,7 +204,7 @@ static noinline long simnow(long cmd, lo - return ret; - } - --void __init simnow_init(char *str) -+static void __init simnow_init(char *str) - { - char *fn = "klog"; - if (*str == '=') -@@ -277,22 +276,12 @@ static int __init setup_early_printk(cha - early_console = &simnow_console; - keep_early = 1; - } -+ -+ if (keep_early) -+ early_console->flags &= ~CON_BOOT; -+ else -+ early_console->flags |= CON_BOOT; - register_console(early_console); - return 0; - } -- - early_param("earlyprintk", setup_early_printk); -- --void __init disable_early_printk(void) --{ -- if (!early_console_initialized || !early_console) -- return; -- if (!keep_early) { -- printk("disabling early console\n"); -- unregister_console(early_console); -- early_console_initialized = 0; -- } else { -- printk("keeping early console\n"); -- } --} -- ---- a/arch/x86/kernel/entry_32-xen.S -+++ b/arch/x86/kernel/entry_32-xen.S +--- sle11-2009-04-20.orig/arch/x86/kernel/entry_32-xen.S 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/entry_32-xen.S 2008-12-15 11:27:22.000000000 +0100 @@ -15,7 +15,7 @@ * I changed all the .align's to 4 (16 byte alignment), as that's faster * on a 486. @@ -942,189 +607,8 @@ Acked-by: jbeulich@novell.com movl %ecx, %fs UNWIND_ESPFIX_STACK popl %ecx ---- a/arch/x86/kernel/entry_64-xen.S -+++ b/arch/x86/kernel/entry_64-xen.S -@@ -1254,3 +1254,10 @@ ENTRY(call_softirq) - ret - CFI_ENDPROC - ENDPROC(call_softirq) -+ -+KPROBE_ENTRY(ignore_sysret) -+ CFI_STARTPROC -+ mov $-ENOSYS,%eax -+ HYPERVISOR_IRET 0 -+ CFI_ENDPROC -+ENDPROC(ignore_sysret) ---- a/arch/x86/kernel/genapic_64-xen.c -+++ b/arch/x86/kernel/genapic_64-xen.c -@@ -11,123 +11,57 @@ - #include - #include - #include -+#include - #include - #include - #include --#include - - #include - #include -+#include - --#if defined(CONFIG_ACPI) -+#ifdef CONFIG_ACPI - #include - #endif - - /* which logical CPU number maps to which CPU (physical APIC ID) */ --u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; -+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly -+ = { [0 ... NR_CPUS-1] = BAD_APICID }; - EXPORT_SYMBOL(x86_cpu_to_apicid); --u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - --extern struct genapic apic_cluster; --extern struct genapic apic_flat; --extern struct genapic apic_physflat; -+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - - #ifndef CONFIG_XEN --struct genapic *genapic = &apic_flat; --struct genapic *genapic_force; -+struct genapic __read_mostly *genapic = &apic_flat; - #else - extern struct genapic apic_xen; --struct genapic *genapic = &apic_xen; -+struct genapic __read_mostly *genapic = &apic_xen; - #endif - - - /* - * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. - */ --void __init clustered_apic_check(void) -+void __init setup_apic_routing(void) - { - #ifndef CONFIG_XEN -- long i; -- u8 clusters, max_cluster; -- u8 id; -- u8 cluster_cnt[NUM_APIC_CLUSTERS]; -- int max_apic = 0; -- -- /* genapic selection can be forced because of certain quirks. -- */ -- if (genapic_force) { -- genapic = genapic_force; -- goto print; -- } -- --#if defined(CONFIG_ACPI) -+#ifdef CONFIG_ACPI - /* -- * Some x86_64 machines use physical APIC mode regardless of how many -- * procs/clusters are present (x86_64 ES7000 is an example). -+ * Quirk: some x86_64 machines can only use physical APIC mode -+ * regardless of how many processors are present (x86_64 ES7000 -+ * is an example). - */ -- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) -- if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { -- genapic = &apic_cluster; -- goto print; -- } --#endif -- -- memset(cluster_cnt, 0, sizeof(cluster_cnt)); -- for (i = 0; i < NR_CPUS; i++) { -- id = bios_cpu_apicid[i]; -- if (id == BAD_APICID) -- continue; -- if (id > max_apic) -- max_apic = id; -- cluster_cnt[APIC_CLUSTERID(id)]++; -- } -- -- /* Don't use clustered mode on AMD platforms. */ -- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { -+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && -+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) - genapic = &apic_physflat; --#ifndef CONFIG_HOTPLUG_CPU -- /* In the CPU hotplug case we cannot use broadcast mode -- because that opens a race when a CPU is removed. -- Stay at physflat mode in this case. -- It is bad to do this unconditionally though. Once -- we have ACPI platform support for CPU hotplug -- we should detect hotplug capablity from ACPI tables and -- only do this when really needed. -AK */ -- if (max_apic <= 8) -- genapic = &apic_flat; --#endif -- goto print; -- } -- -- clusters = 0; -- max_cluster = 0; -- -- for (i = 0; i < NUM_APIC_CLUSTERS; i++) { -- if (cluster_cnt[i] > 0) { -- ++clusters; -- if (cluster_cnt[i] > max_cluster) -- max_cluster = cluster_cnt[i]; -- } -- } -+ else -+#endif - -- /* -- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, -- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical -- * else physical mode. -- * (We don't use lowest priority delivery + HW APIC IRQ steering, so -- * can ignore the clustered logical case and go straight to physical.) -- */ -- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { --#ifdef CONFIG_HOTPLUG_CPU -- /* Don't use APIC shortcuts in CPU hotplug to avoid races */ -- genapic = &apic_physflat; --#else -+ if (cpus_weight(cpu_possible_map) <= 8) - genapic = &apic_flat; --#endif -- } else -- genapic = &apic_cluster; -+ else -+ genapic = &apic_physflat; - --print: - #else - /* hardcode to xen apic functions */ - genapic = &apic_xen; -@@ -135,7 +69,7 @@ print: - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); - } - --/* Same for both flat and clustered. */ -+/* Same for both flat and physical. */ - - #ifdef CONFIG_XEN - extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); ---- a/arch/x86/kernel/genapic_xen_64.c -+++ b/arch/x86/kernel/genapic_xen_64.c -@@ -21,9 +21,8 @@ - #include - #else - #include --#include --#include - #endif -+#include - #include - - DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); ---- a/arch/x86/kernel/head_32-xen.S -+++ b/arch/x86/kernel/head_32-xen.S +--- sle11-2009-04-20.orig/arch/x86/kernel/head_32-xen.S 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/head_32-xen.S 2008-12-15 11:27:22.000000000 +0100 @@ -37,7 +37,8 @@ ENTRY(startup_32) /* Set up the stack pointer */ movl $(init_thread_union+THREAD_SIZE),%esp @@ -1252,244 +736,65 @@ Acked-by: jbeulich@novell.com #if CONFIG_XEN_COMPAT <= 0x030002 /* * __xen_guest information ---- a/arch/x86/kernel/head64-xen.c -+++ b/arch/x86/kernel/head64-xen.c -@@ -25,13 +25,21 @@ - #include - #include - #include -+#include - #include - - unsigned long start_pfn; +--- sle11-2009-04-20.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/io_apic_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -25,7 +25,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -35,6 +34,7 @@ + #include + #include + #include ++#include -+#ifndef CONFIG_XEN -+static void __init zap_identity_mappings(void) -+{ -+ pgd_t *pgd = pgd_offset_k(0UL); -+ pgd_clear(pgd); -+ __flush_tlb(); -+} -+ - /* Don't add a printk in there. printk relies on the PDA which is not initialized - yet. */ --#if 0 - static void __init clear_bss(void) - { - memset(__bss_start, 0, -@@ -40,26 +48,25 @@ static void __init clear_bss(void) - #endif + #include + #include +@@ -710,8 +710,6 @@ static int balanced_irq(void *unused) + unsigned long prev_balance_time = jiffies; + long time_remaining = balanced_irq_interval; - #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ --#define OLD_CL_MAGIC_ADDR 0x90020 -+#define OLD_CL_MAGIC_ADDR 0x20 - #define OLD_CL_MAGIC 0xA33F --#define OLD_CL_BASE_ADDR 0x90000 --#define OLD_CL_OFFSET 0x90022 -+#define OLD_CL_OFFSET 0x22 +- daemonize("kirqd"); +- + /* push everything to CPU 0 to give us a starting point. */ + for (i = 0 ; i < NR_IRQS ; i++) { + irq_desc[i].pending_mask = cpumask_of_cpu(0); +@@ -771,10 +769,9 @@ static int __init balanced_irq_init(void + } + + printk(KERN_INFO "Starting balanced_irq\n"); +- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) ++ if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) + return 0; +- else +- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); ++ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); + failed: + for_each_possible_cpu(i) { + kfree(irq_cpu_data[i].irq_delta); +@@ -1455,10 +1452,6 @@ static void __init setup_ExtINT_IRQ0_pin + enable_8259A_irq(0); + } - static void __init copy_bootdata(char *real_mode_data) +-static inline void UNEXPECTED_IO_APIC(void) +-{ +-} +- + void __init print_IO_APIC(void) { - #ifndef CONFIG_XEN -- int new_data; -+ unsigned long new_data; - char * command_line; - - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); -- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); -+ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); - if (!new_data) { -- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { -+ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { - return; - } -- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; -+ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); - } -- command_line = (char *) ((u64)(new_data)); -+ command_line = __va(new_data); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); - #else - int max_cmdline; -@@ -101,10 +108,13 @@ void __init x86_64_start_kernel(char * r - while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) - machine_to_phys_order++; - --#if 0 -+#ifndef CONFIG_XEN - /* clear bss before set_intr_gate with early_idt_handler */ - clear_bss(); - -+ /* Make NULL pointers segfault */ -+ zap_identity_mappings(); -+ - for (i = 0; i < IDT_ENTRIES; i++) - set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); -@@ -116,7 +126,7 @@ void __init x86_64_start_kernel(char * r - cpu_pda(i) = &boot_cpu_pda[i]; - - pda_init(0); -- copy_bootdata(real_mode_data); -+ copy_bootdata(__va(real_mode_data)); - #ifdef CONFIG_SMP - cpu_set(0, cpu_online_map); - #endif ---- a/arch/x86/kernel/head_64-xen.S -+++ b/arch/x86/kernel/head_64-xen.S -@@ -5,6 +5,7 @@ - * Copyright (C) 2000 Pavel Machek - * Copyright (C) 2000 Karsten Keil - * Copyright (C) 2001,2002 Andi Kleen -+ * Copyright (C) 2005 Eric Biederman - * Jun Nakajima - * Modified for Xen - */ -@@ -34,27 +35,15 @@ startup_64: - pushq $0 # fake return address - jmp x86_64_start_kernel - --#ifdef CONFIG_ACPI_SLEEP --.org 0xf00 -- .globl pGDT32 --pGDT32: -- .word gdt_end-cpu_gdt_table-1 -- .long cpu_gdt_table-__START_KERNEL_map --#endif --ENTRY(stext) --ENTRY(_stext) -+.balign PAGE_SIZE - -- $page = 0 - #define NEXT_PAGE(name) \ -- $page = $page + 1; \ -- .org $page * 0x1000; \ -- phys_##name = $page * 0x1000 + __PHYSICAL_START; \ -+ .balign PAGE_SIZE; \ -+ phys_##name = . - .bootstrap.text; \ - ENTRY(name) - - NEXT_PAGE(init_level4_pgt) -- /* This gets initialized in x86_64_start_kernel */ - .fill 512,8,0 --NEXT_PAGE(init_level4_user_pgt) - /* - * We update two pgd entries to make kernel and user pgd consistent - * at pgd_populate(). It can be used for kernel modules. So we place -@@ -101,14 +90,6 @@ NEXT_PAGE(hypercall_page) - #undef NEXT_PAGE - - .data --/* Just dummy symbol to allow compilation. Not used in sleep path */ --#ifdef CONFIG_ACPI_SLEEP -- .align PAGE_SIZE --ENTRY(wakeup_level4_pgt) -- .fill 512,8,0 --#endif -- -- .data - - .align 16 - .globl cpu_gdt_descr -@@ -136,13 +117,13 @@ gdt: - - ENTRY(cpu_gdt_table) - .quad 0x0000000000000000 /* NULL descriptor */ -+ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ -+ .quad 0x00af9b000000ffff /* __KERNEL_CS */ -+ .quad 0x00cf93000000ffff /* __KERNEL_DS */ -+ .quad 0x00cffb000000ffff /* __USER32_CS */ -+ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ -+ .quad 0x00affb000000ffff /* __USER_CS */ - .quad 0x0 /* unused */ -- .quad 0x00af9a000000ffff /* __KERNEL_CS */ -- .quad 0x00cf92000000ffff /* __KERNEL_DS */ -- .quad 0x00cffa000000ffff /* __USER32_CS */ -- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ -- .quad 0x00affa000000ffff /* __USER_CS */ -- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ - .quad 0,0 /* TSS */ - .quad 0,0 /* LDT */ - .quad 0,0,0 /* three TLS descriptors */ -@@ -165,14 +146,11 @@ ENTRY(empty_zero_page) - * __xen_guest information - */ - .macro utoh value -- .if (\value) < 0 || (\value) >= 0x10 -- utoh (((\value)>>4)&0x0fffffffffffffff) -- .endif -- .if ((\value) & 0xf) < 10 -- .byte '0' + ((\value) & 0xf) -- .else -- .byte 'A' + ((\value) & 0xf) - 10 -- .endif -+ i = 64 -+ .rept 16 -+ i = i - 4 -+ .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf) -+ .endr - .endm - - .section __xen_guest ---- a/arch/x86/kernel/io_apic_32-xen.c -+++ b/arch/x86/kernel/io_apic_32-xen.c -@@ -25,7 +25,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -35,6 +34,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -710,8 +710,6 @@ static int balanced_irq(void *unused) - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - -- daemonize("kirqd"); -- - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { - irq_desc[i].pending_mask = cpumask_of_cpu(0); -@@ -771,10 +769,9 @@ static int __init balanced_irq_init(void - } - - printk(KERN_INFO "Starting balanced_irq\n"); -- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) -+ if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) - return 0; -- else -- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -+ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); - failed: - for_each_possible_cpu(i) { - kfree(irq_cpu_data[i].irq_delta); -@@ -1455,10 +1452,6 @@ static void __init setup_ExtINT_IRQ0_pin - enable_8259A_irq(0); - } - --static inline void UNEXPECTED_IO_APIC(void) --{ --} -- - void __init print_IO_APIC(void) - { - int apic, i; -@@ -1498,34 +1491,12 @@ void __init print_IO_APIC(void) - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); -- if (reg_00.bits.ID >= get_physical_broadcast()) -- UNEXPECTED_IO_APIC(); -- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) -- UNEXPECTED_IO_APIC(); + int apic, i; +@@ -1498,34 +1491,12 @@ void __init print_IO_APIC(void) + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); +- if (reg_00.bits.ID >= get_physical_broadcast()) +- UNEXPECTED_IO_APIC(); +- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) +- UNEXPECTED_IO_APIC(); printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); @@ -1557,244 +862,44 @@ Acked-by: jbeulich@novell.com } void arch_teardown_msi_irq(unsigned int irq) ---- a/arch/x86/kernel/io_apic_64-xen.c -+++ b/arch/x86/kernel/io_apic_64-xen.c -@@ -25,7 +25,6 @@ - #include - #include - #include +--- sle11-2009-04-20.orig/arch/x86/kernel/ioport_32-xen.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/arch/x86/kernel/ioport_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -12,10 +12,10 @@ + #include + #include + #include -#include - #include - #include - #include -@@ -904,10 +903,6 @@ static void __init setup_ExtINT_IRQ0_pin - enable_8259A_irq(0); - } - --void __init UNEXPECTED_IO_APIC(void) --{ --} -- - void __apicdebuginit print_IO_APIC(void) - { - int apic, i; -@@ -943,40 +938,16 @@ void __apicdebuginit print_IO_APIC(void) - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); -- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) -- UNEXPECTED_IO_APIC(); + #include + #include + #include ++#include + #include - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); -- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ -- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ -- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ -- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ -- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ -- (reg_01.bits.entries != 0x2E) && -- (reg_01.bits.entries != 0x3F) && -- (reg_01.bits.entries != 0x03) -- ) -- UNEXPECTED_IO_APIC(); + /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ +--- sle11-2009-04-20.orig/arch/x86/kernel/irq_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/irq_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -24,6 +24,9 @@ + DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; + EXPORT_PER_CPU_SYMBOL(irq_stat); - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); -- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ -- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */ -- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ -- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ -- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ -- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ -- ) -- UNEXPECTED_IO_APIC(); -- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) -- UNEXPECTED_IO_APIC(); ++DEFINE_PER_CPU(struct pt_regs *, irq_regs); ++EXPORT_PER_CPU_SYMBOL(irq_regs); ++ + /* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. +--- sle11-2009-04-20.orig/arch/x86/kernel/ldt_32-xen.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/ldt_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -10,7 +10,6 @@ + #include + #include + #include +-#include + #include + #include - if (reg_01.bits.version >= 0x10) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); -- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) -- UNEXPECTED_IO_APIC(); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); -@@ -1408,8 +1379,7 @@ static void irq_complete_move(unsigned i - - vector = ~get_irq_regs()->orig_rax; - me = smp_processor_id(); -- if ((vector == cfg->vector) && -- cpu_isset(smp_processor_id(), cfg->domain)) { -+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); -@@ -1444,7 +1414,7 @@ static void ack_apic_level(unsigned int - - /* - * We must acknowledge the irq before we move it or the acknowledge will -- * not propogate properly. -+ * not propagate properly. - */ - ack_APIC_irq(); - -@@ -1527,6 +1497,7 @@ static void ack_lapic_irq (unsigned int - static void end_lapic_irq (unsigned int i) { /* nothing */ } - - static struct hw_interrupt_type lapic_irq_type __read_mostly = { -+ .name = "local-APIC", - .typename = "local-APIC-edge", - .startup = NULL, /* startup_irq() not used for IRQ0 */ - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ -@@ -1998,18 +1969,18 @@ int arch_setup_msi_irq(struct pci_dev *d - if (irq < 0) - return irq; - -- set_irq_msi(irq, desc); - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - -+ set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - -- return irq; -+ return 0; - } - - void arch_teardown_msi_irq(unsigned int irq) ---- a/arch/x86/kernel/ioport_32-xen.c -+++ b/arch/x86/kernel/ioport_32-xen.c -@@ -12,10 +12,10 @@ - #include - #include - #include --#include - #include - #include - #include -+#include - #include - - /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ ---- a/arch/x86/kernel/ioport_64-xen.c -+++ b/arch/x86/kernel/ioport_64-xen.c -@@ -13,10 +13,10 @@ - #include - #include - #include --#include - #include - #include - #include -+#include - #include - - /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ ---- a/arch/x86/kernel/irq_32-xen.c -+++ b/arch/x86/kernel/irq_32-xen.c -@@ -24,6 +24,9 @@ - DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; - EXPORT_PER_CPU_SYMBOL(irq_stat); - -+DEFINE_PER_CPU(struct pt_regs *, irq_regs); -+EXPORT_PER_CPU_SYMBOL(irq_regs); -+ - /* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. ---- a/arch/x86/kernel/irq_64-xen.c -+++ b/arch/x86/kernel/irq_64-xen.c -@@ -32,7 +32,7 @@ atomic_t irq_err_count; - */ - static inline void stack_overflow_check(struct pt_regs *regs) - { -- u64 curbase = (u64) current->thread_info; -+ u64 curbase = (u64)task_stack_page(current); - static unsigned long warned = -60*HZ; - - if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && -@@ -145,17 +145,43 @@ void fixup_irqs(cpumask_t map) - - for (irq = 0; irq < NR_IRQS; irq++) { - cpumask_t mask; -+ int break_affinity = 0; -+ int set_affinity = 1; -+ - if (irq == 2) - continue; - -+ /* interrupt's are disabled at this point */ -+ spin_lock(&irq_desc[irq].lock); -+ -+ if (!irq_has_action(irq) || -+ cpus_equal(irq_desc[irq].affinity, map)) { -+ spin_unlock(&irq_desc[irq].lock); -+ continue; -+ } -+ - cpus_and(mask, irq_desc[irq].affinity, map); -- if (any_online_cpu(mask) == NR_CPUS) { -- /*printk("Breaking affinity for irq %i\n", irq);*/ -+ if (cpus_empty(mask)) { -+ break_affinity = 1; - mask = map; - } -+ -+ if (irq_desc[irq].chip->mask) -+ irq_desc[irq].chip->mask(irq); -+ - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); -- else if (irq_desc[irq].action && !(warned++)) -+ else if (!(warned++)) -+ set_affinity = 0; -+ -+ if (irq_desc[irq].chip->unmask) -+ irq_desc[irq].chip->unmask(irq); -+ -+ spin_unlock(&irq_desc[irq].lock); -+ -+ if (break_affinity && set_affinity) -+ /*printk("Broke affinity for irq %i\n", irq)*/; -+ else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - ---- a/arch/x86/kernel/ldt_32-xen.c -+++ b/arch/x86/kernel/ldt_32-xen.c -@@ -10,7 +10,6 @@ - #include - #include - #include --#include - #include - #include - ---- a/arch/x86/kernel/ldt_64-xen.c -+++ b/arch/x86/kernel/ldt_64-xen.c -@@ -13,7 +13,6 @@ - #include - #include - #include --#include - #include - #include - ---- a/arch/x86/kernel/Makefile -+++ b/arch/x86/kernel/Makefile -@@ -127,4 +127,4 @@ endif - disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ - smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o - disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o --%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := -+%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := ---- a/arch/x86/kernel/microcode-xen.c -+++ b/arch/x86/kernel/microcode-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/microcode-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/microcode-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -135,7 +135,7 @@ static int __init microcode_dev_init (vo return 0; } @@ -1804,8 +909,8 @@ Acked-by: jbeulich@novell.com { misc_deregister(µcode_dev); } ---- a/arch/x86/kernel/mpparse_32-xen.c -+++ b/arch/x86/kernel/mpparse_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/mpparse_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/mpparse_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -18,7 +18,6 @@ #include #include @@ -1823,27 +928,8 @@ Acked-by: jbeulich@novell.com if (!num_processors) printk(KERN_ERR "SMP mptable: no processors registered!\n"); return num_processors; ---- a/arch/x86/kernel/mpparse_64-xen.c -+++ b/arch/x86/kernel/mpparse_64-xen.c -@@ -17,7 +17,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -307,7 +306,7 @@ static int __init smp_read_mpc(struct mp - } - } - } -- clustered_apic_check(); -+ setup_apic_routing(); - if (!num_processors) - printk(KERN_ERR "MPTABLE: no processors registered!\n"); - return num_processors; ---- a/arch/x86/kernel/pci-dma-xen.c -+++ b/arch/x86/kernel/pci-dma-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/pci-dma-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/pci-dma-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -13,6 +13,7 @@ #include #include @@ -1894,8 +980,8 @@ Acked-by: jbeulich@novell.com dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) ---- a/arch/x86/kernel/process_32-xen.c -+++ b/arch/x86/kernel/process_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/process_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/process_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -21,7 +21,6 @@ #include #include @@ -1993,59 +1079,8 @@ Acked-by: jbeulich@novell.com return prev_p; } ---- a/arch/x86/kernel/process_64-xen.c -+++ b/arch/x86/kernel/process_64-xen.c -@@ -39,6 +39,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -49,7 +50,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -232,16 +232,18 @@ void __cpuinit select_idle_routine(const - - static int __init idle_setup (char *str) - { -- if (!strncmp(str, "poll", 4)) { -+ if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; -- } -+ } else if (!strcmp(str, "mwait")) -+ force_mwait = 1; -+ else -+ return -1; - - boot_option_idle_override = 1; -- return 1; -+ return 0; - } -- --__setup("idle=", idle_setup); -+early_param("idle", idle_setup); - - /* Prints also some state that isn't saved in the pt_regs */ - void __show_regs(struct pt_regs * regs) -@@ -546,7 +548,7 @@ __switch_to(struct task_struct *prev_p, - * The AMD workaround requires it to be after DS reload, or - * after DS has been cleared, which we do in __prepare_arch_switch. - */ -- if (prev_p->thread_info->status & TS_USEDFPU) { -+ if (task_thread_info(prev_p)->status & TS_USEDFPU) { - __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ - mcl->op = __HYPERVISOR_fpu_taskswitch; - mcl->args[0] = 1; ---- a/arch/x86/kernel/quirks-xen.c -+++ b/arch/x86/kernel/quirks-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/quirks-xen.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/quirks-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -3,12 +3,10 @@ */ #include @@ -2138,159 +1173,21 @@ Acked-by: jbeulich@novell.com +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); #endif ---- a/arch/x86/kernel/setup_64-xen.c -+++ b/arch/x86/kernel/setup_64-xen.c -@@ -120,6 +120,8 @@ int bootloader_type; - - unsigned long saved_video_mode; - -+int force_mwait __cpuinitdata; -+ - /* - * Early DMI memory - */ -@@ -253,10 +255,10 @@ static void discover_ebda(void) - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E - */ -- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; -+ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); - ebda_addr <<= 4; - -- ebda_size = *(unsigned short *)(unsigned long)ebda_addr; -+ ebda_size = *(unsigned short *)__va(ebda_addr); - - /* Round EBDA up to pages */ - if (ebda_size == 0) -@@ -410,15 +412,8 @@ void __init setup_arch(char **cmdline_p) - #endif - - #ifdef CONFIG_SMP -- /* -- * But first pinch a few for the stack/trampoline stuff -- * FIXME: Don't need the extra page at 4K, but need to fix -- * trampoline before removing it. (see the GDT stuff) -- */ -- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE); -- - /* Reserve SMP trampoline */ -- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); -+ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); - #endif - #endif - -@@ -570,8 +565,6 @@ void __init setup_arch(char **cmdline_p) - early_quirks(); - #endif - -- zap_low_mappings(0); -- - /* - * set this early, so we dont allocate cpu0 - * if MADT list doesnt list BSP first -@@ -864,6 +857,10 @@ static void __cpuinit init_amd(struct cp - - /* RDTSC can be speculated around */ - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); -+ -+ /* Family 10 doesn't support C states in MWAIT so don't use it */ -+ if (c->x86 == 0x10 && !force_mwait) -+ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); - } - - static void __cpuinit detect_ht(struct cpuinfo_x86 *c) -@@ -1146,9 +1143,7 @@ void __cpuinit identify_cpu(struct cpuin - #ifdef CONFIG_X86_MCE - mcheck_init(c); - #endif -- if (c == &boot_cpu_data) -- mtrr_bp_init(); -- else -+ if (c != &boot_cpu_data) - mtrr_ap_init(); - #ifdef CONFIG_NUMA - numa_add_cpu(smp_processor_id()); -@@ -1239,9 +1234,8 @@ static int show_cpuinfo(struct seq_file - "stc", - "100mhzsteps", - "hwpstate", -- NULL, /* tsc invariant mapped to constant_tsc */ -- NULL, -- /* nothing */ /* constant_tsc - moved to flags */ -+ "", /* tsc invariant mapped to constant_tsc */ -+ /* nothing */ - }; - - ---- a/arch/x86/kernel/setup64-xen.c -+++ b/arch/x86/kernel/setup64-xen.c -@@ -113,9 +113,9 @@ void __init setup_per_cpu_areas(void) - if (!NODE_DATA(cpu_to_node(i))) { - printk("cpu with no node %d, num_online_nodes %d\n", - i, num_online_nodes()); -- ptr = alloc_bootmem(size); -+ ptr = alloc_bootmem_pages(size); - } else { -- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); -+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); - } - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); -@@ -208,6 +208,8 @@ char boot_exception_stacks[(N_EXCEPTION_ - __attribute__((section(".bss.page_aligned"))); - #endif - -+extern asmlinkage void ignore_sysret(void); -+ - /* May not be marked __init: used by software suspend */ - void syscall_init(void) - { -@@ -219,12 +221,22 @@ void syscall_init(void) - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); -+ wrmsrl(MSR_CSTAR, ignore_sysret); - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); - #endif - #ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init (); -+#else -+ { -+ static const struct callback_register cstar = { -+ .type = CALLBACKTYPE_syscall32, -+ .address = (unsigned long)ignore_sysret -+ }; -+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) -+ printk(KERN_WARN "Unable to register CSTAR callback\n"); -+ } - #endif - } - -@@ -262,7 +274,6 @@ void __cpuinit cpu_init (void) - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) { - pda_init(cpu); -- zap_low_mappings(cpu); - } - #ifndef CONFIG_X86_NO_TSS - else ---- a/arch/x86/kernel/smp_32-xen.c -+++ b/arch/x86/kernel/smp_32-xen.c -@@ -13,7 +13,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask; - static struct mm_struct * flush_mm; - static unsigned long flush_va; - static DEFINE_SPINLOCK(tlbstate_lock); --#define FLUSH_ALL 0xffffffff +--- sle11-2009-04-20.orig/arch/x86/kernel/smp_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/smp_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -13,7 +13,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask; + static struct mm_struct * flush_mm; + static unsigned long flush_va; + static DEFINE_SPINLOCK(tlbstate_lock); +-#define FLUSH_ALL 0xffffffff /* * We cannot call mmdrop() because we are in interrupt context, @@ -2598,73 +1495,8 @@ Acked-by: jbeulich@novell.com - return 0; -} -EXPORT_SYMBOL(smp_call_function_single); ---- a/arch/x86/kernel/smp_64-xen.c -+++ b/arch/x86/kernel/smp_64-xen.c -@@ -14,7 +14,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -457,44 +456,36 @@ int smp_call_function (void (*func) (voi - } - EXPORT_SYMBOL(smp_call_function); - --void smp_stop_cpu(void) -+static void stop_this_cpu(void *dummy) - { -- unsigned long flags; -+ local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); -- local_irq_save(flags); - disable_all_local_evtchn(); -- local_irq_restore(flags); --} -- --static void smp_really_stop_cpu(void *dummy) --{ -- smp_stop_cpu(); - for (;;) - halt(); - } - - void smp_send_stop(void) - { -- int nolock = 0; -+ int nolock; -+ unsigned long flags; -+ - #ifndef CONFIG_XEN - if (reboot_force) - return; - #endif -+ - /* Don't deadlock on the call lock in panic */ -- if (!spin_trylock(&call_lock)) { -- /* ignore locking because we have panicked anyways */ -- nolock = 1; -- } -- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0); -+ nolock = !spin_trylock(&call_lock); -+ local_irq_save(flags); -+ __smp_call_function(stop_this_cpu, NULL, 0, 0); - if (!nolock) - spin_unlock(&call_lock); -- -- local_irq_disable(); - disable_all_local_evtchn(); -- local_irq_enable(); -+ local_irq_restore(flags); - } - - /* ---- a/arch/x86/kernel/time_32-xen.c -+++ b/arch/x86/kernel/time_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/time_32-xen.c 2009-03-24 10:11:08.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/time_32-xen.c 2009-03-24 10:11:31.000000000 +0100 @@ -80,7 +80,6 @@ #include DEFINE_SPINLOCK(i8253_lock); @@ -2700,7 +1532,7 @@ Acked-by: jbeulich@novell.com static void init_cpu_khz(void) { u64 __cpu_khz = 1000000ULL << 32; -@@ -399,7 +418,7 @@ static int set_rtc_mmss(unsigned long no +@@ -397,7 +416,7 @@ static int set_rtc_mmss(unsigned long no return retval; } @@ -2709,7 +1541,7 @@ Acked-by: jbeulich@novell.com { unsigned int cpu = get_cpu(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); -@@ -420,6 +439,61 @@ unsigned long long sched_clock(void) +@@ -418,6 +437,61 @@ unsigned long long sched_clock(void) return time; } @@ -2771,7 +1603,7 @@ Acked-by: jbeulich@novell.com unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); -@@ -467,10 +541,9 @@ EXPORT_SYMBOL(profile_pc); +@@ -465,10 +539,9 @@ EXPORT_SYMBOL(profile_pc); irqreturn_t timer_interrupt(int irq, void *dev_id) { s64 delta, delta_cpu, stolen, blocked; @@ -2783,7 +1615,7 @@ Acked-by: jbeulich@novell.com /* * Here we are in the timer irq handler. We just have irqs locally -@@ -490,20 +563,7 @@ irqreturn_t timer_interrupt(int irq, voi +@@ -488,20 +561,7 @@ irqreturn_t timer_interrupt(int irq, voi delta -= processed_system_time; delta_cpu -= per_cpu(processed_system_time, cpu); @@ -2805,7 +1637,7 @@ Acked-by: jbeulich@novell.com } while (!time_values_up_to_date(cpu)); if ((unlikely(delta < -(s64)permitted_clock_jitter) || -@@ -545,6 +605,9 @@ irqreturn_t timer_interrupt(int irq, voi +@@ -543,6 +603,9 @@ irqreturn_t timer_interrupt(int irq, voi * HACK: Passing NULL to account_steal_time() * ensures that the ticks are accounted as stolen. */ @@ -2815,7 +1647,7 @@ Acked-by: jbeulich@novell.com if ((stolen > 0) && (delta_cpu > 0)) { delta_cpu -= stolen; if (unlikely(delta_cpu < 0)) -@@ -560,6 +623,8 @@ irqreturn_t timer_interrupt(int irq, voi +@@ -558,6 +621,8 @@ irqreturn_t timer_interrupt(int irq, voi * HACK: Passing idle_task to account_steal_time() * ensures that the ticks are accounted as idle/wait. */ @@ -2824,7 +1656,7 @@ Acked-by: jbeulich@novell.com if ((blocked > 0) && (delta_cpu > 0)) { delta_cpu -= blocked; if (unlikely(delta_cpu < 0)) -@@ -596,7 +661,7 @@ irqreturn_t timer_interrupt(int irq, voi +@@ -594,7 +659,7 @@ irqreturn_t timer_interrupt(int irq, voi return IRQ_HANDLED; } @@ -2833,7 +1665,7 @@ Acked-by: jbeulich@novell.com { #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */ tsc_unstable = 1; -@@ -604,17 +669,13 @@ void mark_tsc_unstable(void) +@@ -602,17 +667,13 @@ void mark_tsc_unstable(void) } EXPORT_SYMBOL_GPL(mark_tsc_unstable); @@ -2855,7 +1687,7 @@ Acked-by: jbeulich@novell.com if (unlikely((s64)(ret - last) < 0)) { if (last - ret > permitted_clock_jitter -@@ -633,17 +694,25 @@ static cycle_t xen_clocksource_read(void +@@ -631,17 +692,25 @@ static cycle_t xen_clocksource_read(void } for (;;) { @@ -2883,7 +1715,7 @@ Acked-by: jbeulich@novell.com static struct clocksource clocksource_xen = { .name = "xen", .rating = 400, -@@ -652,6 +721,7 @@ static struct clocksource clocksource_xe +@@ -650,6 +719,7 @@ static struct clocksource clocksource_xe .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */ .shift = XEN_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -2891,7 +1723,7 @@ Acked-by: jbeulich@novell.com }; static void init_missing_ticks_accounting(unsigned int cpu) -@@ -740,35 +810,6 @@ void notify_arch_cmos_timer(void) +@@ -738,35 +808,6 @@ void notify_arch_cmos_timer(void) mod_timer(&sync_xen_wallclock_timer, jiffies + 1); } @@ -2927,7 +1759,7 @@ Acked-by: jbeulich@novell.com extern void (*late_time_init)(void); /* Dynamically-mapped IRQ. */ -@@ -899,21 +940,21 @@ static void start_hz_timer(void) +@@ -897,21 +938,21 @@ static void start_hz_timer(void) cpu_clear(smp_processor_id(), nohz_cpu_mask); } @@ -2953,8 +1785,8 @@ Acked-by: jbeulich@novell.com /* No locking required. Interrupts are disabled on all CPUs. */ void time_resume(void) ---- a/arch/x86/kernel/traps_32-xen.c -+++ b/arch/x86/kernel/traps_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/kernel/traps_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/traps_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -52,7 +52,7 @@ #include #include @@ -3065,308 +1897,33 @@ Acked-by: jbeulich@novell.com unsigned long base = (kesp - uesp) & -THREAD_SIZE; unsigned long new_kesp = kesp - base; unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; ---- a/arch/x86/kernel/traps_64-xen.c -+++ b/arch/x86/kernel/traps_64-xen.c -@@ -32,6 +32,7 @@ - #include +--- sle11-2009-04-20.orig/arch/x86/mm/fault_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/fault_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -14,19 +14,20 @@ + #include + #include + #include +-#include + #include + #include + #include + #include /* For unblank_screen() */ + #include ++#include /* for max_low_pfn */ ++#include + #include + #include #include - #include +#include #include - #include -@@ -39,7 +40,6 @@ - #include #include - #include -#include - #include - #include - #include -@@ -71,22 +71,6 @@ asmlinkage void alignment_check(void); - asmlinkage void machine_check(void); - asmlinkage void spurious_interrupt_bug(void); + #include --ATOMIC_NOTIFIER_HEAD(die_chain); --EXPORT_SYMBOL(die_chain); -- --int register_die_notifier(struct notifier_block *nb) --{ -- vmalloc_sync_all(); -- return atomic_notifier_chain_register(&die_chain, nb); --} --EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ -- --int unregister_die_notifier(struct notifier_block *nb) --{ -- return atomic_notifier_chain_unregister(&die_chain, nb); --} --EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ -- - static inline void conditional_sti(struct pt_regs *regs) - { - if (regs->eflags & X86_EFLAGS_IF) -@@ -428,8 +412,7 @@ void show_registers(struct pt_regs *regs - const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; - -- rsp = regs->rsp; -- -+ rsp = regs->rsp; - printk("CPU %d ", cpu); - __show_regs(regs); - printk("Process %s (pid: %d, threadinfo %p, task %p)\n", -@@ -440,7 +423,6 @@ void show_registers(struct pt_regs *regs - * time of the fault.. - */ - if (in_kernel) { -- - printk("Stack: "); - _show_stack(NULL, regs, (unsigned long*)rsp); - -@@ -485,13 +467,14 @@ static unsigned int die_nest_count; - - unsigned __kprobes long oops_begin(void) - { -- int cpu = smp_processor_id(); -+ int cpu; - unsigned long flags; - - oops_enter(); - - /* racy, but better than risking deadlock. */ - local_irq_save(flags); -+ cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { - if (cpu == die_owner) - /* nested oops. should stop eventually */; -@@ -585,10 +568,20 @@ static void __kprobes do_trap(int trapnr - { - struct task_struct *tsk = current; - -- tsk->thread.error_code = error_code; -- tsk->thread.trap_no = trapnr; -- - if (user_mode(regs)) { -+ /* -+ * We want error_code and trap_no set for userspace -+ * faults and kernelspace faults which result in -+ * die(), but not kernelspace faults which are fixed -+ * up. die() gives the process no chance to handle -+ * the signal and notice the kernel fault information, -+ * so that won't result in polluting the information -+ * about previously queued, but not yet delivered, -+ * faults. See also do_general_protection below. -+ */ -+ tsk->thread.error_code = error_code; -+ tsk->thread.trap_no = trapnr; -+ - if (exception_trace && unhandled_signal(tsk, signr)) - printk(KERN_INFO - "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", -@@ -609,8 +602,11 @@ static void __kprobes do_trap(int trapnr - fixup = search_exception_tables(regs->rip); - if (fixup) - regs->rip = fixup->fixup; -- else -+ else { -+ tsk->thread.error_code = error_code; -+ tsk->thread.trap_no = trapnr; - die(str, regs, error_code); -+ } - return; - } - } -@@ -686,10 +682,10 @@ asmlinkage void __kprobes do_general_pro - - conditional_sti(regs); - -- tsk->thread.error_code = error_code; -- tsk->thread.trap_no = 13; -- - if (user_mode(regs)) { -+ tsk->thread.error_code = error_code; -+ tsk->thread.trap_no = 13; -+ - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) - printk(KERN_INFO - "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", -@@ -708,6 +704,9 @@ asmlinkage void __kprobes do_general_pro - regs->rip = fixup->fixup; - return; - } -+ -+ tsk->thread.error_code = error_code; -+ tsk->thread.trap_no = 13; - if (notify_die(DIE_GPF, "general protection fault", regs, - error_code, 13, SIGSEGV) == NOTIFY_STOP) - return; ---- a/arch/x86/kernel/vsyscall_64-xen.c -+++ b/arch/x86/kernel/vsyscall_64-xen.c -@@ -45,14 +45,34 @@ - - #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) - #define __syscall_clobber "r11","rcx","memory" -+#define __pa_vsymbol(x) \ -+ ({unsigned long v; \ -+ extern char __vsyscall_0; \ -+ asm("" : "=r" (v) : "0" (x)); \ -+ ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) - -+/* -+ * vsyscall_gtod_data contains data that is : -+ * - readonly from vsyscalls -+ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) -+ * Try to keep this structure as small as possible to avoid cache line ping pongs -+ */ - struct vsyscall_gtod_data_t { -- seqlock_t lock; -- int sysctl_enabled; -- struct timeval wall_time_tv; -+ seqlock_t lock; -+ -+ /* open coded 'struct timespec' */ -+ time_t wall_time_sec; -+ u32 wall_time_nsec; -+ -+ int sysctl_enabled; - struct timezone sys_tz; -- cycle_t offset_base; -- struct clocksource clock; -+ struct { /* extract of a clocksource struct */ -+ cycle_t (*vread)(void); -+ cycle_t cycle_last; -+ cycle_t mask; -+ u32 mult; -+ u32 shift; -+ } clock; - }; - int __vgetcpu_mode __section_vgetcpu_mode; - -@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wa - - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); - /* copy vsyscall data */ -- vsyscall_gtod_data.clock = *clock; -- vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; -- vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; -+ vsyscall_gtod_data.clock.vread = clock->vread; -+ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; -+ vsyscall_gtod_data.clock.mask = clock->mask; -+ vsyscall_gtod_data.clock.mult = clock->mult; -+ vsyscall_gtod_data.clock.shift = clock->shift; -+ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; -+ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); - } -@@ -105,7 +129,8 @@ static __always_inline long time_syscall - static __always_inline void do_vgettimeofday(struct timeval * tv) - { - cycle_t now, base, mask, cycle_delta; -- unsigned long seq, mult, shift, nsec_delta; -+ unsigned seq; -+ unsigned long mult, shift, nsec; - cycle_t (*vread)(void); - do { - seq = read_seqbegin(&__vsyscall_gtod_data.lock); -@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeo - mult = __vsyscall_gtod_data.clock.mult; - shift = __vsyscall_gtod_data.clock.shift; - -- *tv = __vsyscall_gtod_data.wall_time_tv; -- -+ tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; -+ nsec = __vsyscall_gtod_data.wall_time_nsec; - } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); - - /* calculate interval: */ - cycle_delta = (now - base) & mask; - /* convert to nsecs: */ -- nsec_delta = (cycle_delta * mult) >> shift; -+ nsec += (cycle_delta * mult) >> shift; - -- /* convert to usecs and add to timespec: */ -- tv->tv_usec += nsec_delta / NSEC_PER_USEC; -- while (tv->tv_usec > USEC_PER_SEC) { -+ while (nsec >= NSEC_PER_SEC) { - tv->tv_sec += 1; -- tv->tv_usec -= USEC_PER_SEC; -+ nsec -= NSEC_PER_SEC; - } -+ tv->tv_usec = nsec / NSEC_PER_USEC; - } - - int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) -@@ -151,11 +175,16 @@ int __vsyscall(0) vgettimeofday(struct t - * unlikely */ - time_t __vsyscall(1) vtime(time_t *t) - { -+ struct timeval tv; -+ time_t result; - if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) - return time_syscall(t); -- else if (t) -- *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; -- return __vsyscall_gtod_data.wall_time_tv.tv_sec; -+ -+ vgettimeofday(&tv, 0); -+ result = tv.tv_sec; -+ if (t) -+ *t = result; -+ return result; - } - - /* Fast way to get current CPU and node. -@@ -224,10 +253,10 @@ static int vsyscall_sysctl_change(ctl_ta - return ret; - /* gcc has some trouble with __va(__pa()), so just do it this - way. */ -- map1 = ioremap(__pa_symbol(&vsysc1), 2); -+ map1 = ioremap(__pa_vsymbol(&vsysc1), 2); - if (!map1) - return -ENOMEM; -- map2 = ioremap(__pa_symbol(&vsysc2), 2); -+ map2 = ioremap(__pa_vsymbol(&vsysc2), 2); - if (!map2) { - ret = -ENOMEM; - goto out; -@@ -304,7 +333,7 @@ static int __cpuinit - cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) - { - long cpu = (long)arg; -- if (action == CPU_ONLINE) -+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) - smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); - return NOTIFY_DONE; - } ---- a/arch/x86/mm/fault_32-xen.c -+++ b/arch/x86/mm/fault_32-xen.c -@@ -14,19 +14,20 @@ - #include - #include - #include --#include - #include - #include - #include - #include /* For unblank_screen() */ - #include -+#include /* for max_low_pfn */ -+#include - #include - #include - #include -+#include - - #include - #include --#include - #include - - extern void die(const char *,struct pt_regs *,long); -@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon - unsigned long page; + extern void die(const char *,struct pt_regs *,long); +@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon + unsigned long page; page = read_cr3(); - page = ((unsigned long *) __va(page))[address >> 22]; @@ -3462,67 +2019,8 @@ Acked-by: jbeulich@novell.com } } -#endif ---- a/arch/x86/mm/fault_64-xen.c -+++ b/arch/x86/mm/fault_64-xen.c -@@ -15,22 +15,22 @@ - #include - #include - #include --#include - #include - #include - #include - #include /* For unblank_screen() */ - #include -+#include - #include - #include - #include -+#include - - #include - #include - #include - #include - #include --#include - #include - - /* Page fault error code bits */ -@@ -537,6 +537,12 @@ bad_area: - bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { -+ -+ /* -+ * It's possible to have interrupts off here. -+ */ -+ local_irq_enable(); -+ - if (is_prefetch(regs, address, error_code)) - return; - -@@ -646,7 +652,7 @@ do_sigbus: - } - - DEFINE_SPINLOCK(pgd_lock); --struct page *pgd_list; -+LIST_HEAD(pgd_list); - - void vmalloc_sync_all(void) - { -@@ -666,8 +672,7 @@ void vmalloc_sync_all(void) - if (pgd_none(*pgd_ref)) - continue; - spin_lock(&pgd_lock); -- for (page = pgd_list; page; -- page = (struct page *)page->index) { -+ list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - if (pgd_none(*pgd)) ---- a/arch/x86/mm/highmem_32-xen.c -+++ b/arch/x86/mm/highmem_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/mm/highmem_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/highmem_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -26,7 +26,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. @@ -3565,8 +2063,8 @@ Acked-by: jbeulich@novell.com EXPORT_SYMBOL(kunmap_atomic); EXPORT_SYMBOL(kmap_atomic_to_page); EXPORT_SYMBOL(clear_highpage); ---- a/arch/x86/mm/init_32-xen.c -+++ b/arch/x86/mm/init_32-xen.c +--- sle11-2009-04-20.orig/arch/x86/mm/init_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/init_32-xen.c 2008-12-15 11:27:22.000000000 +0100 @@ -22,6 +22,7 @@ #include #include @@ -3838,3592 +2336,4998 @@ Acked-by: jbeulich@novell.com } void free_initmem(void) ---- a/arch/x86/mm/init_64-xen.c -+++ b/arch/x86/mm/init_64-xen.c -@@ -25,10 +25,12 @@ - #include - #include - #include -+#include - #include - #include +--- sle11-2009-04-20.orig/arch/x86/mm/ioremap_32-xen.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/ioremap_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -13,6 +13,7 @@ + #include #include - #include -+#include + #include ++#include + #include + #include + #include +--- sle11-2009-04-20.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/pgtable_32-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include - #include #include -@@ -51,7 +53,7 @@ - #define Dprintk(x...) - #endif - --struct dma_mapping_ops* dma_ops; -+const struct dma_mapping_ops* dma_ops; - EXPORT_SYMBOL(dma_ops); - - #if CONFIG_XEN_COMPAT <= 0x030002 -@@ -191,6 +193,13 @@ void show_mem(void) - - for_each_online_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; ++i) { -+ /* this loop can take a while with 256 GB and 4k pages -+ so update the NMI watchdog */ -+ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { -+ touch_nmi_watchdog(); -+ } -+ if (!pfn_valid(pgdat->node_start_pfn + i)) -+ continue; - page = pfn_to_page(pgdat->node_start_pfn + i); - total++; - if (PageReserved(page)) -@@ -359,7 +368,7 @@ __set_fixmap (enum fixed_addresses idx, - } + #include +@@ -218,8 +219,6 @@ void pmd_ctor(void *pmd, struct kmem_cac + * against pageattr.c; it is the unique case in which a valid change + * of kernel pagetables can't be lazily synchronized by vmalloc faults. + * vmalloc faults work because attached pagetables are never freed. +- * The locking scheme was chosen on the basis of manfred's +- * recommendations and having no core impact whatsoever. + * -- wli + */ + DEFINE_SPINLOCK(pgd_lock); +@@ -245,37 +244,54 @@ static inline void pgd_list_del(pgd_t *p + set_page_private(next, (unsigned long)pprev); } --unsigned long __initdata table_start, table_end; -+unsigned long __meminitdata table_start, table_end; - - static __meminit void *alloc_static_page(unsigned long *phys) +-void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) ++ ++ ++#if (PTRS_PER_PMD == 1) ++/* Non-PAE pgd constructor */ ++void pgd_ctor(void *pgd) { -@@ -376,7 +385,7 @@ static __meminit void *alloc_static_page - start_pfn++; - memset((void *)va, 0, PAGE_SIZE); - return (void *)va; --} -+} - - #define PTE_SIZE PAGE_SIZE - -@@ -412,28 +421,46 @@ static inline int make_readonly(unsigned + unsigned long flags; - #ifndef CONFIG_XEN - /* Must run before zap_low_mappings */ --__init void *early_ioremap(unsigned long addr, unsigned long size) -+__meminit void *early_ioremap(unsigned long addr, unsigned long size) - { -- unsigned long map = round_down(addr, LARGE_PAGE_SIZE); -- -- /* actually usually some more */ -- if (size >= LARGE_PAGE_SIZE) { -- return NULL; -+ unsigned long vaddr; -+ pmd_t *pmd, *last_pmd; -+ int i, pmds; +- if (PTRS_PER_PMD > 1) { +- if (HAVE_SHARED_KERNEL_PMD) +- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, +- swapper_pg_dir + USER_PTRS_PER_PGD, +- KERNEL_PGD_PTRS); +- } else { +- spin_lock_irqsave(&pgd_lock, flags); ++ /* !PAE, no pagetable sharing */ ++ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + -+ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; -+ vaddr = __START_KERNEL_map; -+ pmd = level2_kernel_pgt; -+ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; -+ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { -+ for (i = 0; i < pmds; i++) { -+ if (pmd_present(pmd[i])) -+ goto next; -+ } -+ vaddr += addr & ~PMD_MASK; -+ addr &= PMD_MASK; -+ for (i = 0; i < pmds; i++, addr += PMD_SIZE) -+ set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); -+ __flush_tlb(); -+ return (void *)vaddr; -+ next: -+ ; ++ spin_lock_irqsave(&pgd_lock, flags); ++ ++ /* must happen under lock */ ++ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, ++ swapper_pg_dir + USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); ++ ++ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, ++ __pa(swapper_pg_dir) >> PAGE_SHIFT, ++ USER_PTRS_PER_PGD, ++ KERNEL_PGD_PTRS); ++ pgd_list_add(pgd); ++ spin_unlock_irqrestore(&pgd_lock, flags); ++} ++#else /* PTRS_PER_PMD > 1 */ ++/* PAE pgd constructor */ ++void pgd_ctor(void *pgd) ++{ ++ /* PAE, kernel PMD may be shared */ ++ ++ if (SHARED_KERNEL_PMD) { + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); ++ } else { + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); +- +- /* must happen under lock */ +- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, +- __pa(swapper_pg_dir) >> PAGE_SHIFT, +- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); +- +- pgd_list_add(pgd); +- spin_unlock_irqrestore(&pgd_lock, flags); } -- set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); -- map += LARGE_PAGE_SIZE; -- set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); -- __flush_tlb(); -- return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); -+ printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); -+ return NULL; } ++#endif /* PTRS_PER_PMD */ - /* To avoid virtual aliases later */ --__init void early_iounmap(void *addr, unsigned long size) -+__meminit void early_iounmap(void *addr, unsigned long size) +-/* never called when PTRS_PER_PMD > 1 */ +-void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) ++void pgd_dtor(void *pgd) { -- if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) -- printk("early_iounmap: bad address %p\n", addr); -- set_pmd(temp_mappings[0].pmd, __pmd(0)); -- set_pmd(temp_mappings[1].pmd, __pmd(0)); -+ unsigned long vaddr; -+ pmd_t *pmd; -+ int i, pmds; + unsigned long flags; /* can be called from interrupt context */ + ++ if (SHARED_KERNEL_PMD) ++ return; + -+ vaddr = (unsigned long)addr; -+ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; -+ pmd = level2_kernel_pgt + pmd_index(vaddr); -+ for (i = 0; i < pmds; i++) -+ pmd_clear(pmd + i); - __flush_tlb(); - } - #endif -@@ -767,14 +794,6 @@ void __meminit init_memory_mapping(unsig - __flush_tlb_all(); + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); +@@ -284,11 +300,46 @@ void pgd_dtor(void *pgd, struct kmem_cac + pgd_test_and_unpin(pgd); } --void __cpuinit zap_low_mappings(int cpu) --{ -- /* this is not required for Xen */ --#if 0 -- swap_low_mappings(); --#endif --} -- - #ifndef CONFIG_NUMA - void __init paging_init(void) ++#define UNSHARED_PTRS_PER_PGD \ ++ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) ++ ++/* If we allocate a pmd for part of the kernel address space, then ++ make sure its initialized with the appropriate kernel mappings. ++ Otherwise use a cached zeroed pmd. */ ++static pmd_t *pmd_cache_alloc(int idx) ++{ ++ pmd_t *pmd; ++ ++ if (idx >= USER_PTRS_PER_PGD) { ++ pmd = (pmd_t *)__get_free_page(GFP_KERNEL); ++ ++#ifndef CONFIG_XEN ++ if (pmd) ++ memcpy(pmd, ++ (void *)pgd_page_vaddr(swapper_pg_dir[idx]), ++ sizeof(pmd_t) * PTRS_PER_PMD); ++#endif ++ } else ++ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); ++ ++ return pmd; ++} ++ ++static void pmd_cache_free(pmd_t *pmd, int idx) ++{ ++ if (idx >= USER_PTRS_PER_PGD) { ++ make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables); ++ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); ++ free_page((unsigned long)pmd); ++ } else ++ kmem_cache_free(pmd_cache, pmd); ++} ++ + pgd_t *pgd_alloc(struct mm_struct *mm) { -@@ -960,17 +979,6 @@ void __init mem_init(void) - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); -- --#ifndef CONFIG_XEN --#ifdef CONFIG_SMP -- /* -- * Sync boot_level4_pgt mappings with the init_level4_pgt -- * except for the low identity mappings which are already zapped -- * in init_level4_pgt. This sync-up is essential for AP's bringup -- */ -- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); --#endif --#endif - } + int i; +- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); +- pmd_t **pmd; ++ pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); ++ pmd_t **pmds = NULL; + unsigned long flags; - void free_init_pages(char *what, unsigned long begin, unsigned long end) -@@ -980,7 +988,7 @@ void free_init_pages(char *what, unsigne - if (begin >= end) - return; + pgd_test_and_unpin(pgd); +@@ -296,37 +347,40 @@ pgd_t *pgd_alloc(struct mm_struct *mm) + if (PTRS_PER_PMD == 1 || !pgd) + return pgd; -- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); -+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); -@@ -989,24 +997,17 @@ void free_init_pages(char *what, unsigne - if (addr >= __START_KERNEL_map) { - /* make_readonly() reports all kernel addresses. */ - __make_page_writable(__va(__pa(addr))); -- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { -- pgd_t *pgd = pgd_offset_k(addr); -- pud_t *pud = pud_offset(pgd, addr); -- pmd_t *pmd = pmd_offset(pud, addr); -- pte_t *pte = pte_offset_kernel(pmd, addr); -- -- xen_l1_entry_update(pte, __pte(0)); /* fallback */ -- } -+ change_page_attr_addr(addr, 1, __pgprot(0)); +- if (HAVE_SHARED_KERNEL_PMD) { +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) { +- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); +- if (!pmd) +- goto out_oom; +- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); +- set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); ++#ifdef CONFIG_XEN ++ if (!SHARED_KERNEL_PMD) { ++ /* ++ * We can race save/restore (if we sleep during a GFP_KERNEL memory ++ * allocation). We therefore store virtual addresses of pmds as they ++ * do not change across save/restore, and poke the machine addresses ++ * into the pgdir under the pgd_lock. ++ */ ++ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); ++ if (!pmds) { ++ quicklist_free(0, pgd_dtor, pgd); ++ return NULL; } - free_page(addr); - totalram_pages++; +- return pgd; +- } +- +- /* +- * We can race save/restore (if we sleep during a GFP_KERNEL memory +- * allocation). We therefore store virtual addresses of pmds as they +- * do not change across save/restore, and poke the machine addresses +- * into the pgdir under the pgd_lock. +- */ +- pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); +- if (!pmd) { +- kmem_cache_free(pgd_cache, pgd); +- return NULL; } -+ if (addr > __START_KERNEL_map) -+ global_flush_tlb(); - } - - void free_initmem(void) - { -- memset(__initdata_begin, POISON_FREE_INITDATA, -- __initdata_end - __initdata_begin); - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); -@@ -1016,13 +1017,28 @@ void free_initmem(void) ++#endif - void mark_rodata_ro(void) - { -- unsigned long addr = (unsigned long)__start_rodata; -+ unsigned long start = (unsigned long)_stext, end; + /* Allocate pmds, remember virtual addresses. */ +- for (i = 0; i < PTRS_PER_PGD; ++i) { +- pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); +- if (!pmd[i]) ++ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { ++ pmd_t *pmd = pmd_cache_alloc(i); + -+#ifdef CONFIG_HOTPLUG_CPU -+ /* It must still be possible to apply SMP alternatives. */ -+ if (num_possible_cpus() > 1) -+ start = (unsigned long)_etext; -+#endif ++ if (!pmd) + goto out_oom; + -+#ifdef CONFIG_KPROBES -+ start = (unsigned long)__start_rodata; -+#endif + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); ++ if (pmds) ++ pmds[i] = pmd; ++ else ++ set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } + ++#ifdef CONFIG_XEN ++ if (SHARED_KERNEL_PMD) ++ return pgd; + -+ end = (unsigned long)__end_rodata; -+ start = (start + PAGE_SIZE - 1) & PAGE_MASK; -+ end &= PAGE_MASK; -+ if (end <= start) -+ return; + spin_lock_irqsave(&pgd_lock, flags); -- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) -- change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); -+ change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); + /* Protect against save/restore: move below 4GB under pgd_lock. */ +@@ -341,44 +395,43 @@ pgd_t *pgd_alloc(struct mm_struct *mm) -- printk ("Write protecting the kernel read-only data: %luk\n", -- (__end_rodata - __start_rodata) >> 10); -+ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", -+ (end - start) >> 10); + /* Copy kernel pmd contents and write-protect the new pmds. */ + for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { +- unsigned long v = (unsigned long)i << PGDIR_SHIFT; +- pgd_t *kpgd = pgd_offset_k(v); +- pud_t *kpud = pud_offset(kpgd, v); +- pmd_t *kpmd = pmd_offset(kpud, v); +- memcpy(pmd[i], kpmd, PAGE_SIZE); ++ memcpy(pmds[i], ++ (void *)pgd_page_vaddr(swapper_pg_dir[i]), ++ sizeof(pmd_t) * PTRS_PER_PMD); + make_lowmem_page_readonly( +- pmd[i], XENFEAT_writable_page_tables); ++ pmds[i], XENFEAT_writable_page_tables); + } - /* - * change_page_attr_addr() requires a global_flush_tlb() call after it. -@@ -1175,3 +1191,11 @@ int in_gate_area_no_task(unsigned long a - { - return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); - } -+ -+#ifndef CONFIG_XEN -+void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) -+{ -+ return __alloc_bootmem_core(pgdat->bdata, size, -+ SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); -+} -+#endif ---- a/arch/x86/mm/ioremap_32-xen.c -+++ b/arch/x86/mm/ioremap_32-xen.c -@@ -13,6 +13,7 @@ - #include - #include - #include -+#include - #include - #include - #include ---- a/arch/x86/mm/pageattr_64-xen.c -+++ b/arch/x86/mm/pageattr_64-xen.c -@@ -215,13 +215,13 @@ void mm_pin_all(void) - preempt_enable(); - } + /* It is safe to poke machine addresses of pmds under the pmd_lock. */ + for (i = 0; i < PTRS_PER_PGD; i++) +- set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i]))); ++ set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i]))); --void _arch_dup_mmap(struct mm_struct *mm) -+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) - { - if (!mm->context.pinned) - mm_pin(mm); - } + /* Ensure this pgd gets picked up and pinned on save/restore. */ + pgd_list_add(pgd); --void _arch_exit_mmap(struct mm_struct *mm) -+void arch_exit_mmap(struct mm_struct *mm) - { - struct task_struct *tsk = current; + spin_unlock_irqrestore(&pgd_lock, flags); -@@ -343,10 +343,11 @@ static void flush_kernel_map(void *arg) - struct page *pg; +- kfree(pmd); ++ kfree(pmds); ++#endif - /* When clflush is available always use it because it is -- much cheaper than WBINVD */ -- if (!cpu_has_clflush) -+ much cheaper than WBINVD. Disable clflush for now because -+ the high level code is not ready yet */ -+ if (1 || !cpu_has_clflush) - asm volatile("wbinvd" ::: "memory"); -- list_for_each_entry(pg, l, lru) { -+ else list_for_each_entry(pg, l, lru) { - void *adr = page_address(pg); - if (cpu_has_clflush) - cache_flush_page(adr); -@@ -460,16 +461,24 @@ __change_page_attr(unsigned long address - */ - int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) - { -- int err = 0; -+ int err = 0, kernel_map = 0; - int i; + return pgd; -+ if (address >= __START_KERNEL_map -+ && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { -+ address = (unsigned long)__va(__pa(address)); -+ kernel_map = 1; -+ } -+ - down_write(&init_mm.mmap_sem); - for (i = 0; i < numpages; i++, address += PAGE_SIZE) { - unsigned long pfn = __pa(address) >> PAGE_SHIFT; - -- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); -- if (err) -- break; -+ if (!kernel_map || pte_present(pfn_pte(0, prot))) { -+ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); -+ if (err) -+ break; -+ } - /* Handle kernel mapping too which aliases part of the - * lowmem */ - if (__pa(address) < KERNEL_TEXT_SIZE) { ---- a/arch/x86/mm/pgtable_32-xen.c -+++ b/arch/x86/mm/pgtable_32-xen.c -@@ -13,6 +13,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -218,8 +219,6 @@ void pmd_ctor(void *pmd, struct kmem_cac - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. -- * The locking scheme was chosen on the basis of manfred's -- * recommendations and having no core impact whatsoever. - * -- wli - */ - DEFINE_SPINLOCK(pgd_lock); -@@ -245,37 +244,54 @@ static inline void pgd_list_del(pgd_t *p - set_page_private(next, (unsigned long)pprev); + out_oom: +- if (HAVE_SHARED_KERNEL_PMD) { ++ if (!pmds) { + for (i--; i >= 0; i--) { + pgd_t pgdent = pgd[i]; + void* pmd = (void *)__va(pgd_val(pgdent)-1); + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); +- kmem_cache_free(pmd_cache, pmd); ++ pmd_cache_free(pmd, i); + } + } else { + for (i--; i >= 0; i--) { +- paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); +- kmem_cache_free(pmd_cache, pmd[i]); ++ paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT); ++ pmd_cache_free(pmds[i], i); + } +- kfree(pmd); ++ kfree(pmds); + } +- kmem_cache_free(pgd_cache, pgd); ++ quicklist_free(0, pgd_dtor, pgd); + return NULL; } --void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) -+ -+ -+#if (PTRS_PER_PMD == 1) -+/* Non-PAE pgd constructor */ -+void pgd_ctor(void *pgd) - { - unsigned long flags; +@@ -398,35 +451,24 @@ void pgd_free(pgd_t *pgd) -- if (PTRS_PER_PMD > 1) { -- if (HAVE_SHARED_KERNEL_PMD) -- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -- swapper_pg_dir + USER_PTRS_PER_PGD, -- KERNEL_PGD_PTRS); -- } else { -- spin_lock_irqsave(&pgd_lock, flags); -+ /* !PAE, no pagetable sharing */ -+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); -+ -+ spin_lock_irqsave(&pgd_lock, flags); -+ -+ /* must happen under lock */ -+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ KERNEL_PGD_PTRS); -+ -+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, -+ __pa(swapper_pg_dir) >> PAGE_SHIFT, -+ USER_PTRS_PER_PGD, -+ KERNEL_PGD_PTRS); -+ pgd_list_add(pgd); -+ spin_unlock_irqrestore(&pgd_lock, flags); -+} -+#else /* PTRS_PER_PMD > 1 */ -+/* PAE pgd constructor */ -+void pgd_ctor(void *pgd) -+{ -+ /* PAE, kernel PMD may be shared */ -+ -+ if (SHARED_KERNEL_PMD) { - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); -+ } else { - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + /* in the PAE case user pgd entries are overwritten before usage */ + if (PTRS_PER_PMD > 1) { +- for (i = 0; i < USER_PTRS_PER_PGD; ++i) { ++ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { + pgd_t pgdent = pgd[i]; + void* pmd = (void *)__va(pgd_val(pgdent)-1); + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); +- kmem_cache_free(pmd_cache, pmd); ++ pmd_cache_free(pmd, i); + } + +- if (!HAVE_SHARED_KERNEL_PMD) { +- unsigned long flags; +- spin_lock_irqsave(&pgd_lock, flags); +- pgd_list_del(pgd); +- spin_unlock_irqrestore(&pgd_lock, flags); - -- /* must happen under lock */ -- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, -- __pa(swapper_pg_dir) >> PAGE_SHIFT, -- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); +- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { +- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); +- make_lowmem_page_writable( +- pmd, XENFEAT_writable_page_tables); +- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); +- kmem_cache_free(pmd_cache, pmd); +- } - -- pgd_list_add(pgd); -- spin_unlock_irqrestore(&pgd_lock, flags); +- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) +- xen_destroy_contiguous_region( +- (unsigned long)pgd, 0); +- } ++ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) ++ xen_destroy_contiguous_region((unsigned long)pgd, 0); } - } -+#endif /* PTRS_PER_PMD */ - --/* never called when PTRS_PER_PMD > 1 */ --void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) -+void pgd_dtor(void *pgd) - { - unsigned long flags; /* can be called from interrupt context */ - -+ if (SHARED_KERNEL_PMD) -+ return; -+ - paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_del(pgd); -@@ -284,11 +300,46 @@ void pgd_dtor(void *pgd, struct kmem_cac - pgd_test_and_unpin(pgd); - } -+#define UNSHARED_PTRS_PER_PGD \ -+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) -+ -+/* If we allocate a pmd for part of the kernel address space, then -+ make sure its initialized with the appropriate kernel mappings. -+ Otherwise use a cached zeroed pmd. */ -+static pmd_t *pmd_cache_alloc(int idx) -+{ -+ pmd_t *pmd; -+ -+ if (idx >= USER_PTRS_PER_PGD) { -+ pmd = (pmd_t *)__get_free_page(GFP_KERNEL); -+ -+#ifndef CONFIG_XEN -+ if (pmd) -+ memcpy(pmd, -+ (void *)pgd_page_vaddr(swapper_pg_dir[idx]), -+ sizeof(pmd_t) * PTRS_PER_PMD); -+#endif -+ } else -+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -+ -+ return pmd; + /* in the non-PAE case, free_pgtables() clears user pgd entries */ +- kmem_cache_free(pgd_cache, pgd); ++ quicklist_free(0, pgd_dtor, pgd); +} + -+static void pmd_cache_free(pmd_t *pmd, int idx) ++void check_pgt_cache(void) +{ -+ if (idx >= USER_PTRS_PER_PGD) { -+ make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables); -+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -+ free_page((unsigned long)pmd); -+ } else -+ kmem_cache_free(pmd_cache, pmd); -+} -+ - pgd_t *pgd_alloc(struct mm_struct *mm) - { - int i; -- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); -- pmd_t **pmd; -+ pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); -+ pmd_t **pmds = NULL; - unsigned long flags; ++ quicklist_trim(0, pgd_dtor, 25, 16); + } - pgd_test_and_unpin(pgd); -@@ -296,37 +347,40 @@ pgd_t *pgd_alloc(struct mm_struct *mm) - if (PTRS_PER_PMD == 1 || !pgd) - return pgd; + void make_lowmem_page_readonly(void *va, unsigned int feature) +@@ -723,13 +765,13 @@ void mm_pin_all(void) + spin_unlock_irqrestore(&pgd_lock, flags); + } -- if (HAVE_SHARED_KERNEL_PMD) { -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -- if (!pmd) -- goto out_oom; -- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); -- set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); -+#ifdef CONFIG_XEN -+ if (!SHARED_KERNEL_PMD) { -+ /* -+ * We can race save/restore (if we sleep during a GFP_KERNEL memory -+ * allocation). We therefore store virtual addresses of pmds as they -+ * do not change across save/restore, and poke the machine addresses -+ * into the pgdir under the pgd_lock. -+ */ -+ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); -+ if (!pmds) { -+ quicklist_free(0, pgd_dtor, pgd); -+ return NULL; - } -- return pgd; -- } -- -- /* -- * We can race save/restore (if we sleep during a GFP_KERNEL memory -- * allocation). We therefore store virtual addresses of pmds as they -- * do not change across save/restore, and poke the machine addresses -- * into the pgdir under the pgd_lock. -- */ -- pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); -- if (!pmd) { -- kmem_cache_free(pgd_cache, pgd); -- return NULL; - } -+#endif +-void _arch_dup_mmap(struct mm_struct *mm) ++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) + { + if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags)) + mm_pin(mm); + } - /* Allocate pmds, remember virtual addresses. */ -- for (i = 0; i < PTRS_PER_PGD; ++i) { -- pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); -- if (!pmd[i]) -+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { -+ pmd_t *pmd = pmd_cache_alloc(i); -+ -+ if (!pmd) - goto out_oom; -+ - paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); -+ if (pmds) -+ pmds[i] = pmd; -+ else -+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); - } - -+#ifdef CONFIG_XEN -+ if (SHARED_KERNEL_PMD) -+ return pgd; -+ - spin_lock_irqsave(&pgd_lock, flags); +-void _arch_exit_mmap(struct mm_struct *mm) ++void arch_exit_mmap(struct mm_struct *mm) + { + struct task_struct *tsk = current; - /* Protect against save/restore: move below 4GB under pgd_lock. */ -@@ -341,44 +395,43 @@ pgd_t *pgd_alloc(struct mm_struct *mm) +--- sle11-2009-04-20.orig/arch/x86/ia32/ia32entry-xen.S 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/ia32/ia32entry-xen.S 2008-12-15 11:27:22.000000000 +0100 +@@ -431,11 +431,7 @@ ia32_sys_call_table: + .quad sys_symlink + .quad sys_lstat + .quad sys_readlink /* 85 */ +-#ifdef CONFIG_IA32_AOUT + .quad sys_uselib +-#else +- .quad quiet_ni_syscall +-#endif + .quad sys_swapon + .quad sys_reboot + .quad compat_sys_old_readdir +@@ -574,7 +570,7 @@ ia32_sys_call_table: + .quad quiet_ni_syscall /* tux */ + .quad quiet_ni_syscall /* security */ + .quad sys_gettid +- .quad sys_readahead /* 225 */ ++ .quad sys32_readahead /* 225 */ + .quad sys_setxattr + .quad sys_lsetxattr + .quad sys_fsetxattr +@@ -599,7 +595,7 @@ ia32_sys_call_table: + .quad compat_sys_io_getevents + .quad compat_sys_io_submit + .quad sys_io_cancel +- .quad sys_fadvise64 /* 250 */ ++ .quad sys32_fadvise64 /* 250 */ + .quad quiet_ni_syscall /* free_huge_pages */ + .quad sys_exit_group + .quad sys32_lookup_dcookie +@@ -663,10 +659,14 @@ ia32_sys_call_table: + .quad compat_sys_set_robust_list + .quad compat_sys_get_robust_list + .quad sys_splice +- .quad sys_sync_file_range +- .quad sys_tee ++ .quad sys32_sync_file_range ++ .quad sys_tee /* 315 */ + .quad compat_sys_vmsplice + .quad compat_sys_move_pages + .quad sys_getcpu + .quad sys_epoll_pwait +-ia32_syscall_end: ++ .quad compat_sys_utimensat /* 320 */ ++ .quad compat_sys_signalfd ++ .quad compat_sys_timerfd ++ .quad sys_eventfd ++ia32_syscall_end: +--- sle11-2009-04-20.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/acpi/sleep_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -60,19 +60,6 @@ unsigned long acpi_video_flags; + extern char wakeup_start, wakeup_end; - /* Copy kernel pmd contents and write-protect the new pmds. */ - for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -- unsigned long v = (unsigned long)i << PGDIR_SHIFT; -- pgd_t *kpgd = pgd_offset_k(v); -- pud_t *kpud = pud_offset(kpgd, v); -- pmd_t *kpmd = pmd_offset(kpud, v); -- memcpy(pmd[i], kpmd, PAGE_SIZE); -+ memcpy(pmds[i], -+ (void *)pgd_page_vaddr(swapper_pg_dir[i]), -+ sizeof(pmd_t) * PTRS_PER_PMD); - make_lowmem_page_readonly( -- pmd[i], XENFEAT_writable_page_tables); -+ pmds[i], XENFEAT_writable_page_tables); - } + extern unsigned long acpi_copy_wakeup_routine(unsigned long); +- +-static pgd_t low_ptr; +- +-static void init_low_mapping(void) +-{ +- pgd_t *slot0 = pgd_offset(current->mm, 0UL); +- low_ptr = *slot0; +- /* FIXME: We're playing with the current task's page tables here, which +- * is potentially dangerous on SMP systems. +- */ +- set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); +- local_flush_tlb(); +-} + #endif - /* It is safe to poke machine addresses of pmds under the pmd_lock. */ - for (i = 0; i < PTRS_PER_PGD; i++) -- set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i]))); -+ set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i]))); + /** +@@ -84,8 +71,6 @@ static void init_low_mapping(void) + int acpi_save_state_mem(void) + { + #ifndef CONFIG_ACPI_PV_SLEEP +- init_low_mapping(); +- + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); + acpi_copy_wakeup_routine(acpi_wakeup_address); +@@ -98,10 +83,6 @@ int acpi_save_state_mem(void) + */ + void acpi_restore_state_mem(void) + { +-#ifndef CONFIG_ACPI_PV_SLEEP +- set_pgd(pgd_offset(current->mm, 0UL), low_ptr); +- local_flush_tlb(); +-#endif + } - /* Ensure this pgd gets picked up and pinned on save/restore. */ - pgd_list_add(pgd); + /** +@@ -115,10 +96,11 @@ void acpi_restore_state_mem(void) + void __init acpi_reserve_bootmem(void) + { + #ifndef CONFIG_ACPI_PV_SLEEP +- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); +- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) ++ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); ++ if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) + printk(KERN_CRIT +- "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); ++ "ACPI: Wakeup code way too big, will crash on attempt" ++ " to suspend\n"); + #endif + } - spin_unlock_irqrestore(&pgd_lock, flags); +--- sle11-2009-04-20.orig/arch/x86/kernel/apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/apic_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -19,7 +19,6 @@ + #include + #include + #include +-#include + #include + #include + #include +--- sle11-2009-04-20.orig/arch/x86/kernel/e820_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/e820_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -17,6 +17,8 @@ + #include + #include + #include ++#include ++#include -- kfree(pmd); -+ kfree(pmds); -+#endif + #include + #include +@@ -28,7 +30,7 @@ - return pgd; + struct e820map e820 __initdata; + #ifdef CONFIG_XEN +-struct e820map machine_e820 __initdata; ++struct e820map machine_e820; + #endif - out_oom: -- if (HAVE_SHARED_KERNEL_PMD) { -+ if (!pmds) { - for (i--; i >= 0; i--) { - pgd_t pgdent = pgd[i]; - void* pmd = (void *)__va(pgd_val(pgdent)-1); - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); -- kmem_cache_free(pmd_cache, pmd); -+ pmd_cache_free(pmd, i); - } - } else { - for (i--; i >= 0; i--) { -- paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT); -- kmem_cache_free(pmd_cache, pmd[i]); -+ paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT); -+ pmd_cache_free(pmds[i], i); - } -- kfree(pmd); -+ kfree(pmds); - } -- kmem_cache_free(pgd_cache, pgd); -+ quicklist_free(0, pgd_dtor, pgd); - return NULL; + /* +@@ -291,22 +293,6 @@ void __init e820_reserve_resources(struc } -@@ -398,35 +451,24 @@ void pgd_free(pgd_t *pgd) - - /* in the PAE case user pgd entries are overwritten before usage */ - if (PTRS_PER_PMD > 1) { -- for (i = 0; i < USER_PTRS_PER_PGD; ++i) { -+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { - pgd_t pgdent = pgd[i]; - void* pmd = (void *)__va(pgd_val(pgdent)-1); - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); -- kmem_cache_free(pmd_cache, pmd); -+ pmd_cache_free(pmd, i); - } - -- if (!HAVE_SHARED_KERNEL_PMD) { -- unsigned long flags; -- spin_lock_irqsave(&pgd_lock, flags); -- pgd_list_del(pgd); -- spin_unlock_irqrestore(&pgd_lock, flags); + #ifndef CONFIG_XEN +-/* Mark pages corresponding to given address range as nosave */ +-static void __init +-e820_mark_nosave_range(unsigned long start, unsigned long end) +-{ +- unsigned long pfn, max_pfn; - -- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { -- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); -- make_lowmem_page_writable( -- pmd, XENFEAT_writable_page_tables); -- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -- kmem_cache_free(pmd_cache, pmd); -- } +- if (start >= end) +- return; - -- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) -- xen_destroy_contiguous_region( -- (unsigned long)pgd, 0); -- } -+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) -+ xen_destroy_contiguous_region((unsigned long)pgd, 0); - } +- printk("Nosave address range: %016lx - %016lx\n", start, end); +- max_pfn = end >> PAGE_SHIFT; +- for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) +- if (pfn_valid(pfn)) +- SetPageNosave(pfn_to_page(pfn)); +-} +- + /* + * Find the ranges of physical addresses that do not correspond to + * e820 RAM areas and mark the corresponding pages as nosave for software +@@ -325,13 +311,13 @@ void __init e820_mark_nosave_regions(voi + struct e820entry *ei = &e820.map[i]; - /* in the non-PAE case, free_pgtables() clears user pgd entries */ -- kmem_cache_free(pgd_cache, pgd); -+ quicklist_free(0, pgd_dtor, pgd); -+} -+ -+void check_pgt_cache(void) -+{ -+ quicklist_trim(0, pgd_dtor, 25, 16); - } - - void make_lowmem_page_readonly(void *va, unsigned int feature) -@@ -723,13 +765,13 @@ void mm_pin_all(void) - spin_unlock_irqrestore(&pgd_lock, flags); - } - --void _arch_dup_mmap(struct mm_struct *mm) -+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) - { - if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags)) - mm_pin(mm); - } - --void _arch_exit_mmap(struct mm_struct *mm) -+void arch_exit_mmap(struct mm_struct *mm) - { - struct task_struct *tsk = current; - ---- a/drivers/char/tpm/tpm_xen.c -+++ b/drivers/char/tpm/tpm_xen.c -@@ -463,7 +463,7 @@ static int tpmif_connect(struct xenbus_d - tp->backend_id = domid; - - err = bind_listening_port_to_irqhandler( -- domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); -+ domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp); - if (err <= 0) { - WPRINTK("bind_listening_port_to_irqhandler failed " - "(err=%d)\n", err); ---- a/drivers/pci/msi-xen.c -+++ b/drivers/pci/msi-xen.c -@@ -12,16 +12,15 @@ - #include - #include - #include --#include - #include - #include - #include -+#include + if (paddr < ei->addr) +- e820_mark_nosave_range(paddr, +- round_up(ei->addr, PAGE_SIZE)); ++ register_nosave_region(PFN_DOWN(paddr), ++ PFN_UP(ei->addr)); - #include + paddr = round_down(ei->addr + ei->size, PAGE_SIZE); + if (ei->type != E820_RAM) +- e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), +- paddr); ++ register_nosave_region(PFN_UP(ei->addr), ++ PFN_DOWN(paddr)); - #include - #include --#include + if (paddr >= (end_pfn << PAGE_SHIFT)) + break; +--- sle11-2009-04-20.orig/arch/x86/kernel/early_printk-xen.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/early_printk-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -11,11 +11,10 @@ - #include "pci.h" - #include "msi.h" -@@ -154,6 +153,7 @@ int register_msi_get_owner(int (*func)(s - get_owner = func; - return 0; - } -+EXPORT_SYMBOL(register_msi_get_owner); + #ifdef __i386__ + #include +-#define VGABASE (__ISA_IO_base + 0xb8000) + #else + #include +-#define VGABASE ((void __iomem *)0xffffffff800b8000UL) + #endif ++#define VGABASE (__ISA_IO_base + 0xb8000) - int unregister_msi_get_owner(int (*func)(struct pci_dev *dev)) + #ifndef CONFIG_XEN + static int max_ypos = 25, max_xpos = 80; +@@ -93,9 +92,9 @@ static int early_serial_putc(unsigned ch + static void early_serial_write(struct console *con, const char *s, unsigned n) { -@@ -162,6 +162,7 @@ int unregister_msi_get_owner(int (*func) - get_owner = NULL; - return 0; + while (*s && n-- > 0) { +- early_serial_putc(*s); + if (*s == '\n') + early_serial_putc('\r'); ++ early_serial_putc(*s); + s++; + } + } +@@ -205,7 +204,7 @@ static noinline long simnow(long cmd, lo + return ret; } -+EXPORT_SYMBOL(unregister_msi_get_owner); - static int msi_get_dev_owner(struct pci_dev *dev) +-void __init simnow_init(char *str) ++static void __init simnow_init(char *str) { -@@ -263,11 +264,6 @@ static int msi_map_vector(struct pci_dev - return msi_map_pirq_to_vector(dev, -1, entry_nr, table_base); + char *fn = "klog"; + if (*str == '=') +@@ -277,22 +276,12 @@ static int __init setup_early_printk(cha + early_console = &simnow_console; + keep_early = 1; + } ++ ++ if (keep_early) ++ early_console->flags &= ~CON_BOOT; ++ else ++ early_console->flags |= CON_BOOT; + register_console(early_console); + return 0; } - --static int msi_init(void) +- + early_param("earlyprintk", setup_early_printk); +- +-void __init disable_early_printk(void) -{ -- return 0; +- if (!early_console_initialized || !early_console) +- return; +- if (!keep_early) { +- printk("disabling early console\n"); +- unregister_console(early_console); +- early_console_initialized = 0; +- } else { +- printk("keeping early console\n"); +- } -} - - #ifdef CONFIG_PM - static void __pci_restore_msi_state(struct pci_dev *dev) - { -@@ -434,21 +430,32 @@ static int msix_capability_init(struct p - } - - /** -- * pci_msi_supported - check whether MSI may be enabled on device -+ * pci_msi_check_device - check whether MSI may be enabled on a device - * @dev: pointer to the pci_dev data structure of MSI device function -+ * @nvec: how many MSIs have been requested ? -+ * @type: are we checking for MSI or MSI-X ? - * - * Look at global flags, the device itself, and its parent busses -- * to return 0 if MSI are supported for the device. -+ * to determine if MSI/-X are supported for the device. If MSI/-X is -+ * supported return 0, else return an error code. - **/ --static --int pci_msi_supported(struct pci_dev * dev) -+static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) - { - struct pci_bus *bus; -+ int ret; - - /* MSI must be globally enabled and supported by the device */ - if (!pci_msi_enable || !dev || dev->no_msi) - return -EINVAL; - -+ /* -+ * You can't ask to have 0 or less MSIs configured. -+ * a) it's stupid .. -+ * b) the list manipulation code assumes nvec >= 1. -+ */ -+ if (nvec < 1) -+ return -ERANGE; +--- sle11-2009-04-20.orig/arch/x86/kernel/entry_64-xen.S 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/entry_64-xen.S 2008-12-15 11:27:22.000000000 +0100 +@@ -1254,3 +1254,10 @@ ENTRY(call_softirq) + ret + CFI_ENDPROC + ENDPROC(call_softirq) + - /* Any bridge which does NOT route MSI transactions from it's - * secondary bus to it's primary bus must set NO_MSI flag on - * the secondary pci_bus. -@@ -459,6 +466,13 @@ int pci_msi_supported(struct pci_dev * d - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return -EINVAL; ++KPROBE_ENTRY(ignore_sysret) ++ CFI_STARTPROC ++ mov $-ENOSYS,%eax ++ HYPERVISOR_IRET 0 ++ CFI_ENDPROC ++ENDPROC(ignore_sysret) +--- sle11-2009-04-20.orig/arch/x86/kernel/genapic_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/genapic_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -11,123 +11,57 @@ + #include + #include + #include ++#include + #include + #include + #include +-#include -+ ret = arch_msi_check_device(dev, nvec, type); -+ if (ret) -+ return ret; -+ -+ if (!pci_find_capability(dev, type)) -+ return -EINVAL; -+ - return 0; - } + #include + #include ++#include -@@ -476,18 +490,15 @@ extern int pci_frontend_enable_msi(struc - int pci_enable_msi(struct pci_dev* dev) - { - struct pci_bus *bus; -- int pos, temp, status; -- -- if (pci_msi_supported(dev) < 0) -- return -EINVAL; -+ int temp, status; +-#if defined(CONFIG_ACPI) ++#ifdef CONFIG_ACPI + #include + #endif - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return -EINVAL; + /* which logical CPU number maps to which CPU (physical APIC ID) */ +-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; ++u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly ++ = { [0 ... NR_CPUS-1] = BAD_APICID }; + EXPORT_SYMBOL(x86_cpu_to_apicid); +-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -- status = msi_init(); -- if (status < 0) -- return status; -+ status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); -+ if (status) -+ return status; +-extern struct genapic apic_cluster; +-extern struct genapic apic_flat; +-extern struct genapic apic_physflat; ++u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - #ifdef CONFIG_XEN_PCIDEV_FRONTEND - if (!is_initial_xendomain()) -@@ -508,10 +519,6 @@ int pci_enable_msi(struct pci_dev* dev) + #ifndef CONFIG_XEN +-struct genapic *genapic = &apic_flat; +-struct genapic *genapic_force; ++struct genapic __read_mostly *genapic = &apic_flat; + #else + extern struct genapic apic_xen; +-struct genapic *genapic = &apic_xen; ++struct genapic __read_mostly *genapic = &apic_xen; + #endif - temp = dev->irq; -- pos = pci_find_capability(dev, PCI_CAP_ID_MSI); -- if (!pos) -- return -EINVAL; + /* + * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. + */ +-void __init clustered_apic_check(void) ++void __init setup_apic_routing(void) + { + #ifndef CONFIG_XEN +- long i; +- u8 clusters, max_cluster; +- u8 id; +- u8 cluster_cnt[NUM_APIC_CLUSTERS]; +- int max_apic = 0; - - /* Check whether driver already requested for MSI-X irqs */ - if (dev->msix_enabled) { - printk(KERN_INFO "PCI: %s: Can't enable MSI. " -@@ -526,15 +533,14 @@ int pci_enable_msi(struct pci_dev* dev) +- /* genapic selection can be forced because of certain quirks. +- */ +- if (genapic_force) { +- genapic = genapic_force; +- goto print; +- } +- +-#if defined(CONFIG_ACPI) ++#ifdef CONFIG_ACPI + /* +- * Some x86_64 machines use physical APIC mode regardless of how many +- * procs/clusters are present (x86_64 ES7000 is an example). ++ * Quirk: some x86_64 machines can only use physical APIC mode ++ * regardless of how many processors are present (x86_64 ES7000 ++ * is an example). + */ +- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) +- if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { +- genapic = &apic_cluster; +- goto print; +- } +-#endif +- +- memset(cluster_cnt, 0, sizeof(cluster_cnt)); +- for (i = 0; i < NR_CPUS; i++) { +- id = bios_cpu_apicid[i]; +- if (id == BAD_APICID) +- continue; +- if (id > max_apic) +- max_apic = id; +- cluster_cnt[APIC_CLUSTERID(id)]++; +- } +- +- /* Don't use clustered mode on AMD platforms. */ +- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { ++ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && ++ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + genapic = &apic_physflat; +-#ifndef CONFIG_HOTPLUG_CPU +- /* In the CPU hotplug case we cannot use broadcast mode +- because that opens a race when a CPU is removed. +- Stay at physflat mode in this case. +- It is bad to do this unconditionally though. Once +- we have ACPI platform support for CPU hotplug +- we should detect hotplug capablity from ACPI tables and +- only do this when really needed. -AK */ +- if (max_apic <= 8) +- genapic = &apic_flat; +-#endif +- goto print; +- } +- +- clusters = 0; +- max_cluster = 0; +- +- for (i = 0; i < NUM_APIC_CLUSTERS; i++) { +- if (cluster_cnt[i] > 0) { +- ++clusters; +- if (cluster_cnt[i] > max_cluster) +- max_cluster = cluster_cnt[i]; +- } +- } ++ else ++#endif - return status; - } -+EXPORT_SYMBOL(pci_enable_msi); +- /* +- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, +- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical +- * else physical mode. +- * (We don't use lowest priority delivery + HW APIC IRQ steering, so +- * can ignore the clustered logical case and go straight to physical.) +- */ +- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { +-#ifdef CONFIG_HOTPLUG_CPU +- /* Don't use APIC shortcuts in CPU hotplug to avoid races */ +- genapic = &apic_physflat; +-#else ++ if (cpus_weight(cpu_possible_map) <= 8) + genapic = &apic_flat; +-#endif +- } else +- genapic = &apic_cluster; ++ else ++ genapic = &apic_physflat; - extern void pci_frontend_disable_msi(struct pci_dev* dev); - void pci_disable_msi(struct pci_dev* dev) - { - int pirq; +-print: + #else + /* hardcode to xen apic functions */ + genapic = &apic_xen; +@@ -135,7 +69,7 @@ print: + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + } -- if (!pci_msi_enable) -- return; -- if (!dev) -+ if (!pci_msi_enable || !dev) - return; +-/* Same for both flat and clustered. */ ++/* Same for both flat and physical. */ - #ifdef CONFIG_XEN_PCIDEV_FRONTEND -@@ -559,6 +565,7 @@ void pci_disable_msi(struct pci_dev* dev - pci_intx(dev, 1); /* enable intx */ - dev->msi_enabled = 0; - } -+EXPORT_SYMBOL(pci_disable_msi); + #ifdef CONFIG_XEN + extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); +--- sle11-2009-04-20.orig/arch/x86/kernel/genapic_xen_64.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/genapic_xen_64.c 2008-12-15 11:27:22.000000000 +0100 +@@ -21,9 +21,8 @@ + #include + #else + #include +-#include +-#include + #endif ++#include + #include - /** - * pci_enable_msix - configure device's MSI-X capability structure -@@ -583,7 +590,7 @@ int pci_enable_msix(struct pci_dev* dev, - int i, j, temp; - u16 control; + DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); +--- sle11-2009-04-20.orig/arch/x86/kernel/head_64-xen.S 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/head_64-xen.S 2008-12-15 11:27:22.000000000 +0100 +@@ -5,6 +5,7 @@ + * Copyright (C) 2000 Pavel Machek + * Copyright (C) 2000 Karsten Keil + * Copyright (C) 2001,2002 Andi Kleen ++ * Copyright (C) 2005 Eric Biederman + * Jun Nakajima + * Modified for Xen + */ +@@ -34,27 +35,15 @@ startup_64: + pushq $0 # fake return address + jmp x86_64_start_kernel -- if (!entries || pci_msi_supported(dev) < 0) -+ if (!entries) - return -EINVAL; +-#ifdef CONFIG_ACPI_SLEEP +-.org 0xf00 +- .globl pGDT32 +-pGDT32: +- .word gdt_end-cpu_gdt_table-1 +- .long cpu_gdt_table-__START_KERNEL_map +-#endif +-ENTRY(stext) +-ENTRY(_stext) ++.balign PAGE_SIZE - #ifdef CONFIG_XEN_PCIDEV_FRONTEND -@@ -621,14 +628,11 @@ int pci_enable_msix(struct pci_dev* dev, - } - #endif +- $page = 0 + #define NEXT_PAGE(name) \ +- $page = $page + 1; \ +- .org $page * 0x1000; \ +- phys_##name = $page * 0x1000 + __PHYSICAL_START; \ ++ .balign PAGE_SIZE; \ ++ phys_##name = . - .bootstrap.text; \ + ENTRY(name) -- status = msi_init(); -- if (status < 0) -+ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX); -+ if (status) - return status; + NEXT_PAGE(init_level4_pgt) +- /* This gets initialized in x86_64_start_kernel */ + .fill 512,8,0 +-NEXT_PAGE(init_level4_user_pgt) + /* + * We update two pgd entries to make kernel and user pgd consistent + * at pgd_populate(). It can be used for kernel modules. So we place +@@ -101,14 +90,6 @@ NEXT_PAGE(hypercall_page) + #undef NEXT_PAGE - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); -- if (!pos) -- return -EINVAL; + .data +-/* Just dummy symbol to allow compilation. Not used in sleep path */ +-#ifdef CONFIG_ACPI_SLEEP +- .align PAGE_SIZE +-ENTRY(wakeup_level4_pgt) +- .fill 512,8,0 +-#endif - - pci_read_config_word(dev, msi_control_reg(pos), &control); - nr_entries = multi_msix_capable(control); - if (nvec > nr_entries) -@@ -660,6 +664,7 @@ int pci_enable_msix(struct pci_dev* dev, +- .data - return status; - } -+EXPORT_SYMBOL(pci_enable_msix); + .align 16 + .globl cpu_gdt_descr +@@ -136,13 +117,13 @@ gdt: - extern void pci_frontend_disable_msix(struct pci_dev* dev); - void pci_disable_msix(struct pci_dev* dev) -@@ -699,6 +704,7 @@ void pci_disable_msix(struct pci_dev* de - pci_intx(dev, 1); /* enable intx */ - dev->msix_enabled = 0; - } -+EXPORT_SYMBOL(pci_disable_msix); + ENTRY(cpu_gdt_table) + .quad 0x0000000000000000 /* NULL descriptor */ ++ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ ++ .quad 0x00af9b000000ffff /* __KERNEL_CS */ ++ .quad 0x00cf93000000ffff /* __KERNEL_DS */ ++ .quad 0x00cffb000000ffff /* __USER32_CS */ ++ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ ++ .quad 0x00affb000000ffff /* __USER_CS */ + .quad 0x0 /* unused */ +- .quad 0x00af9a000000ffff /* __KERNEL_CS */ +- .quad 0x00cf92000000ffff /* __KERNEL_DS */ +- .quad 0x00cffa000000ffff /* __USER32_CS */ +- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ +- .quad 0x00affa000000ffff /* __USER_CS */ +- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ + .quad 0,0 /* TSS */ + .quad 0,0 /* LDT */ + .quad 0,0,0 /* three TLS descriptors */ +@@ -165,14 +146,11 @@ ENTRY(empty_zero_page) + * __xen_guest information + */ + .macro utoh value +- .if (\value) < 0 || (\value) >= 0x10 +- utoh (((\value)>>4)&0x0fffffffffffffff) +- .endif +- .if ((\value) & 0xf) < 10 +- .byte '0' + ((\value) & 0xf) +- .else +- .byte 'A' + ((\value) & 0xf) - 10 +- .endif ++ i = 64 ++ .rept 16 ++ i = i - 4 ++ .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf) ++ .endr + .endm - /** - * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state -@@ -742,12 +748,57 @@ void pci_no_msi(void) - pci_msi_enable = 0; - } + .section __xen_guest +--- sle11-2009-04-20.orig/arch/x86/kernel/head64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/head64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -25,13 +25,21 @@ + #include + #include + #include ++#include + #include + + unsigned long start_pfn; --EXPORT_SYMBOL(pci_enable_msi); --EXPORT_SYMBOL(pci_disable_msi); --EXPORT_SYMBOL(pci_enable_msix); --EXPORT_SYMBOL(pci_disable_msix); --#ifdef CONFIG_XEN --EXPORT_SYMBOL(register_msi_get_owner); --EXPORT_SYMBOL(unregister_msi_get_owner); -+void pci_msi_init_pci_dev(struct pci_dev *dev) -+{ +#ifndef CONFIG_XEN -+ INIT_LIST_HEAD(&dev->msi_list); - #endif -+} -+ -+ -+/* Arch hooks */ -+ -+int __attribute__ ((weak)) -+arch_msi_check_device(struct pci_dev* dev, int nvec, int type) ++static void __init zap_identity_mappings(void) +{ -+ return 0; ++ pgd_t *pgd = pgd_offset_k(0UL); ++ pgd_clear(pgd); ++ __flush_tlb(); +} + + /* Don't add a printk in there. printk relies on the PDA which is not initialized + yet. */ +-#if 0 + static void __init clear_bss(void) + { + memset(__bss_start, 0, +@@ -40,26 +48,25 @@ static void __init clear_bss(void) + #endif + + #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ +-#define OLD_CL_MAGIC_ADDR 0x90020 ++#define OLD_CL_MAGIC_ADDR 0x20 + #define OLD_CL_MAGIC 0xA33F +-#define OLD_CL_BASE_ADDR 0x90000 +-#define OLD_CL_OFFSET 0x90022 ++#define OLD_CL_OFFSET 0x22 + + static void __init copy_bootdata(char *real_mode_data) + { + #ifndef CONFIG_XEN +- int new_data; ++ unsigned long new_data; + char * command_line; + + memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); +- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); ++ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); + if (!new_data) { +- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { ++ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { + return; + } +- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; ++ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); + } +- command_line = (char *) ((u64)(new_data)); ++ command_line = __va(new_data); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); + #else + int max_cmdline; +@@ -101,10 +108,13 @@ void __init x86_64_start_kernel(char * r + while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents ) + machine_to_phys_order++; + +-#if 0 +#ifndef CONFIG_XEN -+int __attribute__ ((weak)) -+arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) -+{ -+ return 0; -+} -+ -+int __attribute__ ((weak)) -+arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -+{ -+ struct msi_desc *entry; -+ int ret; + /* clear bss before set_intr_gate with early_idt_handler */ + clear_bss(); -+ list_for_each_entry(entry, &dev->msi_list, list) { -+ ret = arch_setup_msi_irq(dev, entry); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) -+{ -+ return; -+} -+ -+void __attribute__ ((weak)) -+arch_teardown_msi_irqs(struct pci_dev *dev) -+{ -+ struct msi_desc *entry; ++ /* Make NULL pointers segfault */ ++ zap_identity_mappings(); + -+ list_for_each_entry(entry, &dev->msi_list, list) { -+ if (entry->irq != 0) -+ arch_teardown_msi_irq(entry->irq); -+ } -+} -+#endif ---- a/drivers/xen/blkfront/blkfront.c -+++ b/drivers/xen/blkfront/blkfront.c -@@ -241,7 +241,7 @@ static int setup_blkring(struct xenbus_d - info->ring_ref = err; + for (i = 0; i < IDT_ENTRIES; i++) + set_intr_gate(i, early_idt_handler); + asm volatile("lidt %0" :: "m" (idt_descr)); +@@ -116,7 +126,7 @@ void __init x86_64_start_kernel(char * r + cpu_pda(i) = &boot_cpu_pda[i]; - err = bind_listening_port_to_irqhandler( -- dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); -+ dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info); - if (err <= 0) { - xenbus_dev_fatal(dev, err, - "bind_listening_port_to_irqhandler"); ---- a/drivers/xen/char/mem.c -+++ b/drivers/xen/char/mem.c -@@ -18,7 +18,6 @@ - #include - #include - #include + pda_init(0); +- copy_bootdata(real_mode_data); ++ copy_bootdata(__va(real_mode_data)); + #ifdef CONFIG_SMP + cpu_set(0, cpu_online_map); + #endif +--- sle11-2009-04-20.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/io_apic_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -25,7 +25,6 @@ + #include + #include + #include -#include - #include - #include - #include ---- a/drivers/xen/core/hypervisor_sysfs.c -+++ b/drivers/xen/core/hypervisor_sysfs.c -@@ -50,7 +50,7 @@ static int __init hypervisor_subsys_init - if (!is_running_on_xen()) - return -ENODEV; - -- hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type; -+ hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type; - return 0; + #include + #include + #include +@@ -904,10 +903,6 @@ static void __init setup_ExtINT_IRQ0_pin + enable_8259A_irq(0); } ---- a/drivers/xen/core/smpboot.c -+++ b/drivers/xen/core/smpboot.c -@@ -165,13 +165,12 @@ static void xen_smp_intr_exit(unsigned i - - void __cpuinit cpu_bringup(void) +-void __init UNEXPECTED_IO_APIC(void) +-{ +-} +- + void __apicdebuginit print_IO_APIC(void) { -+ cpu_init(); - #ifdef __i386__ -- cpu_set_gdt(current_thread_info()->cpu); -- secondary_cpu_init(); -+ identify_secondary_cpu(cpu_data + smp_processor_id()); - #else -- cpu_init(); --#endif - identify_cpu(cpu_data + smp_processor_id()); -+#endif - touch_softlockup_watchdog(); - preempt_disable(); - local_irq_enable(); -@@ -191,11 +190,6 @@ static void __cpuinit cpu_initialize_con - static DEFINE_SPINLOCK(ctxt_lock); + int apic, i; +@@ -943,40 +938,16 @@ void __apicdebuginit print_IO_APIC(void) + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); +- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) +- UNEXPECTED_IO_APIC(); - struct task_struct *idle = idle_task(cpu); --#ifdef __x86_64__ -- struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; --#else -- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); --#endif + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); +- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ +- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ +- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ +- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ +- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ +- (reg_01.bits.entries != 0x2E) && +- (reg_01.bits.entries != 0x3F) && +- (reg_01.bits.entries != 0x03) +- ) +- UNEXPECTED_IO_APIC(); - if (cpu_test_and_set(cpu, cpu_initialized_map)) - return; -@@ -218,11 +212,11 @@ static void __cpuinit cpu_initialize_con - smp_trap_init(ctxt.trap_ctxt); + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); +- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ +- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */ +- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ +- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ +- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ +- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ +- ) +- UNEXPECTED_IO_APIC(); +- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) +- UNEXPECTED_IO_APIC(); - ctxt.ldt_ents = 0; -- -- ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); -- ctxt.gdt_ents = gdt_descr->size / 8; -+ ctxt.gdt_ents = GDT_SIZE / 8; + if (reg_01.bits.version >= 0x10) { + printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); +- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) +- UNEXPECTED_IO_APIC(); + } - #ifdef __i386__ -+ ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu)); -+ - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); + printk(KERN_DEBUG ".... IRQ redirection table:\n"); +@@ -1408,8 +1379,7 @@ static void irq_complete_move(unsigned i -@@ -235,7 +229,11 @@ static void __cpuinit cpu_initialize_con - ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + vector = ~get_irq_regs()->orig_rax; + me = smp_processor_id(); +- if ((vector == cfg->vector) && +- cpu_isset(smp_processor_id(), cfg->domain)) { ++ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; - ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); -+ -+ ctxt.user_regs.fs = __KERNEL_PERCPU; - #else /* __x86_64__ */ -+ ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address); -+ - ctxt.user_regs.cs = __KERNEL_CS; - ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); +@@ -1444,7 +1414,7 @@ static void ack_apic_level(unsigned int -@@ -265,9 +263,8 @@ void __init smp_prepare_cpus(unsigned in - struct vcpu_get_physid cpu_id; - #ifdef __x86_64__ - struct desc_ptr *gdt_descr; --#else -- struct Xgt_desc_struct *gdt_descr; - #endif -+ void *gdt_addr; + /* + * We must acknowledge the irq before we move it or the acknowledge will +- * not propogate properly. ++ * not propagate properly. + */ + ack_APIC_irq(); - apicid = 0; - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) -@@ -317,14 +314,12 @@ void __init smp_prepare_cpus(unsigned in - } - gdt_descr->size = GDT_SIZE; - memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); -+ gdt_addr = (void *)gdt_descr->address; - #else -- if (unlikely(!init_gdt(cpu, idle))) -- continue; -- gdt_descr = &per_cpu(cpu_gdt_descr, cpu); -+ init_gdt(cpu); -+ gdt_addr = get_cpu_gdt_table(cpu); - #endif -- make_page_readonly( -- (void *)gdt_descr->address, -- XENFEAT_writable_descriptor_tables); -+ make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); - - apicid = cpu; - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) -@@ -338,7 +333,9 @@ void __init smp_prepare_cpus(unsigned in - #ifdef __x86_64__ - cpu_pda(cpu)->pcurrent = idle; - cpu_pda(cpu)->cpunumber = cpu; -- clear_ti_thread_flag(idle->thread_info, TIF_FORK); -+ clear_ti_thread_flag(task_thread_info(idle), TIF_FORK); -+#else -+ per_cpu(current_task, cpu) = idle; - #endif +@@ -1527,6 +1497,7 @@ static void ack_lapic_irq (unsigned int + static void end_lapic_irq (unsigned int i) { /* nothing */ } - irq_ctx_init(cpu); -@@ -363,8 +360,12 @@ void __init smp_prepare_cpus(unsigned in - #endif - } + static struct hw_interrupt_type lapic_irq_type __read_mostly = { ++ .name = "local-APIC", + .typename = "local-APIC-edge", + .startup = NULL, /* startup_irq() not used for IRQ0 */ + .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ +@@ -1998,18 +1969,18 @@ int arch_setup_msi_irq(struct pci_dev *d + if (irq < 0) + return irq; --void __devinit smp_prepare_boot_cpu(void) -+void __init smp_prepare_boot_cpu(void) - { -+#ifdef __i386__ -+ init_gdt(smp_processor_id()); -+ switch_to_new_gdt(); -+#endif - prefill_possible_map(); - } +- set_irq_msi(irq, desc); + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) { + destroy_irq(irq); + return ret; + } ---- a/drivers/xen/core/xen_sysfs.c -+++ b/drivers/xen/core/xen_sysfs.c -@@ -29,12 +29,12 @@ HYPERVISOR_ATTR_RO(type); ++ set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); - static int __init xen_sysfs_type_init(void) - { -- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); -+ return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr); - } + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - static void xen_sysfs_type_destroy(void) - { -- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); -+ sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr); +- return irq; ++ return 0; } - /* xen version attributes */ -@@ -90,13 +90,13 @@ static struct attribute_group version_gr + void arch_teardown_msi_irq(unsigned int irq) +--- sle11-2009-04-20.orig/arch/x86/kernel/ioport_64-xen.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/ioport_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -13,10 +13,10 @@ + #include + #include + #include +-#include + #include + #include + #include ++#include + #include - static int __init xen_sysfs_version_init(void) + /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ +--- sle11-2009-04-20.orig/arch/x86/kernel/irq_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/irq_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -32,7 +32,7 @@ atomic_t irq_err_count; + */ + static inline void stack_overflow_check(struct pt_regs *regs) { -- return sysfs_create_group(&hypervisor_subsys.kset.kobj, -+ return sysfs_create_group(&hypervisor_subsys.kobj, - &version_group); - } +- u64 curbase = (u64) current->thread_info; ++ u64 curbase = (u64)task_stack_page(current); + static unsigned long warned = -60*HZ; - static void xen_sysfs_version_destroy(void) - { -- sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group); -+ sysfs_remove_group(&hypervisor_subsys.kobj, &version_group); - } + if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE && +@@ -145,17 +145,43 @@ void fixup_irqs(cpumask_t map) - /* UUID */ -@@ -126,12 +126,12 @@ HYPERVISOR_ATTR_RO(uuid); + for (irq = 0; irq < NR_IRQS; irq++) { + cpumask_t mask; ++ int break_affinity = 0; ++ int set_affinity = 1; ++ + if (irq == 2) + continue; - static int __init xen_sysfs_uuid_init(void) - { -- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); -+ return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr); - } ++ /* interrupt's are disabled at this point */ ++ spin_lock(&irq_desc[irq].lock); ++ ++ if (!irq_has_action(irq) || ++ cpus_equal(irq_desc[irq].affinity, map)) { ++ spin_unlock(&irq_desc[irq].lock); ++ continue; ++ } ++ + cpus_and(mask, irq_desc[irq].affinity, map); +- if (any_online_cpu(mask) == NR_CPUS) { +- /*printk("Breaking affinity for irq %i\n", irq);*/ ++ if (cpus_empty(mask)) { ++ break_affinity = 1; + mask = map; + } ++ ++ if (irq_desc[irq].chip->mask) ++ irq_desc[irq].chip->mask(irq); ++ + if (irq_desc[irq].chip->set_affinity) + irq_desc[irq].chip->set_affinity(irq, mask); +- else if (irq_desc[irq].action && !(warned++)) ++ else if (!(warned++)) ++ set_affinity = 0; ++ ++ if (irq_desc[irq].chip->unmask) ++ irq_desc[irq].chip->unmask(irq); ++ ++ spin_unlock(&irq_desc[irq].lock); ++ ++ if (break_affinity && set_affinity) ++ /*printk("Broke affinity for irq %i\n", irq)*/; ++ else if (!set_affinity) + printk("Cannot set affinity for irq %i\n", irq); + } - static void xen_sysfs_uuid_destroy(void) - { -- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); -+ sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr); - } +--- sle11-2009-04-20.orig/arch/x86/kernel/ldt_64-xen.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/arch/x86/kernel/ldt_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -13,7 +13,6 @@ + #include + #include + #include +-#include + #include + #include - /* xen compilation attributes */ -@@ -204,13 +204,13 @@ static struct attribute_group xen_compil +--- sle11-2009-04-20.orig/arch/x86/kernel/mpparse_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/mpparse_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -17,7 +17,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -307,7 +306,7 @@ static int __init smp_read_mpc(struct mp + } + } + } +- clustered_apic_check(); ++ setup_apic_routing(); + if (!num_processors) + printk(KERN_ERR "MPTABLE: no processors registered!\n"); + return num_processors; +--- sle11-2009-04-20.orig/arch/x86/kernel/process_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/process_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include - int __init static xen_compilation_init(void) - { -- return sysfs_create_group(&hypervisor_subsys.kset.kobj, -+ return sysfs_create_group(&hypervisor_subsys.kobj, - &xen_compilation_group); - } + #include + #include +@@ -49,7 +50,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -232,16 +232,18 @@ void __cpuinit select_idle_routine(const - static void xen_compilation_destroy(void) + static int __init idle_setup (char *str) { -- sysfs_remove_group(&hypervisor_subsys.kset.kobj, -+ sysfs_remove_group(&hypervisor_subsys.kobj, - &xen_compilation_group); - } - -@@ -325,13 +325,13 @@ static struct attribute_group xen_proper +- if (!strncmp(str, "poll", 4)) { ++ if (!strcmp(str, "poll")) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; +- } ++ } else if (!strcmp(str, "mwait")) ++ force_mwait = 1; ++ else ++ return -1; - static int __init xen_properties_init(void) - { -- return sysfs_create_group(&hypervisor_subsys.kset.kobj, -+ return sysfs_create_group(&hypervisor_subsys.kobj, - &xen_properties_group); + boot_option_idle_override = 1; +- return 1; ++ return 0; } +- +-__setup("idle=", idle_setup); ++early_param("idle", idle_setup); - static void xen_properties_destroy(void) - { -- sysfs_remove_group(&hypervisor_subsys.kset.kobj, -+ sysfs_remove_group(&hypervisor_subsys.kobj, - &xen_properties_group); - } + /* Prints also some state that isn't saved in the pt_regs */ + void __show_regs(struct pt_regs * regs) +@@ -546,7 +548,7 @@ __switch_to(struct task_struct *prev_p, + * The AMD workaround requires it to be after DS reload, or + * after DS has been cleared, which we do in __prepare_arch_switch. + */ +- if (prev_p->thread_info->status & TS_USEDFPU) { ++ if (task_thread_info(prev_p)->status & TS_USEDFPU) { + __save_init_fpu(prev_p); /* _not_ save_init_fpu() */ + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = 1; +--- sle11-2009-04-20.orig/arch/x86/kernel/setup_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/setup_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -120,6 +120,8 @@ int bootloader_type; -@@ -350,13 +350,13 @@ HYPERVISOR_ATTR_RO(vmcoreinfo); + unsigned long saved_video_mode; - static int __init xen_sysfs_vmcoreinfo_init(void) - { -- return sysfs_create_file(&hypervisor_subsys.kset.kobj, -+ return sysfs_create_file(&hypervisor_subsys.kobj, - &vmcoreinfo_attr.attr); - } ++int force_mwait __cpuinitdata; ++ + /* + * Early DMI memory + */ +@@ -253,10 +255,10 @@ static void discover_ebda(void) + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E + */ +- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; ++ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); + ebda_addr <<= 4; - static void xen_sysfs_vmcoreinfo_destroy(void) - { -- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr); -+ sysfs_remove_file(&hypervisor_subsys.kobj, &vmcoreinfo_attr.attr); - } +- ebda_size = *(unsigned short *)(unsigned long)ebda_addr; ++ ebda_size = *(unsigned short *)__va(ebda_addr); + /* Round EBDA up to pages */ + if (ebda_size == 0) +@@ -410,15 +412,8 @@ void __init setup_arch(char **cmdline_p) #endif ---- a/drivers/xen/netback/netback.c -+++ b/drivers/xen/netback/netback.c -@@ -179,7 +179,7 @@ static struct sk_buff *netbk_copy_skb(st - goto err; - skb_reserve(nskb, 16 + NET_IP_ALIGN); -- headlen = nskb->end - nskb->data; -+ headlen = skb_end_pointer(nskb) - nskb->data; - if (headlen > skb_headlen(skb)) - headlen = skb_headlen(skb); - ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); -@@ -225,11 +225,15 @@ static struct sk_buff *netbk_copy_skb(st - len -= copy; - } + #ifdef CONFIG_SMP +- /* +- * But first pinch a few for the stack/trampoline stuff +- * FIXME: Don't need the extra page at 4K, but need to fix +- * trampoline before removing it. (see the GDT stuff) +- */ +- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE); +- + /* Reserve SMP trampoline */ +- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); ++ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); + #endif + #endif -+#ifdef NET_SKBUFF_DATA_USES_OFFSET -+ offset = 0; -+#else - offset = nskb->data - skb->data; -+#endif +@@ -570,8 +565,6 @@ void __init setup_arch(char **cmdline_p) + early_quirks(); + #endif -- nskb->h.raw = skb->h.raw + offset; -- nskb->nh.raw = skb->nh.raw + offset; -- nskb->mac.raw = skb->mac.raw + offset; -+ nskb->transport_header = skb->transport_header + offset; -+ nskb->network_header = skb->network_header + offset; -+ nskb->mac_header = skb->mac_header + offset; +- zap_low_mappings(0); +- + /* + * set this early, so we dont allocate cpu0 + * if MADT list doesnt list BSP first +@@ -864,6 +857,10 @@ static void __cpuinit init_amd(struct cp - return nskb; + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); ++ ++ /* Family 10 doesn't support C states in MWAIT so don't use it */ ++ if (c->x86 == 0x10 && !force_mwait) ++ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); + } -@@ -1601,7 +1605,7 @@ static int __init netback_init(void) - (void)bind_virq_to_irqhandler(VIRQ_DEBUG, - 0, - netif_be_dbg, -- SA_SHIRQ, -+ IRQF_SHARED, - "net-be-dbg", - &netif_be_dbg); + static void __cpuinit detect_ht(struct cpuinfo_x86 *c) +@@ -1146,9 +1143,7 @@ void __cpuinit identify_cpu(struct cpuin + #ifdef CONFIG_X86_MCE + mcheck_init(c); #endif ---- a/drivers/xen/netfront/netfront.c -+++ b/drivers/xen/netfront/netfront.c -@@ -513,7 +513,7 @@ static int setup_device(struct xenbus_de - memcpy(netdev->dev_addr, info->mac, ETH_ALEN); - - err = bind_listening_port_to_irqhandler( -- dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name, -+ dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name, - netdev); - if (err < 0) - goto fail; ---- a/drivers/xen/pciback/xenbus.c -+++ b/drivers/xen/pciback/xenbus.c -@@ -99,7 +99,7 @@ static int pciback_do_attach(struct pcib +- if (c == &boot_cpu_data) +- mtrr_bp_init(); +- else ++ if (c != &boot_cpu_data) + mtrr_ap_init(); + #ifdef CONFIG_NUMA + numa_add_cpu(smp_processor_id()); +@@ -1239,9 +1234,8 @@ static int show_cpuinfo(struct seq_file + "stc", + "100mhzsteps", + "hwpstate", +- NULL, /* tsc invariant mapped to constant_tsc */ +- NULL, +- /* nothing */ /* constant_tsc - moved to flags */ ++ "", /* tsc invariant mapped to constant_tsc */ ++ /* nothing */ + }; - err = bind_interdomain_evtchn_to_irqhandler( - pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, -- SA_SAMPLE_RANDOM, "pciback", pdev); -+ IRQF_SAMPLE_RANDOM, "pciback", pdev); - if (err < 0) { - xenbus_dev_fatal(pdev->xdev, err, - "Error binding event channel to IRQ"); ---- a/drivers/xen/pcifront/xenbus.c -+++ b/drivers/xen/pcifront/xenbus.c -@@ -10,10 +10,6 @@ - #include - #include "pcifront.h" --#ifndef __init_refok --#define __init_refok --#endif -- - #define INVALID_GRANT_REF (0) - #define INVALID_EVTCHN (-1) +--- sle11-2009-04-20.orig/arch/x86/kernel/setup64-xen.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/setup64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -113,9 +113,9 @@ void __init setup_per_cpu_areas(void) + if (!NODE_DATA(cpu_to_node(i))) { + printk("cpu with no node %d, num_online_nodes %d\n", + i, num_online_nodes()); +- ptr = alloc_bootmem(size); ++ ptr = alloc_bootmem_pages(size); + } else { +- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); ++ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); + } + if (!ptr) + panic("Cannot allocate cpu data for CPU %d\n", i); +@@ -208,6 +208,8 @@ char boot_exception_stacks[(N_EXCEPTION_ + __attribute__((section(".bss.page_aligned"))); + #endif ---- a/drivers/xen/scsifront/xenbus.c -+++ b/drivers/xen/scsifront/xenbus.c -@@ -96,7 +96,7 @@ static int scsifront_alloc_ring(struct v ++extern asmlinkage void ignore_sysret(void); ++ + /* May not be marked __init: used by software suspend */ + void syscall_init(void) + { +@@ -219,12 +221,22 @@ void syscall_init(void) + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); ++ wrmsrl(MSR_CSTAR, ignore_sysret); - err = bind_listening_port_to_irqhandler( - dev->otherend_id, scsifront_intr, -- SA_SAMPLE_RANDOM, "scsifront", info); -+ IRQF_SAMPLE_RANDOM, "scsifront", info); + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000); + #endif + #ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init (); ++#else ++ { ++ static const struct callback_register cstar = { ++ .type = CALLBACKTYPE_syscall32, ++ .address = (unsigned long)ignore_sysret ++ }; ++ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar)) ++ printk(KERN_WARN "Unable to register CSTAR callback\n"); ++ } + #endif + } - if (err <= 0) { - xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler"); ---- a/drivers/xen/sfc_netback/accel_fwd.c -+++ b/drivers/xen/sfc_netback/accel_fwd.c -@@ -308,7 +308,7 @@ static struct netback_accel *for_a_vnic( - static inline int packet_is_arp_reply(struct sk_buff *skb) - { - return skb->protocol == ntohs(ETH_P_ARP) -- && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY); -+ && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY); +@@ -262,7 +274,6 @@ void __cpuinit cpu_init (void) + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) { + pda_init(cpu); +- zap_low_mappings(cpu); + } + #ifndef CONFIG_X86_NO_TSS + else +--- sle11-2009-04-20.orig/arch/x86/kernel/smp_64-xen.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/smp_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -14,7 +14,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -457,44 +456,36 @@ int smp_call_function (void (*func) (voi } + EXPORT_SYMBOL(smp_call_function); +-void smp_stop_cpu(void) ++static void stop_this_cpu(void *dummy) + { +- unsigned long flags; ++ local_irq_disable(); + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); +- local_irq_save(flags); + disable_all_local_evtchn(); +- local_irq_restore(flags); +-} +- +-static void smp_really_stop_cpu(void *dummy) +-{ +- smp_stop_cpu(); + for (;;) + halt(); + } -@@ -392,12 +392,13 @@ void netback_accel_tx_packet(struct sk_b + void smp_send_stop(void) + { +- int nolock = 0; ++ int nolock; ++ unsigned long flags; ++ + #ifndef CONFIG_XEN + if (reboot_force) + return; + #endif ++ + /* Don't deadlock on the call lock in panic */ +- if (!spin_trylock(&call_lock)) { +- /* ignore locking because we have panicked anyways */ +- nolock = 1; +- } +- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0); ++ nolock = !spin_trylock(&call_lock); ++ local_irq_save(flags); ++ __smp_call_function(stop_this_cpu, NULL, 0, 0); + if (!nolock) + spin_unlock(&call_lock); +- +- local_irq_disable(); + disable_all_local_evtchn(); +- local_irq_enable(); ++ local_irq_restore(flags); + } - BUG_ON(fwd_priv == NULL); + /* +--- sle11-2009-04-20.orig/arch/x86/kernel/traps_64-xen.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/traps_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include -- if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) { -+ if (is_broadcast_ether_addr(skb_mac_header(skb)) -+ && packet_is_arp_reply(skb)) { - /* - * update our fast path forwarding to reflect this - * gratuitous ARP - */ -- mac = skb->mac.raw+ETH_ALEN; -+ mac = skb_mac_header(skb)+ETH_ALEN; - - DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(mac)); ---- a/drivers/xen/sfc_netback/accel_solarflare.c -+++ b/drivers/xen/sfc_netback/accel_solarflare.c -@@ -114,7 +114,7 @@ bend_dl_tx_packet(struct efx_dl_device * - BUG_ON(port == NULL); - - NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++); -- if (skb->mac.raw != NULL) -+ if (skb_mac_header_was_set(skb)) - netback_accel_tx_packet(skb, port->fwd_priv); - else { - DPRINTK("Ignoring packet with missing mac address\n"); ---- a/drivers/xen/sfc_netfront/accel_tso.c -+++ b/drivers/xen/sfc_netfront/accel_tso.c -@@ -33,10 +33,9 @@ + #include + #include +@@ -39,7 +40,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -71,22 +71,6 @@ asmlinkage void alignment_check(void); + asmlinkage void machine_check(void); + asmlinkage void spurious_interrupt_bug(void); - #include "accel_tso.h" +-ATOMIC_NOTIFIER_HEAD(die_chain); +-EXPORT_SYMBOL(die_chain); +- +-int register_die_notifier(struct notifier_block *nb) +-{ +- vmalloc_sync_all(); +- return atomic_notifier_chain_register(&die_chain, nb); +-} +-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ +- +-int unregister_die_notifier(struct notifier_block *nb) +-{ +- return atomic_notifier_chain_unregister(&die_chain, nb); +-} +-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ +- + static inline void conditional_sti(struct pt_regs *regs) + { + if (regs->eflags & X86_EFLAGS_IF) +@@ -428,8 +412,7 @@ void show_registers(struct pt_regs *regs + const int cpu = smp_processor_id(); + struct task_struct *cur = cpu_pda(cpu)->pcurrent; --#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2)) --#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data) --#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data) --#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data) -+#define ETH_HDR_LEN(skb) skb_network_offset(skb) -+#define SKB_TCP_OFF(skb) skb_transport_offset(skb) -+#define SKB_IP_OFF(skb) skb_network_offset(skb) +- rsp = regs->rsp; +- ++ rsp = regs->rsp; + printk("CPU %d ", cpu); + __show_regs(regs); + printk("Process %s (pid: %d, threadinfo %p, task %p)\n", +@@ -440,7 +423,6 @@ void show_registers(struct pt_regs *regs + * time of the fault.. + */ + if (in_kernel) { +- + printk("Stack: "); + _show_stack(NULL, regs, (unsigned long*)rsp); - /* - * Set a maximum number of buffers in each output packet to make life -@@ -114,9 +113,8 @@ struct netfront_accel_tso_state { - static inline void tso_check_safe(struct sk_buff *skb) { - EPRINTK_ON(skb->protocol != htons (ETH_P_IP)); - EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP)); -- EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP); -- EPRINTK_ON((SKB_TCP_OFF(skb) -- + (skb->h.th->doff << 2u)) > skb_headlen(skb)); -+ EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP); -+ EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb)); - } +@@ -485,13 +467,14 @@ static unsigned int die_nest_count; + unsigned __kprobes long oops_begin(void) + { +- int cpu = smp_processor_id(); ++ int cpu; + unsigned long flags; -@@ -129,17 +127,17 @@ static inline void tso_start(struct netf - * All ethernet/IP/TCP headers combined size is TCP header size - * plus offset of TCP header relative to start of packet. - */ -- st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb); -+ st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb); - st->p.full_packet_size = (st->p.header_length - + skb_shinfo(skb)->gso_size); - st->p.gso_size = skb_shinfo(skb)->gso_size; + oops_enter(); -- st->p.ip_id = htons(skb->nh.iph->id); -- st->seqnum = ntohl(skb->h.th->seq); -+ st->p.ip_id = htons(ip_hdr(skb)->id); -+ st->seqnum = ntohl(tcp_hdr(skb)->seq); + /* racy, but better than risking deadlock. */ + local_irq_save(flags); ++ cpu = smp_processor_id(); + if (!spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; +@@ -585,10 +568,20 @@ static void __kprobes do_trap(int trapnr + { + struct task_struct *tsk = current; -- EPRINTK_ON(skb->h.th->urg); -- EPRINTK_ON(skb->h.th->syn); -- EPRINTK_ON(skb->h.th->rst); -+ EPRINTK_ON(tcp_hdr(skb)->urg); -+ EPRINTK_ON(tcp_hdr(skb)->syn); -+ EPRINTK_ON(tcp_hdr(skb)->rst); +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = trapnr; +- + if (user_mode(regs)) { ++ /* ++ * We want error_code and trap_no set for userspace ++ * faults and kernelspace faults which result in ++ * die(), but not kernelspace faults which are fixed ++ * up. die() gives the process no chance to handle ++ * the signal and notice the kernel fault information, ++ * so that won't result in polluting the information ++ * about previously queued, but not yet delivered, ++ * faults. See also do_general_protection below. ++ */ ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = trapnr; ++ + if (exception_trace && unhandled_signal(tsk, signr)) + printk(KERN_INFO + "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", +@@ -609,8 +602,11 @@ static void __kprobes do_trap(int trapnr + fixup = search_exception_tables(regs->rip); + if (fixup) + regs->rip = fixup->fixup; +- else ++ else { ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = trapnr; + die(str, regs, error_code); ++ } + return; + } + } +@@ -686,10 +682,10 @@ asmlinkage void __kprobes do_general_pro - st->remaining_len = skb->len - st->p.header_length; + conditional_sti(regs); -@@ -258,8 +256,8 @@ int tso_start_new_packet(netfront_accel_ - /* This packet will be the last in the TSO burst. */ - ip_length = (st->p.header_length - ETH_HDR_LEN(skb) - + st->remaining_len); -- tsoh_th->fin = skb->h.th->fin; -- tsoh_th->psh = skb->h.th->psh; -+ tsoh_th->fin = tcp_hdr(skb)->fin; -+ tsoh_th->psh = tcp_hdr(skb)->psh; - } +- tsk->thread.error_code = error_code; +- tsk->thread.trap_no = 13; +- + if (user_mode(regs)) { ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = 13; ++ + if (exception_trace && unhandled_signal(tsk, SIGSEGV)) + printk(KERN_INFO + "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", +@@ -708,6 +704,9 @@ asmlinkage void __kprobes do_general_pro + regs->rip = fixup->fixup; + return; + } ++ ++ tsk->thread.error_code = error_code; ++ tsk->thread.trap_no = 13; + if (notify_die(DIE_GPF, "general protection fault", regs, + error_code, 13, SIGSEGV) == NOTIFY_STOP) + return; +--- sle11-2009-04-20.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/kernel/vsyscall_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -45,14 +45,34 @@ - tsoh_iph->tot_len = htons(ip_length); ---- a/drivers/xen/sfc_netfront/accel_vi.c -+++ b/drivers/xen/sfc_netfront/accel_vi.c -@@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron + #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) + #define __syscall_clobber "r11","rcx","memory" ++#define __pa_vsymbol(x) \ ++ ({unsigned long v; \ ++ extern char __vsyscall_0; \ ++ asm("" : "=r" (v) : "0" (x)); \ ++ ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) - if (skb->ip_summed == CHECKSUM_PARTIAL) { - /* Set to zero to encourage falcon to work it out for us */ -- *(u16*)(skb->h.raw + skb->csum_offset) = 0; -+ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; - } ++/* ++ * vsyscall_gtod_data contains data that is : ++ * - readonly from vsyscalls ++ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) ++ * Try to keep this structure as small as possible to avoid cache line ping pongs ++ */ + struct vsyscall_gtod_data_t { +- seqlock_t lock; +- int sysctl_enabled; +- struct timeval wall_time_tv; ++ seqlock_t lock; ++ ++ /* open coded 'struct timespec' */ ++ time_t wall_time_sec; ++ u32 wall_time_nsec; ++ ++ int sysctl_enabled; + struct timezone sys_tz; +- cycle_t offset_base; +- struct clocksource clock; ++ struct { /* extract of a clocksource struct */ ++ cycle_t (*vread)(void); ++ cycle_t cycle_last; ++ cycle_t mask; ++ u32 mult; ++ u32 shift; ++ } clock; + }; + int __vgetcpu_mode __section_vgetcpu_mode; - if (multi_post_start_new_buffer(vnic, &state)) { -@@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro +@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wa - if (skb->ip_summed == CHECKSUM_PARTIAL) { - /* Set to zero to encourage falcon to work it out for us */ -- *(u16*)(skb->h.raw + skb->csum_offset) = 0; -+ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; - } - NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT - (skb, idx, frag_data, frag_len, { ---- a/drivers/xen/sfc_netfront/accel_xenbus.c -+++ b/drivers/xen/sfc_netfront/accel_xenbus.c -@@ -356,7 +356,7 @@ static int vnic_setup_domU_shared_state( - /* Create xenbus msg event channel */ - err = bind_listening_port_to_irqhandler - (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend, -- SA_SAMPLE_RANDOM, "vnicctrl", vnic); -+ IRQF_SAMPLE_RANDOM, "vnicctrl", vnic); - if (err < 0) { - EPRINTK("Couldn't bind msg event channel\n"); - goto fail_msg_irq; -@@ -367,7 +367,7 @@ static int vnic_setup_domU_shared_state( - /* Create xenbus net event channel */ - err = bind_listening_port_to_irqhandler - (dev->otherend_id, netfront_accel_net_channel_irq_from_bend, -- SA_SAMPLE_RANDOM, "vnicfront", vnic); -+ IRQF_SAMPLE_RANDOM, "vnicfront", vnic); - if (err < 0) { - EPRINTK("Couldn't bind net event channel\n"); - goto fail_net_irq; ---- a/fs/aio.c -+++ b/fs/aio.c -@@ -38,7 +38,7 @@ + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* copy vsyscall data */ +- vsyscall_gtod_data.clock = *clock; +- vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; +- vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; ++ vsyscall_gtod_data.clock.vread = clock->vread; ++ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; ++ vsyscall_gtod_data.clock.mask = clock->mask; ++ vsyscall_gtod_data.clock.mult = clock->mult; ++ vsyscall_gtod_data.clock.shift = clock->shift; ++ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; ++ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + } +@@ -105,7 +129,8 @@ static __always_inline long time_syscall + static __always_inline void do_vgettimeofday(struct timeval * tv) + { + cycle_t now, base, mask, cycle_delta; +- unsigned long seq, mult, shift, nsec_delta; ++ unsigned seq; ++ unsigned long mult, shift, nsec; + cycle_t (*vread)(void); + do { + seq = read_seqbegin(&__vsyscall_gtod_data.lock); +@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeo + mult = __vsyscall_gtod_data.clock.mult; + shift = __vsyscall_gtod_data.clock.shift; - #ifdef CONFIG_EPOLL - #include --#include -+#include - #endif +- *tv = __vsyscall_gtod_data.wall_time_tv; +- ++ tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; ++ nsec = __vsyscall_gtod_data.wall_time_nsec; + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); - #if DEBUG > 1 -@@ -1309,7 +1309,7 @@ static const struct file_operations aioq + /* calculate interval: */ + cycle_delta = (now - base) & mask; + /* convert to nsecs: */ +- nsec_delta = (cycle_delta * mult) >> shift; ++ nsec += (cycle_delta * mult) >> shift; - /* make_aio_fd: - * Create a file descriptor that can be used to poll the event queue. -- * Based and piggybacked on the excellent epoll code. -+ * Based on the excellent epoll code. - */ +- /* convert to usecs and add to timespec: */ +- tv->tv_usec += nsec_delta / NSEC_PER_USEC; +- while (tv->tv_usec > USEC_PER_SEC) { ++ while (nsec >= NSEC_PER_SEC) { + tv->tv_sec += 1; +- tv->tv_usec -= USEC_PER_SEC; ++ nsec -= NSEC_PER_SEC; + } ++ tv->tv_usec = nsec / NSEC_PER_USEC; + } - static int make_aio_fd(struct kioctx *ioctx) -@@ -1318,7 +1318,8 @@ static int make_aio_fd(struct kioctx *io - struct inode *inode; - struct file *file; + int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) +@@ -151,11 +175,16 @@ int __vsyscall(0) vgettimeofday(struct t + * unlikely */ + time_t __vsyscall(1) vtime(time_t *t) + { ++ struct timeval tv; ++ time_t result; + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) + return time_syscall(t); +- else if (t) +- *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; +- return __vsyscall_gtod_data.wall_time_tv.tv_sec; ++ ++ vgettimeofday(&tv, 0); ++ result = tv.tv_sec; ++ if (t) ++ *t = result; ++ return result; + } -- error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); -+ error = anon_inode_getfd(&fd, &inode, &file, "[aioq]", -+ &aioq_fops, ioctx); - if (error) - return error; + /* Fast way to get current CPU and node. +@@ -224,10 +253,10 @@ static int vsyscall_sysctl_change(ctl_ta + return ret; + /* gcc has some trouble with __va(__pa()), so just do it this + way. */ +- map1 = ioremap(__pa_symbol(&vsysc1), 2); ++ map1 = ioremap(__pa_vsymbol(&vsysc1), 2); + if (!map1) + return -ENOMEM; +- map2 = ioremap(__pa_symbol(&vsysc2), 2); ++ map2 = ioremap(__pa_vsymbol(&vsysc2), 2); + if (!map2) { + ret = -ENOMEM; + goto out; +@@ -304,7 +333,7 @@ static int __cpuinit + cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) + { + long cpu = (long)arg; +- if (action == CPU_ONLINE) ++ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); + return NOTIFY_DONE; + } +--- sle11-2009-04-20.orig/arch/x86/mm/fault_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/fault_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -15,22 +15,22 @@ + #include + #include + #include +-#include + #include + #include + #include + #include /* For unblank_screen() */ + #include ++#include + #include + #include + #include ++#include ---- a/include/asm-x86/mach-xen/asm/desc_32.h -+++ b/include/asm-x86/mach-xen/asm/desc_32.h -@@ -11,23 +11,24 @@ + #include + #include + #include + #include + #include +-#include + #include - #include + /* Page fault error code bits */ +@@ -537,6 +537,12 @@ bad_area: + bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { ++ ++ /* ++ * It's possible to have interrupts off here. ++ */ ++ local_irq_enable(); ++ + if (is_prefetch(regs, address, error_code)) + return; --extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -- - struct Xgt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); - unsigned short pad; - } __attribute__ ((packed)); +@@ -646,7 +652,7 @@ do_sigbus: + } --extern struct Xgt_desc_struct idt_descr; --DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); --extern struct Xgt_desc_struct early_gdt_descr; -+struct gdt_page -+{ -+ struct desc_struct gdt[GDT_ENTRIES]; -+} __attribute__((aligned(PAGE_SIZE))); -+DECLARE_PER_CPU(struct gdt_page, gdt_page); + DEFINE_SPINLOCK(pgd_lock); +-struct page *pgd_list; ++LIST_HEAD(pgd_list); - static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) + void vmalloc_sync_all(void) { -- return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; -+ return per_cpu(gdt_page, cpu).gdt; +@@ -666,8 +672,7 @@ void vmalloc_sync_all(void) + if (pgd_none(*pgd_ref)) + continue; + spin_lock(&pgd_lock); +- for (page = pgd_list; page; +- page = (struct page *)page->index) { ++ list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) +--- sle11-2009-04-20.orig/arch/x86/mm/init_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/init_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -25,10 +25,12 @@ + #include + #include + #include ++#include + #include + #include + #include + #include ++#include + + #include + #include +@@ -51,7 +53,7 @@ + #define Dprintk(x...) + #endif + +-struct dma_mapping_ops* dma_ops; ++const struct dma_mapping_ops* dma_ops; + EXPORT_SYMBOL(dma_ops); + + #if CONFIG_XEN_COMPAT <= 0x030002 +@@ -191,6 +193,13 @@ void show_mem(void) + + for_each_online_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { ++ /* this loop can take a while with 256 GB and 4k pages ++ so update the NMI watchdog */ ++ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { ++ touch_nmi_watchdog(); ++ } ++ if (!pfn_valid(pgdat->node_start_pfn + i)) ++ continue; + page = pfn_to_page(pgdat->node_start_pfn + i); + total++; + if (PageReserved(page)) +@@ -359,7 +368,7 @@ __set_fixmap (enum fixed_addresses idx, + } } -+extern struct Xgt_desc_struct idt_descr; - extern struct desc_struct idt_table[]; - extern void set_intr_gate(unsigned int irq, void * addr); +-unsigned long __initdata table_start, table_end; ++unsigned long __meminitdata table_start, table_end; -@@ -55,53 +56,32 @@ static inline void pack_gate(__u32 *a, _ - #define DESCTYPE_S 0x10 /* !system */ + static __meminit void *alloc_static_page(unsigned long *phys) + { +@@ -376,7 +385,7 @@ static __meminit void *alloc_static_page + start_pfn++; + memset((void *)va, 0, PAGE_SIZE); + return (void *)va; +-} ++} + + #define PTE_SIZE PAGE_SIZE + +@@ -412,28 +421,46 @@ static inline int make_readonly(unsigned #ifndef CONFIG_XEN --#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) + /* Must run before zap_low_mappings */ +-__init void *early_ioremap(unsigned long addr, unsigned long size) ++__meminit void *early_ioremap(unsigned long addr, unsigned long size) + { +- unsigned long map = round_down(addr, LARGE_PAGE_SIZE); - --#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) --#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) -+#define load_TR_desc() native_load_tr_desc() -+#define load_gdt(dtr) native_load_gdt(dtr) -+#define load_idt(dtr) native_load_idt(dtr) - #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) - #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) - --#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) --#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) --#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) -+#define store_gdt(dtr) native_store_gdt(dtr) -+#define store_idt(dtr) native_store_idt(dtr) -+#define store_tr(tr) (tr = native_store_tr()) - #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) --#endif +- /* actually usually some more */ +- if (size >= LARGE_PAGE_SIZE) { +- return NULL; ++ unsigned long vaddr; ++ pmd_t *pmd, *last_pmd; ++ int i, pmds; ++ ++ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; ++ vaddr = __START_KERNEL_map; ++ pmd = level2_kernel_pgt; ++ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; ++ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { ++ for (i = 0; i < pmds; i++) { ++ if (pmd_present(pmd[i])) ++ goto next; ++ } ++ vaddr += addr & ~PMD_MASK; ++ addr &= PMD_MASK; ++ for (i = 0; i < pmds; i++, addr += PMD_SIZE) ++ set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); ++ __flush_tlb(); ++ return (void *)vaddr; ++ next: ++ ; + } +- set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); +- map += LARGE_PAGE_SIZE; +- set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); +- __flush_tlb(); +- return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); ++ printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); ++ return NULL; + } --#if TLS_SIZE != 24 --# error update this code. --#endif -- --static inline void load_TLS(struct thread_struct *t, unsigned int cpu) + /* To avoid virtual aliases later */ +-__init void early_iounmap(void *addr, unsigned long size) ++__meminit void early_iounmap(void *addr, unsigned long size) + { +- if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) +- printk("early_iounmap: bad address %p\n", addr); +- set_pmd(temp_mappings[0].pmd, __pmd(0)); +- set_pmd(temp_mappings[1].pmd, __pmd(0)); ++ unsigned long vaddr; ++ pmd_t *pmd; ++ int i, pmds; ++ ++ vaddr = (unsigned long)addr; ++ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; ++ pmd = level2_kernel_pgt + pmd_index(vaddr); ++ for (i = 0; i < pmds; i++) ++ pmd_clear(pmd + i); + __flush_tlb(); + } + #endif +@@ -767,14 +794,6 @@ void __meminit init_memory_mapping(unsig + __flush_tlb_all(); + } + +-void __cpuinit zap_low_mappings(int cpu) -{ --#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \ -- *(u64 *)&t->tls_array[i]) \ -- BUG() -- C(0); C(1); C(2); --#undef C +- /* this is not required for Xen */ +-#if 0 +- swap_low_mappings(); +-#endif -} -+#define load_TLS(t, cpu) native_load_tls(t, cpu) -+#define set_ldt native_set_ldt - --#ifndef CONFIG_XEN - #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) - #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) - #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) - --static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) -+static inline void write_dt_entry(struct desc_struct *dt, -+ int entry, u32 entry_low, u32 entry_high) +- + #ifndef CONFIG_NUMA + void __init paging_init(void) { -- __u32 *lp = (__u32 *)((char *)dt + entry*8); -- *lp = entry_a; -- *(lp+1) = entry_b; -+ dt[entry].a = entry_low; -+ dt[entry].b = entry_high; - } --#define set_ldt native_set_ldt --#else --extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); --extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); --#define set_ldt xen_set_ldt +@@ -960,17 +979,6 @@ void __init mem_init(void) + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10); +- +-#ifndef CONFIG_XEN +-#ifdef CONFIG_SMP +- /* +- * Sync boot_level4_pgt mappings with the init_level4_pgt +- * except for the low identity mappings which are already zapped +- * in init_level4_pgt. This sync-up is essential for AP's bringup +- */ +- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); -#endif +-#endif + } --#ifndef CONFIG_XEN --static inline fastcall void native_set_ldt(const void *addr, -- unsigned int entries) -+static inline void native_set_ldt(const void *addr, unsigned int entries) - { - if (likely(entries == 0)) - __asm__ __volatile__("lldt %w0"::"q" (0)); -@@ -116,6 +96,65 @@ static inline fastcall void native_set_l - __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + void free_init_pages(char *what, unsigned long begin, unsigned long end) +@@ -980,7 +988,7 @@ void free_init_pages(char *what, unsigne + if (begin >= end) + return; + +- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); ++ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); + for (addr = begin; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); +@@ -989,24 +997,17 @@ void free_init_pages(char *what, unsigne + if (addr >= __START_KERNEL_map) { + /* make_readonly() reports all kernel addresses. */ + __make_page_writable(__va(__pa(addr))); +- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { +- pgd_t *pgd = pgd_offset_k(addr); +- pud_t *pud = pud_offset(pgd, addr); +- pmd_t *pmd = pmd_offset(pud, addr); +- pte_t *pte = pte_offset_kernel(pmd, addr); +- +- xen_l1_entry_update(pte, __pte(0)); /* fallback */ +- } ++ change_page_attr_addr(addr, 1, __pgprot(0)); + } + free_page(addr); + totalram_pages++; } ++ if (addr > __START_KERNEL_map) ++ global_flush_tlb(); } + + void free_initmem(void) + { +- memset(__initdata_begin, POISON_FREE_INITDATA, +- __initdata_end - __initdata_begin); + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +@@ -1016,13 +1017,28 @@ void free_initmem(void) + + void mark_rodata_ro(void) + { +- unsigned long addr = (unsigned long)__start_rodata; ++ unsigned long start = (unsigned long)_stext, end; + ++#ifdef CONFIG_HOTPLUG_CPU ++ /* It must still be possible to apply SMP alternatives. */ ++ if (num_possible_cpus() > 1) ++ start = (unsigned long)_etext; ++#endif + -+static inline void native_load_tr_desc(void) -+{ -+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); -+} -+ -+static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) -+{ -+ asm volatile("lgdt %0"::"m" (*dtr)); -+} -+ -+static inline void native_load_idt(const struct Xgt_desc_struct *dtr) -+{ -+ asm volatile("lidt %0"::"m" (*dtr)); -+} -+ -+static inline void native_store_gdt(struct Xgt_desc_struct *dtr) -+{ -+ asm ("sgdt %0":"=m" (*dtr)); -+} -+ -+static inline void native_store_idt(struct Xgt_desc_struct *dtr) -+{ -+ asm ("sidt %0":"=m" (*dtr)); -+} -+ -+static inline unsigned long native_store_tr(void) -+{ -+ unsigned long tr; -+ asm ("str %0":"=r" (tr)); -+ return tr; -+} -+ -+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) -+{ -+ unsigned int i; -+ struct desc_struct *gdt = get_cpu_gdt_table(cpu); -+ -+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) -+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; -+} -+#else -+#define load_TLS(t, cpu) xen_load_tls(t, cpu) -+#define set_ldt xen_set_ldt ++#ifdef CONFIG_KPROBES ++ start = (unsigned long)__start_rodata; ++#endif + -+extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); -+extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); ++ end = (unsigned long)__end_rodata; ++ start = (start + PAGE_SIZE - 1) & PAGE_MASK; ++ end &= PAGE_MASK; ++ if (end <= start) ++ return; + +- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) +- change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); ++ change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO); + +- printk ("Write protecting the kernel read-only data: %luk\n", +- (__end_rodata - __start_rodata) >> 10); ++ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", ++ (end - start) >> 10); + + /* + * change_page_attr_addr() requires a global_flush_tlb() call after it. +@@ -1173,3 +1189,11 @@ int in_gate_area_no_task(unsigned long a + { + return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); + } + -+static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu) ++#ifndef CONFIG_XEN ++void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) +{ -+ unsigned int i; -+ struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; -+ -+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) -+ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), -+ *(u64 *)&t->tls_array[i])) -+ BUG(); ++ return __alloc_bootmem_core(pgdat->bdata, size, ++ SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); +} - #endif ++#endif +--- sle11-2009-04-20.orig/arch/x86/mm/pageattr_64-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/arch/x86/mm/pageattr_64-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -215,13 +215,13 @@ void mm_pin_all(void) + preempt_enable(); + } - #ifndef CONFIG_X86_NO_IDT ---- a/include/asm-x86/mach-xen/asm/desc_64.h -+++ b/include/asm-x86/mach-xen/asm/desc_64.h -@@ -127,16 +127,6 @@ static inline void set_ldt_desc(unsigned - DESC_LDT, size * 8 - 1); +-void _arch_dup_mmap(struct mm_struct *mm) ++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) + { + if (!mm->context.pinned) + mm_pin(mm); } --static inline void set_seg_base(unsigned cpu, int entry, void *base) --{ -- struct desc_struct *d = &cpu_gdt(cpu)[entry]; -- u32 addr = (u32)(u64)base; -- BUG_ON((u64)base >> 32); -- d->base0 = addr & 0xffff; -- d->base1 = (addr >> 16) & 0xff; -- d->base2 = (addr >> 24) & 0xff; --} -- - #define LDT_entry_a(info) \ - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) - /* Don't allow setting of the lm bit. It is useless anyways because -@@ -165,25 +155,15 @@ static inline void set_seg_base(unsigned - (info)->useable == 0 && \ - (info)->lm == 0) +-void _arch_exit_mmap(struct mm_struct *mm) ++void arch_exit_mmap(struct mm_struct *mm) + { + struct task_struct *tsk = current; --#if TLS_SIZE != 24 --# error update this code. --#endif -- - static inline void load_TLS(struct thread_struct *t, unsigned int cpu) +@@ -343,10 +343,11 @@ static void flush_kernel_map(void *arg) + struct page *pg; + + /* When clflush is available always use it because it is +- much cheaper than WBINVD */ +- if (!cpu_has_clflush) ++ much cheaper than WBINVD. Disable clflush for now because ++ the high level code is not ready yet */ ++ if (1 || !cpu_has_clflush) + asm volatile("wbinvd" ::: "memory"); +- list_for_each_entry(pg, l, lru) { ++ else list_for_each_entry(pg, l, lru) { + void *adr = page_address(pg); + if (cpu_has_clflush) + cache_flush_page(adr); +@@ -460,16 +461,24 @@ __change_page_attr(unsigned long address + */ + int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) { --#if 0 -+ unsigned int i; - u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); -- gdt[0] = t->tls_array[0]; -- gdt[1] = t->tls_array[1]; -- gdt[2] = t->tls_array[2]; --#endif --#define C(i) \ -- if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \ -- t->tls_array[i])) \ -- BUG(); +- int err = 0; ++ int err = 0, kernel_map = 0; + int i; -- C(0); C(1); C(2); --#undef C -+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) -+ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), -+ t->tls_array[i])) -+ BUG(); - } ++ if (address >= __START_KERNEL_map ++ && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { ++ address = (unsigned long)__va(__pa(address)); ++ kernel_map = 1; ++ } ++ + down_write(&init_mm.mmap_sem); + for (i = 0; i < numpages; i++, address += PAGE_SIZE) { + unsigned long pfn = __pa(address) >> PAGE_SHIFT; - /* ---- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h -+++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h -@@ -51,7 +51,7 @@ struct dma_mapping_ops { - }; +- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); +- if (err) +- break; ++ if (!kernel_map || pte_present(pfn_pte(0, prot))) { ++ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); ++ if (err) ++ break; ++ } + /* Handle kernel mapping too which aliases part of the + * lowmem */ + if (__pa(address) < KERNEL_TEXT_SIZE) { +--- sle11-2009-04-20.orig/drivers/char/tpm/tpm_xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/drivers/char/tpm/tpm_xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -463,7 +463,7 @@ static int tpmif_connect(struct xenbus_d + tp->backend_id = domid; - extern dma_addr_t bad_dma_address; --extern struct dma_mapping_ops* dma_ops; -+extern const struct dma_mapping_ops* dma_ops; - extern int iommu_merge; + err = bind_listening_port_to_irqhandler( +- domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp); ++ domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp); + if (err <= 0) { + WPRINTK("bind_listening_port_to_irqhandler failed " + "(err=%d)\n", err); +--- sle11-2009-04-20.orig/drivers/pci/msi-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/drivers/pci/msi-xen.c 2008-12-15 11:27:22.000000000 +0100 +@@ -12,16 +12,15 @@ + #include + #include + #include +-#include + #include + #include + #include ++#include - #if 0 ---- a/include/asm-x86/mach-xen/asm/fixmap_32.h -+++ b/include/asm-x86/mach-xen/asm/fixmap_32.h -@@ -19,10 +19,8 @@ - * the start of the fixmap. - */ - extern unsigned long __FIXADDR_TOP; --#ifdef CONFIG_COMPAT_VDSO --#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) --#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) --#endif -+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) + #include - #ifndef __ASSEMBLY__ - #include -@@ -85,6 +83,9 @@ enum fixed_addresses { - #ifdef CONFIG_PCI_MMCONFIG - FIX_PCIE_MCFG, - #endif -+#ifdef CONFIG_PARAVIRT -+ FIX_PARAVIRT_BOOTMAP, -+#endif - FIX_SHARED_INFO, - #define NR_FIX_ISAMAPS 256 - FIX_ISAMAP_END, ---- a/include/asm-x86/mach-xen/asm/fixmap_64.h -+++ b/include/asm-x86/mach-xen/asm/fixmap_64.h -@@ -15,7 +15,6 @@ - #include - #include - #include --#include - #include + #include + #include +-#include - /* ---- a/include/asm-x86/mach-xen/asm/highmem.h -+++ b/include/asm-x86/mach-xen/asm/highmem.h -@@ -67,12 +67,18 @@ extern void FASTCALL(kunmap_high(struct + #include "pci.h" + #include "msi.h" +@@ -154,6 +153,7 @@ int register_msi_get_owner(int (*func)(s + get_owner = func; + return 0; + } ++EXPORT_SYMBOL(register_msi_get_owner); - void *kmap(struct page *page); - void kunmap(struct page *page); -+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); - void *kmap_atomic(struct page *page, enum km_type type); - void *kmap_atomic_pte(struct page *page, enum km_type type); - void kunmap_atomic(void *kvaddr, enum km_type type); - void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); - struct page *kmap_atomic_to_page(void *ptr); + int unregister_msi_get_owner(int (*func)(struct pci_dev *dev)) + { +@@ -162,6 +162,7 @@ int unregister_msi_get_owner(int (*func) + get_owner = NULL; + return 0; + } ++EXPORT_SYMBOL(unregister_msi_get_owner); -+#define kmap_atomic_pte(page, type) \ -+ kmap_atomic_prot(page, type, \ -+ test_bit(PG_pinned, &(page)->flags) \ -+ ? PAGE_KERNEL_RO : kmap_prot) -+ - #define flush_cache_kmaps() do { } while (0) + static int msi_get_dev_owner(struct pci_dev *dev) + { +@@ -263,11 +264,6 @@ static int msi_map_vector(struct pci_dev + return msi_map_pirq_to_vector(dev, -1, entry_nr, table_base); + } - void clear_highpage(struct page *); ---- a/include/asm-x86/mach-xen/asm/io_32.h -+++ b/include/asm-x86/mach-xen/asm/io_32.h -@@ -263,15 +263,18 @@ static inline void flush_write_buffers(v +-static int msi_init(void) +-{ +- return 0; +-} +- + #ifdef CONFIG_PM + static void __pci_restore_msi_state(struct pci_dev *dev) + { +@@ -434,21 +430,32 @@ static int msix_capability_init(struct p + } - #endif /* __KERNEL__ */ + /** +- * pci_msi_supported - check whether MSI may be enabled on device ++ * pci_msi_check_device - check whether MSI may be enabled on a device + * @dev: pointer to the pci_dev data structure of MSI device function ++ * @nvec: how many MSIs have been requested ? ++ * @type: are we checking for MSI or MSI-X ? + * + * Look at global flags, the device itself, and its parent busses +- * to return 0 if MSI are supported for the device. ++ * to determine if MSI/-X are supported for the device. If MSI/-X is ++ * supported return 0, else return an error code. + **/ +-static +-int pci_msi_supported(struct pci_dev * dev) ++static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type) + { + struct pci_bus *bus; ++ int ret; --#define __SLOW_DOWN_IO "outb %%al,$0x80;" -+static inline void xen_io_delay(void) -+{ -+ asm volatile("outb %%al,$0x80" : : : "memory"); -+} + /* MSI must be globally enabled and supported by the device */ + if (!pci_msi_enable || !dev || dev->no_msi) + return -EINVAL; - static inline void slow_down_io(void) { -- __asm__ __volatile__( -- __SLOW_DOWN_IO -+ xen_io_delay(); - #ifdef REALLY_SLOW_IO -- __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO -+ xen_io_delay(); -+ xen_io_delay(); -+ xen_io_delay(); - #endif -- : : ); ++ /* ++ * You can't ask to have 0 or less MSIs configured. ++ * a) it's stupid .. ++ * b) the list manipulation code assumes nvec >= 1. ++ */ ++ if (nvec < 1) ++ return -ERANGE; ++ + /* Any bridge which does NOT route MSI transactions from it's + * secondary bus to it's primary bus must set NO_MSI flag on + * the secondary pci_bus. +@@ -459,6 +466,13 @@ int pci_msi_supported(struct pci_dev * d + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) + return -EINVAL; + ++ ret = arch_msi_check_device(dev, nvec, type); ++ if (ret) ++ return ret; ++ ++ if (!pci_find_capability(dev, type)) ++ return -EINVAL; ++ + return 0; } - #ifdef CONFIG_X86_NUMAQ ---- a/include/asm-x86/mach-xen/asm/irqflags_32.h -+++ b/include/asm-x86/mach-xen/asm/irqflags_32.h -@@ -11,6 +11,40 @@ - #define _ASM_IRQFLAGS_H +@@ -475,14 +489,11 @@ int pci_msi_supported(struct pci_dev * d + extern int pci_frontend_enable_msi(struct pci_dev *dev); + int pci_enable_msi(struct pci_dev* dev) + { +- int pos, temp, status; ++ int temp, status; - #ifndef __ASSEMBLY__ -+#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) -+ -+#define xen_restore_fl(f) \ -+do { \ -+ vcpu_info_t *_vcpu; \ -+ barrier(); \ -+ _vcpu = current_vcpu_info(); \ -+ if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \ -+ barrier(); /* unmask then check (avoid races) */\ -+ if (unlikely(_vcpu->evtchn_upcall_pending)) \ -+ force_evtchn_callback(); \ -+ } \ -+} while (0) -+ -+#define xen_irq_disable() \ -+do { \ -+ current_vcpu_info()->evtchn_upcall_mask = 1; \ -+ barrier(); \ -+} while (0) -+ -+#define xen_irq_enable() \ -+do { \ -+ vcpu_info_t *_vcpu; \ -+ barrier(); \ -+ _vcpu = current_vcpu_info(); \ -+ _vcpu->evtchn_upcall_mask = 0; \ -+ barrier(); /* unmask then check (avoid races) */ \ -+ if (unlikely(_vcpu->evtchn_upcall_pending)) \ -+ force_evtchn_callback(); \ -+} while (0) -+ -+void xen_safe_halt(void); -+ -+void xen_halt(void); +- if (pci_msi_supported(dev) < 0) +- return -EINVAL; +- +- status = msi_init(); +- if (status < 0) +- return status; ++ status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI); ++ if (status) ++ return status; - /* - * The use of 'barrier' in the following reflects their use as local-lock -@@ -20,48 +54,31 @@ - * includes these barriers, for example. - */ + #ifdef CONFIG_XEN_PCIDEV_FRONTEND + if (!is_initial_xendomain()) +@@ -503,10 +514,6 @@ int pci_enable_msi(struct pci_dev* dev) --#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) -+#define __raw_local_save_flags() xen_save_fl() + temp = dev->irq; --#define raw_local_irq_restore(x) \ --do { \ -- vcpu_info_t *_vcpu; \ -- barrier(); \ -- _vcpu = current_vcpu_info(); \ -- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ -- barrier(); /* unmask then check (avoid races) */ \ -- if (unlikely(_vcpu->evtchn_upcall_pending)) \ -- force_evtchn_callback(); \ -- } \ --} while (0) -+#define raw_local_irq_restore(flags) xen_restore_fl(flags) +- pos = pci_find_capability(dev, PCI_CAP_ID_MSI); +- if (!pos) +- return -EINVAL; +- + /* Check whether driver already requested for MSI-X irqs */ + if (dev->msix_enabled) { + printk(KERN_INFO "PCI: %s: Can't enable MSI. " +@@ -521,15 +528,14 @@ int pci_enable_msi(struct pci_dev* dev) --#define raw_local_irq_disable() \ --do { \ -- current_vcpu_info()->evtchn_upcall_mask = 1; \ -- barrier(); \ --} while (0) -+#define raw_local_irq_disable() xen_irq_disable() + return status; + } ++EXPORT_SYMBOL(pci_enable_msi); --#define raw_local_irq_enable() \ --do { \ -- vcpu_info_t *_vcpu; \ -- barrier(); \ -- _vcpu = current_vcpu_info(); \ -- _vcpu->evtchn_upcall_mask = 0; \ -- barrier(); /* unmask then check (avoid races) */ \ -- if (unlikely(_vcpu->evtchn_upcall_pending)) \ -- force_evtchn_callback(); \ --} while (0) -+#define raw_local_irq_enable() xen_irq_enable() + extern void pci_frontend_disable_msi(struct pci_dev* dev); + void pci_disable_msi(struct pci_dev* dev) + { + int pirq; - /* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ --void raw_safe_halt(void); -+static inline void raw_safe_halt(void) -+{ -+ xen_safe_halt(); -+} +- if (!pci_msi_enable) +- return; +- if (!dev) ++ if (!pci_msi_enable || !dev) + return; - /* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ --void halt(void); -+static inline void halt(void) -+{ -+ xen_halt(); -+} + #ifdef CONFIG_XEN_PCIDEV_FRONTEND +@@ -554,6 +560,7 @@ void pci_disable_msi(struct pci_dev* dev + pci_intx(dev, 1); /* enable intx */ + dev->msi_enabled = 0; + } ++EXPORT_SYMBOL(pci_disable_msi); - /* - * For spinlocks, etc: ---- a/include/asm-x86/mach-xen/asm/irqflags_64.h -+++ b/include/asm-x86/mach-xen/asm/irqflags_64.h -@@ -9,6 +9,7 @@ - */ - #ifndef _ASM_IRQFLAGS_H - #define _ASM_IRQFLAGS_H -+#include + /** + * pci_enable_msix - configure device's MSI-X capability structure +@@ -578,7 +585,7 @@ int pci_enable_msix(struct pci_dev* dev, + int i, j, temp; + u16 control; - #ifndef __ASSEMBLY__ - /* -@@ -50,19 +51,19 @@ static inline void raw_local_irq_disable - { - unsigned long flags = __raw_local_save_flags(); +- if (!entries || pci_msi_supported(dev) < 0) ++ if (!entries) + return -EINVAL; -- raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); -+ raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); - } + #ifdef CONFIG_XEN_PCIDEV_FRONTEND +@@ -616,14 +623,11 @@ int pci_enable_msix(struct pci_dev* dev, + } + #endif - static inline void raw_local_irq_enable(void) - { - unsigned long flags = __raw_local_save_flags(); +- status = msi_init(); +- if (status < 0) ++ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX); ++ if (status) + return status; -- raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); -+ raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); +- if (!pos) +- return -EINVAL; +- + pci_read_config_word(dev, msi_control_reg(pos), &control); + nr_entries = multi_msix_capable(control); + if (nvec > nr_entries) +@@ -655,6 +659,7 @@ int pci_enable_msix(struct pci_dev* dev, + + return status; } ++EXPORT_SYMBOL(pci_enable_msix); - static inline int raw_irqs_disabled_flags(unsigned long flags) - { -- return !(flags & (1<<9)) || (flags & (1 << 18)); -+ return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); + extern void pci_frontend_disable_msix(struct pci_dev* dev); + void pci_disable_msix(struct pci_dev* dev) +@@ -694,6 +699,7 @@ void pci_disable_msix(struct pci_dev* de + pci_intx(dev, 1); /* enable intx */ + dev->msix_enabled = 0; + } ++EXPORT_SYMBOL(pci_disable_msix); + + /** + * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state +@@ -737,12 +743,57 @@ void pci_no_msi(void) + pci_msi_enable = 0; } - #else /* CONFIG_X86_VSMP */ -@@ -118,13 +119,21 @@ static inline int raw_irqs_disabled_flag - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ --void raw_safe_halt(void); -+void xen_safe_halt(void); -+static inline void raw_safe_halt(void) +-EXPORT_SYMBOL(pci_enable_msi); +-EXPORT_SYMBOL(pci_disable_msi); +-EXPORT_SYMBOL(pci_enable_msix); +-EXPORT_SYMBOL(pci_disable_msix); +-#ifdef CONFIG_XEN +-EXPORT_SYMBOL(register_msi_get_owner); +-EXPORT_SYMBOL(unregister_msi_get_owner); ++void pci_msi_init_pci_dev(struct pci_dev *dev) +{ -+ xen_safe_halt(); ++#ifndef CONFIG_XEN ++ INIT_LIST_HEAD(&dev->msi_list); + #endif +} - - /* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ --void halt(void); -+void xen_halt(void); -+static inline void halt(void) ++ ++ ++/* Arch hooks */ ++ ++int __attribute__ ((weak)) ++arch_msi_check_device(struct pci_dev* dev, int nvec, int type) +{ -+ xen_halt(); ++ return 0; +} ++ ++#ifndef CONFIG_XEN ++int __attribute__ ((weak)) ++arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry) ++{ ++ return 0; ++} ++ ++int __attribute__ ((weak)) ++arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) ++{ ++ struct msi_desc *entry; ++ int ret; - #else /* __ASSEMBLY__: */ - # ifdef CONFIG_TRACE_IRQFLAGS ---- a/include/asm-x86/mach-xen/asm/mmu_context_32.h -+++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h -@@ -6,6 +6,20 @@ - #include - #include - -+void arch_exit_mmap(struct mm_struct *mm); -+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); ++ list_for_each_entry(entry, &dev->msi_list, list) { ++ ret = arch_setup_msi_irq(dev, entry); ++ if (ret) ++ return ret; ++ } + -+void mm_pin(struct mm_struct *mm); -+void mm_unpin(struct mm_struct *mm); -+void mm_pin_all(void); ++ return 0; ++} + -+static inline void xen_activate_mm(struct mm_struct *prev, -+ struct mm_struct *next) ++void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq) +{ -+ if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) -+ mm_pin(next); ++ return; +} + - /* - * Used for LDT copy/destruction. - */ -@@ -37,10 +51,6 @@ static inline void __prepare_arch_switch - : : "r" (0) ); - } - --extern void mm_pin(struct mm_struct *mm); --extern void mm_unpin(struct mm_struct *mm); --void mm_pin_all(void); -- - static inline void switch_mm(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -@@ -97,11 +107,10 @@ static inline void switch_mm(struct mm_s - #define deactivate_mm(tsk, mm) \ - asm("movl %0,%%gs": :"r" (0)); ++void __attribute__ ((weak)) ++arch_teardown_msi_irqs(struct pci_dev *dev) ++{ ++ struct msi_desc *entry; ++ ++ list_for_each_entry(entry, &dev->msi_list, list) { ++ if (entry->irq != 0) ++ arch_teardown_msi_irq(entry->irq); ++ } ++} ++#endif +--- sle11-2009-04-20.orig/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:49.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/blkfront/blkfront.c 2009-03-24 10:11:24.000000000 +0100 +@@ -244,7 +244,7 @@ static int setup_blkring(struct xenbus_d + info->ring_ref = err; --static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) --{ -- if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) -- mm_pin(next); -- switch_mm(prev, next, NULL); --} -+#define activate_mm(prev, next) \ -+ do { \ -+ xen_activate_mm(prev, next); \ -+ switch_mm((prev),(next),NULL); \ -+ } while(0) + err = bind_listening_port_to_irqhandler( +- dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); ++ dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info); + if (err <= 0) { + xenbus_dev_fatal(dev, err, + "bind_listening_port_to_irqhandler"); +--- sle11-2009-04-20.orig/drivers/xen/char/mem.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/char/mem.c 2008-12-15 11:27:22.000000000 +0100 +@@ -18,7 +18,6 @@ + #include + #include + #include +-#include + #include + #include + #include +--- sle11-2009-04-20.orig/drivers/xen/core/hypervisor_sysfs.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/core/hypervisor_sysfs.c 2008-12-15 11:27:22.000000000 +0100 +@@ -50,7 +50,7 @@ static int __init hypervisor_subsys_init + if (!is_running_on_xen()) + return -ENODEV; - #endif ---- a/include/asm-x86/mach-xen/asm/mmu_context_64.h -+++ b/include/asm-x86/mach-xen/asm/mmu_context_64.h -@@ -9,6 +9,9 @@ - #include - #include +- hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type; ++ hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type; + return 0; + } -+void arch_exit_mmap(struct mm_struct *mm); -+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); -+ - /* - * possibly do the LDT unload here? - */ ---- a/include/asm-x86/mach-xen/asm/page_64.h -+++ b/include/asm-x86/mach-xen/asm/page_64.h -@@ -7,6 +7,7 @@ - #include - #include - #endif -+#include - #include +--- sle11-2009-04-20.orig/drivers/xen/core/smpboot.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/core/smpboot.c 2008-12-15 11:27:22.000000000 +0100 +@@ -161,13 +161,12 @@ static void xen_smp_intr_exit(unsigned i - /* -@@ -19,18 +20,14 @@ + void __cpuinit cpu_bringup(void) + { ++ cpu_init(); + #ifdef __i386__ +- cpu_set_gdt(current_thread_info()->cpu); +- secondary_cpu_init(); ++ identify_secondary_cpu(cpu_data + smp_processor_id()); + #else +- cpu_init(); +-#endif + identify_cpu(cpu_data + smp_processor_id()); ++#endif + touch_softlockup_watchdog(); + preempt_disable(); + local_irq_enable(); +@@ -187,11 +186,6 @@ static void __cpuinit cpu_initialize_con + static DEFINE_SPINLOCK(ctxt_lock); - /* PAGE_SHIFT determines the page size */ - #define PAGE_SHIFT 12 --#ifdef __ASSEMBLY__ --#define PAGE_SIZE (0x1 << PAGE_SHIFT) + struct task_struct *idle = idle_task(cpu); +-#ifdef __x86_64__ +- struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu]; -#else --#define PAGE_SIZE (1UL << PAGE_SHIFT) +- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu); -#endif -+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) - #define PAGE_MASK (~(PAGE_SIZE-1)) - - /* See Documentation/x86_64/mm.txt for a description of the memory map. */ - #define __PHYSICAL_MASK_SHIFT 46 --#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) -+#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) - #define __VIRTUAL_MASK_SHIFT 48 --#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) -+#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) - #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) - -@@ -55,10 +52,10 @@ - #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ - - #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) --#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) -+#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) - - #define HPAGE_SHIFT PMD_SHIFT --#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -+#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) - #define HPAGE_MASK (~(HPAGE_SIZE - 1)) - #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + if (cpu_test_and_set(cpu, cpu_initialized_map)) + return; +@@ -214,11 +208,11 @@ static void __cpuinit cpu_initialize_con + smp_trap_init(ctxt.trap_ctxt); -@@ -152,17 +149,23 @@ static inline pgd_t __pgd(unsigned long + ctxt.ldt_ents = 0; +- +- ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address); +- ctxt.gdt_ents = gdt_descr->size / 8; ++ ctxt.gdt_ents = GDT_SIZE / 8; - #define __pgprot(x) ((pgprot_t) { (x) } ) + #ifdef __i386__ ++ ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu)); ++ + ctxt.user_regs.cs = __KERNEL_CS; + ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); --#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) --#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) --#define __START_KERNEL_map 0xffffffff80000000UL --#define __PAGE_OFFSET 0xffff880000000000UL -+#endif /* !__ASSEMBLY__ */ +@@ -231,7 +225,11 @@ static void __cpuinit cpu_initialize_con + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; --#else - #define __PHYSICAL_START CONFIG_PHYSICAL_START -+#define __KERNEL_ALIGN 0x200000 + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); + -+/* -+ * Make sure kernel is aligned to 2MB address. Catching it at compile -+ * time is better. Change your config file and compile the kernel -+ * for a 2MB aligned address (CONFIG_PHYSICAL_START) -+ */ -+#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 -+#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" -+#endif ++ ctxt.user_regs.fs = __KERNEL_PERCPU; + #else /* __x86_64__ */ ++ ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address); + - #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) --#define __START_KERNEL_map 0xffffffff80000000 --#define __PAGE_OFFSET 0xffff880000000000 --#endif /* !__ASSEMBLY__ */ -+#define __START_KERNEL_map _AC(0xffffffff80000000, UL) -+#define __PAGE_OFFSET _AC(0xffff880000000000, UL) + ctxt.user_regs.cs = __KERNEL_CS; + ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); - #if CONFIG_XEN_COMPAT <= 0x030002 - #undef LOAD_OFFSET -@@ -172,20 +175,20 @@ static inline pgd_t __pgd(unsigned long - /* to align the pointer to the (next) page boundary */ - #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +@@ -261,9 +259,8 @@ void __init smp_prepare_cpus(unsigned in + struct vcpu_get_physid cpu_id; + #ifdef __x86_64__ + struct desc_ptr *gdt_descr; +-#else +- struct Xgt_desc_struct *gdt_descr; + #endif ++ void *gdt_addr; --#define KERNEL_TEXT_SIZE (40UL*1024*1024) --#define KERNEL_TEXT_START 0xffffffff80000000UL -+#define KERNEL_TEXT_SIZE (40*1024*1024) -+#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) -+ -+#define PAGE_OFFSET __PAGE_OFFSET + apicid = 0; + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) +@@ -313,14 +310,12 @@ void __init smp_prepare_cpus(unsigned in + } + gdt_descr->size = GDT_SIZE; + memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); ++ gdt_addr = (void *)gdt_descr->address; + #else +- if (unlikely(!init_gdt(cpu, idle))) +- continue; +- gdt_descr = &per_cpu(cpu_gdt_descr, cpu); ++ init_gdt(cpu); ++ gdt_addr = get_cpu_gdt_table(cpu); + #endif +- make_page_readonly( +- (void *)gdt_descr->address, +- XENFEAT_writable_descriptor_tables); ++ make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables); --#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -+#ifndef __ASSEMBLY__ -+static inline unsigned long __phys_addr(unsigned long x) -+{ -+ return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET); -+} + apicid = cpu; + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) +@@ -334,7 +329,9 @@ void __init smp_prepare_cpus(unsigned in + #ifdef __x86_64__ + cpu_pda(cpu)->pcurrent = idle; + cpu_pda(cpu)->cpunumber = cpu; +- clear_ti_thread_flag(idle->thread_info, TIF_FORK); ++ clear_ti_thread_flag(task_thread_info(idle), TIF_FORK); ++#else ++ per_cpu(current_task, cpu) = idle; + #endif + + irq_ctx_init(cpu); +@@ -359,8 +356,12 @@ void __init smp_prepare_cpus(unsigned in + #endif + } + +-void __devinit smp_prepare_boot_cpu(void) ++void __init smp_prepare_boot_cpu(void) + { ++#ifdef __i386__ ++ init_gdt(smp_processor_id()); ++ switch_to_new_gdt(); +#endif + prefill_possible_map(); + } --/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. -- Otherwise you risk miscompilation. */ --#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) --/* __pa_symbol should be used for C visible symbols. -- This seems to be the official gcc blessed way to do such arithmetic. */ --#define __pa_symbol(x) \ -- ({unsigned long v; \ -- asm("" : "=r" (v) : "0" (x)); \ -- __pa(v); }) -+#define __pa(x) __phys_addr((unsigned long)(x)) -+#define __pa_symbol(x) __phys_addr((unsigned long)(x)) +--- sle11-2009-04-20.orig/drivers/xen/core/xen_sysfs.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/core/xen_sysfs.c 2008-12-15 11:27:22.000000000 +0100 +@@ -29,12 +29,12 @@ HYPERVISOR_ATTR_RO(type); - #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) - #define __boot_va(x) __va(x) ---- a/include/asm-x86/mach-xen/asm/pgalloc_32.h -+++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h -@@ -1,7 +1,6 @@ - #ifndef _I386_PGALLOC_H - #define _I386_PGALLOC_H + static int __init xen_sysfs_type_init(void) + { +- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); ++ return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr); + } --#include - #include - #include /* for struct page */ - #include /* for phys_to_virt and page_to_pseudophys */ -@@ -69,6 +68,4 @@ do { \ - #define pud_populate(mm, pmd, pte) BUG() - #endif + static void xen_sysfs_type_destroy(void) + { +- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr); ++ sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr); + } --#define check_pgt_cache() do { } while (0) -- - #endif /* _I386_PGALLOC_H */ ---- a/include/asm-x86/mach-xen/asm/pgalloc_64.h -+++ b/include/asm-x86/mach-xen/asm/pgalloc_64.h -@@ -1,7 +1,6 @@ - #ifndef _X86_64_PGALLOC_H - #define _X86_64_PGALLOC_H + /* xen version attributes */ +@@ -90,13 +90,13 @@ static struct attribute_group version_gr --#include - #include - #include - #include -@@ -100,24 +99,16 @@ static inline void pgd_list_add(pgd_t *p - struct page *page = virt_to_page(pgd); + static int __init xen_sysfs_version_init(void) + { +- return sysfs_create_group(&hypervisor_subsys.kset.kobj, ++ return sysfs_create_group(&hypervisor_subsys.kobj, + &version_group); + } - spin_lock(&pgd_lock); -- page->index = (pgoff_t)pgd_list; -- if (pgd_list) -- pgd_list->private = (unsigned long)&page->index; -- pgd_list = page; -- page->private = (unsigned long)&pgd_list; -+ list_add(&page->lru, &pgd_list); - spin_unlock(&pgd_lock); + static void xen_sysfs_version_destroy(void) + { +- sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group); ++ sysfs_remove_group(&hypervisor_subsys.kobj, &version_group); } - static inline void pgd_list_del(pgd_t *pgd) + /* UUID */ +@@ -126,12 +126,12 @@ HYPERVISOR_ATTR_RO(uuid); + + static int __init xen_sysfs_uuid_init(void) { -- struct page *next, **pprev, *page = virt_to_page(pgd); -+ struct page *page = virt_to_page(pgd); +- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); ++ return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr); + } - spin_lock(&pgd_lock); -- next = (struct page *)page->index; -- pprev = (struct page **)page->private; -- *pprev = next; -- if (next) -- next->private = (unsigned long)pprev; -+ list_del(&page->lru); - spin_unlock(&pgd_lock); + static void xen_sysfs_uuid_destroy(void) + { +- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr); ++ sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr); } ---- a/include/asm-x86/mach-xen/asm/pgtable_32.h -+++ b/include/asm-x86/mach-xen/asm/pgtable_32.h -@@ -24,11 +24,11 @@ - #include - #include - #include -+#include + /* xen compilation attributes */ +@@ -204,13 +204,13 @@ static struct attribute_group xen_compil - /* Is this pagetable pinned? */ - #define PG_pinned PG_arch_1 + int __init static xen_compilation_init(void) + { +- return sysfs_create_group(&hypervisor_subsys.kset.kobj, ++ return sysfs_create_group(&hypervisor_subsys.kobj, + &xen_compilation_group); + } --struct mm_struct; - struct vm_area_struct; + static void xen_compilation_destroy(void) + { +- sysfs_remove_group(&hypervisor_subsys.kset.kobj, ++ sysfs_remove_group(&hypervisor_subsys.kobj, + &xen_compilation_group); + } - /* -@@ -38,17 +38,16 @@ struct vm_area_struct; - #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) - extern unsigned long empty_zero_page[1024]; - extern pgd_t *swapper_pg_dir; --extern struct kmem_cache *pgd_cache; - extern struct kmem_cache *pmd_cache; - extern spinlock_t pgd_lock; - extern struct page *pgd_list; -+void check_pgt_cache(void); +@@ -325,13 +325,13 @@ static struct attribute_group xen_proper - void pmd_ctor(void *, struct kmem_cache *, unsigned long); --void pgd_ctor(void *, struct kmem_cache *, unsigned long); --void pgd_dtor(void *, struct kmem_cache *, unsigned long); - void pgtable_cache_init(void); - void paging_init(void); + static int __init xen_properties_init(void) + { +- return sysfs_create_group(&hypervisor_subsys.kset.kobj, ++ return sysfs_create_group(&hypervisor_subsys.kobj, + &xen_properties_group); + } -+ - /* - * The Linux x86 paging architecture is 'compile-time dual-mode', it - * implements both the traditional 2-level x86 page tables and the -@@ -165,6 +164,7 @@ void paging_init(void); + static void xen_properties_destroy(void) + { +- sysfs_remove_group(&hypervisor_subsys.kset.kobj, ++ sysfs_remove_group(&hypervisor_subsys.kobj, + &xen_properties_group); + } - extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; - #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) - #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) - #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) - #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) -@@ -172,6 +172,7 @@ extern unsigned long long __PAGE_KERNEL, - #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) - #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) - #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) -+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) - #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) - #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) - #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) -@@ -275,7 +276,13 @@ static inline pte_t pte_mkhuge(pte_t pte - */ - #define pte_update(mm, addr, ptep) do { } while (0) - #define pte_update_defer(mm, addr, ptep) do { } while (0) --#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) -+ -+/* local pte updates need not use xchg for locking */ -+static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) -+{ -+ xen_set_pte(ptep, __pte(0)); -+ return res; -+} +@@ -350,13 +350,13 @@ HYPERVISOR_ATTR_RO(vmcoreinfo); - /* - * We only update the dirty/accessed state if we set -@@ -286,17 +293,34 @@ static inline pte_t pte_mkhuge(pte_t pte - */ - #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS - #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ --do { \ -- if (dirty) \ -+({ \ -+ int __changed = !pte_same(*(ptep), entry); \ -+ if (__changed && (dirty)) \ - ptep_establish(vma, address, ptep, entry); \ --} while (0) -+ __changed; \ -+}) + static int __init xen_sysfs_vmcoreinfo_init(void) + { +- return sysfs_create_file(&hypervisor_subsys.kset.kobj, ++ return sysfs_create_file(&hypervisor_subsys.kobj, + &vmcoreinfo_attr.attr); + } --/* -- * We don't actually have these, but we want to advertise them so that -- * we can encompass the flush here. -- */ - #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -+#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \ -+ int __ret = 0; \ -+ if (pte_dirty(*(ptep))) \ -+ __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \ -+ &(ptep)->pte_low); \ -+ if (__ret) \ -+ pte_update((vma)->vm_mm, addr, ptep); \ -+ __ret; \ -+}) -+ - #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -+#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ -+ int __ret = 0; \ -+ if (pte_young(*(ptep))) \ -+ __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ -+ &(ptep)->pte_low); \ -+ if (__ret) \ -+ pte_update((vma)->vm_mm, addr, ptep); \ -+ __ret; \ -+}) + static void xen_sysfs_vmcoreinfo_destroy(void) + { +- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr); ++ sysfs_remove_file(&hypervisor_subsys.kobj, &vmcoreinfo_attr.attr); + } - /* - * Rules for using ptep_establish: the pte MUST be a user pte, and -@@ -323,7 +347,7 @@ do { \ - int __dirty = pte_dirty(__pte); \ - __pte = pte_mkclean(__pte); \ - if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ -- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ -+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ - else if (__dirty) \ - (ptep)->pte_low = __pte.pte_low; \ - __dirty; \ -@@ -336,7 +360,7 @@ do { \ - int __young = pte_young(__pte); \ - __pte = pte_mkold(__pte); \ - if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ -- ptep_set_access_flags(vma, address, ptep, __pte, __young); \ -+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ - else if (__young) \ - (ptep)->pte_low = __pte.pte_low; \ - __young; \ -@@ -349,7 +373,7 @@ static inline pte_t ptep_get_and_clear(s - if (!pte_none(pte) - && (mm != &init_mm - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { -- pte = raw_ptep_get_and_clear(ptep, pte); -+ pte = xen_ptep_get_and_clear(ptep, pte); - pte_update(mm, addr, ptep); - } - return pte; -@@ -491,24 +515,10 @@ extern pte_t *lookup_address(unsigned lo #endif +--- sle11-2009-04-20.orig/drivers/xen/netback/netback.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/netback/netback.c 2008-12-23 09:33:22.000000000 +0100 +@@ -195,7 +195,7 @@ static struct sk_buff *netbk_copy_skb(st + goto err; - #if defined(CONFIG_HIGHPTE) --#define pte_offset_map(dir, address) \ --({ \ -- pte_t *__ptep; \ -- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ -- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ -- paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ -- __ptep = __ptep + pte_index(address); \ -- __ptep; \ --}) --#define pte_offset_map_nested(dir, address) \ --({ \ -- pte_t *__ptep; \ -- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ -- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ -- paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ -- __ptep = __ptep + pte_index(address); \ -- __ptep; \ --}) -+#define pte_offset_map(dir, address) \ -+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) -+#define pte_offset_map_nested(dir, address) \ -+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) - #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) - #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) - #else -@@ -584,10 +594,6 @@ int xen_change_pte_range(struct mm_struc - #define io_remap_pfn_range(vma,from,pfn,size,prot) \ - direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) + skb_reserve(nskb, 16 + NET_IP_ALIGN); +- headlen = nskb->end - nskb->data; ++ headlen = skb_end_pointer(nskb) - nskb->data; + if (headlen > skb_headlen(skb)) + headlen = skb_headlen(skb); + ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); +@@ -241,11 +241,15 @@ static struct sk_buff *netbk_copy_skb(st + len -= copy; + } --#define MK_IOSPACE_PFN(space, pfn) (pfn) --#define GET_IOSPACE(pfn) 0 --#define GET_PFN(pfn) (pfn) -- - #include ++#ifdef NET_SKBUFF_DATA_USES_OFFSET ++ offset = 0; ++#else + offset = nskb->data - skb->data; ++#endif - #endif /* _I386_PGTABLE_H */ ---- a/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h -+++ b/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h -@@ -1,7 +1,7 @@ - #ifndef _I386_PGTABLE_3LEVEL_DEFS_H - #define _I386_PGTABLE_3LEVEL_DEFS_H +- nskb->h.raw = skb->h.raw + offset; +- nskb->nh.raw = skb->nh.raw + offset; +- nskb->mac.raw = skb->mac.raw + offset; ++ nskb->transport_header = skb->transport_header + offset; ++ nskb->network_header = skb->network_header + offset; ++ nskb->mac_header = skb->mac_header + offset; --#define HAVE_SHARED_KERNEL_PMD 0 -+#define SHARED_KERNEL_PMD 0 + return nskb; - /* - * PGDIR_SHIFT determines what a top-level page table entry can map ---- a/include/asm-x86/mach-xen/asm/pgtable-3level.h -+++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h -@@ -52,32 +52,40 @@ static inline int pte_exec_kernel(pte_t - * value and then use set_pte to update it. -ben - */ +@@ -1619,7 +1623,7 @@ static int __init netback_init(void) + (void)bind_virq_to_irqhandler(VIRQ_DEBUG, + 0, + netif_be_dbg, +- SA_SHIRQ, ++ IRQF_SHARED, + "net-be-dbg", + &netif_be_dbg); + #endif +--- sle11-2009-04-20.orig/drivers/xen/netfront/netfront.c 2009-03-30 16:35:44.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/netfront/netfront.c 2009-03-30 16:36:30.000000000 +0200 +@@ -513,7 +513,7 @@ static int setup_device(struct xenbus_de + memcpy(netdev->dev_addr, info->mac, ETH_ALEN); --static inline void set_pte(pte_t *ptep, pte_t pte) -+static inline void xen_set_pte(pte_t *ptep, pte_t pte) - { - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; - } --#define set_pte_atomic(pteptr,pteval) \ -- set_64bit((unsigned long long *)(pteptr),__pte_val(pteval)) + err = bind_listening_port_to_irqhandler( +- dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name, ++ dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name, + netdev); + if (err < 0) + goto fail; +--- sle11-2009-04-20.orig/drivers/xen/pciback/xenbus.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/pciback/xenbus.c 2008-12-15 11:27:22.000000000 +0100 +@@ -99,7 +99,7 @@ static int pciback_do_attach(struct pcib --#define set_pte_at(_mm,addr,ptep,pteval) do { \ -- if (((_mm) != current->mm && (_mm) != &init_mm) || \ -- HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ -- set_pte((ptep), (pteval)); \ --} while (0) -- --#define set_pmd(pmdptr,pmdval) \ -- xen_l2_entry_update((pmdptr), (pmdval)) --#define set_pud(pudptr,pudval) \ -- xen_l3_entry_update((pudptr), (pudval)) -+static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, -+ pte_t *ptep , pte_t pte) -+{ -+ if ((mm != current->mm && mm != &init_mm) || -+ HYPERVISOR_update_va_mapping(addr, pte, 0)) -+ xen_set_pte(ptep, pte); -+} -+ -+static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte) -+{ -+ set_64bit((unsigned long long *)(ptep),__pte_val(pte)); -+} -+static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) -+{ -+ xen_l2_entry_update(pmdp, pmd); -+} -+static inline void xen_set_pud(pud_t *pudp, pud_t pud) -+{ -+ xen_l3_entry_update(pudp, pud); -+} + err = bind_interdomain_evtchn_to_irqhandler( + pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event, +- SA_SAMPLE_RANDOM, "pciback", pdev); ++ IRQF_SAMPLE_RANDOM, "pciback", pdev); + if (err < 0) { + xenbus_dev_fatal(pdev->xdev, err, + "Error binding event channel to IRQ"); +--- sle11-2009-04-20.orig/drivers/xen/pcifront/xenbus.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/pcifront/xenbus.c 2008-12-15 11:27:22.000000000 +0100 +@@ -10,10 +10,6 @@ + #include + #include "pcifront.h" - /* - * For PTEs and PDEs, we must clear the P-bit first when clearing a page table - * entry, so clear the bottom half first and enforce ordering with a compiler - * barrier. - */ --static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -+static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) - { - if ((mm != current->mm && mm != &init_mm) - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { -@@ -87,7 +95,18 @@ static inline void pte_clear(struct mm_s - } - } +-#ifndef __init_refok +-#define __init_refok +-#endif +- + #define INVALID_GRANT_REF (0) + #define INVALID_EVTCHN (-1) --#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -+static inline void xen_pmd_clear(pmd_t *pmd) -+{ -+ xen_l2_entry_update(pmd, __pmd(0)); -+} -+ -+#define set_pte(ptep, pte) xen_set_pte(ptep, pte) -+#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) -+#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte) -+#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) -+#define set_pud(pudp, pud) xen_set_pud(pudp, pud) -+#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) -+#define pmd_clear(pmd) xen_pmd_clear(pmd) +--- sle11-2009-04-20.orig/drivers/xen/scsifront/xenbus.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/scsifront/xenbus.c 2008-12-15 11:27:22.000000000 +0100 +@@ -96,7 +96,7 @@ static int scsifront_alloc_ring(struct v - /* - * Pentium-II erratum A13: in PAE mode we explicitly have to flush -@@ -108,7 +127,8 @@ static inline void pud_clear (pud_t * pu - #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ - pmd_index(address)) + err = bind_listening_port_to_irqhandler( + dev->otherend_id, scsifront_intr, +- SA_SAMPLE_RANDOM, "scsifront", info); ++ IRQF_SAMPLE_RANDOM, "scsifront", info); --static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) -+#ifdef CONFIG_SMP -+static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res) + if (err <= 0) { + xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler"); +--- sle11-2009-04-20.orig/drivers/xen/sfc_netback/accel_fwd.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/sfc_netback/accel_fwd.c 2008-12-15 11:27:22.000000000 +0100 +@@ -308,7 +308,7 @@ static struct netback_accel *for_a_vnic( + static inline int packet_is_arp_reply(struct sk_buff *skb) { - uint64_t val = __pte_val(res); - if (__cmpxchg64(ptep, val, 0) != val) { -@@ -119,6 +139,9 @@ static inline pte_t raw_ptep_get_and_cle - } - return res; + return skb->protocol == ntohs(ETH_P_ARP) +- && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY); ++ && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY); } -+#else -+#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) -+#endif - #define __HAVE_ARCH_PTEP_CLEAR_FLUSH - #define ptep_clear_flush(vma, addr, ptep) \ -@@ -165,13 +188,13 @@ extern unsigned long long __supported_pt - static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) - { - return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | -- pgprot_val(pgprot)) & __supported_pte_mask); -+ pgprot_val(pgprot)) & __supported_pte_mask); - } - static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) - { - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | -- pgprot_val(pgprot)) & __supported_pte_mask); -+ pgprot_val(pgprot)) & __supported_pte_mask); - } +@@ -392,12 +392,13 @@ void netback_accel_tx_packet(struct sk_b - /* -@@ -191,6 +214,4 @@ static inline pmd_t pfn_pmd(unsigned lon + BUG_ON(fwd_priv == NULL); - #define __pmd_free_tlb(tlb, x) do { } while (0) +- if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) { ++ if (is_broadcast_ether_addr(skb_mac_header(skb)) ++ && packet_is_arp_reply(skb)) { + /* + * update our fast path forwarding to reflect this + * gratuitous ARP + */ +- mac = skb->mac.raw+ETH_ALEN; ++ mac = skb_mac_header(skb)+ETH_ALEN; --void vmalloc_sync_all(void); -- - #endif /* _I386_PGTABLE_3LEVEL_H */ ---- a/include/asm-x86/mach-xen/asm/pgtable_64.h -+++ b/include/asm-x86/mach-xen/asm/pgtable_64.h -@@ -1,12 +1,14 @@ - #ifndef _X86_64_PGTABLE_H - #define _X86_64_PGTABLE_H + DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", + __FUNCTION__, MAC_ARG(mac)); +--- sle11-2009-04-20.orig/drivers/xen/sfc_netback/accel_solarflare.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/sfc_netback/accel_solarflare.c 2009-03-30 16:36:16.000000000 +0200 +@@ -113,7 +113,7 @@ bend_dl_tx_packet(struct efx_dl_device * + BUG_ON(port == NULL); -+#include -+#ifndef __ASSEMBLY__ -+ - /* - * This file contains the functions and defines necessary to modify and use - * the x86-64 page table tree. - */ - #include --#include - #include - #include - #include -@@ -33,11 +35,9 @@ extern pte_t *lookup_address(unsigned lo - #endif + NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++); +- if (skb->mac.raw != NULL) ++ if (skb_mac_header_was_set(skb)) + netback_accel_tx_packet(skb, port->fwd_priv); + else { + DPRINTK("Ignoring packet with missing mac address\n"); +--- sle11-2009-04-20.orig/drivers/xen/sfc_netfront/accel_tso.c 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/drivers/xen/sfc_netfront/accel_tso.c 2008-12-15 11:27:22.000000000 +0100 +@@ -33,10 +33,9 @@ - extern pud_t level3_kernel_pgt[512]; --extern pud_t level3_physmem_pgt[512]; - extern pud_t level3_ident_pgt[512]; - extern pmd_t level2_kernel_pgt[512]; - extern pgd_t init_level4_pgt[]; --extern pgd_t boot_level4_pgt[]; - extern unsigned long __supported_pte_mask; + #include "accel_tso.h" - #define swapper_pg_dir init_level4_pgt -@@ -52,6 +52,8 @@ extern void clear_kernel_mapping(unsigne - extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; - #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +-#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2)) +-#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data) +-#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data) +-#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data) ++#define ETH_HDR_LEN(skb) skb_network_offset(skb) ++#define SKB_TCP_OFF(skb) skb_transport_offset(skb) ++#define SKB_IP_OFF(skb) skb_network_offset(skb) -+#endif /* !__ASSEMBLY__ */ -+ /* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ -@@ -76,6 +78,8 @@ extern unsigned long empty_zero_page[PAG - */ - #define PTRS_PER_PTE 512 - -+#ifndef __ASSEMBLY__ -+ - #define pte_ERROR(e) \ - printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \ - &(e), __pte_val(e), pte_pfn(e)) -@@ -118,22 +122,23 @@ static inline void pgd_clear (pgd_t * pg - - #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) - --#define PMD_SIZE (1UL << PMD_SHIFT) -+#endif /* !__ASSEMBLY__ */ -+ -+#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) - #define PMD_MASK (~(PMD_SIZE-1)) --#define PUD_SIZE (1UL << PUD_SHIFT) -+#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) - #define PUD_MASK (~(PUD_SIZE-1)) --#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -+#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) - #define PGDIR_MASK (~(PGDIR_SIZE-1)) + * Set a maximum number of buffers in each output packet to make life +@@ -114,9 +113,8 @@ struct netfront_accel_tso_state { + static inline void tso_check_safe(struct sk_buff *skb) { + EPRINTK_ON(skb->protocol != htons (ETH_P_IP)); + EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP)); +- EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP); +- EPRINTK_ON((SKB_TCP_OFF(skb) +- + (skb->h.th->doff << 2u)) > skb_headlen(skb)); ++ EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP); ++ EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb)); + } - #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) - #define FIRST_USER_ADDRESS 0 --#ifndef __ASSEMBLY__ --#define MAXMEM 0x3fffffffffffUL --#define VMALLOC_START 0xffffc20000000000UL --#define VMALLOC_END 0xffffe1ffffffffffUL --#define MODULES_VADDR 0xffffffff88000000UL --#define MODULES_END 0xfffffffffff00000UL -+#define MAXMEM _AC(0x3fffffffffff, UL) -+#define VMALLOC_START _AC(0xffffc20000000000, UL) -+#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) -+#define MODULES_VADDR _AC(0xffffffff88000000, UL) -+#define MODULES_END _AC(0xfffffffffff00000, UL) - #define MODULES_LEN (MODULES_END - MODULES_VADDR) +@@ -129,17 +127,17 @@ static inline void tso_start(struct netf + * All ethernet/IP/TCP headers combined size is TCP header size + * plus offset of TCP header relative to start of packet. + */ +- st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb); ++ st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb); + st->p.full_packet_size = (st->p.header_length + + skb_shinfo(skb)->gso_size); + st->p.gso_size = skb_shinfo(skb)->gso_size; - #define _PAGE_BIT_PRESENT 0 -@@ -159,16 +164,18 @@ static inline void pgd_clear (pgd_t * pg - #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ +- st->p.ip_id = htons(skb->nh.iph->id); +- st->seqnum = ntohl(skb->h.th->seq); ++ st->p.ip_id = htons(ip_hdr(skb)->id); ++ st->seqnum = ntohl(tcp_hdr(skb)->seq); - #define _PAGE_PROTNONE 0x080 /* If not present */ --#define _PAGE_NX (1UL<<_PAGE_BIT_NX) -+#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) +- EPRINTK_ON(skb->h.th->urg); +- EPRINTK_ON(skb->h.th->syn); +- EPRINTK_ON(skb->h.th->rst); ++ EPRINTK_ON(tcp_hdr(skb)->urg); ++ EPRINTK_ON(tcp_hdr(skb)->syn); ++ EPRINTK_ON(tcp_hdr(skb)->rst); - /* Mapped page is I/O or foreign and has no associated page struct. */ - #define _PAGE_IO 0x200 + st->remaining_len = skb->len - st->p.header_length; -+#ifndef __ASSEMBLY__ - #if CONFIG_XEN_COMPAT <= 0x030002 - extern unsigned int __kernel_page_user; - #else - #define __kernel_page_user 0 +@@ -258,8 +256,8 @@ int tso_start_new_packet(netfront_accel_ + /* This packet will be the last in the TSO burst. */ + ip_length = (st->p.header_length - ETH_HDR_LEN(skb) + + st->remaining_len); +- tsoh_th->fin = skb->h.th->fin; +- tsoh_th->psh = skb->h.th->psh; ++ tsoh_th->fin = tcp_hdr(skb)->fin; ++ tsoh_th->psh = tcp_hdr(skb)->psh; + } + + tsoh_iph->tot_len = htons(ip_length); +--- sle11-2009-04-20.orig/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:25.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:36:26.000000000 +0200 +@@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Set to zero to encourage falcon to work it out for us */ +- *(u16*)(skb->h.raw + skb->csum_offset) = 0; ++ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; + } + + if (multi_post_start_new_buffer(vnic, &state)) { +@@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Set to zero to encourage falcon to work it out for us */ +- *(u16*)(skb->h.raw + skb->csum_offset) = 0; ++ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0; + } + NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT + (skb, idx, frag_data, frag_len, { +--- sle11-2009-04-20.orig/drivers/xen/sfc_netfront/accel_xenbus.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/drivers/xen/sfc_netfront/accel_xenbus.c 2008-12-15 11:27:22.000000000 +0100 +@@ -356,7 +356,7 @@ static int vnic_setup_domU_shared_state( + /* Create xenbus msg event channel */ + err = bind_listening_port_to_irqhandler + (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend, +- SA_SAMPLE_RANDOM, "vnicctrl", vnic); ++ IRQF_SAMPLE_RANDOM, "vnicctrl", vnic); + if (err < 0) { + EPRINTK("Couldn't bind msg event channel\n"); + goto fail_msg_irq; +@@ -367,7 +367,7 @@ static int vnic_setup_domU_shared_state( + /* Create xenbus net event channel */ + err = bind_listening_port_to_irqhandler + (dev->otherend_id, netfront_accel_net_channel_irq_from_bend, +- SA_SAMPLE_RANDOM, "vnicfront", vnic); ++ IRQF_SAMPLE_RANDOM, "vnicfront", vnic); + if (err < 0) { + EPRINTK("Couldn't bind net event channel\n"); + goto fail_net_irq; +--- sle11-2009-04-20.orig/fs/aio.c 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/fs/aio.c 2009-03-24 10:11:37.000000000 +0100 +@@ -38,7 +38,7 @@ + + #ifdef CONFIG_EPOLL + #include +-#include ++#include #endif -+#endif - #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) - #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) -@@ -233,6 +240,8 @@ extern unsigned int __kernel_page_user; - #define __S110 PAGE_SHARED_EXEC - #define __S111 PAGE_SHARED_EXEC + #if DEBUG > 1 +@@ -1325,7 +1325,7 @@ static const struct file_operations aioq -+#ifndef __ASSEMBLY__ -+ - static inline unsigned long pgd_bad(pgd_t pgd) - { - return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); -@@ -344,6 +353,20 @@ static inline pte_t pte_mkwrite(pte_t pt - static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } - static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } + /* make_aio_fd: + * Create a file descriptor that can be used to poll the event queue. +- * Based and piggybacked on the excellent epoll code. ++ * Based on the excellent epoll code. + */ -+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -+{ -+ if (!pte_dirty(*ptep)) -+ return 0; -+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte); -+} -+ -+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) + static int make_aio_fd(struct kioctx *ioctx) +@@ -1334,7 +1334,8 @@ static int make_aio_fd(struct kioctx *io + struct inode *inode; + struct file *file; + +- error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops); ++ error = anon_inode_getfd(&fd, &inode, &file, "[aioq]", ++ &aioq_fops, ioctx); + if (error) + return error; + +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/desc_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -11,23 +11,24 @@ + + #include + +-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; +- + struct Xgt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); + unsigned short pad; + } __attribute__ ((packed)); + +-extern struct Xgt_desc_struct idt_descr; +-DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +-extern struct Xgt_desc_struct early_gdt_descr; ++struct gdt_page +{ -+ if (!pte_young(*ptep)) -+ return 0; -+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); -+} -+ - static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) ++ struct desc_struct gdt[GDT_ENTRIES]; ++} __attribute__((aligned(PAGE_SIZE))); ++DECLARE_PER_CPU(struct gdt_page, gdt_page); + + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - pte_t pte = *ptep; -@@ -468,18 +491,12 @@ static inline pte_t pte_modify(pte_t pte - * bit at the same time. */ - #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS - #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ -- do { \ -- if (dirty) \ -- ptep_establish(vma, address, ptep, entry); \ -- } while (0) +- return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; ++ return per_cpu(gdt_page, cpu).gdt; + } + ++extern struct Xgt_desc_struct idt_descr; + extern struct desc_struct idt_table[]; + extern void set_intr_gate(unsigned int irq, void * addr); + +@@ -55,53 +56,32 @@ static inline void pack_gate(__u32 *a, _ + #define DESCTYPE_S 0x10 /* !system */ + + #ifndef CONFIG_XEN +-#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) - +-#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) +-#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) ++#define load_TR_desc() native_load_tr_desc() ++#define load_gdt(dtr) native_load_gdt(dtr) ++#define load_idt(dtr) native_load_idt(dtr) + #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) + #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) + +-#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) +-#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) +-#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) ++#define store_gdt(dtr) native_store_gdt(dtr) ++#define store_idt(dtr) native_store_idt(dtr) ++#define store_tr(tr) (tr = native_store_tr()) + #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) +-#endif + +-#if TLS_SIZE != 24 +-# error update this code. +-#endif - --/* -- * i386 says: We don't actually have these, but we want to advertise -- * them so that we can encompass the flush here. -- */ --#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY --#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -+({ \ -+ int __changed = !pte_same(*(ptep), entry); \ -+ if (__changed && (dirty)) \ -+ ptep_establish(vma, address, ptep, entry); \ -+ __changed; \ -+}) +-static inline void load_TLS(struct thread_struct *t, unsigned int cpu) +-{ +-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \ +- *(u64 *)&t->tls_array[i]) \ +- BUG() +- C(0); C(1); C(2); +-#undef C +-} ++#define load_TLS(t, cpu) native_load_tls(t, cpu) ++#define set_ldt native_set_ldt - #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH - #define ptep_clear_flush_dirty(vma, address, ptep) \ -@@ -488,7 +505,7 @@ static inline pte_t pte_modify(pte_t pte - int __dirty = pte_dirty(__pte); \ - __pte = pte_mkclean(__pte); \ - if ((vma)->vm_mm->context.pinned) \ -- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ -+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ - else if (__dirty) \ - set_pte(ptep, __pte); \ +-#ifndef CONFIG_XEN + #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) + #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) + #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) + +-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) ++static inline void write_dt_entry(struct desc_struct *dt, ++ int entry, u32 entry_low, u32 entry_high) + { +- __u32 *lp = (__u32 *)((char *)dt + entry*8); +- *lp = entry_a; +- *(lp+1) = entry_b; ++ dt[entry].a = entry_low; ++ dt[entry].b = entry_high; + } +-#define set_ldt native_set_ldt +-#else +-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); +-extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); +-#define set_ldt xen_set_ldt +-#endif + +-#ifndef CONFIG_XEN +-static inline fastcall void native_set_ldt(const void *addr, +- unsigned int entries) ++static inline void native_set_ldt(const void *addr, unsigned int entries) + { + if (likely(entries == 0)) + __asm__ __volatile__("lldt %w0"::"q" (0)); +@@ -116,6 +96,65 @@ static inline fastcall void native_set_l + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } + } ++ ++ ++static inline void native_load_tr_desc(void) ++{ ++ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); ++} ++ ++static inline void native_load_gdt(const struct Xgt_desc_struct *dtr) ++{ ++ asm volatile("lgdt %0"::"m" (*dtr)); ++} ++ ++static inline void native_load_idt(const struct Xgt_desc_struct *dtr) ++{ ++ asm volatile("lidt %0"::"m" (*dtr)); ++} ++ ++static inline void native_store_gdt(struct Xgt_desc_struct *dtr) ++{ ++ asm ("sgdt %0":"=m" (*dtr)); ++} ++ ++static inline void native_store_idt(struct Xgt_desc_struct *dtr) ++{ ++ asm ("sidt %0":"=m" (*dtr)); ++} ++ ++static inline unsigned long native_store_tr(void) ++{ ++ unsigned long tr; ++ asm ("str %0":"=r" (tr)); ++ return tr; ++} ++ ++static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) ++{ ++ unsigned int i; ++ struct desc_struct *gdt = get_cpu_gdt_table(cpu); ++ ++ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) ++ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; ++} ++#else ++#define load_TLS(t, cpu) xen_load_tls(t, cpu) ++#define set_ldt xen_set_ldt ++ ++extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); ++extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); ++ ++static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu) ++{ ++ unsigned int i; ++ struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN; ++ ++ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) ++ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), ++ *(u64 *)&t->tls_array[i])) ++ BUG(); ++} + #endif + + #ifndef CONFIG_X86_NO_IDT +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -19,10 +19,8 @@ + * the start of the fixmap. + */ + extern unsigned long __FIXADDR_TOP; +-#ifdef CONFIG_COMPAT_VDSO +-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) +-#endif ++#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) ++#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) + + #ifndef __ASSEMBLY__ + #include +@@ -85,6 +83,9 @@ enum fixed_addresses { + #ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, + #endif ++#ifdef CONFIG_PARAVIRT ++ FIX_PARAVIRT_BOOTMAP, ++#endif + FIX_SHARED_INFO, + #define NR_FIX_ISAMAPS 256 + FIX_ISAMAP_END, +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/highmem.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/highmem.h 2008-12-15 11:27:22.000000000 +0100 +@@ -67,12 +67,18 @@ extern void FASTCALL(kunmap_high(struct + + void *kmap(struct page *page); + void kunmap(struct page *page); ++void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); + void *kmap_atomic(struct page *page, enum km_type type); + void *kmap_atomic_pte(struct page *page, enum km_type type); + void kunmap_atomic(void *kvaddr, enum km_type type); + void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); + struct page *kmap_atomic_to_page(void *ptr); + ++#define kmap_atomic_pte(page, type) \ ++ kmap_atomic_prot(page, type, \ ++ test_bit(PG_pinned, &(page)->flags) \ ++ ? PAGE_KERNEL_RO : kmap_prot) ++ + #define flush_cache_kmaps() do { } while (0) + + void clear_highpage(struct page *); +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -11,6 +11,40 @@ + #define _ASM_IRQFLAGS_H + + #ifndef __ASSEMBLY__ ++#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask) ++ ++#define xen_restore_fl(f) \ ++do { \ ++ vcpu_info_t *_vcpu; \ ++ barrier(); \ ++ _vcpu = current_vcpu_info(); \ ++ if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \ ++ barrier(); /* unmask then check (avoid races) */\ ++ if (unlikely(_vcpu->evtchn_upcall_pending)) \ ++ force_evtchn_callback(); \ ++ } \ ++} while (0) ++ ++#define xen_irq_disable() \ ++do { \ ++ current_vcpu_info()->evtchn_upcall_mask = 1; \ ++ barrier(); \ ++} while (0) ++ ++#define xen_irq_enable() \ ++do { \ ++ vcpu_info_t *_vcpu; \ ++ barrier(); \ ++ _vcpu = current_vcpu_info(); \ ++ _vcpu->evtchn_upcall_mask = 0; \ ++ barrier(); /* unmask then check (avoid races) */ \ ++ if (unlikely(_vcpu->evtchn_upcall_pending)) \ ++ force_evtchn_callback(); \ ++} while (0) ++ ++void xen_safe_halt(void); ++ ++void xen_halt(void); + + /* + * The use of 'barrier' in the following reflects their use as local-lock +@@ -20,48 +54,31 @@ + * includes these barriers, for example. + */ + +-#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) ++#define __raw_local_save_flags() xen_save_fl() + +-#define raw_local_irq_restore(x) \ +-do { \ +- vcpu_info_t *_vcpu; \ +- barrier(); \ +- _vcpu = current_vcpu_info(); \ +- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ +- barrier(); /* unmask then check (avoid races) */ \ +- if (unlikely(_vcpu->evtchn_upcall_pending)) \ +- force_evtchn_callback(); \ +- } \ +-} while (0) ++#define raw_local_irq_restore(flags) xen_restore_fl(flags) + +-#define raw_local_irq_disable() \ +-do { \ +- current_vcpu_info()->evtchn_upcall_mask = 1; \ +- barrier(); \ +-} while (0) ++#define raw_local_irq_disable() xen_irq_disable() + +-#define raw_local_irq_enable() \ +-do { \ +- vcpu_info_t *_vcpu; \ +- barrier(); \ +- _vcpu = current_vcpu_info(); \ +- _vcpu->evtchn_upcall_mask = 0; \ +- barrier(); /* unmask then check (avoid races) */ \ +- if (unlikely(_vcpu->evtchn_upcall_pending)) \ +- force_evtchn_callback(); \ +-} while (0) ++#define raw_local_irq_enable() xen_irq_enable() + + /* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +-void raw_safe_halt(void); ++static inline void raw_safe_halt(void) ++{ ++ xen_safe_halt(); ++} + + /* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +-void halt(void); ++static inline void halt(void) ++{ ++ xen_halt(); ++} + + /* + * For spinlocks, etc: +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -6,6 +6,20 @@ + #include + #include + ++void arch_exit_mmap(struct mm_struct *mm); ++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); ++ ++void mm_pin(struct mm_struct *mm); ++void mm_unpin(struct mm_struct *mm); ++void mm_pin_all(void); ++ ++static inline void xen_activate_mm(struct mm_struct *prev, ++ struct mm_struct *next) ++{ ++ if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) ++ mm_pin(next); ++} ++ + /* + * Used for LDT copy/destruction. + */ +@@ -37,10 +51,6 @@ static inline void __prepare_arch_switch + : : "r" (0) ); + } + +-extern void mm_pin(struct mm_struct *mm); +-extern void mm_unpin(struct mm_struct *mm); +-void mm_pin_all(void); +- + static inline void switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +@@ -97,11 +107,10 @@ static inline void switch_mm(struct mm_s + #define deactivate_mm(tsk, mm) \ + asm("movl %0,%%gs": :"r" (0)); + +-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) +-{ +- if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) +- mm_pin(next); +- switch_mm(prev, next, NULL); +-} ++#define activate_mm(prev, next) \ ++ do { \ ++ xen_activate_mm(prev, next); \ ++ switch_mm((prev),(next),NULL); \ ++ } while(0) + + #endif +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -1,7 +1,6 @@ + #ifndef _I386_PGALLOC_H + #define _I386_PGALLOC_H + +-#include + #include + #include /* for struct page */ + #include /* for phys_to_virt and page_to_pseudophys */ +@@ -69,6 +68,4 @@ do { \ + #define pud_populate(mm, pmd, pte) BUG() + #endif + +-#define check_pgt_cache() do { } while (0) +- + #endif /* _I386_PGALLOC_H */ +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-15 11:27:22.000000000 +0100 +@@ -52,32 +52,40 @@ static inline int pte_exec_kernel(pte_t + * value and then use set_pte to update it. -ben + */ + +-static inline void set_pte(pte_t *ptep, pte_t pte) ++static inline void xen_set_pte(pte_t *ptep, pte_t pte) + { + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; + } +-#define set_pte_atomic(pteptr,pteval) \ +- set_64bit((unsigned long long *)(pteptr),__pte_val(pteval)) + +-#define set_pte_at(_mm,addr,ptep,pteval) do { \ +- if (((_mm) != current->mm && (_mm) != &init_mm) || \ +- HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \ +- set_pte((ptep), (pteval)); \ +-} while (0) +- +-#define set_pmd(pmdptr,pmdval) \ +- xen_l2_entry_update((pmdptr), (pmdval)) +-#define set_pud(pudptr,pudval) \ +- xen_l3_entry_update((pudptr), (pudval)) ++static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, ++ pte_t *ptep , pte_t pte) ++{ ++ if ((mm != current->mm && mm != &init_mm) || ++ HYPERVISOR_update_va_mapping(addr, pte, 0)) ++ xen_set_pte(ptep, pte); ++} ++ ++static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte) ++{ ++ set_64bit((unsigned long long *)(ptep),__pte_val(pte)); ++} ++static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) ++{ ++ xen_l2_entry_update(pmdp, pmd); ++} ++static inline void xen_set_pud(pud_t *pudp, pud_t pud) ++{ ++ xen_l3_entry_update(pudp, pud); ++} + + /* + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table + * entry, so clear the bottom half first and enforce ordering with a compiler + * barrier. + */ +-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) ++static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + if ((mm != current->mm && mm != &init_mm) + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { +@@ -87,7 +95,18 @@ static inline void pte_clear(struct mm_s + } + } + +-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) ++static inline void xen_pmd_clear(pmd_t *pmd) ++{ ++ xen_l2_entry_update(pmd, __pmd(0)); ++} ++ ++#define set_pte(ptep, pte) xen_set_pte(ptep, pte) ++#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) ++#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte) ++#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd) ++#define set_pud(pudp, pud) xen_set_pud(pudp, pud) ++#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep) ++#define pmd_clear(pmd) xen_pmd_clear(pmd) + + /* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush +@@ -108,7 +127,8 @@ static inline void pud_clear (pud_t * pu + #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ + pmd_index(address)) + +-static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) ++#ifdef CONFIG_SMP ++static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res) + { + uint64_t val = __pte_val(res); + if (__cmpxchg64(ptep, val, 0) != val) { +@@ -119,6 +139,9 @@ static inline pte_t raw_ptep_get_and_cle + } + return res; + } ++#else ++#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) ++#endif + + #define __HAVE_ARCH_PTEP_CLEAR_FLUSH + #define ptep_clear_flush(vma, addr, ptep) \ +@@ -165,13 +188,13 @@ extern unsigned long long __supported_pt + static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) + { + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | +- pgprot_val(pgprot)) & __supported_pte_mask); ++ pgprot_val(pgprot)) & __supported_pte_mask); + } + + static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) + { + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | +- pgprot_val(pgprot)) & __supported_pte_mask); ++ pgprot_val(pgprot)) & __supported_pte_mask); + } + + /* +@@ -191,6 +214,4 @@ static inline pmd_t pfn_pmd(unsigned lon + + #define __pmd_free_tlb(tlb, x) do { } while (0) + +-void vmalloc_sync_all(void); +- + #endif /* _I386_PGTABLE_3LEVEL_H */ +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h 2008-12-15 11:27:22.000000000 +0100 +@@ -1,7 +1,7 @@ + #ifndef _I386_PGTABLE_3LEVEL_DEFS_H + #define _I386_PGTABLE_3LEVEL_DEFS_H + +-#define HAVE_SHARED_KERNEL_PMD 0 ++#define SHARED_KERNEL_PMD 0 + + /* + * PGDIR_SHIFT determines what a top-level page table entry can map +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -24,11 +24,11 @@ + #include + #include + #include ++#include + + /* Is this pagetable pinned? */ + #define PG_pinned PG_arch_1 + +-struct mm_struct; + struct vm_area_struct; + + /* +@@ -38,17 +38,16 @@ struct vm_area_struct; + #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + extern unsigned long empty_zero_page[1024]; + extern pgd_t *swapper_pg_dir; +-extern struct kmem_cache *pgd_cache; + extern struct kmem_cache *pmd_cache; + extern spinlock_t pgd_lock; + extern struct page *pgd_list; ++void check_pgt_cache(void); + + void pmd_ctor(void *, struct kmem_cache *, unsigned long); +-void pgd_ctor(void *, struct kmem_cache *, unsigned long); +-void pgd_dtor(void *, struct kmem_cache *, unsigned long); + void pgtable_cache_init(void); + void paging_init(void); + ++ + /* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the +@@ -165,6 +164,7 @@ void paging_init(void); + + extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC; + #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) ++#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) + #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD) + #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) + #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) +@@ -172,6 +172,7 @@ extern unsigned long long __PAGE_KERNEL, + #define PAGE_KERNEL __pgprot(__PAGE_KERNEL) + #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) + #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) ++#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) + #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) + #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) + #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) +@@ -275,7 +276,13 @@ static inline pte_t pte_mkhuge(pte_t pte + */ + #define pte_update(mm, addr, ptep) do { } while (0) + #define pte_update_defer(mm, addr, ptep) do { } while (0) +-#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0) ++ ++/* local pte updates need not use xchg for locking */ ++static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res) ++{ ++ xen_set_pte(ptep, __pte(0)); ++ return res; ++} + + /* + * We only update the dirty/accessed state if we set +@@ -286,17 +293,34 @@ static inline pte_t pte_mkhuge(pte_t pte + */ + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS + #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ +-do { \ +- if (dirty) \ ++({ \ ++ int __changed = !pte_same(*(ptep), entry); \ ++ if (__changed && (dirty)) \ + ptep_establish(vma, address, ptep, entry); \ +-} while (0) ++ __changed; \ ++}) + +-/* +- * We don't actually have these, but we want to advertise them so that +- * we can encompass the flush here. +- */ + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY ++#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \ ++ int __ret = 0; \ ++ if (pte_dirty(*(ptep))) \ ++ __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \ ++ &(ptep)->pte_low); \ ++ if (__ret) \ ++ pte_update((vma)->vm_mm, addr, ptep); \ ++ __ret; \ ++}) ++ + #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG ++#define ptep_test_and_clear_young(vma, addr, ptep) ({ \ ++ int __ret = 0; \ ++ if (pte_young(*(ptep))) \ ++ __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ ++ &(ptep)->pte_low); \ ++ if (__ret) \ ++ pte_update((vma)->vm_mm, addr, ptep); \ ++ __ret; \ ++}) + + /* + * Rules for using ptep_establish: the pte MUST be a user pte, and +@@ -323,7 +347,7 @@ do { \ + int __dirty = pte_dirty(__pte); \ + __pte = pte_mkclean(__pte); \ + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ +- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ ++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ + else if (__dirty) \ + (ptep)->pte_low = __pte.pte_low; \ __dirty; \ -@@ -501,7 +518,7 @@ static inline pte_t pte_modify(pte_t pte +@@ -336,7 +360,7 @@ do { \ int __young = pte_young(__pte); \ __pte = pte_mkold(__pte); \ - if ((vma)->vm_mm->context.pinned) \ + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ - ptep_set_access_flags(vma, address, ptep, __pte, __young); \ + (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ else if (__young) \ - set_pte(ptep, __pte); \ + (ptep)->pte_low = __pte.pte_low; \ __young; \ -@@ -515,10 +532,7 @@ static inline pte_t pte_modify(pte_t pte - #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +@@ -349,7 +373,7 @@ static inline pte_t ptep_get_and_clear(s + if (!pte_none(pte) + && (mm != &init_mm + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { +- pte = raw_ptep_get_and_clear(ptep, pte); ++ pte = xen_ptep_get_and_clear(ptep, pte); + pte_update(mm, addr, ptep); + } + return pte; +@@ -491,24 +515,10 @@ extern pte_t *lookup_address(unsigned lo + #endif + + #if defined(CONFIG_HIGHPTE) +-#define pte_offset_map(dir, address) \ +-({ \ +- pte_t *__ptep; \ +- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ +- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \ +- paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \ +- __ptep = __ptep + pte_index(address); \ +- __ptep; \ +-}) +-#define pte_offset_map_nested(dir, address) \ +-({ \ +- pte_t *__ptep; \ +- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \ +- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \ +- paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \ +- __ptep = __ptep + pte_index(address); \ +- __ptep; \ +-}) ++#define pte_offset_map(dir, address) \ ++ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) ++#define pte_offset_map_nested(dir, address) \ ++ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) + #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) + #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) + #else +@@ -597,10 +607,6 @@ int xen_change_pte_range(struct mm_struc + #define io_remap_pfn_range(vma,from,pfn,size,prot) \ + direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO) + +-#define MK_IOSPACE_PFN(space, pfn) (pfn) +-#define GET_IOSPACE(pfn) 0 +-#define GET_PFN(pfn) (pfn) +- + #include + + #endif /* _I386_PGTABLE_H */ +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + + /* flag for disabling the tsc */ +@@ -118,7 +119,8 @@ extern char ignore_fpu_irq; + + void __init cpu_detect(struct cpuinfo_x86 *c); + +-extern void identify_cpu(struct cpuinfo_x86 *); ++extern void identify_boot_cpu(void); ++extern void identify_secondary_cpu(struct cpuinfo_x86 *); + extern void print_cpu_info(struct cpuinfo_x86 *); + extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); + extern unsigned short num_cache_leaves; +@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86 + static inline void detect_ht(struct cpuinfo_x86 *c) {} + #endif + +-/* +- * EFLAGS bits +- */ +-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ +- +-static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, +- unsigned int *ecx, unsigned int *edx) ++static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, ++ unsigned int *ecx, unsigned int *edx) + { + /* ecx is often an input as well as an output. */ + __asm__(XEN_CPUID +@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un + #define load_cr3(pgdir) write_cr3(__pa(pgdir)) + + /* +- * Intel CPU features in CR4 +- */ +-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +-#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +-#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +-#define X86_CR4_MCE 0x0040 /* Machine check enable */ +-#define X86_CR4_PGE 0x0080 /* enable global pages */ +-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ +- +-/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up +@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne + } + + /* +- * NSC/Cyrix CPU configuration register indexes +- */ +- +-#define CX86_PCR0 0x20 +-#define CX86_GCR 0xb8 +-#define CX86_CCR0 0xc0 +-#define CX86_CCR1 0xc1 +-#define CX86_CCR2 0xc2 +-#define CX86_CCR3 0xc3 +-#define CX86_CCR4 0xe8 +-#define CX86_CCR5 0xe9 +-#define CX86_CCR6 0xea +-#define CX86_CCR7 0xeb +-#define CX86_PCR1 0xf0 +-#define CX86_DIR0 0xfe +-#define CX86_DIR1 0xff +-#define CX86_ARR_BASE 0xc4 +-#define CX86_RCR_BASE 0xdc +- +-/* + * NSC/Cyrix CPU indexed register access macros + */ + +@@ -351,7 +297,8 @@ typedef struct { + struct thread_struct; + + #ifndef CONFIG_X86_NO_TSS +-struct tss_struct { ++/* This is the TSS defined by the hardware. */ ++struct i386_hw_tss { + unsigned short back_link,__blh; + unsigned long esp0; + unsigned short ss0,__ss0h; +@@ -375,6 +322,11 @@ struct tss_struct { + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, io_bitmap_base; ++} __attribute__((packed)); ++ ++struct tss_struct { ++ struct i386_hw_tss x86_tss; ++ + /* + * The extra 1 is there because the CPU will access an + * additional byte beyond the end of the IO permission +@@ -428,10 +380,11 @@ struct thread_struct { + }; + + #define INIT_THREAD { \ ++ .esp0 = sizeof(init_stack) + (long)&init_stack, \ + .vm86_info = NULL, \ + .sysenter_cs = __KERNEL_CS, \ + .io_bitmap_ptr = NULL, \ +- .fs = __KERNEL_PDA, \ ++ .fs = __KERNEL_PERCPU, \ + } + + /* +@@ -441,10 +394,12 @@ struct thread_struct { + * be within the limit. + */ + #define INIT_TSS { \ +- .esp0 = sizeof(init_stack) + (long)&init_stack, \ +- .ss0 = __KERNEL_DS, \ +- .ss1 = __KERNEL_CS, \ +- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ ++ .x86_tss = { \ ++ .esp0 = sizeof(init_stack) + (long)&init_stack, \ ++ .ss0 = __KERNEL_DS, \ ++ .ss1 = __KERNEL_CS, \ ++ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ ++ }, \ + .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ + } + +@@ -551,38 +506,33 @@ static inline void rep_nop(void) + + #define cpu_relax() rep_nop() + +-#define paravirt_enabled() 0 +-#define __cpuid xen_cpuid +- + #ifndef CONFIG_X86_NO_TSS +-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) ++static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) + { +- tss->esp0 = thread->esp0; ++ tss->x86_tss.esp0 = thread->esp0; + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ +- if (unlikely(tss->ss1 != thread->sysenter_cs)) { +- tss->ss1 = thread->sysenter_cs; ++ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { ++ tss->x86_tss.ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } + } +-#define load_esp0(tss, thread) \ +- __load_esp0(tss, thread) + #else +-#define load_esp0(tss, thread) do { \ ++#define xen_load_esp0(tss, thread) do { \ + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ + BUG(); \ + } while (0) + #endif + + +-/* +- * These special macros can be used to get or set a debugging register +- */ +-#define get_debugreg(var, register) \ +- (var) = HYPERVISOR_get_debugreg(register) +-#define set_debugreg(value, register) \ +- WARN_ON(HYPERVISOR_set_debugreg(register, value)) ++static inline unsigned long xen_get_debugreg(int regno) ++{ ++ return HYPERVISOR_get_debugreg(regno); ++} + +-#define set_iopl_mask xen_set_iopl_mask ++static inline void xen_set_debugreg(int regno, unsigned long value) ++{ ++ WARN_ON(HYPERVISOR_set_debugreg(regno, value)); ++} + + /* + * Set IOPL bits in EFLAGS from given mask +@@ -597,6 +547,21 @@ static inline void xen_set_iopl_mask(uns + } + + ++#define paravirt_enabled() 0 ++#define __cpuid xen_cpuid ++ ++#define load_esp0 xen_load_esp0 ++ ++/* ++ * These special macros can be used to get or set a debugging register ++ */ ++#define get_debugreg(var, register) \ ++ (var) = xen_get_debugreg(register) ++#define set_debugreg(value, register) \ ++ xen_set_debugreg(register, value) ++ ++#define set_iopl_mask xen_set_iopl_mask ++ + /* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx +@@ -749,8 +714,14 @@ extern unsigned long boot_option_idle_ov + extern void enable_sep_cpu(void); + extern int sysenter_setup(void); + +-extern int init_gdt(int cpu, struct task_struct *idle); ++/* Defined in head.S */ ++extern struct Xgt_desc_struct early_gdt_descr; ++ + extern void cpu_set_gdt(int); +-extern void secondary_cpu_init(void); ++extern void switch_to_new_gdt(void); ++extern void cpu_init(void); ++extern void init_gdt(int cpu); ++ ++extern int force_mwait; + + #endif /* __ASM_I386_PROCESSOR_H */ +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -39,7 +39,7 @@ + * 25 - APM BIOS support + * + * 26 - ESPFIX small SS +- * 27 - PDA [ per-cpu private data area ] ++ * 27 - per-cpu [ offset to per-cpu data area ] + * 28 - unused + * 29 - unused + * 30 - unused +@@ -74,8 +74,12 @@ + #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) + #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) + +-#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) +-#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) ++#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) ++#ifdef CONFIG_SMP ++#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) ++#else ++#define __KERNEL_PERCPU 0 ++#endif + + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 + +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -8,19 +8,15 @@ + #include + #include + #include +-#include + #endif + +-#ifdef CONFIG_X86_LOCAL_APIC +-#ifndef __ASSEMBLY__ +-#include ++#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) + #include + #include ++#include + #ifdef CONFIG_X86_IO_APIC + #include + #endif +-#include +-#endif + #endif + + #define BAD_APICID 0xFFu +@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void); + extern void cpu_uninit(void); + #endif + +-#ifndef CONFIG_PARAVIRT ++#ifndef CONFIG_XEN ++struct smp_ops ++{ ++ void (*smp_prepare_boot_cpu)(void); ++ void (*smp_prepare_cpus)(unsigned max_cpus); ++ int (*cpu_up)(unsigned cpu); ++ void (*smp_cpus_done)(unsigned max_cpus); ++ ++ void (*smp_send_stop)(void); ++ void (*smp_send_reschedule)(int cpu); ++ int (*smp_call_function_mask)(cpumask_t mask, ++ void (*func)(void *info), void *info, ++ int wait); ++}; ++ ++extern struct smp_ops smp_ops; ++ ++static inline void smp_prepare_boot_cpu(void) ++{ ++ smp_ops.smp_prepare_boot_cpu(); ++} ++static inline void smp_prepare_cpus(unsigned int max_cpus) ++{ ++ smp_ops.smp_prepare_cpus(max_cpus); ++} ++static inline int __cpu_up(unsigned int cpu) ++{ ++ return smp_ops.cpu_up(cpu); ++} ++static inline void smp_cpus_done(unsigned int max_cpus) ++{ ++ smp_ops.smp_cpus_done(max_cpus); ++} ++ ++static inline void smp_send_stop(void) ++{ ++ smp_ops.smp_send_stop(); ++} ++static inline void smp_send_reschedule(int cpu) ++{ ++ smp_ops.smp_send_reschedule(cpu); ++} ++static inline int smp_call_function_mask(cpumask_t mask, ++ void (*func) (void *info), void *info, ++ int wait) ++{ ++ return smp_ops.smp_call_function_mask(mask, func, info, wait); ++} ++ ++void native_smp_prepare_boot_cpu(void); ++void native_smp_prepare_cpus(unsigned int max_cpus); ++int native_cpu_up(unsigned int cpunum); ++void native_smp_cpus_done(unsigned int max_cpus); ++ + #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ + do { } while (0) ++ ++#else ++ ++ ++void xen_smp_send_stop(void); ++void xen_smp_send_reschedule(int cpu); ++int xen_smp_call_function_mask(cpumask_t mask, ++ void (*func) (void *info), void *info, ++ int wait); ++ ++#define smp_send_stop xen_smp_send_stop ++#define smp_send_reschedule xen_smp_send_reschedule ++#define smp_call_function_mask xen_smp_call_function_mask ++ + #endif + + /* +@@ -62,7 +125,8 @@ do { } while (0) + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ +-#define raw_smp_processor_id() (read_pda(cpu_number)) ++DECLARE_PER_CPU(int, cpu_number); ++#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) + + extern cpumask_t cpu_possible_map; + #define cpu_callin_map cpu_possible_map +@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void) + return cpus_weight(cpu_possible_map); + } + +-#ifdef CONFIG_X86_LOCAL_APIC +- +-#ifdef APIC_DEFINITION +-extern int hard_smp_processor_id(void); +-#else +-#include +-static inline int hard_smp_processor_id(void) +-{ +- /* we don't want to mark this access volatile - bad code generation */ +- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); +-} +-#endif +-#endif +- + #define safe_smp_processor_id() smp_processor_id() + extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); +@@ -102,10 +152,31 @@ extern unsigned int num_processors; + + #define NO_PROC_ID 0xFF /* No processor magic marker */ + +-#endif ++#endif /* CONFIG_SMP */ + + #ifndef __ASSEMBLY__ + ++#ifdef CONFIG_X86_LOCAL_APIC ++ ++#ifdef APIC_DEFINITION ++extern int hard_smp_processor_id(void); ++#else ++#include ++static inline int hard_smp_processor_id(void) ++{ ++ /* we don't want to mark this access volatile - bad code generation */ ++ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); ++} ++#endif /* APIC_DEFINITION */ ++ ++#else /* CONFIG_X86_LOCAL_APIC */ ++ ++#ifndef CONFIG_SMP ++#define hard_smp_processor_id() 0 ++#endif ++ ++#endif /* CONFIG_X86_LOCAL_APIC */ ++ + extern u8 apicid_2_node[]; + + #ifdef CONFIG_X86_LOCAL_APIC +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -4,7 +4,7 @@ + #include + #include + #include +-#include /* for LOCK_PREFIX */ ++#include + #include + #include + +@@ -90,308 +90,102 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" + #define savesegment(seg, value) \ + asm volatile("mov %%" #seg ",%0":"=rm" (value)) + +-#define read_cr0() ({ \ +- unsigned int __dummy; \ +- __asm__ __volatile__( \ +- "movl %%cr0,%0\n\t" \ +- :"=r" (__dummy)); \ +- __dummy; \ +-}) +-#define write_cr0(x) \ +- __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) +- +-#define read_cr2() (current_vcpu_info()->arch.cr2) +-#define write_cr2(x) \ +- __asm__ __volatile__("movl %0,%%cr2": :"r" (x)) +- +-#define read_cr3() ({ \ +- unsigned int __dummy; \ +- __asm__ ( \ +- "movl %%cr3,%0\n\t" \ +- :"=r" (__dummy)); \ +- __dummy = xen_cr3_to_pfn(__dummy); \ +- mfn_to_pfn(__dummy) << PAGE_SHIFT; \ +-}) +-#define write_cr3(x) ({ \ +- unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \ +- __dummy = xen_pfn_to_cr3(__dummy); \ +- __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \ +-}) +-#define read_cr4() ({ \ +- unsigned int __dummy; \ +- __asm__( \ +- "movl %%cr4,%0\n\t" \ +- :"=r" (__dummy)); \ +- __dummy; \ +-}) +-#define read_cr4_safe() ({ \ +- unsigned int __dummy; \ +- /* This could fault if %cr4 does not exist */ \ +- __asm__("1: movl %%cr4, %0 \n" \ +- "2: \n" \ +- ".section __ex_table,\"a\" \n" \ +- ".long 1b,2b \n" \ +- ".previous \n" \ +- : "=r" (__dummy): "0" (0)); \ +- __dummy; \ +-}) +- +-#define write_cr4(x) \ +- __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) +- +-#define wbinvd() \ +- __asm__ __volatile__ ("wbinvd": : :"memory") +- +-/* Clear the 'TS' bit */ +-#define clts() (HYPERVISOR_fpu_taskswitch(0)) +- +-/* Set the 'TS' bit */ +-#define stts() (HYPERVISOR_fpu_taskswitch(1)) +- +-#endif /* __KERNEL__ */ +- +-static inline unsigned long get_limit(unsigned long segment) ++static inline void xen_clts(void) + { +- unsigned long __limit; +- __asm__("lsll %1,%0" +- :"=r" (__limit):"r" (segment)); +- return __limit+1; ++ HYPERVISOR_fpu_taskswitch(0); + } + +-#define nop() __asm__ __volatile__ ("nop") +- +-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +- +-#define tas(ptr) (xchg((ptr),1)) +- +-struct __xchg_dummy { unsigned long a[100]; }; +-#define __xg(x) ((struct __xchg_dummy *)(x)) ++static inline unsigned long xen_read_cr0(void) ++{ ++ unsigned long val; ++ asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); ++ return val; ++} + ++static inline void xen_write_cr0(unsigned long val) ++{ ++ asm volatile("movl %0,%%cr0": :"r" (val)); ++} + +-#ifdef CONFIG_X86_CMPXCHG64 ++#define xen_read_cr2() (current_vcpu_info()->arch.cr2) + +-/* +- * The semantics of XCHGCMP8B are a bit strange, this is why +- * there is a loop and the loading of %%eax and %%edx has to +- * be inside. This inlines well in most cases, the cached +- * cost is around ~38 cycles. (in the future we might want +- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that +- * might have an implicit FPU-save as a cost, so it's not +- * clear which path to go.) +- * +- * cmpxchg8b must be used with the lock prefix here to allow +- * the instruction to be executed atomically, see page 3-102 +- * of the instruction set reference 24319102.pdf. We need +- * the reader side to see the coherent 64bit value. +- */ +-static inline void __set_64bit (unsigned long long * ptr, +- unsigned int low, unsigned int high) ++static inline void xen_write_cr2(unsigned long val) + { +- __asm__ __volatile__ ( +- "\n1:\t" +- "movl (%0), %%eax\n\t" +- "movl 4(%0), %%edx\n\t" +- "lock cmpxchg8b (%0)\n\t" +- "jnz 1b" +- : /* no outputs */ +- : "D"(ptr), +- "b"(low), +- "c"(high) +- : "ax","dx","memory"); ++ asm volatile("movl %0,%%cr2": :"r" (val)); + } + +-static inline void __set_64bit_constant (unsigned long long *ptr, +- unsigned long long value) ++static inline unsigned long xen_read_cr3(void) + { +- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); ++ unsigned long val; ++ asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); ++ return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT; + } +-#define ll_low(x) *(((unsigned int*)&(x))+0) +-#define ll_high(x) *(((unsigned int*)&(x))+1) + +-static inline void __set_64bit_var (unsigned long long *ptr, +- unsigned long long value) ++static inline void xen_write_cr3(unsigned long val) + { +- __set_64bit(ptr,ll_low(value), ll_high(value)); ++ val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT)); ++ asm volatile("movl %0,%%cr3": :"r" (val)); + } + +-#define set_64bit(ptr,value) \ +-(__builtin_constant_p(value) ? \ +- __set_64bit_constant(ptr, value) : \ +- __set_64bit_var(ptr, value) ) +- +-#define _set_64bit(ptr,value) \ +-(__builtin_constant_p(value) ? \ +- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ +- __set_64bit(ptr, ll_low(value), ll_high(value)) ) +- +-#endif +- +-/* +- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway +- * Note 2: xchg has side effect, so that attribute volatile is necessary, +- * but generally the primitive is invalid, *ptr is output argument. --ANK +- */ +-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) ++static inline unsigned long xen_read_cr4(void) + { +- switch (size) { +- case 1: +- __asm__ __volatile__("xchgb %b0,%1" +- :"=q" (x) +- :"m" (*__xg(ptr)), "0" (x) +- :"memory"); +- break; +- case 2: +- __asm__ __volatile__("xchgw %w0,%1" +- :"=r" (x) +- :"m" (*__xg(ptr)), "0" (x) +- :"memory"); +- break; +- case 4: +- __asm__ __volatile__("xchgl %0,%1" +- :"=r" (x) +- :"m" (*__xg(ptr)), "0" (x) +- :"memory"); +- break; +- } +- return x; ++ unsigned long val; ++ asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); ++ return val; + } + +-/* +- * Atomic compare and exchange. Compare OLD with MEM, if identical, +- * store NEW in MEM. Return the initial value in MEM. Success is +- * indicated by comparing RETURN with OLD. +- */ +- +-#ifdef CONFIG_X86_CMPXCHG +-#define __HAVE_ARCH_CMPXCHG 1 +-#define cmpxchg(ptr,o,n)\ +- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ +- (unsigned long)(n),sizeof(*(ptr)))) +-#define sync_cmpxchg(ptr,o,n)\ +- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ +- (unsigned long)(n),sizeof(*(ptr)))) +-#endif ++static inline unsigned long xen_read_cr4_safe(void) ++{ ++ unsigned long val; ++ /* This could fault if %cr4 does not exist */ ++ asm("1: movl %%cr4, %0 \n" ++ "2: \n" ++ ".section __ex_table,\"a\" \n" ++ ".long 1b,2b \n" ++ ".previous \n" ++ : "=r" (val): "0" (0)); ++ return val; ++} + +-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, +- unsigned long new, int size) ++static inline void xen_write_cr4(unsigned long val) + { +- unsigned long prev; +- switch (size) { +- case 1: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" +- : "=a"(prev) +- : "q"(new), "m"(*__xg(ptr)), "0"(old) +- : "memory"); +- return prev; +- case 2: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" +- : "=a"(prev) +- : "r"(new), "m"(*__xg(ptr)), "0"(old) +- : "memory"); +- return prev; +- case 4: +- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" +- : "=a"(prev) +- : "r"(new), "m"(*__xg(ptr)), "0"(old) +- : "memory"); +- return prev; +- } +- return old; ++ asm volatile("movl %0,%%cr4": :"r" (val)); + } + +-/* +- * Always use locked operations when touching memory shared with a +- * hypervisor, since the system may be SMP even if the guest kernel +- * isn't. +- */ +-static inline unsigned long __sync_cmpxchg(volatile void *ptr, +- unsigned long old, +- unsigned long new, int size) +-{ +- unsigned long prev; +- switch (size) { +- case 1: +- __asm__ __volatile__("lock; cmpxchgb %b1,%2" +- : "=a"(prev) +- : "q"(new), "m"(*__xg(ptr)), "0"(old) +- : "memory"); +- return prev; +- case 2: +- __asm__ __volatile__("lock; cmpxchgw %w1,%2" +- : "=a"(prev) +- : "r"(new), "m"(*__xg(ptr)), "0"(old) +- : "memory"); +- return prev; +- case 4: +- __asm__ __volatile__("lock; cmpxchgl %1,%2" +- : "=a"(prev) +- : "r"(new), "m"(*__xg(ptr)), "0"(old) +- : "memory"); +- return prev; +- } +- return old; ++static inline void xen_wbinvd(void) ++{ ++ asm volatile("wbinvd": : :"memory"); + } - extern spinlock_t pgd_lock; --extern struct page *pgd_list; --void vmalloc_sync_all(void); -- --#endif /* !__ASSEMBLY__ */ -+extern struct list_head pgd_list; +-#ifndef CONFIG_X86_CMPXCHG +-/* +- * Building a kernel capable running on 80386. It may be necessary to +- * simulate the cmpxchg on the 80386 CPU. For that purpose we define +- * a function for each of the sizes we support. +- */ ++#define read_cr0() (xen_read_cr0()) ++#define write_cr0(x) (xen_write_cr0(x)) ++#define read_cr2() (xen_read_cr2()) ++#define write_cr2(x) (xen_write_cr2(x)) ++#define read_cr3() (xen_read_cr3()) ++#define write_cr3(x) (xen_write_cr3(x)) ++#define read_cr4() (xen_read_cr4()) ++#define read_cr4_safe() (xen_read_cr4_safe()) ++#define write_cr4(x) (xen_write_cr4(x)) ++#define wbinvd() (xen_wbinvd()) - extern int kern_addr_valid(unsigned long addr); +-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8); +-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16); +-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32); +- +-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, +- unsigned long new, int size) +-{ +- switch (size) { +- case 1: +- return cmpxchg_386_u8(ptr, old, new); +- case 2: +- return cmpxchg_386_u16(ptr, old, new); +- case 4: +- return cmpxchg_386_u32(ptr, old, new); +- } +- return old; +-} +- +-#define cmpxchg(ptr,o,n) \ +-({ \ +- __typeof__(*(ptr)) __ret; \ +- if (likely(boot_cpu_data.x86 > 3)) \ +- __ret = __cmpxchg((ptr), (unsigned long)(o), \ +- (unsigned long)(n), sizeof(*(ptr))); \ +- else \ +- __ret = cmpxchg_386((ptr), (unsigned long)(o), \ +- (unsigned long)(n), sizeof(*(ptr))); \ +- __ret; \ +-}) +-#endif ++/* Clear the 'TS' bit */ ++#define clts() (xen_clts()) -@@ -557,10 +571,6 @@ int xen_change_pte_range(struct mm_struc - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) +-#ifdef CONFIG_X86_CMPXCHG64 ++/* Set the 'TS' bit */ ++#define stts() (HYPERVISOR_fpu_taskswitch(1)) --#define MK_IOSPACE_PFN(space, pfn) (pfn) --#define GET_IOSPACE(pfn) 0 --#define GET_PFN(pfn) (pfn) +-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, +- unsigned long long new) ++#endif /* __KERNEL__ */ ++ ++static inline unsigned long get_limit(unsigned long segment) + { +- unsigned long long prev; +- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" +- : "=A"(prev) +- : "b"((unsigned long)new), +- "c"((unsigned long)(new >> 32)), +- "m"(*__xg(ptr)), +- "0"(old) +- : "memory"); +- return prev; +-} - - #define HAVE_ARCH_UNMAPPED_AREA - - #define pgtable_cache_init() do { } while (0) -@@ -574,11 +584,14 @@ int xen_change_pte_range(struct mm_struc - #define kc_offset_to_vaddr(o) \ - (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) +-#define cmpxchg64(ptr,o,n)\ +- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ +- (unsigned long long)(n))) ++ unsigned long __limit; ++ __asm__("lsll %1,%0" ++ :"=r" (__limit):"r" (segment)); ++ return __limit+1; ++} ++ ++#define nop() __asm__ __volatile__ ("nop") -+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY - #define __HAVE_ARCH_PTEP_GET_AND_CLEAR - #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL - #define __HAVE_ARCH_PTEP_CLEAR_FLUSH - #define __HAVE_ARCH_PTEP_SET_WRPROTECT - #define __HAVE_ARCH_PTE_SAME - #include -+#endif /* !__ASSEMBLY__ */ +-#endif +- + /* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-15 11:27:22.000000000 +0100 +@@ -29,8 +29,13 @@ + * and page-granular flushes are available only on i486 and up. + */ - #endif /* _X86_64_PGTABLE_H */ ---- a/include/asm-x86/mach-xen/asm/processor_32.h -+++ b/include/asm-x86/mach-xen/asm/processor_32.h -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include ++#define TLB_FLUSH_ALL 0xffffffff ++ ++ + #ifndef CONFIG_SMP - /* flag for disabling the tsc */ -@@ -118,7 +119,8 @@ extern char ignore_fpu_irq; ++#include ++ + #define flush_tlb() __flush_tlb() + #define flush_tlb_all() __flush_tlb_all() + #define local_flush_tlb() __flush_tlb() +@@ -55,7 +60,7 @@ static inline void flush_tlb_range(struc + __flush_tlb(); + } - void __init cpu_detect(struct cpuinfo_x86 *c); +-#else ++#else /* SMP */ --extern void identify_cpu(struct cpuinfo_x86 *); -+extern void identify_boot_cpu(void); -+extern void identify_secondary_cpu(struct cpuinfo_x86 *); - extern void print_cpu_info(struct cpuinfo_x86 *); - extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); - extern unsigned short num_cache_leaves; -@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86 - static inline void detect_ht(struct cpuinfo_x86 *c) {} - #endif + #include --/* -- * EFLAGS bits -- */ --#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ --#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ --#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ --#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ --#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ --#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ --#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ --#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ --#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ --#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ --#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ --#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ --#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ --#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ --#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ --#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ --#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ +@@ -84,9 +89,7 @@ struct tlb_state + char __cacheline_padding[L1_CACHE_BYTES-8]; + }; + DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); - --static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, -- unsigned int *ecx, unsigned int *edx) -+static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx, -+ unsigned int *ecx, unsigned int *edx) - { - /* ecx is often an input as well as an output. */ - __asm__(XEN_CPUID -@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un - #define load_cr3(pgdir) write_cr3(__pa(pgdir)) - - /* -- * Intel CPU features in CR4 -- */ --#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ --#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ --#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ --#define X86_CR4_DE 0x0008 /* enable debugging extensions */ --#define X86_CR4_PSE 0x0010 /* enable page size extensions */ --#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ --#define X86_CR4_MCE 0x0040 /* Machine check enable */ --#define X86_CR4_PGE 0x0080 /* enable global pages */ --#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ --#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ --#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ - --/* - * Save the cr4 feature set we're using (ie - * Pentium 4MB enable and PPro Global page - * enable), so that any CPU's that boot up -@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne +-#endif ++#endif /* SMP */ + + #define flush_tlb_kernel_range(start, end) flush_tlb_all() + +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -127,16 +127,6 @@ static inline void set_ldt_desc(unsigned + DESC_LDT, size * 8 - 1); } - /* -- * NSC/Cyrix CPU configuration register indexes -- */ +-static inline void set_seg_base(unsigned cpu, int entry, void *base) +-{ +- struct desc_struct *d = &cpu_gdt(cpu)[entry]; +- u32 addr = (u32)(u64)base; +- BUG_ON((u64)base >> 32); +- d->base0 = addr & 0xffff; +- d->base1 = (addr >> 16) & 0xff; +- d->base2 = (addr >> 24) & 0xff; +-} - --#define CX86_PCR0 0x20 --#define CX86_GCR 0xb8 --#define CX86_CCR0 0xc0 --#define CX86_CCR1 0xc1 --#define CX86_CCR2 0xc2 --#define CX86_CCR3 0xc3 --#define CX86_CCR4 0xe8 --#define CX86_CCR5 0xe9 --#define CX86_CCR6 0xea --#define CX86_CCR7 0xeb --#define CX86_PCR1 0xf0 --#define CX86_DIR0 0xfe --#define CX86_DIR1 0xff --#define CX86_ARR_BASE 0xc4 --#define CX86_RCR_BASE 0xdc + #define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + /* Don't allow setting of the lm bit. It is useless anyways because +@@ -165,25 +155,15 @@ static inline void set_seg_base(unsigned + (info)->useable == 0 && \ + (info)->lm == 0) + +-#if TLS_SIZE != 24 +-# error update this code. +-#endif - --/* - * NSC/Cyrix CPU indexed register access macros - */ + static inline void load_TLS(struct thread_struct *t, unsigned int cpu) + { +-#if 0 ++ unsigned int i; + u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); +- gdt[0] = t->tls_array[0]; +- gdt[1] = t->tls_array[1]; +- gdt[2] = t->tls_array[2]; +-#endif +-#define C(i) \ +- if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \ +- t->tls_array[i])) \ +- BUG(); -@@ -351,7 +297,8 @@ typedef struct { - struct thread_struct; +- C(0); C(1); C(2); +-#undef C ++ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) ++ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]), ++ t->tls_array[i])) ++ BUG(); + } - #ifndef CONFIG_X86_NO_TSS --struct tss_struct { -+/* This is the TSS defined by the hardware. */ -+struct i386_hw_tss { - unsigned short back_link,__blh; - unsigned long esp0; - unsigned short ss0,__ss0h; -@@ -375,6 +322,11 @@ struct tss_struct { - unsigned short gs, __gsh; - unsigned short ldt, __ldth; - unsigned short trace, io_bitmap_base; -+} __attribute__((packed)); -+ -+struct tss_struct { -+ struct i386_hw_tss x86_tss; -+ - /* - * The extra 1 is there because the CPU will access an - * additional byte beyond the end of the IO permission -@@ -428,10 +380,11 @@ struct thread_struct { + /* +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -51,7 +51,7 @@ struct dma_mapping_ops { }; - #define INIT_THREAD { \ -+ .esp0 = sizeof(init_stack) + (long)&init_stack, \ - .vm86_info = NULL, \ - .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ -- .fs = __KERNEL_PDA, \ -+ .fs = __KERNEL_PERCPU, \ - } + extern dma_addr_t bad_dma_address; +-extern struct dma_mapping_ops* dma_ops; ++extern const struct dma_mapping_ops* dma_ops; + extern int iommu_merge; + + #if 0 +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -15,7 +15,6 @@ + #include + #include + #include +-#include + #include /* -@@ -441,10 +394,12 @@ struct thread_struct { - * be within the limit. +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/irqflags_64.h 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/irqflags_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -9,6 +9,7 @@ */ - #define INIT_TSS { \ -- .esp0 = sizeof(init_stack) + (long)&init_stack, \ -- .ss0 = __KERNEL_DS, \ -- .ss1 = __KERNEL_CS, \ -- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ -+ .x86_tss = { \ -+ .esp0 = sizeof(init_stack) + (long)&init_stack, \ -+ .ss0 = __KERNEL_DS, \ -+ .ss1 = __KERNEL_CS, \ -+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ -+ }, \ - .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ - } + #ifndef _ASM_IRQFLAGS_H + #define _ASM_IRQFLAGS_H ++#include -@@ -551,38 +506,33 @@ static inline void rep_nop(void) + #ifndef __ASSEMBLY__ + /* +@@ -50,19 +51,19 @@ static inline void raw_local_irq_disable + { + unsigned long flags = __raw_local_save_flags(); - #define cpu_relax() rep_nop() +- raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); ++ raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); + } --#define paravirt_enabled() 0 --#define __cpuid xen_cpuid -- - #ifndef CONFIG_X86_NO_TSS --static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) -+static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread) + static inline void raw_local_irq_enable(void) { -- tss->esp0 = thread->esp0; -+ tss->x86_tss.esp0 = thread->esp0; - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ -- if (unlikely(tss->ss1 != thread->sysenter_cs)) { -- tss->ss1 = thread->sysenter_cs; -+ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { -+ tss->x86_tss.ss1 = thread->sysenter_cs; - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); - } + unsigned long flags = __raw_local_save_flags(); + +- raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); ++ raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } --#define load_esp0(tss, thread) \ -- __load_esp0(tss, thread) - #else --#define load_esp0(tss, thread) do { \ -+#define xen_load_esp0(tss, thread) do { \ - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ - BUG(); \ - } while (0) - #endif + static inline int raw_irqs_disabled_flags(unsigned long flags) + { +- return !(flags & (1<<9)) || (flags & (1 << 18)); ++ return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); + } --/* -- * These special macros can be used to get or set a debugging register -- */ --#define get_debugreg(var, register) \ -- (var) = HYPERVISOR_get_debugreg(register) --#define set_debugreg(value, register) \ -- WARN_ON(HYPERVISOR_set_debugreg(register, value)) -+static inline unsigned long xen_get_debugreg(int regno) + #else /* CONFIG_X86_VSMP */ +@@ -118,13 +119,21 @@ static inline int raw_irqs_disabled_flag + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +-void raw_safe_halt(void); ++void xen_safe_halt(void); ++static inline void raw_safe_halt(void) +{ -+ return HYPERVISOR_get_debugreg(regno); ++ xen_safe_halt(); +} --#define set_iopl_mask xen_set_iopl_mask -+static inline void xen_set_debugreg(int regno, unsigned long value) + /* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +-void halt(void); ++void xen_halt(void); ++static inline void halt(void) +{ -+ WARN_ON(HYPERVISOR_set_debugreg(regno, value)); ++ xen_halt(); +} - /* - * Set IOPL bits in EFLAGS from given mask -@@ -597,6 +547,21 @@ static inline void xen_set_iopl_mask(uns - } - - -+#define paravirt_enabled() 0 -+#define __cpuid xen_cpuid -+ -+#define load_esp0 xen_load_esp0 -+ -+/* -+ * These special macros can be used to get or set a debugging register -+ */ -+#define get_debugreg(var, register) \ -+ (var) = xen_get_debugreg(register) -+#define set_debugreg(value, register) \ -+ xen_set_debugreg(register, value) -+ -+#define set_iopl_mask xen_set_iopl_mask -+ - /* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx -@@ -749,8 +714,14 @@ extern unsigned long boot_option_idle_ov - extern void enable_sep_cpu(void); - extern int sysenter_setup(void); + #else /* __ASSEMBLY__: */ + # ifdef CONFIG_TRACE_IRQFLAGS +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -9,6 +9,9 @@ + #include + #include --extern int init_gdt(int cpu, struct task_struct *idle); -+/* Defined in head.S */ -+extern struct Xgt_desc_struct early_gdt_descr; -+ - extern void cpu_set_gdt(int); --extern void secondary_cpu_init(void); -+extern void switch_to_new_gdt(void); -+extern void cpu_init(void); -+extern void init_gdt(int cpu); ++void arch_exit_mmap(struct mm_struct *mm); ++void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); + -+extern int force_mwait; - - #endif /* __ASM_I386_PROCESSOR_H */ ---- a/include/asm-x86/mach-xen/asm/processor_64.h -+++ b/include/asm-x86/mach-xen/asm/processor_64.h -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - #define TF_MASK 0x00000100 - #define IF_MASK 0x00000200 -@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo - extern unsigned short num_cache_leaves; - /* -- * EFLAGS bits -- */ --#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ --#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ --#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ --#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ --#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ --#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ --#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ --#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ --#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ --#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ --#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ --#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ --#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ --#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ --#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ --#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ --#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ -- --/* -- * Intel CPU features in CR4 -- */ --#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ --#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ --#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ --#define X86_CR4_DE 0x0008 /* enable debugging extensions */ --#define X86_CR4_PSE 0x0010 /* enable page size extensions */ --#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ --#define X86_CR4_MCE 0x0040 /* Machine check enable */ --#define X86_CR4_PGE 0x0080 /* enable global pages */ --#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ --#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ --#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ -- --/* - * Save the cr4 feature set we're using (ie - * Pentium 4MB enable and PPro Global page - * enable), so that any CPU's that boot up -@@ -203,7 +168,7 @@ struct i387_fxsave_struct { - u32 mxcsr; - u32 mxcsr_mask; - u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ -- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ -+ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ - u32 padding[24]; - } __attribute__ ((aligned (16))); - -@@ -436,22 +401,6 @@ static inline void prefetchw(void *x) - #define cpu_relax() rep_nop() + * possibly do the LDT unload here? + */ +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/page_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -7,6 +7,7 @@ + #include + #include + #endif ++#include + #include /* -- * NSC/Cyrix CPU configuration register indexes -- */ --#define CX86_CCR0 0xc0 --#define CX86_CCR1 0xc1 --#define CX86_CCR2 0xc2 --#define CX86_CCR3 0xc3 --#define CX86_CCR4 0xe8 --#define CX86_CCR5 0xe9 --#define CX86_CCR6 0xea --#define CX86_CCR7 0xeb --#define CX86_DIR0 0xfe --#define CX86_DIR1 0xff --#define CX86_ARR_BASE 0xc4 --#define CX86_RCR_BASE 0xdc -- --/* - * NSC/Cyrix CPU indexed register access macros - */ +@@ -19,18 +20,14 @@ ---- a/include/asm-x86/mach-xen/asm/segment_32.h -+++ b/include/asm-x86/mach-xen/asm/segment_32.h -@@ -39,7 +39,7 @@ - * 25 - APM BIOS support - * - * 26 - ESPFIX small SS -- * 27 - PDA [ per-cpu private data area ] -+ * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused - * 29 - unused - * 30 - unused -@@ -74,8 +74,12 @@ - #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) - #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) + /* PAGE_SHIFT determines the page size */ + #define PAGE_SHIFT 12 +-#ifdef __ASSEMBLY__ +-#define PAGE_SIZE (0x1 << PAGE_SHIFT) +-#else +-#define PAGE_SIZE (1UL << PAGE_SHIFT) +-#endif ++#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) + #define PAGE_MASK (~(PAGE_SIZE-1)) --#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) --#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) -+#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) -+#ifdef CONFIG_SMP -+#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) -+#else -+#define __KERNEL_PERCPU 0 -+#endif + /* See Documentation/x86_64/mm.txt for a description of the memory map. */ + #define __PHYSICAL_MASK_SHIFT 46 +-#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) ++#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1) + #define __VIRTUAL_MASK_SHIFT 48 +-#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) ++#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1) - #define GDT_ENTRY_DOUBLEFAULT_TSS 31 + #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) ---- a/include/asm-x86/mach-xen/asm/smp_32.h -+++ b/include/asm-x86/mach-xen/asm/smp_32.h -@@ -8,19 +8,15 @@ - #include - #include - #include --#include - #endif +@@ -55,10 +52,10 @@ + #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ --#ifdef CONFIG_X86_LOCAL_APIC --#ifndef __ASSEMBLY__ --#include -+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) - #include - #include -+#include - #ifdef CONFIG_X86_IO_APIC - #include - #endif --#include --#endif - #endif + #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) +-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) ++#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT) - #define BAD_APICID 0xFFu -@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void); - extern void cpu_uninit(void); - #endif + #define HPAGE_SHIFT PMD_SHIFT +-#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) ++#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) + #define HPAGE_MASK (~(HPAGE_SIZE - 1)) + #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) --#ifndef CONFIG_PARAVIRT -+#ifndef CONFIG_XEN -+struct smp_ops -+{ -+ void (*smp_prepare_boot_cpu)(void); -+ void (*smp_prepare_cpus)(unsigned max_cpus); -+ int (*cpu_up)(unsigned cpu); -+ void (*smp_cpus_done)(unsigned max_cpus); -+ -+ void (*smp_send_stop)(void); -+ void (*smp_send_reschedule)(int cpu); -+ int (*smp_call_function_mask)(cpumask_t mask, -+ void (*func)(void *info), void *info, -+ int wait); -+}; +@@ -152,17 +149,23 @@ static inline pgd_t __pgd(unsigned long + + #define __pgprot(x) ((pgprot_t) { (x) } ) + +-#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) +-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +-#define __START_KERNEL_map 0xffffffff80000000UL +-#define __PAGE_OFFSET 0xffff880000000000UL ++#endif /* !__ASSEMBLY__ */ + +-#else + #define __PHYSICAL_START CONFIG_PHYSICAL_START ++#define __KERNEL_ALIGN 0x200000 + -+extern struct smp_ops smp_ops; ++/* ++ * Make sure kernel is aligned to 2MB address. Catching it at compile ++ * time is better. Change your config file and compile the kernel ++ * for a 2MB aligned address (CONFIG_PHYSICAL_START) ++ */ ++#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 ++#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" ++#endif + -+static inline void smp_prepare_boot_cpu(void) -+{ -+ smp_ops.smp_prepare_boot_cpu(); -+} -+static inline void smp_prepare_cpus(unsigned int max_cpus) -+{ -+ smp_ops.smp_prepare_cpus(max_cpus); -+} -+static inline int __cpu_up(unsigned int cpu) -+{ -+ return smp_ops.cpu_up(cpu); -+} -+static inline void smp_cpus_done(unsigned int max_cpus) -+{ -+ smp_ops.smp_cpus_done(max_cpus); -+} + #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +-#define __START_KERNEL_map 0xffffffff80000000 +-#define __PAGE_OFFSET 0xffff880000000000 +-#endif /* !__ASSEMBLY__ */ ++#define __START_KERNEL_map _AC(0xffffffff80000000, UL) ++#define __PAGE_OFFSET _AC(0xffff880000000000, UL) + + #if CONFIG_XEN_COMPAT <= 0x030002 + #undef LOAD_OFFSET +@@ -172,20 +175,20 @@ static inline pgd_t __pgd(unsigned long + /* to align the pointer to the (next) page boundary */ + #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +-#define KERNEL_TEXT_SIZE (40UL*1024*1024) +-#define KERNEL_TEXT_START 0xffffffff80000000UL ++#define KERNEL_TEXT_SIZE (40*1024*1024) ++#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) + -+static inline void smp_send_stop(void) -+{ -+ smp_ops.smp_send_stop(); -+} -+static inline void smp_send_reschedule(int cpu) -+{ -+ smp_ops.smp_send_reschedule(cpu); -+} -+static inline int smp_call_function_mask(cpumask_t mask, -+ void (*func) (void *info), void *info, -+ int wait) ++#define PAGE_OFFSET __PAGE_OFFSET + +-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) ++#ifndef __ASSEMBLY__ ++static inline unsigned long __phys_addr(unsigned long x) +{ -+ return smp_ops.smp_call_function_mask(mask, func, info, wait); ++ return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET); +} -+ -+void native_smp_prepare_boot_cpu(void); -+void native_smp_prepare_cpus(unsigned int max_cpus); -+int native_cpu_up(unsigned int cpunum); -+void native_smp_cpus_done(unsigned int max_cpus); -+ - #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ - do { } while (0) -+ -+#else -+ -+ -+void xen_smp_send_stop(void); -+void xen_smp_send_reschedule(int cpu); -+int xen_smp_call_function_mask(cpumask_t mask, -+ void (*func) (void *info), void *info, -+ int wait); -+ -+#define smp_send_stop xen_smp_send_stop -+#define smp_send_reschedule xen_smp_send_reschedule -+#define smp_call_function_mask xen_smp_call_function_mask -+ - #endif ++#endif - /* -@@ -62,7 +125,8 @@ do { } while (0) - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. - */ --#define raw_smp_processor_id() (read_pda(cpu_number)) -+DECLARE_PER_CPU(int, cpu_number); -+#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +-/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol. +- Otherwise you risk miscompilation. */ +-#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET) +-/* __pa_symbol should be used for C visible symbols. +- This seems to be the official gcc blessed way to do such arithmetic. */ +-#define __pa_symbol(x) \ +- ({unsigned long v; \ +- asm("" : "=r" (v) : "0" (x)); \ +- __pa(v); }) ++#define __pa(x) __phys_addr((unsigned long)(x)) ++#define __pa_symbol(x) __phys_addr((unsigned long)(x)) - extern cpumask_t cpu_possible_map; - #define cpu_callin_map cpu_possible_map -@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void) - return cpus_weight(cpu_possible_map); + #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) + #define __boot_va(x) __va(x) +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgalloc_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -1,7 +1,6 @@ + #ifndef _X86_64_PGALLOC_H + #define _X86_64_PGALLOC_H + +-#include + #include + #include + #include +@@ -100,24 +99,16 @@ static inline void pgd_list_add(pgd_t *p + struct page *page = virt_to_page(pgd); + + spin_lock(&pgd_lock); +- page->index = (pgoff_t)pgd_list; +- if (pgd_list) +- pgd_list->private = (unsigned long)&page->index; +- pgd_list = page; +- page->private = (unsigned long)&pgd_list; ++ list_add(&page->lru, &pgd_list); + spin_unlock(&pgd_lock); + } + + static inline void pgd_list_del(pgd_t *pgd) + { +- struct page *next, **pprev, *page = virt_to_page(pgd); ++ struct page *page = virt_to_page(pgd); + + spin_lock(&pgd_lock); +- next = (struct page *)page->index; +- pprev = (struct page **)page->private; +- *pprev = next; +- if (next) +- next->private = (unsigned long)pprev; ++ list_del(&page->lru); + spin_unlock(&pgd_lock); } --#ifdef CONFIG_X86_LOCAL_APIC -- --#ifdef APIC_DEFINITION --extern int hard_smp_processor_id(void); --#else --#include --static inline int hard_smp_processor_id(void) --{ -- /* we don't want to mark this access volatile - bad code generation */ -- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); --} --#endif --#endif -- - #define safe_smp_processor_id() smp_processor_id() - extern int __cpu_disable(void); - extern void __cpu_die(unsigned int cpu); -@@ -102,10 +152,31 @@ extern unsigned int num_processors; +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -1,12 +1,14 @@ + #ifndef _X86_64_PGTABLE_H + #define _X86_64_PGTABLE_H - #define NO_PROC_ID 0xFF /* No processor magic marker */ ++#include ++#ifndef __ASSEMBLY__ ++ + /* + * This file contains the functions and defines necessary to modify and use + * the x86-64 page table tree. + */ + #include +-#include + #include + #include + #include +@@ -35,11 +37,9 @@ extern pte_t *lookup_address(unsigned lo + #endif --#endif -+#endif /* CONFIG_SMP */ + extern pud_t level3_kernel_pgt[512]; +-extern pud_t level3_physmem_pgt[512]; + extern pud_t level3_ident_pgt[512]; + extern pmd_t level2_kernel_pgt[512]; + extern pgd_t init_level4_pgt[]; +-extern pgd_t boot_level4_pgt[]; + extern unsigned long __supported_pte_mask; - #ifndef __ASSEMBLY__ + #define swapper_pg_dir init_level4_pgt +@@ -54,6 +54,8 @@ extern void clear_kernel_mapping(unsigne + extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; + #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -+#ifdef CONFIG_X86_LOCAL_APIC -+ -+#ifdef APIC_DEFINITION -+extern int hard_smp_processor_id(void); -+#else -+#include -+static inline int hard_smp_processor_id(void) -+{ -+ /* we don't want to mark this access volatile - bad code generation */ -+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); -+} -+#endif /* APIC_DEFINITION */ -+ -+#else /* CONFIG_X86_LOCAL_APIC */ -+ -+#ifndef CONFIG_SMP -+#define hard_smp_processor_id() 0 -+#endif ++#endif /* !__ASSEMBLY__ */ + -+#endif /* CONFIG_X86_LOCAL_APIC */ + /* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +@@ -78,6 +80,8 @@ extern unsigned long empty_zero_page[PAG + */ + #define PTRS_PER_PTE 512 + ++#ifndef __ASSEMBLY__ + - extern u8 apicid_2_node[]; + #define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \ + &(e), __pte_val(e), pte_pfn(e)) +@@ -120,22 +124,23 @@ static inline void pgd_clear (pgd_t * pg - #ifdef CONFIG_X86_LOCAL_APIC ---- a/include/asm-x86/mach-xen/asm/smp_64.h -+++ b/include/asm-x86/mach-xen/asm/smp_64.h -@@ -11,12 +11,11 @@ - extern int disable_apic; + #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) - #ifdef CONFIG_X86_LOCAL_APIC --#include - #include -+#include - #ifdef CONFIG_X86_IO_APIC - #include - #endif --#include - #include - #endif +-#define PMD_SIZE (1UL << PMD_SHIFT) ++#endif /* !__ASSEMBLY__ */ ++ ++#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) + #define PMD_MASK (~(PMD_SIZE-1)) +-#define PUD_SIZE (1UL << PUD_SHIFT) ++#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) + #define PUD_MASK (~(PUD_SIZE-1)) +-#define PGDIR_SIZE (1UL << PGDIR_SHIFT) ++#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) + #define PGDIR_MASK (~(PGDIR_SIZE-1)) -@@ -41,7 +40,6 @@ extern void lock_ipi_call_lock(void); - extern void unlock_ipi_call_lock(void); - extern int smp_num_siblings; - extern void smp_send_reschedule(int cpu); --void smp_stop_cpu(void); + #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) + #define FIRST_USER_ADDRESS 0 - extern cpumask_t cpu_sibling_map[NR_CPUS]; - extern cpumask_t cpu_core_map[NR_CPUS]; -@@ -62,14 +60,6 @@ static inline int num_booting_cpus(void) +-#ifndef __ASSEMBLY__ +-#define MAXMEM 0x3fffffffffffUL +-#define VMALLOC_START 0xffffc20000000000UL +-#define VMALLOC_END 0xffffe1ffffffffffUL +-#define MODULES_VADDR 0xffffffff88000000UL +-#define MODULES_END 0xfffffffffff00000UL ++#define MAXMEM _AC(0x3fffffffffff, UL) ++#define VMALLOC_START _AC(0xffffc20000000000, UL) ++#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) ++#define MODULES_VADDR _AC(0xffffffff88000000, UL) ++#define MODULES_END _AC(0xfffffffffff00000, UL) + #define MODULES_LEN (MODULES_END - MODULES_VADDR) - #define raw_smp_processor_id() read_pda(cpunumber) + #define _PAGE_BIT_PRESENT 0 +@@ -161,16 +166,18 @@ static inline void pgd_clear (pgd_t * pg + #define _PAGE_GLOBAL 0x100 /* Global TLB entry */ --#ifdef CONFIG_X86_LOCAL_APIC --static inline int hard_smp_processor_id(void) --{ -- /* we don't want to mark this access volatile - bad code generation */ -- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); --} --#endif -- - extern int __cpu_disable(void); - extern void __cpu_die(unsigned int cpu); - extern void prefill_possible_map(void); -@@ -78,6 +68,14 @@ extern unsigned __cpuinitdata disabled_c + #define _PAGE_PROTNONE 0x080 /* If not present */ +-#define _PAGE_NX (1UL<<_PAGE_BIT_NX) ++#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX) - #define NO_PROC_ID 0xFF /* No processor magic marker */ + /* Mapped page is I/O or foreign and has no associated page struct. */ + #define _PAGE_IO 0x200 -+#endif /* CONFIG_SMP */ -+ -+#ifdef CONFIG_X86_LOCAL_APIC -+static inline int hard_smp_processor_id(void) -+{ -+ /* we don't want to mark this access volatile - bad code generation */ -+ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); -+} ++#ifndef __ASSEMBLY__ + #if CONFIG_XEN_COMPAT <= 0x030002 + extern unsigned int __kernel_page_user; + #else + #define __kernel_page_user 0 #endif ++#endif - /* ---- a/include/asm-x86/mach-xen/asm/system_32.h -+++ b/include/asm-x86/mach-xen/asm/system_32.h -@@ -4,7 +4,7 @@ - #include - #include - #include --#include /* for LOCK_PREFIX */ -+#include - #include - #include - -@@ -90,308 +90,102 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" - #define savesegment(seg, value) \ - asm volatile("mov %%" #seg ",%0":"=rm" (value)) + #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) + #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user) +@@ -235,6 +242,8 @@ extern unsigned int __kernel_page_user; + #define __S110 PAGE_SHARED_EXEC + #define __S111 PAGE_SHARED_EXEC --#define read_cr0() ({ \ -- unsigned int __dummy; \ -- __asm__ __volatile__( \ -- "movl %%cr0,%0\n\t" \ -- :"=r" (__dummy)); \ -- __dummy; \ --}) --#define write_cr0(x) \ -- __asm__ __volatile__("movl %0,%%cr0": :"r" (x)) -- --#define read_cr2() (current_vcpu_info()->arch.cr2) --#define write_cr2(x) \ -- __asm__ __volatile__("movl %0,%%cr2": :"r" (x)) -- --#define read_cr3() ({ \ -- unsigned int __dummy; \ -- __asm__ ( \ -- "movl %%cr3,%0\n\t" \ -- :"=r" (__dummy)); \ -- __dummy = xen_cr3_to_pfn(__dummy); \ -- mfn_to_pfn(__dummy) << PAGE_SHIFT; \ --}) --#define write_cr3(x) ({ \ -- unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \ -- __dummy = xen_pfn_to_cr3(__dummy); \ -- __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \ --}) --#define read_cr4() ({ \ -- unsigned int __dummy; \ -- __asm__( \ -- "movl %%cr4,%0\n\t" \ -- :"=r" (__dummy)); \ -- __dummy; \ --}) --#define read_cr4_safe() ({ \ -- unsigned int __dummy; \ -- /* This could fault if %cr4 does not exist */ \ -- __asm__("1: movl %%cr4, %0 \n" \ -- "2: \n" \ -- ".section __ex_table,\"a\" \n" \ -- ".long 1b,2b \n" \ -- ".previous \n" \ -- : "=r" (__dummy): "0" (0)); \ -- __dummy; \ --}) -- --#define write_cr4(x) \ -- __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) -- --#define wbinvd() \ -- __asm__ __volatile__ ("wbinvd": : :"memory") -- --/* Clear the 'TS' bit */ --#define clts() (HYPERVISOR_fpu_taskswitch(0)) -- --/* Set the 'TS' bit */ --#define stts() (HYPERVISOR_fpu_taskswitch(1)) -- --#endif /* __KERNEL__ */ -- --static inline unsigned long get_limit(unsigned long segment) -+static inline void xen_clts(void) - { -- unsigned long __limit; -- __asm__("lsll %1,%0" -- :"=r" (__limit):"r" (segment)); -- return __limit+1; -+ HYPERVISOR_fpu_taskswitch(0); - } ++#ifndef __ASSEMBLY__ ++ + static inline unsigned long pgd_bad(pgd_t pgd) + { + return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); +@@ -346,6 +355,20 @@ static inline pte_t pte_mkwrite(pte_t pt + static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } + static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } --#define nop() __asm__ __volatile__ ("nop") -- --#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) -- --#define tas(ptr) (xchg((ptr),1)) -- --struct __xchg_dummy { unsigned long a[100]; }; --#define __xg(x) ((struct __xchg_dummy *)(x)) -+static inline unsigned long xen_read_cr0(void) ++static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ -+ unsigned long val; -+ asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); -+ return val; ++ if (!pte_dirty(*ptep)) ++ return 0; ++ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte); +} - -+static inline void xen_write_cr0(unsigned long val) ++ ++static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ -+ asm volatile("movl %0,%%cr0": :"r" (val)); ++ if (!pte_young(*ptep)) ++ return 0; ++ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); +} - --#ifdef CONFIG_X86_CMPXCHG64 -+#define xen_read_cr2() (current_vcpu_info()->arch.cr2) - ++ + static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + pte_t pte = *ptep; +@@ -470,18 +493,12 @@ static inline pte_t pte_modify(pte_t pte + * bit at the same time. */ + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS + #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ +- do { \ +- if (dirty) \ +- ptep_establish(vma, address, ptep, entry); \ +- } while (0) +- +- -/* -- * The semantics of XCHGCMP8B are a bit strange, this is why -- * there is a loop and the loading of %%eax and %%edx has to -- * be inside. This inlines well in most cases, the cached -- * cost is around ~38 cycles. (in the future we might want -- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that -- * might have an implicit FPU-save as a cost, so it's not -- * clear which path to go.) -- * -- * cmpxchg8b must be used with the lock prefix here to allow -- * the instruction to be executed atomically, see page 3-102 -- * of the instruction set reference 24319102.pdf. We need -- * the reader side to see the coherent 64bit value. +- * i386 says: We don't actually have these, but we want to advertise +- * them so that we can encompass the flush here. - */ --static inline void __set_64bit (unsigned long long * ptr, -- unsigned int low, unsigned int high) -+static inline void xen_write_cr2(unsigned long val) - { -- __asm__ __volatile__ ( -- "\n1:\t" -- "movl (%0), %%eax\n\t" -- "movl 4(%0), %%edx\n\t" -- "lock cmpxchg8b (%0)\n\t" -- "jnz 1b" -- : /* no outputs */ -- : "D"(ptr), -- "b"(low), -- "c"(high) -- : "ax","dx","memory"); -+ asm volatile("movl %0,%%cr2": :"r" (val)); - } - --static inline void __set_64bit_constant (unsigned long long *ptr, -- unsigned long long value) -+static inline unsigned long xen_read_cr3(void) - { -- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); -+ unsigned long val; -+ asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); -+ return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT; - } --#define ll_low(x) *(((unsigned int*)&(x))+0) --#define ll_high(x) *(((unsigned int*)&(x))+1) +-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG ++({ \ ++ int __changed = !pte_same(*(ptep), entry); \ ++ if (__changed && (dirty)) \ ++ ptep_establish(vma, address, ptep, entry); \ ++ __changed; \ ++}) --static inline void __set_64bit_var (unsigned long long *ptr, -- unsigned long long value) -+static inline void xen_write_cr3(unsigned long val) - { -- __set_64bit(ptr,ll_low(value), ll_high(value)); -+ val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT)); -+ asm volatile("movl %0,%%cr3": :"r" (val)); - } + #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH + #define ptep_clear_flush_dirty(vma, address, ptep) \ +@@ -490,7 +507,7 @@ static inline pte_t pte_modify(pte_t pte + int __dirty = pte_dirty(__pte); \ + __pte = pte_mkclean(__pte); \ + if ((vma)->vm_mm->context.pinned) \ +- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ ++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ + else if (__dirty) \ + set_pte(ptep, __pte); \ + __dirty; \ +@@ -503,7 +520,7 @@ static inline pte_t pte_modify(pte_t pte + int __young = pte_young(__pte); \ + __pte = pte_mkold(__pte); \ + if ((vma)->vm_mm->context.pinned) \ +- ptep_set_access_flags(vma, address, ptep, __pte, __young); \ ++ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ + else if (__young) \ + set_pte(ptep, __pte); \ + __young; \ +@@ -517,10 +534,7 @@ static inline pte_t pte_modify(pte_t pte + #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) --#define set_64bit(ptr,value) \ --(__builtin_constant_p(value) ? \ -- __set_64bit_constant(ptr, value) : \ -- __set_64bit_var(ptr, value) ) + extern spinlock_t pgd_lock; +-extern struct page *pgd_list; +-void vmalloc_sync_all(void); - --#define _set_64bit(ptr,value) \ --(__builtin_constant_p(value) ? \ -- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ -- __set_64bit(ptr, ll_low(value), ll_high(value)) ) +-#endif /* !__ASSEMBLY__ */ ++extern struct list_head pgd_list; + + extern int kern_addr_valid(unsigned long addr); + +@@ -559,10 +573,6 @@ int xen_change_pte_range(struct mm_struc + #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) + +-#define MK_IOSPACE_PFN(space, pfn) (pfn) +-#define GET_IOSPACE(pfn) 0 +-#define GET_PFN(pfn) (pfn) - --#endif + #define HAVE_ARCH_UNMAPPED_AREA + + #define pgtable_cache_init() do { } while (0) +@@ -576,11 +586,14 @@ int xen_change_pte_range(struct mm_struc + #define kc_offset_to_vaddr(o) \ + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) + ++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG ++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL + #define __HAVE_ARCH_PTEP_CLEAR_FLUSH + #define __HAVE_ARCH_PTEP_SET_WRPROTECT + #define __HAVE_ARCH_PTE_SAME + #include ++#endif /* !__ASSEMBLY__ */ + + #endif /* _X86_64_PGTABLE_H */ +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #define TF_MASK 0x00000100 + #define IF_MASK 0x00000200 +@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo + extern unsigned short num_cache_leaves; + + /* +- * EFLAGS bits +- */ +-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ - -/* -- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway -- * Note 2: xchg has side effect, so that attribute volatile is necessary, -- * but generally the primitive is invalid, *ptr is output argument. --ANK +- * Intel CPU features in CR4 - */ --static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) -+static inline unsigned long xen_read_cr4(void) - { -- switch (size) { -- case 1: -- __asm__ __volatile__("xchgb %b0,%1" -- :"=q" (x) -- :"m" (*__xg(ptr)), "0" (x) -- :"memory"); -- break; -- case 2: -- __asm__ __volatile__("xchgw %w0,%1" -- :"=r" (x) -- :"m" (*__xg(ptr)), "0" (x) -- :"memory"); -- break; -- case 4: -- __asm__ __volatile__("xchgl %0,%1" -- :"=r" (x) -- :"m" (*__xg(ptr)), "0" (x) -- :"memory"); -- break; -- } -- return x; -+ unsigned long val; -+ asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); -+ return val; - } +-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +-#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +-#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +-#define X86_CR4_MCE 0x0040 /* Machine check enable */ +-#define X86_CR4_PGE 0x0080 /* enable global pages */ +-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ +- +-/* + * Save the cr4 feature set we're using (ie + * Pentium 4MB enable and PPro Global page + * enable), so that any CPU's that boot up +@@ -203,7 +168,7 @@ struct i387_fxsave_struct { + u32 mxcsr; + u32 mxcsr_mask; + u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ +- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */ ++ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + u32 padding[24]; + } __attribute__ ((aligned (16))); + +@@ -436,22 +401,6 @@ static inline void prefetchw(void *x) + #define cpu_relax() rep_nop() --/* -- * Atomic compare and exchange. Compare OLD with MEM, if identical, -- * store NEW in MEM. Return the initial value in MEM. Success is -- * indicated by comparing RETURN with OLD. + /* +- * NSC/Cyrix CPU configuration register indexes - */ +-#define CX86_CCR0 0xc0 +-#define CX86_CCR1 0xc1 +-#define CX86_CCR2 0xc2 +-#define CX86_CCR3 0xc3 +-#define CX86_CCR4 0xe8 +-#define CX86_CCR5 0xe9 +-#define CX86_CCR6 0xea +-#define CX86_CCR7 0xeb +-#define CX86_DIR0 0xfe +-#define CX86_DIR1 0xff +-#define CX86_ARR_BASE 0xc4 +-#define CX86_RCR_BASE 0xdc - --#ifdef CONFIG_X86_CMPXCHG --#define __HAVE_ARCH_CMPXCHG 1 --#define cmpxchg(ptr,o,n)\ -- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ -- (unsigned long)(n),sizeof(*(ptr)))) --#define sync_cmpxchg(ptr,o,n)\ -- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ -- (unsigned long)(n),sizeof(*(ptr)))) --#endif -+static inline unsigned long xen_read_cr4_safe(void) -+{ -+ unsigned long val; -+ /* This could fault if %cr4 does not exist */ -+ asm("1: movl %%cr4, %0 \n" -+ "2: \n" -+ ".section __ex_table,\"a\" \n" -+ ".long 1b,2b \n" -+ ".previous \n" -+ : "=r" (val): "0" (0)); -+ return val; -+} +-/* + * NSC/Cyrix CPU indexed register access macros + */ --static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, -- unsigned long new, int size) -+static inline void xen_write_cr4(unsigned long val) - { -- unsigned long prev; -- switch (size) { -- case 1: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -- : "=a"(prev) -- : "q"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- case 2: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -- : "=a"(prev) -- : "r"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- case 4: -- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -- : "=a"(prev) -- : "r"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- } -- return old; -+ asm volatile("movl %0,%%cr4": :"r" (val)); - } +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-15 11:27:22.000000000 +0100 +@@ -11,12 +11,11 @@ + extern int disable_apic; --/* -- * Always use locked operations when touching memory shared with a -- * hypervisor, since the system may be SMP even if the guest kernel -- * isn't. -- */ --static inline unsigned long __sync_cmpxchg(volatile void *ptr, -- unsigned long old, -- unsigned long new, int size) --{ -- unsigned long prev; -- switch (size) { -- case 1: -- __asm__ __volatile__("lock; cmpxchgb %b1,%2" -- : "=a"(prev) -- : "q"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- case 2: -- __asm__ __volatile__("lock; cmpxchgw %w1,%2" -- : "=a"(prev) -- : "r"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- case 4: -- __asm__ __volatile__("lock; cmpxchgl %1,%2" -- : "=a"(prev) -- : "r"(new), "m"(*__xg(ptr)), "0"(old) -- : "memory"); -- return prev; -- } -- return old; -+static inline void xen_wbinvd(void) -+{ -+ asm volatile("wbinvd": : :"memory"); - } + #ifdef CONFIG_X86_LOCAL_APIC +-#include + #include ++#include + #ifdef CONFIG_X86_IO_APIC + #include + #endif +-#include + #include + #endif --#ifndef CONFIG_X86_CMPXCHG --/* -- * Building a kernel capable running on 80386. It may be necessary to -- * simulate the cmpxchg on the 80386 CPU. For that purpose we define -- * a function for each of the sizes we support. -- */ -+#define read_cr0() (xen_read_cr0()) -+#define write_cr0(x) (xen_write_cr0(x)) -+#define read_cr2() (xen_read_cr2()) -+#define write_cr2(x) (xen_write_cr2(x)) -+#define read_cr3() (xen_read_cr3()) -+#define write_cr3(x) (xen_write_cr3(x)) -+#define read_cr4() (xen_read_cr4()) -+#define read_cr4_safe() (xen_read_cr4_safe()) -+#define write_cr4(x) (xen_write_cr4(x)) -+#define wbinvd() (xen_wbinvd()) +@@ -41,7 +40,6 @@ extern void lock_ipi_call_lock(void); + extern void unlock_ipi_call_lock(void); + extern int smp_num_siblings; + extern void smp_send_reschedule(int cpu); +-void smp_stop_cpu(void); --extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8); --extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16); --extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32); -- --static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, -- unsigned long new, int size) + extern cpumask_t cpu_sibling_map[NR_CPUS]; + extern cpumask_t cpu_core_map[NR_CPUS]; +@@ -62,14 +60,6 @@ static inline int num_booting_cpus(void) + + #define raw_smp_processor_id() read_pda(cpunumber) + +-#ifdef CONFIG_X86_LOCAL_APIC +-static inline int hard_smp_processor_id(void) -{ -- switch (size) { -- case 1: -- return cmpxchg_386_u8(ptr, old, new); -- case 2: -- return cmpxchg_386_u16(ptr, old, new); -- case 4: -- return cmpxchg_386_u32(ptr, old, new); -- } -- return old; +- /* we don't want to mark this access volatile - bad code generation */ +- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); -} -- --#define cmpxchg(ptr,o,n) \ --({ \ -- __typeof__(*(ptr)) __ret; \ -- if (likely(boot_cpu_data.x86 > 3)) \ -- __ret = __cmpxchg((ptr), (unsigned long)(o), \ -- (unsigned long)(n), sizeof(*(ptr))); \ -- else \ -- __ret = cmpxchg_386((ptr), (unsigned long)(o), \ -- (unsigned long)(n), sizeof(*(ptr))); \ -- __ret; \ --}) -#endif -+/* Clear the 'TS' bit */ -+#define clts() (xen_clts()) +- + extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); + extern void prefill_possible_map(void); +@@ -78,6 +68,14 @@ extern unsigned __cpuinitdata disabled_c --#ifdef CONFIG_X86_CMPXCHG64 -+/* Set the 'TS' bit */ -+#define stts() (HYPERVISOR_fpu_taskswitch(1)) + #define NO_PROC_ID 0xFF /* No processor magic marker */ --static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, -- unsigned long long new) -+#endif /* __KERNEL__ */ ++#endif /* CONFIG_SMP */ + -+static inline unsigned long get_limit(unsigned long segment) - { -- unsigned long long prev; -- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3" -- : "=A"(prev) -- : "b"((unsigned long)new), -- "c"((unsigned long)(new >> 32)), -- "m"(*__xg(ptr)), -- "0"(old) -- : "memory"); -- return prev; --} -- --#define cmpxchg64(ptr,o,n)\ -- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\ -- (unsigned long long)(n))) -+ unsigned long __limit; -+ __asm__("lsll %1,%0" -+ :"=r" (__limit):"r" (segment)); -+ return __limit+1; ++#ifdef CONFIG_X86_LOCAL_APIC ++static inline int hard_smp_processor_id(void) ++{ ++ /* we don't want to mark this access volatile - bad code generation */ ++ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); +} -+ -+#define nop() __asm__ __volatile__ ("nop") + #endif --#endif -- /* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking ---- a/include/asm-x86/mach-xen/asm/system_64.h -+++ b/include/asm-x86/mach-xen/asm/system_64.h +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/system_64.h 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/system_64.h 2008-12-15 11:27:22.000000000 +0100 @@ -3,7 +3,7 @@ #include @@ -7565,44 +7469,8 @@ Acked-by: jbeulich@novell.com #ifdef CONFIG_SMP #define smp_mb() mb() #define smp_rmb() rmb() ---- a/include/asm-x86/mach-xen/asm/tlbflush_32.h -+++ b/include/asm-x86/mach-xen/asm/tlbflush_32.h -@@ -29,8 +29,13 @@ - * and page-granular flushes are available only on i486 and up. - */ - -+#define TLB_FLUSH_ALL 0xffffffff -+ -+ - #ifndef CONFIG_SMP - -+#include -+ - #define flush_tlb() __flush_tlb() - #define flush_tlb_all() __flush_tlb_all() - #define local_flush_tlb() __flush_tlb() -@@ -55,7 +60,7 @@ static inline void flush_tlb_range(struc - __flush_tlb(); - } - --#else -+#else /* SMP */ - - #include - -@@ -84,9 +89,7 @@ struct tlb_state - char __cacheline_padding[L1_CACHE_BYTES-8]; - }; - DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -- -- --#endif -+#endif /* SMP */ - - #define flush_tlb_kernel_range(start, end) flush_tlb_all() - ---- a/include/asm-x86/mach-xen/asm/tlbflush_64.h -+++ b/include/asm-x86/mach-xen/asm/tlbflush_64.h +--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-03-04 11:28:34.000000000 +0100 ++++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-15 11:27:22.000000000 +0100 @@ -2,7 +2,9 @@ #define _X8664_TLBFLUSH_H @@ -7613,8 +7481,8 @@ Acked-by: jbeulich@novell.com #define __flush_tlb() xen_tlb_flush() ---- a/include/linux/pci.h -+++ b/include/linux/pci.h +--- sle11-2009-04-20.orig/include/linux/pci.h 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/include/linux/pci.h 2008-12-15 11:27:22.000000000 +0100 @@ -239,7 +239,7 @@ struct pci_dev { int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ @@ -7624,8 +7492,8 @@ Acked-by: jbeulich@novell.com struct list_head msi_list; #endif struct pci_vpd *vpd; ---- a/lib/swiotlb-xen.c -+++ b/lib/swiotlb-xen.c +--- sle11-2009-04-20.orig/lib/swiotlb-xen.c 2009-03-04 11:25:55.000000000 +0100 ++++ sle11-2009-04-20/lib/swiotlb-xen.c 2009-02-05 11:16:51.000000000 +0100 @@ -723,7 +723,6 @@ swiotlb_dma_supported (struct device *hw return (mask >= ((1UL << dma_bits) - 1)); } @@ -7634,8 +7502,8 @@ Acked-by: jbeulich@novell.com EXPORT_SYMBOL(swiotlb_map_single); EXPORT_SYMBOL(swiotlb_unmap_single); EXPORT_SYMBOL(swiotlb_map_sg); ---- a/net/core/dev.c -+++ b/net/core/dev.c +--- sle11-2009-04-20.orig/net/core/dev.c 2008-12-15 11:26:44.000000000 +0100 ++++ sle11-2009-04-20/net/core/dev.c 2008-12-15 11:27:22.000000000 +0100 @@ -1744,12 +1744,17 @@ static struct netdev_queue *dev_pick_tx( inline int skb_checksum_setup(struct sk_buff *skb) { @@ -7670,8 +7538,8 @@ Acked-by: jbeulich@novell.com goto out; skb->ip_summed = CHECKSUM_PARTIAL; skb->proto_csum_blank = 0; ---- a/scripts/Makefile.xen.awk -+++ b/scripts/Makefile.xen.awk +--- sle11-2009-04-20.orig/scripts/Makefile.xen.awk 2009-04-29 08:44:31.000000000 +0200 ++++ sle11-2009-04-20/scripts/Makefile.xen.awk 2008-12-15 11:27:22.000000000 +0100 @@ -13,7 +13,7 @@ BEGIN { next }