From: www.kernel.org Subject: Update to 2.6.24 Patch-mainline: 2.6.24 Automatically created from "patches.kernel.org/patch-2.6.24" by xen-port-patches.py Acked-by: jbeulich@novell.com Index: head-2008-12-01/arch/x86/Kconfig =================================================================== --- head-2008-12-01.orig/arch/x86/Kconfig 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/Kconfig 2008-12-01 11:36:55.000000000 +0100 @@ -50,15 +50,16 @@ config GENERIC_CMOS_UPDATE config CLOCKSOURCE_WATCHDOG def_bool y - depends on !X86_XEN + depends on !XEN config GENERIC_CLOCKEVENTS def_bool y - depends on !X86_XEN + depends on !XEN config GENERIC_CLOCKEVENTS_BROADCAST def_bool y - depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN) + depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + depends on !XEN config LOCKDEP_SUPPORT def_bool y @@ -208,12 +209,12 @@ config X86_TRAMPOLINE config X86_NO_TSS bool - depends on X86_XEN || X86_64_XEN + depends on XEN default y config X86_NO_IDT bool - depends on X86_XEN || X86_64_XEN + depends on XEN default y config KTIME_SCALAR @@ -283,6 +284,7 @@ config X86_PC config X86_XEN bool "Xen-compatible" + depends on X86_32 select XEN select X86_PAE select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST @@ -361,6 +363,7 @@ endif config X86_64_XEN bool "Enable Xen compatible kernel" + depends on X86_64 select XEN select SWIOTLB help @@ -413,7 +416,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER menuconfig PARAVIRT_GUEST bool "Paravirtualized guest support" - depends on !X86_XEN && !X86_64_XEN + depends on !XEN help Say Y here to get to see options related to running Linux under various hypervisors. This option alone does not add any kernel code. @@ -507,7 +510,7 @@ source "arch/x86/Kconfig.cpu" config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 - depends on !X86_XEN && !X86_64_XEN + depends on !XEN help Use the IA-PC HPET (High Precision Event Timer) to manage time in preference to the PIT and RTC, if a HPET is @@ -827,7 +830,7 @@ config I8K config X86_REBOOTFIXUPS def_bool n prompt "Enable X86 board specific fixups for reboot" - depends on X86_32 && !X86_XEN + depends on X86_32 && !XEN ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -1160,7 +1163,7 @@ config X86_RESERVE_LOW_64K config MATH_EMULATION bool prompt "Math emulation" if X86_32 - depends on !X86_XEN + depends on !XEN ---help--- Linux can emulate a math coprocessor (used for floating point operations) if you don't have one. 486DX and Pentium processors have @@ -1268,7 +1271,7 @@ config X86_PAT config EFI def_bool n prompt "EFI runtime service support" - depends on ACPI && !X86_XEN && !X86_64_XEN + depends on ACPI && !XEN ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). @@ -1283,7 +1286,7 @@ config EFI config IRQBALANCE def_bool y prompt "Enable kernel irq balancing" - depends on X86_32 && SMP && X86_IO_APIC && !X86_XEN + depends on X86_32 && SMP && X86_IO_APIC && !XEN help The default yes will allow the kernel to do irq load balancing. Saying no will keep the kernel from doing irq load balancing. @@ -1429,7 +1432,7 @@ config PHYSICAL_START config RELOCATABLE bool "Build a relocatable kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL && !X86_XEN && !X86_64_XEN + depends on EXPERIMENTAL && !XEN help This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. @@ -1499,6 +1502,7 @@ endmenu config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on X86_64 || (X86_32 && HIGHMEM) + depends on !XEN config HAVE_ARCH_EARLY_PFN_TO_NID def_bool X86_64 @@ -1689,7 +1693,7 @@ choice config PCI_GOBIOS bool "BIOS" - depends on !X86_XEN + depends on !XEN config PCI_GOMMCONFIG bool "MMConfig" @@ -1740,7 +1744,7 @@ config PCI_MMCONFIG config XEN_PCIDEV_FRONTEND bool "Xen PCI Frontend" if X86_64 - depends on PCI && ((X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)) || X86_64_XEN) + depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64) select HOTPLUG default y help @@ -1757,6 +1761,7 @@ config XEN_PCIDEV_FE_DEBUG config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL + depends on !XEN help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. Index: head-2008-12-01/arch/x86/Makefile =================================================================== --- head-2008-12-01.orig/arch/x86/Makefile 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/Makefile 2008-12-01 11:36:55.000000000 +0100 @@ -191,8 +191,8 @@ PHONY += zImage bzImage vmlinuz compress zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install ifdef CONFIG_XEN -CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ - -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) +KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ + -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(KBUILD_CPPFLAGS) ifdef CONFIG_X86_64 LDFLAGS_vmlinux := -e startup_64 @@ -206,6 +206,8 @@ KBUILD_IMAGE := $(boot)/vmlinuz vmlinuz: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot + $(Q)ln -fsn ../../x86/boot/$@ $(objtree)/arch/$(UTS_MACHINE)/boot/$@ else # Default kernel to build all: bzImage Index: head-2008-12-01/arch/x86/ia32/ia32entry-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/ia32/ia32entry-xen.S 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/ia32/ia32entry-xen.S 2008-12-01 11:36:55.000000000 +0100 @@ -125,20 +125,16 @@ sysenter_do_call: jmp int_ret_from_sys_call sysenter_tracesys: + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - movl %ebp, %ebp - /* no need to do an access_ok check here because rbp has been - 32bit zero extended */ -1: movl (%rbp),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous + xchgl %ebp,%r9d jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -200,20 +196,17 @@ cstar_do_call: jmp int_ret_from_sys_call cstar_tracesys: + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST + xchgl %ebp,%r9d movl RSP-ARGOFFSET(%rsp), %r8d - /* no need to do an access_ok check here because r8 has been - 32bit zero extended */ -1: movl (%r8),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous jmp cstar_do_call END(ia32_cstar_target) Index: head-2008-12-01/arch/x86/kernel/Makefile =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/Makefile 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/Makefile 2008-12-01 11:36:55.000000000 +0100 @@ -127,4 +127,4 @@ endif disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o -%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +%/head_64.o %/head_64.s: asflags-$(CONFIG_XEN) := Index: head-2008-12-01/arch/x86/kernel/acpi/sleep_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/acpi/sleep_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/acpi/sleep_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -90,7 +90,7 @@ __setup("acpi_sleep=", acpi_sleep_setup) /* Ouch, we want to delete this. We already have better version in userspace, in s2ram from suspend.sf.net project */ -static __init int reset_videomode_after_s3(struct dmi_system_id *d) +static __init int reset_videomode_after_s3(const struct dmi_system_id *d) { acpi_realmode_flags |= 2; return 0; Index: head-2008-12-01/arch/x86/kernel/acpi/sleep_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/acpi/sleep_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -123,6 +123,3 @@ static int __init acpi_sleep_setup(char __setup("acpi_sleep=", acpi_sleep_setup); #endif /* CONFIG_ACPI_PV_SLEEP */ -void acpi_pci_link_exit(void) -{ -} Index: head-2008-12-01/arch/x86/kernel/apic_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/apic_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/apic_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -63,22 +63,38 @@ int setup_profiling_timer(unsigned int m void smp_local_timer_interrupt(void) { - profile_tick(CPU_PROFILING); #ifndef CONFIG_XEN -#ifdef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif -#endif + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + /* - * We take the 'long' return path, and there every subsystem - * grabs the appropriate locks (kernel lock/ irq lock). + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. * - * We might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } +#endif + + /* + * the NMI deadlock-detector uses this. */ + add_pda(apic_timer_irqs, 1); + +#ifndef CONFIG_XEN + evt->event_handler(evt); +#endif } /* @@ -94,11 +110,6 @@ void smp_apic_timer_interrupt(struct pt_ struct pt_regs *old_regs = set_irq_regs(regs); /* - * the NMI deadlock-detector uses this. - */ - add_pda(apic_timer_irqs, 1); - - /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. */ @@ -132,6 +143,7 @@ asmlinkage void smp_spurious_interrupt(v if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); + add_pda(irq_spurious_count, 1); irq_exit(); } Index: head-2008-12-01/arch/x86/kernel/cpu/common-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -214,7 +214,7 @@ static void __cpuinit get_cpu_vendor(str static int __init x86_fxsr_setup(char * s) { - /* Tell all the other CPU's to not use it... */ + /* Tell all the other CPUs to not use it... */ disable_x86_fxsr = 1; /* Index: head-2008-12-01/arch/x86/kernel/e820_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/e820_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/e820_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -52,6 +52,13 @@ struct resource code_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -266,7 +273,9 @@ static struct e820map machine_e820; * and also for regions reported as reserved by the e820. */ static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -300,9 +309,11 @@ legacy_init_iomem_resources(struct resou #ifndef CONFIG_XEN request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #endif #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); #ifdef CONFIG_XEN xen_machine_kexec_register_resources(res); #endif @@ -329,9 +340,11 @@ static int __init request_standard_resou printk("Setting up standard PCI resources\n"); if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); + efi_initialize_iomem_resources(&code_resource, + &data_resource, &bss_resource); else - legacy_init_iomem_resources(&code_resource, &data_resource); + legacy_init_iomem_resources(&code_resource, + &data_resource, &bss_resource); /* EFI systems may still have VGA */ request_resource(&iomem_resource, &video_ram_resource); @@ -774,7 +787,7 @@ void __init e820_register_memory(void) #endif /* - * Search for the bigest gap in the low 32 bits of the e820 + * Search for the biggest gap in the low 32 bits of the e820 * memory space. */ last = 0x100000000ull; Index: head-2008-12-01/arch/x86/kernel/e820_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/e820_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/e820_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -51,7 +51,7 @@ unsigned long end_pfn_map; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource; +extern struct resource code_resource, data_resource, bss_resource; /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) @@ -73,10 +73,15 @@ static inline int bad_addr(unsigned long /* initrd */ #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START && last >= INITRD_START && - addr < INITRD_START+INITRD_SIZE) { - *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); - return 1; + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image+ramdisk_size; + + if (last >= ramdisk_image && addr < ramdisk_end) { + *addrp = PAGE_ALIGN(ramdisk_end); + return 1; + } } #endif /* kernel code */ @@ -249,6 +254,7 @@ void __init e820_reserve_resources(struc #ifndef CONFIG_XEN request_resource(res, &code_resource); request_resource(res, &data_resource); + request_resource(res, &bss_resource); #endif #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) @@ -650,8 +656,8 @@ void __init setup_memory_region(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map("BIOS-e820"); @@ -836,3 +842,22 @@ __init void e820_setup_gap(struct e820en printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); } + +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) +{ + int i; + + if (slot < 0 || slot >= e820.nr_map) + return -1; + for (i = slot; i < e820.nr_map; i++) { + if (e820.map[i].type != E820_RAM) + continue; + break; + } + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) + return -1; + *addr = e820.map[i].addr; + *size = min_t(u64, e820.map[i].size + e820.map[i].addr, + max_pfn << PAGE_SHIFT) - *addr; + return i + 1; +} Index: head-2008-12-01/arch/x86/kernel/early_printk-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/early_printk-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/early_printk-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -6,14 +6,9 @@ #include #include #include +#include /* Simple VGA output */ - -#ifdef __i386__ -#include -#else -#include -#endif #define VGABASE (__ISA_IO_base + 0xb8000) #ifndef CONFIG_XEN @@ -264,10 +259,10 @@ static int __init setup_early_printk(cha early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3)) { #ifndef CONFIG_XEN - && SCREEN_INFO.orig_video_isVGA == 1) { - max_xpos = SCREEN_INFO.orig_video_cols; - max_ypos = SCREEN_INFO.orig_video_lines; - current_ypos = SCREEN_INFO.orig_y; + && boot_params.screen_info.orig_video_isVGA == 1) { + max_xpos = boot_params.screen_info.orig_video_cols; + max_ypos = boot_params.screen_info.orig_video_lines; + current_ypos = boot_params.screen_info.orig_y; #endif early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { Index: head-2008-12-01/arch/x86/kernel/entry_32-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:36:55.000000000 +0100 @@ -254,6 +254,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -341,6 +342,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -406,6 +408,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -478,7 +481,7 @@ ldt_ss: * is still available to implement the setting of the high * 16-bits in the INTERRUPT_RETURN paravirt-op. */ - cmpl $0, paravirt_ops+PARAVIRT_enabled + cmpl $0, pv_info+PARAVIRT_enabled jne restore_nocheck #endif @@ -540,6 +543,7 @@ work_pending: jz work_notifysig work_resched: call schedule + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -1264,6 +1268,6 @@ ENTRY(kernel_thread_helper) ENDPROC(kernel_thread_helper) .section .rodata,"a" -#include "syscall_table.S" +#include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) Index: head-2008-12-01/arch/x86/kernel/entry_64-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:36:55.000000000 +0100 @@ -57,7 +57,7 @@ #include #include -#include "xen_entry.S" +#include "xen_entry_64.S" .code64 @@ -275,6 +275,7 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: + LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) XEN_BLOCK_EVENTS(%rsi) TRACE_IRQS_OFF @@ -365,6 +366,7 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: + LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -516,11 +518,12 @@ END(stub_rt_sigreturn) retint_check: CFI_DEFAULT_STACK adj=1 + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful -retint_restore_args: +retint_restore_args: /* return to kernel space */ movl EFLAGS-REST_SKIP(%rsp), %eax shr $9, %eax # EAX[0] == IRET_EFLAGS.IF XEN_GET_VCPU_INFO(%rsi) @@ -841,7 +844,7 @@ error_call_handler: movq ORIG_RAX(%rsp),%rsi # get error code movq $-1,ORIG_RAX(%rsp) call *%rax -error_exit: +error_exit: RESTORE_REST /* cli */ XEN_BLOCK_EVENTS(%rsi) @@ -849,14 +852,11 @@ error_exit: GET_THREAD_INFO(%rcx) testb $3,CS-ARGOFFSET(%rsp) jz retint_kernel + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - /* - * The iret might restore flags: - */ - TRACE_IRQS_IRETQ jmp retint_restore_args #if 0 @@ -1071,7 +1071,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorl %edi, %edi + mov %eax, %edi call do_exit CFI_ENDPROC ENDPROC(child_rip) Index: head-2008-12-01/arch/x86/kernel/genapic_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/genapic_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/genapic_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -24,12 +24,21 @@ #include #endif -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly +/* + * which logical CPU number maps to which CPU (physical APIC ID) + * + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +#ifndef CONFIG_XEN +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; -EXPORT_SYMBOL(x86_cpu_to_apicid); - -u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_cpu_to_apicid_ptr; +#endif +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); #ifndef CONFIG_XEN struct genapic __read_mostly *genapic = &apic_flat; Index: head-2008-12-01/arch/x86/kernel/head64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/head64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/head64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,5 +1,5 @@ /* - * linux/arch/x86_64/kernel/head64.c -- prepare to run common code + * prepare to run common code * * Copyright (C) 2000 Andrea Arcangeli SuSE * @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -47,27 +46,16 @@ static void __init clear_bss(void) } #endif -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#define OLD_CL_MAGIC_ADDR 0x20 -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_OFFSET 0x22 - static void __init copy_bootdata(char *real_mode_data) { #ifndef CONFIG_XEN - unsigned long new_data; char * command_line; - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); - new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); - if (!new_data) { - if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { - return; - } - new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); + memcpy(&boot_params, real_mode_data, sizeof boot_params); + if (boot_params.hdr.cmd_line_ptr) { + command_line = __va(boot_params.hdr.cmd_line_ptr); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } - command_line = __va(new_data); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); #else int max_cmdline; @@ -117,7 +105,7 @@ void __init x86_64_start_kernel(char * r for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_idt((const struct desc_ptr *)&idt_descr); #endif early_printk("Kernel alive\n"); Index: head-2008-12-01/arch/x86/kernel/init_task-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/init_task-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/init_task-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -14,11 +14,11 @@ static struct fs_struct init_fs = INIT_F static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - +#ifdef CONFIG_X86_XEN #define swapper_pg_dir ((pgd_t *)NULL) +#endif struct mm_struct init_mm = INIT_MM(init_mm); #undef swapper_pg_dir - EXPORT_SYMBOL(init_mm); /* @@ -28,7 +28,7 @@ EXPORT_SYMBOL(init_mm); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union +union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = { INIT_THREAD_INFO(init_task) }; @@ -38,14 +38,15 @@ union thread_union init_thread_union * All other task structs will be allocated on slabs in fork.c */ struct task_struct init_task = INIT_TASK(init_task); - EXPORT_SYMBOL(init_task); #ifndef CONFIG_X86_NO_TSS /* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. - */ + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; #endif Index: head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -427,7 +427,7 @@ static struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) static cpumask_t balance_irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL @@ -633,7 +633,7 @@ tryanotherirq: imbalance = move_this_load; - /* For physical_balance case, we accumlated both load + /* For physical_balance case, we accumulated both load * values in the one of the siblings cpu_irq[], * to use the same code for physical and logical processors * as much as possible. @@ -647,7 +647,7 @@ tryanotherirq: * (A+B)/2 vs B */ load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { if (load > CPU_IRQ(j)) { /* This won't change cpu_sibling_map[min_loaded] */ load = CPU_IRQ(j); @@ -1018,7 +1018,7 @@ static int EISA_ELCR(unsigned int irq) #define default_MCA_trigger(idx) (1) #define default_MCA_polarity(idx) (0) -static int __init MPBIOS_polarity(int idx) +static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; int polarity; @@ -1347,6 +1347,11 @@ static void __init setup_IO_APIC_irqs(vo continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + entry.trigger = irq_trigger(idx); entry.polarity = irq_polarity(idx); @@ -1936,13 +1941,16 @@ __setup("no_timer_check", notimercheck); static int __init timer_irq_works(void) { unsigned long t1 = jiffies; + unsigned long flags; if (no_timer_check) return 1; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); + local_irq_restore(flags); /* * Expect a few ticks at least, to be sure some possible @@ -2223,6 +2231,9 @@ static inline void __init check_timer(vo { int apic1, pin1, apic2, pin2; int vector; + unsigned long flags; + + local_irq_save(flags); /* * get/set the timer IRQ vector: @@ -2268,7 +2279,7 @@ static inline void __init check_timer(vo } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); - return; + goto out; } clear_IO_APIC_pin(apic1, pin1); printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " @@ -2291,7 +2302,7 @@ static inline void __init check_timer(vo if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } - return; + goto out; } /* * Cleanup, just in case ... @@ -2315,7 +2326,7 @@ static inline void __init check_timer(vo if (timer_irq_works()) { printk(" works.\n"); - return; + goto out; } apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); printk(" failed.\n"); @@ -2331,11 +2342,13 @@ static inline void __init check_timer(vo if (timer_irq_works()) { printk(" works.\n"); - return; + goto out; } printk(" failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option"); +out: + local_irq_restore(flags); } #else int timer_uses_ioapic_pin_0 = 0; @@ -2353,6 +2366,14 @@ int timer_uses_ioapic_pin_0 = 0; void __init setup_IO_APIC(void) { +#ifndef CONFIG_XEN + int i; + + /* Reserve all the system vectors. */ + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + set_bit(i, used_vectors); +#endif + enable_IO_APIC(); if (acpi_ioapic) @@ -2542,7 +2563,7 @@ void destroy_irq(unsigned int irq) #endif /* CONFIG_XEN */ /* - * MSI mesage composition + * MSI message composition */ #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2899,6 +2920,25 @@ int io_apic_set_pci_routing (int ioapic, return 0; } +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == mp_INT && + mp_irqs[i].mpc_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + #endif /* CONFIG_ACPI */ static int __init parse_disable_timer_pin_1(char *arg) Index: head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI #include #endif @@ -584,7 +585,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) -static int __init MPBIOS_polarity(int idx) +static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; int polarity; @@ -871,6 +872,10 @@ static void __init setup_IO_APIC_irqs(vo apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); @@ -881,7 +886,7 @@ static void __init setup_IO_APIC_irqs(vo } if (!first_notcon) - apic_printk(APIC_VERBOSE," not connected.\n"); + apic_printk(APIC_VERBOSE, " not connected.\n"); } #ifndef CONFIG_XEN @@ -1277,10 +1282,13 @@ void disable_IO_APIC(void) static int __init timer_irq_works(void) { unsigned long t1 = jiffies; + unsigned long flags; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); + local_irq_restore(flags); /* * Expect a few ticks at least, to be sure some possible @@ -1655,6 +1663,9 @@ static inline void check_timer(void) { struct irq_cfg *cfg = irq_cfg + 0; int apic1, pin1, apic2, pin2; + unsigned long flags; + + local_irq_save(flags); /* * get/set the timer IRQ vector: @@ -1696,7 +1707,7 @@ static inline void check_timer(void) } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); - return; + goto out; } clear_IO_APIC_pin(apic1, pin1); apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " @@ -1718,7 +1729,7 @@ static inline void check_timer(void) if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } - return; + goto out; } /* * Cleanup, just in case ... @@ -1741,7 +1752,7 @@ static inline void check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - return; + goto out; } apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_VERBOSE," failed.\n"); @@ -1756,10 +1767,12 @@ static inline void check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - return; + goto out; } apic_printk(APIC_VERBOSE," failed :(.\n"); panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); +out: + local_irq_restore(flags); } #else #define check_timer() ((void)0) @@ -1775,7 +1788,7 @@ __setup("no_timer_check", notimercheck); /* * - * IRQ's that are handled by the PIC in the MPS IOAPIC case. + * IRQs that are handled by the PIC in the MPS IOAPIC case. * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. * Linux doesn't really care, as it's not actually used * for any interrupt handling anyway. @@ -1858,7 +1871,7 @@ static struct sysdev_class ioapic_sysdev static int __init ioapic_init_sysfs(void) { struct sys_device * dev; - int i, size, error = 0; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) @@ -1867,12 +1880,11 @@ static int __init ioapic_init_sysfs(void for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; @@ -1933,7 +1945,7 @@ void destroy_irq(unsigned int irq) #endif /* CONFIG_XEN */ /* - * MSI mesage composition + * MSI message composition */ #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2043,8 +2055,64 @@ void arch_teardown_msi_irq(unsigned int destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ @@ -2177,8 +2245,27 @@ int io_apic_set_pci_routing (int ioapic, return 0; } -#endif /* CONFIG_ACPI */ +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == mp_INT && + mp_irqs[i].mpc_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + +#endif /* CONFIG_ACPI */ #ifndef CONFIG_XEN /* @@ -2217,3 +2304,4 @@ void __init setup_ioapic_dest(void) } #endif #endif /* !CONFIG_XEN */ + Index: head-2008-12-01/arch/x86/kernel/ioport_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/ioport_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/ioport_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ Index: head-2008-12-01/arch/x86/kernel/ioport_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/ioport_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/ioport_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ Index: head-2008-12-01/arch/x86/kernel/irq_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/irq_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/irq_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/irq.c - * * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar * * This file contains the lowest level x86-specific interrupt @@ -231,8 +229,6 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } - -EXPORT_SYMBOL(do_softirq); #endif /* @@ -259,9 +255,17 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { + unsigned any_count = 0; + spin_lock_irqsave(&irq_desc[i].lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_cpu(j).irqs[i]; +#endif action = irq_desc[i].action; - if (!action) + if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP @@ -272,10 +276,12 @@ int show_interrupts(struct seq_file *p, #endif seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } seq_putc(p, '\n'); skip: @@ -284,13 +290,46 @@ skip: seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); + seq_printf(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_call_count); + seq_printf(p, " function call interrupts\n"); +#ifndef CONFIG_XEN + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) Index: head-2008-12-01/arch/x86/kernel/irq_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/irq.c - * * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar * * This file contains the lowest level x86_64-specific interrupt @@ -64,9 +62,17 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { + unsigned any_count = 0; + spin_lock_irqsave(&irq_desc[i].lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_cpu(j).irqs[i]; +#endif action = irq_desc[i].action; - if (!action) + if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP @@ -78,9 +84,11 @@ int show_interrupts(struct seq_file *p, seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); @@ -88,12 +96,44 @@ skip: seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_putc(p, '\n'); + seq_printf(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); + seq_printf(p, " function call interrupts\n"); +#ifndef CONFIG_XEN + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } @@ -211,7 +251,6 @@ asmlinkage void do_softirq(void) } local_irq_restore(flags); } -EXPORT_SYMBOL(do_softirq); #ifndef CONFIG_X86_LOCAL_APIC /* Index: head-2008-12-01/arch/x86/kernel/ldt_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar */ @@ -106,14 +104,14 @@ int init_new_context(struct task_struct struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; mm->context.has_foreign_mappings = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -149,7 +147,7 @@ static int read_ldt(void __user * ptr, u if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -157,7 +155,7 @@ static int read_ldt(void __user * ptr, u err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -213,7 +211,7 @@ static int write_ldt(void __user * ptr, goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -240,7 +238,7 @@ install: entry_1, entry_2); out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } Index: head-2008-12-01/arch/x86/kernel/ldt_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/ldt_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/ldt_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar * Copyright (C) 2002 Andi Kleen @@ -112,19 +110,14 @@ int init_new_context(struct task_struct int retval = 0; memset(&mm->context, 0, sizeof(mm->context)); - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); old_mm = current->mm; if (old_mm) mm->context.vdso = old_mm->context.vdso; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - if (retval == 0) { - spin_lock(&mm_unpinned_lock); - list_add(&mm->context.unpinned, &mm_unpinned); - spin_unlock(&mm_unpinned_lock); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -148,11 +141,6 @@ void destroy_context(struct mm_struct *m kfree(mm->context.ldt); mm->context.size = 0; } - if (!PagePinned(virt_to_page(mm->pgd))) { - spin_lock(&mm_unpinned_lock); - list_del(&mm->context.unpinned); - spin_unlock(&mm_unpinned_lock); - } } static int read_ldt(void __user * ptr, unsigned long bytecount) @@ -166,7 +154,7 @@ static int read_ldt(void __user * ptr, u if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -174,7 +162,7 @@ static int read_ldt(void __user * ptr, u err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -227,7 +215,7 @@ static int write_ldt(void __user * ptr, goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= (unsigned)mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -256,7 +244,7 @@ install: error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32))); out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } Index: head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1023,7 +1023,7 @@ void __init mp_config_acpi_legacy_irqs ( /* * Use the default configuration for the IRQs 0-15. Unless - * overriden by (MADT) interrupt source override entries. + * overridden by (MADT) interrupt source override entries. */ for (i = 0; i < 16; i++) { int idx; Index: head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; +EXPORT_SYMBOL(boot_cpu_id); + /* Internal processor count */ unsigned int num_processors __cpuinitdata = 0; @@ -87,7 +89,7 @@ static int __init mpf_checksum(unsigned } #ifndef CONFIG_XEN -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int cpu; cpumask_t tmp_map; @@ -124,13 +126,24 @@ static void __cpuinit MP_processor_info cpu = 0; } bios_cpu_apicid[cpu] = m->mpc_apicid; - x86_cpu_to_apicid[cpu] = m->mpc_apicid; + /* + * We get called early in the the start_kernel initialization + * process when the per_cpu data area is not yet setup, so we + * use a static array that is removed after the per_cpu data + * area is created. + */ + if (x86_cpu_to_apicid_ptr) { + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; + } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } #else -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { num_processors++; } Index: head-2008-12-01/arch/x86/kernel/pci-dma-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -13,14 +13,13 @@ #include #include #include -#include #include #include #include #include #include -#include -#include +#include +#include #include #ifdef __x86_64__ @@ -106,27 +105,29 @@ int range_straddles_page_boundary(paddr_ } int -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, +dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { int i, rc; BUG_ON(!valid_dma_direction(direction)); - WARN_ON(nents == 0 || sg[0].length == 0); + WARN_ON(nents == 0 || sgl->length == 0); if (swiotlb) { - rc = swiotlb_map_sg(hwdev, sg, nents, direction); + rc = swiotlb_map_sg(hwdev, sgl, nents, direction); } else { - for (i = 0; i < nents; i++ ) { - BUG_ON(!sg[i].page); - sg[i].dma_address = - gnttab_dma_map_page(sg[i].page) + sg[i].offset; - sg[i].dma_length = sg[i].length; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + BUG_ON(!sg_page(sg)); + sg->dma_address = + gnttab_dma_map_page(sg_page(sg)) + sg->offset; + sg->dma_length = sg->length; IOMMU_BUG_ON(address_needs_mapping( - hwdev, sg[i].dma_address)); + hwdev, sg->dma_address)); IOMMU_BUG_ON(range_straddles_page_boundary( - page_to_pseudophys(sg[i].page) + sg[i].offset, - sg[i].length)); + page_to_pseudophys(sg_page(sg)) + sg->offset, + sg->length)); } rc = nents; } @@ -137,17 +138,19 @@ dma_map_sg(struct device *hwdev, struct EXPORT_SYMBOL(dma_map_sg); void -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, +dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { int i; BUG_ON(!valid_dma_direction(direction)); if (swiotlb) - swiotlb_unmap_sg(hwdev, sg, nents, direction); + swiotlb_unmap_sg(hwdev, sgl, nents, direction); else { - for (i = 0; i < nents; i++ ) - gnttab_dma_unmap_page(sg[i].dma_address); + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) + gnttab_dma_unmap_page(sg->dma_address); } } EXPORT_SYMBOL(dma_unmap_sg); @@ -261,7 +264,8 @@ void dma_free_coherent(struct device *de { struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - + + WARN_ON(irqs_disabled()); /* for portability */ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; Index: head-2008-12-01/arch/x86/kernel/process_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/process_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/process_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -190,6 +188,10 @@ void cpu_idle(void) } } +static void do_nothing(void *unused) +{ +} + void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); @@ -214,13 +216,20 @@ void cpu_idle_wait(void) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); + /* + * We waited 1 sec, if a CPU still did not call idle + * it may be because it is in idle and not waking up + * because it has nothing to do. + * Give all the remaining CPUS a kick. + */ + smp_call_function_mask(map, do_nothing, 0, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); } EXPORT_SYMBOL_GPL(cpu_idle_wait); -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { } @@ -238,34 +247,52 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -void show_regs(struct pt_regs * regs) +void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; + unsigned long esp; + unsigned short ss, gs; + + if (user_mode_vm(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + savesegment(gs, gs); + } else { + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + savesegment(gs, gs); + } printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d, comm: %s %s (%s %.*s)\n", + task_pid_nr(current), current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + 0xffff & regs->xcs, regs->eip, regs->eflags, + smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, gs, ss); + + if (!all) + return; cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); @@ -273,10 +300,16 @@ void show_regs(struct pt_regs * regs) get_debugreg(d3, 3); printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); + get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); + printk("DR6: %08lx DR7: %08lx\n", + d6, d7); +} +void show_regs(struct pt_regs *regs) +{ + __show_registers(regs, 1); show_trace(NULL, regs, ®s->esp); } Index: head-2008-12-01/arch/x86/kernel/process_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/process_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/process_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -41,6 +39,7 @@ #include #include #include +#include #include #include @@ -172,6 +171,9 @@ void cpu_idle (void) if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; + + tick_nohz_stop_sched_tick(); + rmb(); idle = xen_idle; /* no alternatives */ if (cpu_is_offline(smp_processor_id())) @@ -190,12 +192,17 @@ void cpu_idle (void) __exit_idle(); } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); } } +static void do_nothing(void *unused) +{ +} + void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); @@ -221,6 +228,13 @@ void cpu_idle_wait(void) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); + /* + * We waited 1 sec, if a CPU still did not call idle + * it may be because it is in idle and not waking up + * because it has nothing to do. + * Give all the remaining CPUS a kick. + */ + smp_call_function_mask(map, do_nothing, 0, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); @@ -528,7 +542,7 @@ static inline void __switch_to_xtra(stru * * Kprobes not supported here. Set the probe on schedule instead. */ -__kprobes struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, Index: head-2008-12-01/arch/x86/kernel/quirks-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/quirks-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/quirks-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -41,7 +41,353 @@ static void __devinit quirk_intel_irqbal if (!(config & 0x2)) pci_write_config_byte(dev, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, + quirk_intel_irqbalance); +#endif + +#if defined(CONFIG_HPET_TIMER) +#include + +unsigned long force_hpet_address; + +static enum { + NONE_FORCE_HPET_RESUME, + OLD_ICH_FORCE_HPET_RESUME, + ICH_FORCE_HPET_RESUME, + VT8237_FORCE_HPET_RESUME, + NVIDIA_FORCE_HPET_RESUME, +} force_hpet_resume_type; + +static void __iomem *rcba_base; + +static void ich_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address) + return; + + if (rcba_base == NULL) + BUG(); + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) { + /* HPET disabled in HPTC. Trying to enable */ + writel(val | 0x80, rcba_base + 0x3404); + } + + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) + BUG(); + else + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + + return; +} + +static void ich_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + u32 uninitialized_var(rcba); + int err = 0; + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0xF0, &rcba); + rcba &= 0xFFFFC000; + if (rcba == 0) { + printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n"); + return; + } + + /* use bits 31:14, 16 kB aligned */ + rcba_base = ioremap_nocache(rcba, 0x4000); + if (rcba_base == NULL) { + printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n"); + return; + } + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); + + if (val & 0x80) { + /* HPET is enabled in HPTC. Just not reported by BIOS */ + val = val & 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + iounmap(rcba_base); + return; + } + + /* HPET disabled in HPTC. Trying to enable */ + writel(val | 0x80, rcba_base + 0x3404); + + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) { + err = 1; + } else { + val = val & 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + } + + if (err) { + force_hpet_address = 0; + iounmap(rcba_base); + printk(KERN_DEBUG "Failed to force enable HPET\n"); + } else { + force_hpet_resume_type = ICH_FORCE_HPET_RESUME; + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + } +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, + ich_force_enable_hpet); + + +static struct pci_dev *cached_dev; + +static void old_ich_force_hpet_resume(void) +{ + u32 val; + u32 uninitialized_var(gen_cntl); + + if (!force_hpet_address || !cached_dev) + return; + + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); + gen_cntl &= (~(0x7 << 15)); + gen_cntl |= (0x4 << 15); + + pci_write_config_dword(cached_dev, 0xD0, gen_cntl); + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); + val = gen_cntl >> 15; + val &= 0x7; + if (val == 0x4) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void old_ich_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + u32 uninitialized_var(gen_cntl); + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0xD0, &gen_cntl); + /* + * Bit 17 is HPET enable bit. + * Bit 16:15 control the HPET base address. + */ + val = gen_cntl >> 15; + val &= 0x7; + if (val & 0x4) { + val &= 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + gen_cntl &= (~(0x7 << 15)); + gen_cntl |= (0x4 << 15); + pci_write_config_dword(dev, 0xD0, gen_cntl); + + pci_read_config_dword(dev, 0xD0, &gen_cntl); + + val = gen_cntl >> 15; + val &= 0x7; + if (val & 0x4) { + /* HPET is enabled in HPTC. Just not reported by BIOS */ + val &= 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +/* + * Undocumented chipset features. Make sure that the user enforced + * this. + */ +static void old_ich_force_enable_hpet_user(struct pci_dev *dev) +{ + if (hpet_force_user) + old_ich_force_enable_hpet(dev); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, + old_ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, + old_ich_force_enable_hpet); + + +static void vt8237_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address || !cached_dev) + return; + + val = 0xfed00000 | 0x80; + pci_write_config_dword(cached_dev, 0x68, val); + + pci_read_config_dword(cached_dev, 0x68, &val); + if (val & 0x80) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void vt8237_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0x68, &val); + /* + * Bit 7 is HPET enable bit. + * Bit 31:10 is HPET base address (contrary to what datasheet claims) + */ + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + val = 0xfed00000 | 0x80; + pci_write_config_dword(dev, 0x68, val); + + pci_read_config_dword(dev, 0x68, &val); + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, + vt8237_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + vt8237_force_enable_hpet); + +/* + * Undocumented chipset feature taken from LinuxBIOS. + */ +static void nvidia_force_hpet_resume(void) +{ + pci_write_config_dword(cached_dev, 0x44, 0xfed00001); + printk(KERN_DEBUG "Force enabled HPET at resume\n"); +} + +static void nvidia_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_write_config_dword(dev, 0x44, 0xfed00001); + pci_read_config_dword(dev, 0x44, &val); + force_hpet_address = val & 0xfffffffe; + force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + return; +} + +/* ISA Bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0050, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051, + nvidia_force_enable_hpet); + +/* LPC bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0362, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0363, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0364, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0365, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0366, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, + nvidia_force_enable_hpet); + +void force_hpet_resume(void) +{ + switch (force_hpet_resume_type) { + case ICH_FORCE_HPET_RESUME: + return ich_force_hpet_resume(); + + case OLD_ICH_FORCE_HPET_RESUME: + return old_ich_force_hpet_resume(); + + case VT8237_FORCE_HPET_RESUME: + return vt8237_force_hpet_resume(); + + case NVIDIA_FORCE_HPET_RESUME: + return nvidia_force_hpet_resume(); + + default: + break; + } +} + #endif Index: head-2008-12-01/arch/x86/kernel/setup64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/setup64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/setup64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -27,11 +26,12 @@ #include #include #include +#include #ifdef CONFIG_XEN #include #endif -char x86_boot_params[BOOT_PARAM_SIZE] __initdata; +struct boot_params __initdata boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; @@ -159,8 +159,8 @@ static void switch_pt(void) static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr) { - asm volatile("lgdt %0" :: "m" (*gdt_descr)); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_gdt(gdt_descr); + load_idt(idt_descr); } #endif @@ -252,6 +252,14 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; +#ifndef CONFIG_X86_NO_TSS +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT Index: head-2008-12-01/arch/x86/kernel/setup_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds * * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 @@ -70,6 +68,7 @@ #include #include #include +#include #ifdef CONFIG_XEN #include @@ -80,13 +79,14 @@ static struct notifier_block xen_panic_b xen_panic_event, NULL, 0 /* try to go last */ }; -int disable_pse __devinitdata = 0; +int disable_pse __cpuinitdata = 0; /* * Machine setup.. */ extern struct resource code_resource; extern struct resource data_resource; +extern struct resource bss_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -98,9 +98,6 @@ unsigned long mmu_cr4_features; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; @@ -121,7 +118,7 @@ EXPORT_SYMBOL(apm_info); struct edid_info edid_info; EXPORT_SYMBOL_GPL(edid_info); #ifndef CONFIG_XEN -#define copy_edid() (edid_info = EDID_INFO) +#define copy_edid() (edid_info = boot_params.edid_info) #endif struct ist_info ist_info; #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ @@ -170,10 +167,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #endif #else @@ -416,6 +414,53 @@ extern unsigned long __init setup_memory extern void zone_sizes_init(void); #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline unsigned long long get_total_mem(void) +{ + unsigned long long total; + + total = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + total += highend_pfn - highstart_pfn; +#endif + + return total << PAGE_SHIFT; +} + +#ifdef CONFIG_KEXEC +#ifndef CONFIG_XEN +static void __init reserve_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = get_total_mem(); + + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size > 0) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +#define reserve_crashkernel xen_machine_kexec_setup_resources +#endif +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + void __init setup_bootmem_allocator(void) { unsigned long bootmap_size; @@ -471,30 +516,25 @@ void __init setup_bootmem_allocator(void #ifdef CONFIG_BLK_DEV_INITRD if (xen_start_info->mod_start) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/ - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); + unsigned long ramdisk_size = xen_start_info->mod_len; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_lowmem) { + /*reserve_bootmem(ramdisk_image, ramdisk_size);*/ + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; initrd_below_start_ok = 1; - } - else { + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_lowmem); initrd_start = 0; } } #endif -#ifdef CONFIG_KEXEC -#ifdef CONFIG_XEN - xen_machine_kexec_setup_resources(); -#else - if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1); -#endif -#endif + reserve_crashkernel(); } /* @@ -572,7 +612,8 @@ void __init setup_arch(char **cmdline_p) * the system table is valid. If not, then initialize normally. */ #ifdef CONFIG_EFI - if ((LOADER_TYPE == 0x50) && EFI_SYSTAB) + if ((boot_params.hdr.type_of_loader == 0x50) && + boot_params.efi_info.efi_systab) efi_enabled = 1; #endif @@ -580,18 +621,18 @@ void __init setup_arch(char **cmdline_p) properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */ ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); - screen_info = SCREEN_INFO; + screen_info = boot_params.screen_info; copy_edid(); - apm_info.bios = APM_BIOS_INFO; - ist_info = IST_INFO; - saved_videomode = VIDEO_MODE; - if( SYS_DESC_TABLE.length != 0 ) { - set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); - machine_id = SYS_DESC_TABLE.table[0]; - machine_submodel_id = SYS_DESC_TABLE.table[1]; - BIOS_revision = SYS_DESC_TABLE.table[2]; + apm_info.bios = boot_params.apm_bios_info; + ist_info = boot_params.ist_info; + saved_videomode = boot_params.hdr.vid_mode; + if( boot_params.sys_desc_table.length != 0 ) { + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); + machine_id = boot_params.sys_desc_table.table[0]; + machine_submodel_id = boot_params.sys_desc_table.table[1]; + BIOS_revision = boot_params.sys_desc_table.table[2]; } - bootloader_type = LOADER_TYPE; + bootloader_type = boot_params.hdr.type_of_loader; if (is_initial_xendomain()) { const struct dom0_vga_console_info *info = @@ -606,9 +647,9 @@ void __init setup_arch(char **cmdline_p) screen_info.orig_video_isVGA = 0; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif ARCH_SETUP @@ -621,7 +662,7 @@ void __init setup_arch(char **cmdline_p) copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; @@ -633,6 +674,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(_etext)-1; data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) i = COMMAND_LINE_SIZE; @@ -661,7 +704,7 @@ void __init setup_arch(char **cmdline_p) /* * NOTE: before this point _nobody_ is allowed to allocate * any memory using the bootmem allocator. Although the - * alloctor is now initialised only the first 8Mb of the kernel + * allocator is now initialised only the first 8Mb of the kernel * virtual address space has been mapped. All allocations before * paging_init() has completed must use the alloc_bootmem_low_pages() * variant (which allocates DMA'able memory) and care must be taken @@ -784,10 +827,8 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif -#ifdef CONFIG_PCI -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) + early_quirks(); #endif #ifdef CONFIG_ACPI Index: head-2008-12-01/arch/x86/kernel/setup_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,10 +1,5 @@ /* - * linux/arch/x86-64/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds - * - * Nov 2001 Dave Jones - * Forked from i386 setup code. */ /* @@ -57,13 +52,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #ifdef CONFIG_XEN #include #include @@ -180,6 +175,12 @@ struct resource code_resource = { .end = 0, .flags = IORESOURCE_RAM, }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_RAM, +}; #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header @@ -231,10 +232,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #endif #else @@ -243,6 +245,41 @@ static inline void copy_edd(void) } #endif +#ifdef CONFIG_KEXEC +#ifndef CONFIG_XEN +static void __init reserve_crashkernel(void) +{ + unsigned long long free_mem; + unsigned long long crash_size, crash_base; + int ret; + + free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; + + ret = parse_crashkernel(boot_command_line, free_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(free_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +#define reserve_crashkernel xen_machine_kexec_setup_resources +#endif +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + #ifndef CONFIG_XEN #define EBDA_ADDR_POINTER 0x40E @@ -283,7 +320,7 @@ void __init setup_arch(char **cmdline_p) atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); - screen_info = SCREEN_INFO; + screen_info = boot_params.screen_info; if (is_initial_xendomain()) { const struct dom0_vga_console_info *info = @@ -306,22 +343,22 @@ void __init setup_arch(char **cmdline_p) #else printk(KERN_INFO "Command line: %s\n", boot_command_line); - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; #endif /* !CONFIG_XEN */ - saved_video_mode = SAVED_VIDEO_MODE; - bootloader_type = LOADER_TYPE; + saved_video_mode = boot_params.hdr.vid_mode; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif setup_memory_region(); copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; @@ -332,6 +369,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; early_identify_cpu(&boot_cpu_data); @@ -359,6 +398,11 @@ void __init setup_arch(char **cmdline_p) if (is_initial_xendomain()) dmi_scan_machine(); +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) + /* setup to use the static apicid table during kernel startup */ + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; +#endif + /* How many end-of-memory variables you have, grandma! */ max_low_pfn = end_pfn; max_pfn = end_pfn; @@ -423,52 +467,37 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_XEN #ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_XEN if (xen_start_info->mod_start) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/ - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); + unsigned long ramdisk_size = xen_start_info->mod_len; +#else + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; +#endif + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_mem = end_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_mem) { +#ifndef CONFIG_XEN + reserve_bootmem_generic(ramdisk_image, ramdisk_size); +#endif + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; +#ifdef CONFIG_XEN initrd_below_start_ok = 1; - } else { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); - initrd_start = 0; - } - } #endif -#else /* CONFIG_XEN */ -#ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_mem); initrd_start = 0; } } #endif -#endif /* !CONFIG_XEN */ -#ifdef CONFIG_KEXEC -#ifdef CONFIG_XEN - xen_machine_kexec_setup_resources(); -#else - if (crashk_res.start != crashk_res.end) { - reserve_bootmem_generic(crashk_res.start, - crashk_res.end - crashk_res.start + 1); - } -#endif -#endif - + reserve_crashkernel(); paging_init(); #ifdef CONFIG_X86_LOCAL_APIC /* @@ -783,7 +812,7 @@ static void __init amd_detect_cmp(struct but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -798,6 +827,39 @@ static void __init amd_detect_cmp(struct #endif } +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +#ifndef CONFIG_XEN +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} +#endif + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { unsigned level; @@ -827,7 +889,7 @@ static void __cpuinit init_amd(struct cp level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - if (c->x86 == 0x10) + if (c->x86 == 0x10 || c->x86 == 0x11) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); /* Enable workaround for FXSAVE leak */ @@ -869,6 +931,11 @@ static void __cpuinit init_amd(struct cp /* Family 10 doesn't support C states in MWAIT so don't use it */ if (c->x86 == 0x10 && !force_mwait) clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); + +#ifndef CONFIG_XEN + if (amd_apic_timer_broken()) + disable_apic_timer = 1; +#endif } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -1179,6 +1246,7 @@ void __cpuinit print_cpu_info(struct cpu static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; + int cpu = 0; /* * These flag bits must match the definitions in . @@ -1188,7 +1256,7 @@ static int show_cpuinfo(struct seq_file * applications want to get the raw CPUID data, they should access * /dev/cpu//cpuid instead. */ - static char *x86_cap_flags[] = { + static const char *const x86_cap_flags[] = { /* Intel-defined */ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", @@ -1219,7 +1287,7 @@ static int show_cpuinfo(struct seq_file /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -1229,10 +1297,10 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1242,7 +1310,7 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; - static char *x86_power_flags[] = { + static const char *const x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ @@ -1257,8 +1325,7 @@ static int show_cpuinfo(struct seq_file #ifdef CONFIG_SMP - if (!cpu_online(c-cpu_data)) - return 0; + cpu = c->cpu_index; #endif seq_printf(m,"processor\t: %u\n" @@ -1266,7 +1333,7 @@ static int show_cpuinfo(struct seq_file "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n", - (unsigned)(c-cpu_data), + (unsigned)cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, (int)c->x86_model, @@ -1278,7 +1345,7 @@ static int show_cpuinfo(struct seq_file seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + unsigned int freq = cpufreq_quick_get((unsigned)cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", @@ -1291,9 +1358,9 @@ static int show_cpuinfo(struct seq_file #ifdef CONFIG_SMP if (smp_num_siblings * c->x86_max_cores > 1) { - int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } @@ -1348,12 +1415,16 @@ static int show_cpuinfo(struct seq_file static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_online_map); + if ((*pos) < NR_CPUS && cpu_online(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_online_map); return c_start(m, pos); } Index: head-2008-12-01/arch/x86/kernel/smp_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -72,7 +72,7 @@ * * B stepping CPUs may hang. There are hardware work arounds * for this. We warn about it in case your board doesn't have the work - * arounds. Basically thats so I can tell anyone with a B stepping + * arounds. Basically that's so I can tell anyone with a B stepping * CPU and SMP problems "tough". * * Specific items [From Pentium Processor Specification Update] @@ -241,7 +241,7 @@ void leave_mm(unsigned long cpu) * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superflous + * for the wrong mm, and in the worst case we perform a superfluous * tlb flush. * 1a2) set cpu_tlbstate to TLBSTATE_OK * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 @@ -309,6 +309,7 @@ irqreturn_t smp_invalidate_interrupt(int smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + __get_cpu_var(irq_stat).irq_tlb_count++; return IRQ_HANDLED; } @@ -580,7 +581,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_all_local_evtchn(); - if (cpu_data[smp_processor_id()].hlt_works_ok) + if (cpu_data(smp_processor_id()).hlt_works_ok) for(;;) halt(); for (;;); } @@ -610,6 +611,7 @@ void xen_smp_send_stop(void) */ irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) { + __get_cpu_var(irq_stat).irq_resched_count++; return IRQ_HANDLED; } @@ -632,6 +634,7 @@ irqreturn_t smp_call_function_interrupt( */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { Index: head-2008-12-01/arch/x86/kernel/smp_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -167,6 +167,7 @@ asmlinkage void smp_invalidate_interrupt out: ack_APIC_irq(); cpu_clear(cpu, f->flush_cpumask); + add_pda(irq_tlb_count, 1); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -326,17 +327,27 @@ void unlock_ipi_call_lock(void) } /* - * this function sends a 'generic call function' IPI to one other CPU - * in the system. - * - * cpu is a standard Linux logical CPU number. + * this function sends a 'generic call function' IPI to all other CPU + * of the system defined in the mask. */ -static void -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) + +static int +__smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; - int cpus = 1; + cpumask_t allbutself; + int cpus; + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + return 0; data.func = func; data.info = info; @@ -347,19 +358,55 @@ __smp_call_function_single(int cpu, void call_data = &data; wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); /* Wait for response */ while (atomic_read(&data.started) != cpus) cpu_relax(); if (!wait) - return; + return 0; while (atomic_read(&data.finished) != cpus) cpu_relax(); + + return 0; +} +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + int ret; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock(&call_lock); + ret = __smp_call_function_mask(mask, func, info, wait); + spin_unlock(&call_lock); + return ret; } +EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU @@ -378,6 +425,7 @@ int smp_call_function_single (int cpu, v int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ + int ret; int me = get_cpu(); /* Can deadlock when called with interrupts disabled */ @@ -391,51 +439,14 @@ int smp_call_function_single (int cpu, v return 0; } - spin_lock(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock(&call_lock); + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + put_cpu(); - return 0; + return ret; } EXPORT_SYMBOL(smp_call_function_single); /* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ -static void __smp_call_function (void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - -/* * smp_call_function - run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -453,10 +464,7 @@ static void __smp_call_function (void (* int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - spin_lock(&call_lock); - __smp_call_function(func,info,nonatomic,wait); - spin_unlock(&call_lock); - return 0; + return smp_call_function_mask(cpu_online_map, func, info, wait); } EXPORT_SYMBOL(smp_call_function); @@ -485,7 +493,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); if (!nolock) spin_unlock(&call_lock); disable_all_local_evtchn(); @@ -505,7 +513,9 @@ asmlinkage irqreturn_t smp_reschedule_in { #ifndef CONFIG_XEN ack_APIC_irq(); -#else +#endif + add_pda(irq_resched_count, 1); +#ifdef CONFIG_XEN return IRQ_HANDLED; #endif } @@ -535,6 +545,7 @@ asmlinkage irqreturn_t smp_call_function exit_idle(); irq_enter(); (*func)(info); + add_pda(irq_call_count, 1); irq_exit(); if (wait) { mb(); Index: head-2008-12-01/arch/x86/kernel/time_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/time_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/time_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/time.c - * * Copyright (C) 1991, 1992, 1995 Linus Torvalds * * This file contains the PC-specific time handling details: @@ -74,6 +72,7 @@ #include #include +#include #include #include @@ -546,6 +545,13 @@ irqreturn_t timer_interrupt(int irq, voi struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); struct vcpu_runstate_info runstate; + /* Keep nmi watchdog up to date */ +#ifdef __i386__ + per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; +#else + add_pda(irq0_irqs, 1); +#endif + /* * Here we are in the timer irq handler. We just have irqs locally * disabled but we don't know if the timer_bh is running on the other @@ -996,7 +1002,7 @@ static int time_cpufreq_notifier(struct struct cpufreq_freqs *freq = data; struct xen_platform_op op; - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; if (val == CPUFREQ_PRECHANGE) @@ -1034,30 +1040,33 @@ core_initcall(cpufreq_time_setup); */ static ctl_table xen_subtable[] = { { - .ctl_name = 1, + .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK, .procname = "independent_wallclock", .data = &independent_wallclock, .maxlen = sizeof(independent_wallclock), .mode = 0644, + .strategy = sysctl_data, .proc_handler = proc_dointvec }, { - .ctl_name = 2, + .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER, .procname = "permitted_clock_jitter", .data = &permitted_clock_jitter, .maxlen = sizeof(permitted_clock_jitter), .mode = 0644, + .strategy = sysctl_data, .proc_handler = proc_doulongvec_minmax }, - { 0 } + { } }; static ctl_table xen_table[] = { { - .ctl_name = 123, + .ctl_name = CTL_XEN, .procname = "xen", .mode = 0555, - .child = xen_subtable}, - { 0 } + .child = xen_subtable + }, + { } }; static int __init xen_sysctl_init(void) { Index: head-2008-12-01/arch/x86/kernel/traps_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * * Pentium III FXSR, SSE support @@ -65,6 +63,11 @@ int panic_on_unrecovered_nmi; +#ifndef CONFIG_XEN +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); +#endif + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -120,7 +123,7 @@ struct stack_frame { static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { #ifdef CONFIG_FRAME_POINTER struct stack_frame *frame = (struct stack_frame *)ebp; @@ -157,7 +160,7 @@ static inline unsigned long print_contex void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { unsigned long ebp = 0; @@ -229,7 +232,7 @@ static void print_trace_address(void *da touch_nmi_watchdog(); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -288,6 +291,11 @@ void dump_stack(void) { unsigned long stack; + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(current, NULL, &stack); } @@ -296,48 +304,24 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss, gs; - - esp = (unsigned long) (®s->esp); - savesegment(ss, ss); - savesegment(gs, gs); - if (user_mode_vm(regs)) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } + print_modules(); - printk(KERN_EMERG "CPU: %d\n" - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + __show_registers(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, current->pid, + TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode_vm(regs)) { u8 *eip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); printk(KERN_EMERG "Code: "); @@ -382,11 +366,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -397,40 +381,33 @@ void die(const char * str, struct pt_reg if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + raw_local_irq_save(flags); + __raw_spin_lock(&die.lock); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); - } - else - local_save_flags(flags); + } else + raw_local_irq_save(flags); if (++die.lock_owner_depth < 3) { - int nl = 0; unsigned long esp; unsigned short ss; report_bug(regs->eip, regs); - printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, + ++die_counter); #ifdef CONFIG_PREEMPT - printk(KERN_EMERG "PREEMPT "); - nl = 1; + printk("PREEMPT "); #endif #ifdef CONFIG_SMP - if (!nl) - printk(KERN_EMERG); printk("SMP "); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC - if (!nl) - printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); - nl = 1; #endif - if (nl) - printk("\n"); + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { @@ -454,7 +431,8 @@ void die(const char * str, struct pt_reg bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + __raw_spin_unlock(&die.lock); + raw_local_irq_restore(flags); if (!regs) return; @@ -571,6 +549,7 @@ fastcall void do_##name(struct pt_regs * info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -606,7 +585,7 @@ fastcall void __kprobes do_general_prote printk_ratelimit()) printk(KERN_INFO "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), regs->eip, regs->esp, error_code); force_sig(SIGSEGV, current); @@ -785,6 +764,8 @@ void restart_nmi(void) #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; @@ -822,6 +803,8 @@ fastcall void __kprobes do_debug(struct unsigned int condition; struct task_struct *tsk = current; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, @@ -1084,20 +1067,6 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - /* * NB. All these are "trap gates" (i.e. events_mask isn't set) except Index: head-2008-12-01/arch/x86/kernel/traps_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs * @@ -33,6 +31,7 @@ #include #include #include +#include #if defined(CONFIG_EDAC) #include @@ -205,7 +204,7 @@ static unsigned long *in_exception_stack #define MSG(txt) ops->warning(data, txt) /* - * x86-64 can have upto three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +218,7 @@ static inline int valid_stack_ptr(struct void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; @@ -340,7 +339,7 @@ static void print_trace_address(void *da printk_address(addr); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -404,6 +403,12 @@ void show_stack(struct task_struct *tsk, void dump_stack(void) { unsigned long dummy; + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(NULL, NULL, &dummy); } @@ -466,7 +471,7 @@ void out_of_line_bug(void) EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -478,13 +483,13 @@ unsigned __kprobes long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { + if (!__raw_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + __raw_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -498,12 +503,10 @@ void __kprobes oops_end(unsigned long fl die_owner = -1; bust_spinlocks(0); die_nest_count--; - if (die_nest_count) - /* We still own the lock */ - local_irq_restore(flags); - else + if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - spin_unlock_irqrestore(&die_lock, flags); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); if (panic_on_oops) panic("Fatal exception"); oops_exit(); @@ -636,6 +639,7 @@ asmlinkage void do_##name(struct pt_regs info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -741,11 +745,8 @@ mem_parity_error(unsigned char reason, s printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); -#if 0 /* XEN */ /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); -#endif /* XEN */ + clear_mem_error(reason); } static __kprobes void @@ -754,14 +755,8 @@ io_check_error(unsigned char reason, str printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); -#if 0 /* XEN */ /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - mdelay(2000); - reason &= ~8; - outb(reason, 0x61); -#endif /* XEN */ + clear_io_check_error(reason); } static __kprobes void @@ -821,6 +816,8 @@ asmlinkage __kprobes void default_do_nmi /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } @@ -858,6 +855,8 @@ asmlinkage void __kprobes do_debug(struc struct task_struct *tsk = current; siginfo_t info; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, Index: head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/vsyscall.c - * * Copyright (C) 2001 Andrea Arcangeli SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * @@ -50,12 +48,12 @@ ({unsigned long v; \ extern char __vsyscall_0; \ asm("" : "=r" (v) : "0" (x)); \ - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) /* * vsyscall_gtod_data contains data that is : * - readonly from vsyscalls - * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) + * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) * Try to keep this structure as small as possible to avoid cache line ping pongs */ int __vgetcpu_mode __section_vgetcpu_mode; @@ -66,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gto .sysctl_enabled = 1, }; +void update_vsyscall_tz(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* sys_tz has changed */ + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; @@ -79,8 +87,6 @@ void update_vsyscall(struct timespec *wa vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.sys_tz = sys_tz; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -166,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t) if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); - vgettimeofday(&tv, 0); + vgettimeofday(&tv, NULL); result = tv.tv_sec; if (t) *t = result; @@ -260,18 +266,10 @@ out: return ret; } -static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - static ctl_table kernel_table2[] = { - { .ctl_name = 99, .procname = "vsyscall64", + { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, {} }; @@ -291,9 +289,9 @@ static void __cpuinit vsyscall_set_cpu(i unsigned long d; unsigned long node = 0; #ifdef CONFIG_NUMA - node = cpu_to_node[cpu]; + node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly Index: head-2008-12-01/arch/x86/mm/fault_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/fault_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/fault_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -32,33 +33,27 @@ extern void die(const char *,struct pt_regs *,long); -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode_vm(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(struct pt_regs *regs, long err) + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* * Return EIP plus the CS segment base. The segment limit is also @@ -110,7 +105,7 @@ static inline unsigned long get_segment_ LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ - down(¤t->mm->context.sem); + mutex_lock(¤t->mm->context.lock); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { @@ -123,7 +118,7 @@ static inline unsigned long get_segment_ base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { - up(¤t->mm->context.sem); + mutex_unlock(¤t->mm->context.lock); } else put_cpu(); @@ -244,7 +239,7 @@ static void dump_fault_path(unsigned lon if (mfn_to_pfn(mfn) >= highstart_pfn) return; #endif - if (p[0] & _PAGE_PRESENT) { + if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) { page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *) __va(page); address &= 0x001fffff; @@ -270,7 +265,8 @@ static void dump_fault_path(unsigned lon * it's allocated already. */ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { + && (page & _PAGE_PRESENT) + && !(page & _PAGE_PSE)) { page = machine_to_phys(page & PAGE_MASK); page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; @@ -416,6 +412,11 @@ fastcall void __kprobes do_page_fault(st int write, si_code; int fault; + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + /* get the address */ address = read_cr2(); @@ -453,7 +454,7 @@ fastcall void __kprobes do_page_fault(st /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -462,7 +463,7 @@ fastcall void __kprobes do_page_fault(st goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* It's safe to allow irq's after cr2 has been saved and the vmalloc @@ -481,7 +482,7 @@ fastcall void __kprobes do_page_fault(st /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -489,7 +490,7 @@ fastcall void __kprobes do_page_fault(st * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -598,8 +599,8 @@ bad_area_nosemaphore: printk_ratelimit()) { printk("%s%s[%d]: segfault at %08lx eip %08lx " "esp %08lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->eip, + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, regs->eip, regs->esp, error_code); } tsk->thread.cr2 = address; @@ -664,8 +665,7 @@ no_context: printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); + printk(KERN_ALERT "printing eip: %08lx\n", regs->eip); dump_fault_path(address); } tsk->thread.cr2 = address; @@ -681,14 +681,14 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(tsk)) { + if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + do_group_exit(SIGKILL); goto no_context; do_sigbus: Index: head-2008-12-01/arch/x86/mm/fault_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/fault_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/fault_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -40,34 +41,27 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(struct pt_regs *regs, long err) + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* Sometimes the CPU reports invalid exceptions on prefetch. Check that here and ignore. @@ -175,7 +169,7 @@ void dump_pagetable(unsigned long addres pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; @@ -294,7 +288,6 @@ static int vmalloc_fault(unsigned long a return 0; } -static int page_fault_trace; int show_unhandled_signals = 1; @@ -371,6 +364,11 @@ asmlinkage void __kprobes do_page_fault( if (!user_mode(regs)) error_code &= ~PF_USER; /* means kernel */ + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + tsk = current; mm = tsk->mm; prefetchw(&mm->mmap_sem); @@ -408,7 +406,7 @@ asmlinkage void __kprobes do_page_fault( /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -417,16 +415,12 @@ asmlinkage void __kprobes do_page_fault( goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -447,7 +441,7 @@ asmlinkage void __kprobes do_page_fault( again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -455,7 +449,7 @@ asmlinkage void __kprobes do_page_fault( * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -557,7 +551,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); @@ -623,7 +617,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(current)) { + if (is_global_init(current)) { yield(); goto again; } @@ -690,10 +684,3 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); } - -static int __init enable_pagefaulttrace(char *str) -{ - page_fault_trace = 1; - return 1; -} -__setup("pagefaulttrace", enable_pagefaulttrace); Index: head-2008-12-01/arch/x86/mm/init_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/init_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/init_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -94,7 +94,14 @@ static pte_t * __init one_page_table_ini #else if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) { #endif - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte_t *page_table = NULL; + +#ifdef CONFIG_DEBUG_PAGEALLOC + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); +#endif + if (!page_table) + page_table = + (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); make_lowmem_page_readonly(page_table, @@ -102,7 +109,7 @@ static pte_t * __init one_page_table_ini set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } - + return pte_offset_kernel(pmd, 0); } @@ -360,8 +367,13 @@ extern void set_highmem_pages_init(int); static void __init set_highmem_pages_init(int bad_ppro) { int pfn; - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) { + /* + * Holes under sparsemem might not have no mem_map[]: + */ + if (pfn_valid(pfn)) + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + } totalram_pages += totalhigh_pages; } #endif /* CONFIG_FLATMEM */ @@ -779,35 +791,18 @@ int arch_add_memory(int nid, u64 start, return __add_pages(zone, start_pfn, nr_pages); } -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); #endif struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { - size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); - - if (PTRS_PER_PMD > 1) { + if (PTRS_PER_PMD > 1) pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - PTRS_PER_PMD*sizeof(pmd_t), - SLAB_PANIC, - pmd_ctor); - if (!SHARED_KERNEL_PMD) { - /* If we're in PAE mode and have a non-shared - kernel pmd, then the pgd size must be a - page size. This is because the pgd_list - links through the page structure, so there - can only be one pgd per page for this to - work. */ - pgd_size = PAGE_SIZE; - } - } + PTRS_PER_PMD*sizeof(pmd_t), + PTRS_PER_PMD*sizeof(pmd_t), + SLAB_PANIC, + pmd_ctor); } /* Index: head-2008-12-01/arch/x86/mm/init_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -767,7 +767,7 @@ static void xen_finish_init_mapping(void /* Setup the direct mapping of the physical memory at PAGE_OFFSET. This runs before bootmem is initialized and gets pages directly from the physical memory. To access them they are temporarily mapped. */ -void __meminit init_memory_mapping(unsigned long start, unsigned long end) +void __init_refok init_memory_mapping(unsigned long start, unsigned long end) { unsigned long next; @@ -901,12 +901,6 @@ error: } EXPORT_SYMBOL_GPL(arch_add_memory); -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); - #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) int memory_add_physaddr_to_nid(u64 start) { @@ -1175,14 +1169,6 @@ int in_gate_area_no_task(unsigned long a return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } -#ifndef CONFIG_XEN -void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) -{ - return __alloc_bootmem_core(pgdat->bdata, size, - SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); -} -#endif - const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) @@ -1191,3 +1177,48 @@ const char *arch_vma_name(struct vm_area return "[vsyscall]"; return NULL; } + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* + * Initialise the sparsemem vmemmap using huge-pages at the PMD level. + */ +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t entry; + void *p = vmemmap_alloc_block(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + mk_pte_huge(entry); + set_pmd(pmd, __pmd(pte_val(entry))); + + printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", + addr, addr + PMD_SIZE - 1, p, node); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } + + return 0; +} +#endif Index: head-2008-12-01/arch/x86/mm/pageattr_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -17,9 +17,6 @@ #include #include -LIST_HEAD(mm_unpinned); -DEFINE_SPINLOCK(mm_unpinned_lock); - static void _pin_lock(struct mm_struct *mm, int lock) { if (lock) spin_lock(&mm->page_table_lock); @@ -81,8 +78,8 @@ static void _pin_lock(struct mm_struct * #define PIN_BATCH 8 static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl); -static inline unsigned int mm_walk_set_prot(void *pt, pgprot_t flags, - unsigned int cpu, unsigned int seq) +static inline unsigned int pgd_walk_set_prot(void *pt, pgprot_t flags, + unsigned int cpu, unsigned int seq) { struct page *page = virt_to_page(pt); unsigned long pfn = page_to_pfn(page); @@ -100,9 +97,9 @@ static inline unsigned int mm_walk_set_p return seq; } -static void mm_walk(struct mm_struct *mm, pgprot_t flags) +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) { - pgd_t *pgd; + pgd_t *pgd = pgd_base; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -110,7 +107,6 @@ static void mm_walk(struct mm_struct *mm unsigned int cpu, seq; multicall_entry_t *mcl; - pgd = mm->pgd; cpu = get_cpu(); /* @@ -125,18 +121,18 @@ static void mm_walk(struct mm_struct *mm continue; pud = pud_offset(pgd, 0); if (PTRS_PER_PUD > 1) /* not folded */ - seq = mm_walk_set_prot(pud,flags,cpu,seq); + seq = pgd_walk_set_prot(pud,flags,cpu,seq); for (u = 0; u < PTRS_PER_PUD; u++, pud++) { if (pud_none(*pud)) continue; pmd = pmd_offset(pud, 0); if (PTRS_PER_PMD > 1) /* not folded */ - seq = mm_walk_set_prot(pmd,flags,cpu,seq); + seq = pgd_walk_set_prot(pmd,flags,cpu,seq); for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { if (pmd_none(*pmd)) continue; pte = pte_offset_kernel(pmd,0); - seq = mm_walk_set_prot(pte,flags,cpu,seq); + seq = pgd_walk_set_prot(pte,flags,cpu,seq); } } } @@ -148,12 +144,12 @@ static void mm_walk(struct mm_struct *mm seq = 0; } MULTI_update_va_mapping(mcl + seq, - (unsigned long)__user_pgd(mm->pgd), - pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, flags), + (unsigned long)__user_pgd(pgd_base), + pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), 0); MULTI_update_va_mapping(mcl + seq + 1, - (unsigned long)mm->pgd, - pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, flags), + (unsigned long)pgd_base, + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), UVMF_TLB_FLUSH); if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL))) BUG(); @@ -161,21 +157,35 @@ static void mm_walk(struct mm_struct *mm put_cpu(); } +static void __pgd_pin(pgd_t *pgd) +{ + pgd_walk(pgd, PAGE_KERNEL_RO); + xen_pgd_pin(__pa(pgd)); /* kernel */ + xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ + SetPagePinned(virt_to_page(pgd)); +} + +static void __pgd_unpin(pgd_t *pgd) +{ + xen_pgd_unpin(__pa(pgd)); + xen_pgd_unpin(__pa(__user_pgd(pgd))); + pgd_walk(pgd, PAGE_KERNEL); + ClearPagePinned(virt_to_page(pgd)); +} + +void pgd_test_and_unpin(pgd_t *pgd) +{ + if (PagePinned(virt_to_page(pgd))) + __pgd_unpin(pgd); +} + void mm_pin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; pin_lock(mm); - - mm_walk(mm, PAGE_KERNEL_RO); - xen_pgd_pin(__pa(mm->pgd)); /* kernel */ - xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */ - SetPagePinned(virt_to_page(mm->pgd)); - spin_lock(&mm_unpinned_lock); - list_del(&mm->context.unpinned); - spin_unlock(&mm_unpinned_lock); - + __pgd_pin(mm->pgd); pin_unlock(mm); } @@ -185,34 +195,30 @@ void mm_unpin(struct mm_struct *mm) return; pin_lock(mm); - - xen_pgd_unpin(__pa(mm->pgd)); - xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); - mm_walk(mm, PAGE_KERNEL); - ClearPagePinned(virt_to_page(mm->pgd)); - spin_lock(&mm_unpinned_lock); - list_add(&mm->context.unpinned, &mm_unpinned); - spin_unlock(&mm_unpinned_lock); - + __pgd_unpin(mm->pgd); pin_unlock(mm); } void mm_pin_all(void) { + struct page *page; + unsigned long flags; + if (xen_feature(XENFEAT_writable_page_tables)) return; /* - * Allow uninterrupted access to the mm_unpinned list. We don't - * actually take the mm_unpinned_lock as it is taken inside mm_pin(). + * Allow uninterrupted access to the pgd_list. Also protects + * __pgd_pin() by disabling preemption. * All other CPUs must be at a safe point (e.g., in stop_machine * or offlined entirely). */ - preempt_disable(); - while (!list_empty(&mm_unpinned)) - mm_pin(list_entry(mm_unpinned.next, struct mm_struct, - context.unpinned)); - preempt_enable(); + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) + __pgd_pin((pgd_t *)page_address(page)); + } + spin_unlock_irqrestore(&pgd_lock, flags); } void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) @@ -331,11 +337,11 @@ static struct page *split_large_page(uns return base; } -static void cache_flush_page(void *adr) +void clflush_cache_range(void *adr, int size) { int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (adr + i)); + for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) + clflush(adr+i); } static void flush_kernel_map(void *arg) @@ -350,7 +356,7 @@ static void flush_kernel_map(void *arg) asm volatile("wbinvd" ::: "memory"); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); - cache_flush_page(adr); + clflush_cache_range(adr, PAGE_SIZE); } __flush_tlb_all(); } @@ -418,6 +424,7 @@ __change_page_attr(unsigned long address split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; + pgprot_val(ref_prot2) &= ~_PAGE_NX; set_pte(kpte, mk_pte(split, ref_prot2)); kpte_page = split; } @@ -510,9 +517,14 @@ void global_flush_tlb(void) struct page *pg, *next; struct list_head l; - down_read(&init_mm.mmap_sem); + /* + * Write-protect the semaphore, to exclude two contexts + * doing a list_replace_init() call in parallel and to + * exclude new additions to the deferred_pages list: + */ + down_write(&init_mm.mmap_sem); list_replace_init(&deferred_pages, &l); - up_read(&init_mm.mmap_sem); + up_write(&init_mm.mmap_sem); flush_map(&l); Index: head-2008-12-01/arch/x86/mm/pgtable_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ void show_mem(void) for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) + touch_nmi_watchdog(); page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) @@ -206,7 +209,7 @@ void pte_free(struct page *pte) __free_page(pte); } -void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) +void pmd_ctor(struct kmem_cache *cache, void *pmd) { memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } Index: head-2008-12-01/arch/x86/pci/irq-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/pci/irq-xen.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/arch/x86/pci/irq-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -173,7 +173,7 @@ void eisa_set_level_irq(unsigned int irq } /* - * Common IRQ routing practice: nybbles in config space, + * Common IRQ routing practice: nibbles in config space, * offset by some magic constant. */ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) @@ -496,6 +496,26 @@ static int pirq_amd756_set(struct pci_de return 1; } +/* + * PicoPower PT86C523 + */ +static int pirq_pico_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb(0x10 + ((pirq - 1) >> 1), 0x24); + return ((pirq - 1) & 1) ? (inb(0x26) >> 4) : (inb(0x26) & 0xf); +} + +static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq, + int irq) +{ + unsigned int x; + outb(0x10 + ((pirq - 1) >> 1), 0x24); + x = inb(0x26); + x = ((pirq - 1) & 1) ? ((x & 0x0f) | (irq << 4)) : ((x & 0xf0) | (irq)); + outb(x, 0x26); + return 1; +} + #ifdef CONFIG_PCI_BIOS static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) @@ -569,7 +589,7 @@ static __init int via_router_probe(struc /* FIXME: We should move some of the quirk fixup stuff here */ /* - * work arounds for some buggy BIOSes + * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { switch(router->device) { @@ -725,6 +745,24 @@ static __init int amd_router_probe(struc return 1; } +static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_PICOPOWER_PT86C523: + r->name = "PicoPower PT86C523"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + + case PCI_DEVICE_ID_PICOPOWER_PT86C523BBP: + r->name = "PicoPower PT86C523 rev. BB+"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + } + return 0; +} + static __initdata struct irq_router_handler pirq_routers[] = { { PCI_VENDOR_ID_INTEL, intel_router_probe }, { PCI_VENDOR_ID_AL, ali_router_probe }, @@ -736,6 +774,7 @@ static __initdata struct irq_router_hand { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, { PCI_VENDOR_ID_AMD, amd_router_probe }, + { PCI_VENDOR_ID_PICOPOWER, pico_router_probe }, /* Someone with docs needs to add the ATI Radeon IGP */ { 0, NULL } }; @@ -1014,7 +1053,7 @@ static void __init pcibios_fixup_irqs(vo * Work around broken HP Pavilion Notebooks which assign USB to * IRQ 9 even though it is actually wired to IRQ 11 */ -static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d) +static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) { if (!broken_hp_bios_irq9) { broken_hp_bios_irq9 = 1; @@ -1027,7 +1066,7 @@ static int __init fix_broken_hp_bios_irq * Work around broken Acer TravelMate 360 Notebooks which assign * Cardbus to IRQ 11 even though it is actually wired to IRQ 10 */ -static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d) +static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) { if (!acer_tm360_irqrouting) { acer_tm360_irqrouting = 1; Index: head-2008-12-01/drivers/acpi/processor_idle.c =================================================================== --- head-2008-12-01.orig/drivers/acpi/processor_idle.c 2008-12-01 11:11:03.000000000 +0100 +++ head-2008-12-01/drivers/acpi/processor_idle.c 2008-12-01 11:36:55.000000000 +0100 @@ -1742,6 +1742,13 @@ int acpi_processor_cst_has_changed(struc if (!pr->flags.power_setup_done) return -ENODEV; + if (processor_pm_external()) { + acpi_processor_get_power_info(pr); + processor_notify_external(pr, + PROCESSOR_PM_CHANGE, PM_TYPE_IDLE); + return ret; + } + cpuidle_pause_and_lock(); cpuidle_disable_device(&pr->power.dev); acpi_processor_get_power_info(pr); Index: head-2008-12-01/drivers/cpuidle/Kconfig =================================================================== --- head-2008-12-01.orig/drivers/cpuidle/Kconfig 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/cpuidle/Kconfig 2008-12-01 11:36:55.000000000 +0100 @@ -1,6 +1,7 @@ config CPU_IDLE bool "CPU idle PM support" + depends on !PROCESSOR_EXTERNAL_CONTROL default ACPI help CPU idle is a generic framework for supporting software-controlled Index: head-2008-12-01/drivers/pci/msi-xen.c =================================================================== --- head-2008-12-01.orig/drivers/pci/msi-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/pci/msi-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -264,6 +264,12 @@ static int msi_map_vector(struct pci_dev return msi_map_pirq_to_vector(dev, -1, entry_nr, table_base); } +static void pci_intx_for_msi(struct pci_dev *dev, int enable) +{ + if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) + pci_intx(dev, enable); +} + #ifdef CONFIG_PM static void __pci_restore_msi_state(struct pci_dev *dev) { @@ -276,7 +282,7 @@ static void __pci_restore_msi_state(stru if (pirq < 0) return; - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); } @@ -313,7 +319,7 @@ static void __pci_restore_msix_state(str } spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags); - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msix_set_enable(dev, 0); } @@ -348,7 +354,7 @@ static int msi_capability_init(struct pc return -EBUSY; /* Set MSI enabled bits */ - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msi_set_enable(dev, 1); dev->msi_enabled = 1; @@ -422,7 +428,7 @@ static int msix_capability_init(struct p return avail; } - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msix_set_enable(dev, 1); dev->msix_enabled = 1; @@ -562,7 +568,7 @@ void pci_disable_msi(struct pci_dev* dev /* Disable MSI mode */ msi_set_enable(dev, 0); - pci_intx(dev, 1); /* enable intx */ + pci_intx_for_msi(dev, 1); dev->msi_enabled = 0; } EXPORT_SYMBOL(pci_disable_msi); @@ -701,7 +707,7 @@ void pci_disable_msix(struct pci_dev* de /* Disable MSI mode */ msix_set_enable(dev, 0); - pci_intx(dev, 1); /* enable intx */ + pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; } EXPORT_SYMBOL(pci_disable_msix); Index: head-2008-12-01/drivers/xen/blkback/blkback.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blkback/blkback.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/drivers/xen/blkback/blkback.c 2008-12-01 11:36:55.000000000 +0100 @@ -269,13 +269,10 @@ static void __end_block_io_op(pending_re } } -static int end_block_io_op(struct bio *bio, unsigned int done, int error) +static void end_block_io_op(struct bio *bio, int error) { - if (bio->bi_size != 0) - return 1; __end_block_io_op(bio->bi_private, error); bio_put(bio); - return error; } Index: head-2008-12-01/drivers/xen/blkfront/blkfront.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blkfront/blkfront.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/drivers/xen/blkfront/blkfront.c 2008-12-01 11:36:55.000000000 +0100 @@ -587,9 +587,8 @@ static int blkif_queue_request(struct re struct blkfront_info *info = req->rq_disk->private_data; unsigned long buffer_mfn; blkif_request_t *ring_req; - struct bio *bio; struct bio_vec *bvec; - int idx; + struct req_iterator iter; unsigned long id; unsigned int fsect, lsect; int ref; @@ -623,34 +622,32 @@ static int blkif_queue_request(struct re ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = 0; - rq_for_each_bio (bio, req) { - bio_for_each_segment (bvec, bio, idx) { - BUG_ON(ring_req->nr_segments - == BLKIF_MAX_SEGMENTS_PER_REQUEST); - buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT; - fsect = bvec->bv_offset >> 9; - lsect = fsect + (bvec->bv_len >> 9) - 1; - /* install a grant reference. */ - ref = gnttab_claim_grant_reference(&gref_head); - BUG_ON(ref == -ENOSPC); + rq_for_each_segment(bvec, req, iter) { + BUG_ON(ring_req->nr_segments + == BLKIF_MAX_SEGMENTS_PER_REQUEST); + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT; + fsect = bvec->bv_offset >> 9; + lsect = fsect + (bvec->bv_len >> 9) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); + + gnttab_grant_foreign_access_ref( + ref, + info->xbdev->otherend_id, + buffer_mfn, + rq_data_dir(req) ? GTF_readonly : 0 ); + + info->shadow[id].frame[ring_req->nr_segments] = + mfn_to_pfn(buffer_mfn); + + ring_req->seg[ring_req->nr_segments] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; - gnttab_grant_foreign_access_ref( - ref, - info->xbdev->otherend_id, - buffer_mfn, - rq_data_dir(req) ? GTF_readonly : 0 ); - - info->shadow[id].frame[ring_req->nr_segments] = - mfn_to_pfn(buffer_mfn); - - ring_req->seg[ring_req->nr_segments] = - (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; - - ring_req->nr_segments++; - } + ring_req->nr_segments++; } info->ring.req_prod_pvt++; Index: head-2008-12-01/drivers/xen/core/machine_kexec.c =================================================================== --- head-2008-12-01.orig/drivers/xen/core/machine_kexec.c 2008-10-13 13:43:45.000000000 +0200 +++ head-2008-12-01/drivers/xen/core/machine_kexec.c 2008-12-01 11:36:55.000000000 +0100 @@ -29,6 +29,10 @@ void __init xen_machine_kexec_setup_reso int k = 0; int rc; + if (strstr(boot_command_line, "crashkernel=")) + printk(KERN_WARNING "Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); + if (!is_initial_xendomain()) return; Index: head-2008-12-01/drivers/xen/core/smpboot.c =================================================================== --- head-2008-12-01.orig/drivers/xen/core/smpboot.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/xen/core/smpboot.c 2008-12-01 11:36:55.000000000 +0100 @@ -45,8 +45,8 @@ cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); cpumask_t cpu_initialized_map; -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); #ifdef CONFIG_HOTPLUG_CPU DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -59,13 +59,13 @@ static char callfunc_name[NR_CPUS][15]; u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); #if defined(__i386__) -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); #endif void __init prefill_possible_map(void) @@ -90,25 +90,25 @@ void __init smp_alloc_memory(void) static inline void set_cpu_sibling_map(unsigned int cpu) { - cpu_data[cpu].phys_proc_id = cpu; - cpu_data[cpu].cpu_core_id = 0; + cpu_data(cpu).phys_proc_id = cpu; + cpu_data(cpu).cpu_core_id = 0; - cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); - cpu_core_map[cpu] = cpumask_of_cpu(cpu); + per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu); + per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu); - cpu_data[cpu].booted_cores = 1; + cpu_data(cpu).booted_cores = 1; } static void remove_siblinginfo(unsigned int cpu) { - cpu_data[cpu].phys_proc_id = BAD_APICID; - cpu_data[cpu].cpu_core_id = BAD_APICID; + cpu_data(cpu).phys_proc_id = BAD_APICID; + cpu_data(cpu).cpu_core_id = BAD_APICID; - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); - cpu_data[cpu].booted_cores = 0; + cpu_data(cpu).booted_cores = 0; } static int __cpuinit xen_smp_intr_init(unsigned int cpu) @@ -167,9 +167,9 @@ void __cpuinit cpu_bringup(void) { cpu_init(); #ifdef __i386__ - identify_secondary_cpu(cpu_data + smp_processor_id()); + identify_secondary_cpu(¤t_cpu_data); #else - identify_cpu(cpu_data + smp_processor_id()); + identify_cpu(¤t_cpu_data); #endif touch_softlockup_watchdog(); preempt_disable(); @@ -270,16 +270,16 @@ void __init smp_prepare_cpus(unsigned in if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); boot_cpu_data.apicid = apicid; - cpu_data[0] = boot_cpu_data; + cpu_data(0) = boot_cpu_data; cpu_2_logical_apicid[0] = apicid; - x86_cpu_to_apicid[0] = apicid; + per_cpu(x86_cpu_to_apicid, 0) = apicid; current_thread_info()->cpu = 0; for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); } set_cpu_sibling_map(0); @@ -324,11 +324,12 @@ void __init smp_prepare_cpus(unsigned in apicid = cpu; if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); - cpu_data[cpu] = boot_cpu_data; - cpu_data[cpu].apicid = apicid; + cpu_data(cpu) = boot_cpu_data; + cpu_data(cpu).cpu_index = cpu; + cpu_data(cpu).apicid = apicid; cpu_2_logical_apicid[cpu] = apicid; - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; #ifdef __x86_64__ cpu_pda(cpu)->pcurrent = idle; Index: head-2008-12-01/drivers/xen/netback/loopback.c =================================================================== --- head-2008-12-01.orig/drivers/xen/netback/loopback.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/netback/loopback.c 2008-12-01 11:36:55.000000000 +0100 @@ -285,9 +285,9 @@ static void __exit clean_loopback(int i) char dev_name[IFNAMSIZ]; sprintf(dev_name, "vif0.%d", i); - dev1 = dev_get_by_name(dev_name); + dev1 = dev_get_by_name(&init_net, dev_name); sprintf(dev_name, "veth%d", i); - dev2 = dev_get_by_name(dev_name); + dev2 = dev_get_by_name(&init_net, dev_name); if (dev1 && dev2) { unregister_netdev(dev2); unregister_netdev(dev1); Index: head-2008-12-01/drivers/xen/netback/netback.c =================================================================== --- head-2008-12-01.orig/drivers/xen/netback/netback.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/xen/netback/netback.c 2008-12-01 11:36:55.000000000 +0100 @@ -334,8 +334,8 @@ static void xen_network_done_notify(void { static struct net_device *eth0_dev = NULL; if (unlikely(eth0_dev == NULL)) - eth0_dev = __dev_get_by_name("eth0"); - netif_rx_schedule(eth0_dev); + eth0_dev = __dev_get_by_name(&init_net, "eth0"); + netif_rx_schedule(eth0_dev, ???); } /* * Add following to poll() function in NAPI driver (Tigon3 is example): Index: head-2008-12-01/drivers/xen/netback/xenbus.c =================================================================== --- head-2008-12-01.orig/drivers/xen/netback/xenbus.c 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/drivers/xen/netback/xenbus.c 2008-12-01 11:36:55.000000000 +0100 @@ -149,12 +149,10 @@ fail: * and vif variables to the environment, for the benefit of the vif-* hotplug * scripts. */ -static int netback_uevent(struct xenbus_device *xdev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) { struct backend_info *be = xdev->dev.driver_data; netif_t *netif = be->netif; - int i = 0, length = 0; char *val; DPRINTK("netback_uevent"); @@ -166,15 +164,11 @@ static int netback_uevent(struct xenbus_ return err; } else { - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, - &length, "script=%s", val); + add_uevent_var(env, "script=%s", val); kfree(val); } - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "vif=%s", netif->dev->name); - - envp[i] = NULL; + add_uevent_var(env, "vif=%s", netif->dev->name); return 0; } Index: head-2008-12-01/drivers/xen/netfront/accel.c =================================================================== --- head-2008-12-01.orig/drivers/xen/netfront/accel.c 2008-08-07 12:44:36.000000000 +0200 +++ head-2008-12-01/drivers/xen/netfront/accel.c 2008-12-01 11:36:55.000000000 +0100 @@ -325,13 +325,13 @@ accelerator_set_vif_state_hooks(struct n DPRINTK("%p\n",vif_state); /* Make sure there are no data path operations going on */ - netif_poll_disable(vif_state->np->netdev); + napi_disable(&vif_state->np->napi); netif_tx_lock_bh(vif_state->np->netdev); vif_state->hooks = vif_state->np->accelerator->hooks; netif_tx_unlock_bh(vif_state->np->netdev); - netif_poll_enable(vif_state->np->netdev); + napi_enable(&vif_state->np->napi); } @@ -509,7 +509,7 @@ accelerator_remove_single_hook(struct ne struct netfront_accel_vif_state *vif_state) { /* Make sure there are no data path operations going on */ - netif_poll_disable(vif_state->np->netdev); + napi_disable(&vif_state->np->napi); netif_tx_lock_bh(vif_state->np->netdev); /* @@ -521,7 +521,7 @@ accelerator_remove_single_hook(struct ne vif_state->hooks = NULL; netif_tx_unlock_bh(vif_state->np->netdev); - netif_poll_enable(vif_state->np->netdev); + napi_enable(&vif_state->np->napi); } Index: head-2008-12-01/drivers/xen/netfront/netfront.c =================================================================== --- head-2008-12-01.orig/drivers/xen/netfront/netfront.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/xen/netfront/netfront.c 2008-12-01 11:36:55.000000000 +0100 @@ -626,6 +626,7 @@ static int network_open(struct net_devic struct netfront_info *np = netdev_priv(dev); memset(&np->stats, 0, sizeof(np->stats)); + napi_enable(&np->napi); spin_lock_bh(&np->rx_lock); if (netfront_carrier_ok(np)) { @@ -634,7 +635,7 @@ static int network_open(struct net_devic if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } } spin_unlock_bh(&np->rx_lock); @@ -706,7 +707,7 @@ static void rx_refill_timeout(unsigned l netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } static void network_alloc_rx_buffers(struct net_device *dev) @@ -1063,7 +1064,7 @@ static irqreturn_t netif_int(int irq, vo if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); dev->last_rx = jiffies; } } @@ -1316,16 +1317,17 @@ static int xennet_set_skb_gso(struct sk_ #endif } -static int netif_poll(struct net_device *dev, int *pbudget) +static int netif_poll(struct napi_struct *napi, int budget) { - struct netfront_info *np = netdev_priv(dev); + struct netfront_info *np = container_of(napi, struct netfront_info, napi); + struct net_device *dev = np->netdev; struct sk_buff *skb; struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct multicall_entry *mcl; - int work_done, budget, more_to_do = 1, accel_more_to_do = 1; + int work_done, more_to_do = 1, accel_more_to_do = 1; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -1345,8 +1347,6 @@ static int netif_poll(struct net_device skb_queue_head_init(&errq); skb_queue_head_init(&tmpq); - if ((budget = *pbudget) > dev->quota) - budget = dev->quota; rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -1508,9 +1508,6 @@ err: accel_more_to_do = 0; } - *pbudget -= work_done; - dev->quota -= work_done; - if (work_done < budget) { local_irq_save(flags); @@ -1527,14 +1524,14 @@ err: } if (!more_to_do && !accel_more_to_do) - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); local_irq_restore(flags); } spin_unlock(&np->rx_lock); - return more_to_do | accel_more_to_do; + return work_done; } static void netif_release_tx_bufs(struct netfront_info *np) @@ -1681,6 +1678,7 @@ static int network_close(struct net_devi { struct netfront_info *np = netdev_priv(dev); netif_stop_queue(np->netdev); + napi_disable(&np->napi); return 0; } @@ -2088,16 +2086,14 @@ static struct net_device * __devinit cre netdev->hard_start_xmit = network_start_xmit; netdev->stop = network_close; netdev->get_stats = network_get_stats; - netdev->poll = netif_poll; + netif_napi_add(netdev, &np->napi, netif_poll, 64); netdev->set_multicast_list = network_set_multicast_list; netdev->uninit = netif_uninit; netdev->set_mac_address = xennet_set_mac_address; netdev->change_mtu = xennet_change_mtu; - netdev->weight = 64; netdev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); - SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &dev->dev); np->netdev = netdev; Index: head-2008-12-01/drivers/xen/netfront/netfront.h =================================================================== --- head-2008-12-01.orig/drivers/xen/netfront/netfront.h 2008-01-07 13:19:18.000000000 +0100 +++ head-2008-12-01/drivers/xen/netfront/netfront.h 2008-12-01 11:36:55.000000000 +0100 @@ -157,6 +157,8 @@ struct netfront_info { spinlock_t tx_lock; spinlock_t rx_lock; + struct napi_struct napi; + unsigned int irq; unsigned int copying_receiver; unsigned int carrier; Index: head-2008-12-01/drivers/xen/pciback/Makefile =================================================================== --- head-2008-12-01.orig/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200 +++ head-2008-12-01/drivers/xen/pciback/Makefile 2008-12-01 11:36:55.000000000 +0100 @@ -12,6 +12,4 @@ pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o -ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_XEN_PCIDEV_BE_DEBUG) += -DDEBUG Index: head-2008-12-01/drivers/xen/pcifront/Makefile =================================================================== --- head-2008-12-01.orig/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200 +++ head-2008-12-01/drivers/xen/pcifront/Makefile 2008-12-01 11:36:55.000000000 +0100 @@ -2,6 +2,4 @@ obj-y += pcifront.o pcifront-y := pci_op.o xenbus.o pci.o -ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_XEN_PCIDEV_FE_DEBUG) += -DDEBUG Index: head-2008-12-01/drivers/xen/scsiback/emulate.c =================================================================== --- head-2008-12-01.orig/drivers/xen/scsiback/emulate.c 2008-08-07 12:44:36.000000000 +0200 +++ head-2008-12-01/drivers/xen/scsiback/emulate.c 2008-12-01 11:36:55.000000000 +0100 @@ -104,9 +104,10 @@ static void resp_not_supported_cmd(pendi } -static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg, +static int __copy_to_sg(struct scatterlist *sgl, unsigned int nr_sg, void *buf, unsigned int buflen) { + struct scatterlist *sg; void *from = buf; void *to; unsigned int from_rest = buflen; @@ -115,8 +116,8 @@ static int __copy_to_sg(struct scatterli unsigned int i; unsigned long pfn; - for (i = 0; i < nr_sg; i++) { - if (sg->page == NULL) { + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { printk(KERN_WARNING "%s: inconsistent length field in " "scatterlist\n", __FUNCTION__); return -ENOMEM; @@ -125,7 +126,7 @@ static int __copy_to_sg(struct scatterli to_capa = sg->length; copy_size = min_t(unsigned int, to_capa, from_rest); - pfn = page_to_pfn(sg->page); + pfn = page_to_pfn(sg_page(sg)); to = pfn_to_kaddr(pfn) + (sg->offset); memcpy(to, from, copy_size); @@ -134,7 +135,6 @@ static int __copy_to_sg(struct scatterli return 0; } - sg++; from += copy_size; } @@ -143,9 +143,10 @@ static int __copy_to_sg(struct scatterli return -ENOMEM; } -static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg, +static int __copy_from_sg(struct scatterlist *sgl, unsigned int nr_sg, void *buf, unsigned int buflen) { + struct scatterlist *sg; void *from; void *to = buf; unsigned int from_rest; @@ -154,8 +155,8 @@ static int __copy_from_sg(struct scatter unsigned int i; unsigned long pfn; - for (i = 0; i < nr_sg; i++) { - if (sg->page == NULL) { + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { printk(KERN_WARNING "%s: inconsistent length field in " "scatterlist\n", __FUNCTION__); return -ENOMEM; @@ -170,13 +171,11 @@ static int __copy_from_sg(struct scatter } copy_size = from_rest; - pfn = page_to_pfn(sg->page); + pfn = page_to_pfn(sg_page(sg)); from = pfn_to_kaddr(pfn) + (sg->offset); memcpy(to, from, copy_size); to_capa -= copy_size; - - sg++; to += copy_size; } Index: head-2008-12-01/drivers/xen/scsiback/scsiback.c =================================================================== --- head-2008-12-01.orig/drivers/xen/scsiback/scsiback.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/scsiback/scsiback.c 2008-12-01 11:36:55.000000000 +0100 @@ -247,6 +247,8 @@ static int scsiback_gnttab_data_map(vscs write = (data_dir == DMA_TO_DEVICE); if (nr_segments) { + struct scatterlist *sg; + /* free of (sgl) in fast_flush_area()*/ pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments, GFP_KERNEL); @@ -255,6 +257,8 @@ static int scsiback_gnttab_data_map(vscs return -ENOMEM; } + sg_init_table(pending_req->sgl, nr_segments); + for (i = 0; i < nr_segments; i++) { flags = GNTMAP_host_map; if (write) @@ -267,7 +271,7 @@ static int scsiback_gnttab_data_map(vscs err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments); BUG_ON(err); - for (i = 0; i < nr_segments; i++) { + for_each_sg (pending_req->sgl, sg, nr_segments, i) { if (unlikely(map[i].status != 0)) { printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n"); map[i].handle = SCSIBACK_INVALID_HANDLE; @@ -283,15 +287,15 @@ static int scsiback_gnttab_data_map(vscs pending_req, i)) >> PAGE_SHIFT, FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); - pending_req->sgl[i].page = virt_to_page(vaddr(pending_req, i)); - pending_req->sgl[i].offset = ring_req->seg[i].offset; - pending_req->sgl[i].length = ring_req->seg[i].length; - data_len += pending_req->sgl[i].length; + sg_set_page(sg, virt_to_page(vaddr(pending_req, i)), + ring_req->seg[i].length, + ring_req->seg[i].offset); + data_len += sg->length; barrier(); - if (pending_req->sgl[i].offset >= PAGE_SIZE || - pending_req->sgl[i].length > PAGE_SIZE || - pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE) + if (sg->offset >= PAGE_SIZE || + sg->length > PAGE_SIZE || + sg->offset + sg->length > PAGE_SIZE) err |= 1; } @@ -320,27 +324,14 @@ static int scsiback_merge_bio(struct req blk_queue_bounce(q, &bio); - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!ll_back_merge_fn(q, rq, bio)) - return -EINVAL; - else { - rq->biotail->bi_next = bio; - rq->biotail = bio; - } - - return 0; + return blk_rq_append_bio(q, rq, bio); } /* quoted scsi_lib.c/scsi_bi_endio */ -static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error) +static void scsiback_bi_endio(struct bio *bio, int error) { - if (bio->bi_size) - return 1; - bio_put(bio); - return 0; } @@ -351,16 +342,16 @@ static int request_map_sg(struct request struct request_queue *q = rq->q; int nr_pages; unsigned int nsegs = count; - unsigned int data_len = 0, len, bytes, off; + struct scatterlist *sg; struct page *page; struct bio *bio = NULL; int i, err, nr_vecs = 0; - for (i = 0; i < nsegs; i++) { - page = pending_req->sgl[i].page; - off = (unsigned int)pending_req->sgl[i].offset; - len = (unsigned int)pending_req->sgl[i].length; + for_each_sg (pending_req->sgl, sg, nsegs, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; data_len += len; nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -388,7 +379,7 @@ static int request_map_sg(struct request if (bio->bi_vcnt >= nr_vecs) { err = scsiback_merge_bio(rq, bio); if (err) { - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); goto free_bios; } bio = NULL; @@ -411,7 +402,7 @@ free_bios: /* * call endio instead of bio_put incase it was bounced */ - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); } return err; Index: head-2008-12-01/drivers/xen/scsifront/scsifront.c =================================================================== --- head-2008-12-01.orig/drivers/xen/scsifront/scsifront.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/drivers/xen/scsifront/scsifront.c 2008-12-01 11:36:55.000000000 +0100 @@ -246,11 +246,10 @@ static int map_data_for_request(struct v { grant_ref_t gref_head; struct page *page; - int err, i, ref, ref_cnt = 0; + int err, ref, ref_cnt = 0; int write = (sc->sc_data_direction == DMA_TO_DEVICE); - int nr_pages, off, len, bytes; + unsigned int i, nr_pages, off, len, bytes; unsigned long buffer_pfn; - unsigned int data_len = 0; if (sc->sc_data_direction == DMA_NONE) return 0; @@ -263,25 +262,31 @@ static int map_data_for_request(struct v if (sc->use_sg) { /* quoted scsi_lib.c/scsi_req_map_sg . */ - struct scatterlist *sg = (struct scatterlist *)sc->request_buffer; - nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct scatterlist *sg, *sgl = (struct scatterlist *)sc->request_buffer; + unsigned int data_len = sc->request_bufflen; + nr_pages = (sc->request_bufflen + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages > VSCSIIF_SG_TABLESIZE) { printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n"); ref_cnt = (-E2BIG); goto big_to_sg; } - for (i = 0; i < sc->use_sg; i++) { - page = sg[i].page; - off = sg[i].offset; - len = sg[i].length; - data_len += len; + for_each_sg (sgl, sg, sc->use_sg, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; - while (len > 0) { + while (len > 0 && data_len > 0) { + /* + * sg sends a scatterlist that is larger than + * the data_len it wants transferred for certain + * IO sizes + */ bytes = min_t(unsigned int, len, PAGE_SIZE - off); + bytes = min(bytes, data_len); ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); @@ -296,6 +301,7 @@ static int map_data_for_request(struct v buffer_pfn++; len -= bytes; + data_len -= bytes; off = 0; ref_cnt++; } Index: head-2008-12-01/drivers/xen/sfc_netback/accel_fwd.c =================================================================== --- head-2008-12-01.orig/drivers/xen/sfc_netback/accel_fwd.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/xen/sfc_netback/accel_fwd.c 2008-12-01 11:36:55.000000000 +0100 @@ -181,10 +181,11 @@ int netback_accel_fwd_add(const __u8 *ma unsigned long flags; cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); BUG_ON(fwd_priv == NULL); - DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac)); + DPRINTK("Adding mac %s\n", print_mac(buf, mac)); spin_lock_irqsave(&fwd_set->fwd_lock, flags); @@ -199,8 +200,8 @@ int netback_accel_fwd_add(const __u8 *ma if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, (cuckoo_hash_key *)(&key), &rc) != 0) { spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); - EPRINTK("MAC address " MAC_FMT " already accelerated.\n", - MAC_ARG(mac)); + EPRINTK("MAC address %s already accelerated.\n", + print_mac(buf, mac)); return -EEXIST; } @@ -235,8 +236,9 @@ void netback_accel_fwd_remove(const __u8 unsigned long flags; cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); - DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac)); + DPRINTK("Removing mac %s\n", print_mac(buf, mac)); BUG_ON(fwd_priv == NULL); @@ -394,14 +396,16 @@ void netback_accel_tx_packet(struct sk_b if (is_broadcast_ether_addr(skb_mac_header(skb)) && packet_is_arp_reply(skb)) { + DECLARE_MAC_BUF(buf); + /* * update our fast path forwarding to reflect this * gratuitous ARP */ mac = skb_mac_header(skb)+ETH_ALEN; - DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(mac)); + DPRINTK("%s: found gratuitous ARP for %s\n", + __FUNCTION__, print_mac(buf, mac)); spin_lock_irqsave(&fwd_set->fwd_lock, flags); /* Index: head-2008-12-01/drivers/xen/sfc_netback/accel_msg.c =================================================================== --- head-2008-12-01.orig/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100 +++ head-2008-12-01/drivers/xen/sfc_netback/accel_msg.c 2008-12-01 11:36:55.000000000 +0100 @@ -57,11 +57,11 @@ static void netback_accel_msg_tx_localma { unsigned long lock_state; struct net_accel_msg *msg; + DECLARE_MAC_BUF(buf); BUG_ON(bend == NULL || mac == NULL); - VPRINTK("Sending local mac message: " MAC_FMT "\n", - MAC_ARG((const char *)mac)); + VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac)); msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU, &lock_state); Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_msg.c =================================================================== --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_msg.c 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_msg.c 2008-12-01 11:36:55.000000000 +0100 @@ -41,11 +41,13 @@ static void vnic_start_interrupts(netfro /* Prime our interrupt */ spin_lock_irqsave(&vnic->irq_enabled_lock, flags); if (!netfront_accel_vi_enable_interrupts(vnic)) { + struct netfront_info *np = netdev_priv(vnic->net_dev); + /* Cripes, that was quick, better pass it up */ netfront_accel_disable_net_interrupts(vnic); vnic->irq_enabled = 0; NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); - netif_rx_schedule(vnic->net_dev); + netif_rx_schedule(vnic->net_dev, &np->napi); } else { /* * Nothing yet, make sure we get interrupts through @@ -72,6 +74,7 @@ static void vnic_stop_interrupts(netfron static void vnic_start_fastpath(netfront_accel_vnic *vnic) { struct net_device *net_dev = vnic->net_dev; + struct netfront_info *np = netdev_priv(net_dev); unsigned long flags; DPRINTK("%s\n", __FUNCTION__); @@ -80,9 +83,9 @@ static void vnic_start_fastpath(netfront vnic->tx_enabled = 1; spin_unlock_irqrestore(&vnic->tx_lock, flags); - netif_poll_disable(net_dev); + napi_disable(&np->napi); vnic->poll_enabled = 1; - netif_poll_enable(net_dev); + napi_enable(&np->napi); vnic_start_interrupts(vnic); } @@ -114,11 +117,11 @@ void vnic_stop_fastpath(netfront_accel_v spin_unlock_irqrestore(&vnic->tx_lock, flags1); /* Must prevent polls and hold lock to modify poll_enabled */ - netif_poll_disable(net_dev); + napi_disable(&np->napi); spin_lock_irqsave(&vnic->irq_enabled_lock, flags1); vnic->poll_enabled = 0; spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1); - netif_poll_enable(net_dev); + napi_enable(&np->napi); } @@ -326,8 +329,10 @@ static int vnic_process_localmac_msg(net cuckoo_hash_mac_key key; if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) { - DPRINTK("MAC has moved, could be local: " MAC_FMT "\n", - MAC_ARG(msg->u.localmac.mac)); + DECLARE_MAC_BUF(buf); + + DPRINTK("MAC has moved, could be local: %s\n", + print_mac(buf, msg->u.localmac.mac)); key = cuckoo_mac_to_key(msg->u.localmac.mac); spin_lock_irqsave(&vnic->table_lock, flags); /* Try to remove it, not a big deal if not there */ @@ -515,6 +520,8 @@ irqreturn_t netfront_accel_net_channel_i spin_lock_irqsave(&vnic->irq_enabled_lock, flags); if (vnic->irq_enabled) { + struct netfront_info *np = netdev_priv(net_dev); + netfront_accel_disable_net_interrupts(vnic); vnic->irq_enabled = 0; spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); @@ -527,7 +534,7 @@ irqreturn_t netfront_accel_net_channel_i vnic->stats.event_count_since_irq; vnic->stats.event_count_since_irq = 0; #endif - netif_rx_schedule(net_dev); + netif_rx_schedule(net_dev, &np->napi); } else { spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c =================================================================== --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:36:55.000000000 +0100 @@ -641,8 +641,10 @@ netfront_accel_vi_tx_post(netfront_accel (cuckoo_hash_key *)(&key), &value); if (!try_fastpath) { - VPRINTK("try fast path false for mac: " MAC_FMT "\n", - MAC_ARG(skb->data)); + DECLARE_MAC_BUF(buf); + + VPRINTK("try fast path false for mac: %s\n", + print_mac(buf, skb->data)); return NETFRONT_ACCEL_STATUS_CANT; } @@ -768,9 +770,10 @@ static void netfront_accel_vi_rx_comple if (compare_ether_addr(skb->data, vnic->mac)) { struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN); u16 port; + DECLARE_MAC_BUF(buf); - DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(skb->data)); + DPRINTK("%s: saw wrong MAC address %s\n", + __FUNCTION__, print_mac(buf, skb->data)); if (ip->protocol == IPPROTO_TCP) { struct tcphdr *tcp = (struct tcphdr *) Index: head-2008-12-01/drivers/xen/sfc_netutil/accel_util.h =================================================================== --- head-2008-12-01.orig/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100 +++ head-2008-12-01/drivers/xen/sfc_netutil/accel_util.h 2008-12-01 11:36:55.000000000 +0100 @@ -63,9 +63,6 @@ DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ } while(0) -#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x" -#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] - #include /*! Map a set of pages from another domain Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:36:55.000000000 +0100 @@ -174,11 +174,9 @@ static int read_backend_details(struct x } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE)) -static int xenbus_uevent_frontend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env) { struct xenbus_device *xdev; - int length = 0, i = 0; if (dev == NULL) return -ENODEV; @@ -187,12 +185,9 @@ static int xenbus_uevent_frontend(struct return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_PATH=%s", xdev->nodename); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "MODALIAS=xen:%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype); return 0; } Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe_backend.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe_backend.c 2008-12-01 11:36:55.000000000 +0100 @@ -60,8 +60,7 @@ #include #endif -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size); +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env); static int xenbus_probe_backend(const char *type, const char *domid); extern int read_otherend_details(struct xenbus_device *xendev, @@ -128,13 +127,10 @@ static struct xen_bus_type xenbus_backen }, }; -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env) { struct xenbus_device *xdev; struct xenbus_driver *drv; - int i = 0; - int length = 0; DPRINTK(""); @@ -146,27 +142,16 @@ static int xenbus_uevent_backend(struct return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_BASE_PATH=%s", xenbus_backend.root); - - /* terminate, set to next free slot, shrink available space */ - envp[i] = NULL; - envp = &envp[i]; - num_envp -= i; - buffer = &buffer[length]; - buffer_size -= length; + add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root); if (dev->driver) { drv = to_xenbus_driver(dev->driver); if (drv && drv->uevent) - return drv->uevent(xdev, envp, num_envp, buffer, - buffer_size); + return drv->uevent(xdev, env); } return 0; Index: head-2008-12-01/fs/xfs/linux-2.6/xfs_buf.c =================================================================== --- head-2008-12-01.orig/fs/xfs/linux-2.6/xfs_buf.c 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/fs/xfs/linux-2.6/xfs_buf.c 2008-12-01 11:36:55.000000000 +0100 @@ -187,7 +187,7 @@ free_address( { a_list_t *aentry; -#ifdef CONFIG_XEN +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) /* * Xen needs to be able to make sure it can get an exclusive * RO mapping of pages it wants to turn into a pagetable. If Index: head-2008-12-01/include/asm-x86/mach-xen/asm/agp.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/agp.h 2007-06-22 09:08:06.000000000 +0200 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/agp.h 2008-12-01 11:36:55.000000000 +0100 @@ -1,20 +1,22 @@ -#ifndef AGP_H -#define AGP_H 1 +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H #include #include #include -/* - * Functions to keep the agpgart mappings coherent with the MMU. - * The GART gives the CPU a physical alias of pages in memory. The alias region is - * mapped uncacheable. Make sure there are no conflicting mappings - * with different cachability attributes for the same page. This avoids - * data corruption on some CPUs. +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cachability attributes for the same + * page. This avoids data corruption on some CPUs. */ -/* Caller's responsibility to call global_flush_tlb() for - * performance reasons */ +/* + * Caller's responsibility to call global_flush_tlb() for performance + * reasons + */ #define map_page_into_agp(page) ( \ xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \ ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)) @@ -24,9 +26,11 @@ change_page_attr(page, 1, PAGE_KERNEL)) #define flush_agp_mappings() global_flush_tlb() -/* Could use CLFLUSH here if the cpu supports it. But then it would - need to be called for each cacheline of the whole page so it may not be - worth it. Would need a page for it. */ +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ #define flush_agp_cache() wbinvd() /* Convert a physical address to an address suitable for the GART. */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "desc_32.h" +#else +# include "desc_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -34,6 +34,18 @@ static inline void clear_LDT(void) put_cpu(); } +#ifndef CONFIG_X86_NO_TSS +static inline unsigned long __store_tr(void) +{ + unsigned long tr; + + asm volatile ("str %w0":"=r" (tr)); + return tr; +} + +#define store_tr(tr) (tr) = __store_tr() +#endif + /* * This is the ldt that every process will get unless we need * something other than this. @@ -47,6 +59,18 @@ extern struct desc_ptr cpu_gdt_descr[]; /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) +#ifndef CONFIG_XEN +static inline void load_gdt(const struct desc_ptr *ptr) +{ + asm volatile("lgdt %w0"::"m" (*ptr)); +} + +static inline void store_gdt(struct desc_ptr *ptr) +{ + asm("sgdt %w0":"=m" (*ptr)); +} +#endif + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -87,6 +111,16 @@ static inline void set_system_gate_ist(i { _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } + +static inline void load_idt(const struct desc_ptr *ptr) +{ + asm volatile("lidt %w0"::"m" (*ptr)); +} + +static inline void store_idt(struct desc_ptr *dtr) +{ + asm("sidt %w0":"=m" (*dtr)); +} #endif static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "dma-mapping_32.h" +#else +# include "dma-mapping_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -7,9 +7,9 @@ */ #include +#include #include #include -#include #include static inline int Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -6,8 +6,7 @@ * documentation. */ - -#include +#include #include struct dma_mapping_ops { @@ -203,4 +202,4 @@ extern int panic_on_overflow; #endif /* _X8664_DMA_MAPPING_H */ -#include +#include "dma-mapping_32.h" Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "fixmap_32.h" +#else +# include "fixmap_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,404 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_H__ +#define __HYPERCALL_H__ + +#ifndef __HYPERVISOR_H__ +# error "please don't include this file directly" +#endif + +#if CONFIG_XEN_COMPAT <= 0x030002 +# include /* memcpy() */ +#endif + +#ifdef CONFIG_XEN +#define HYPERCALL_ASM_OPERAND "%c" +#define HYPERCALL_LOCATION(op) (hypercall_page + (op) * 32) +#define HYPERCALL_C_OPERAND(name) "i" (HYPERCALL_LOCATION(__HYPERVISOR_##name)) +#else +#define HYPERCALL_ASM_OPERAND "*%" +#define HYPERCALL_LOCATION(op) (hypercall_stubs + (op) * 32) +#define HYPERCALL_C_OPERAND(name) "g" (HYPERCALL_LOCATION(__HYPERVISOR_##name)) +#endif + +#define HYPERCALL_ARG(arg, n) \ + register typeof((arg)+0) __arg##n asm(HYPERCALL_arg##n) = (arg) + +#define _hypercall0(type, name) \ +({ \ + type __res; \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "1" \ + : "=a" (__res) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall1(type, name, arg) \ +({ \ + type __res; \ + HYPERCALL_ARG(arg, 1); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "2" \ + : "=a" (__res), "+r" (__arg1) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "3" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "4" \ + : "=a" (__res), "+r" (__arg1), \ + "+r" (__arg2), "+r" (__arg3) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "5" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + HYPERCALL_ARG(a5, 5); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "6" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall(type, op, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + HYPERCALL_ARG(a5, 5); \ + asm volatile ( \ + "call *%6" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ + : "g" (HYPERCALL_LOCATION(op)) \ + : "memory" ); \ + __res; \ +}) + +#ifdef CONFIG_X86_32 +# include "hypercall_32.h" +#else +# include "hypercall_64.h" +#endif + +static inline int __must_check +HYPERVISOR_set_trap_table( + const trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int __must_check +HYPERVISOR_mmu_update( + mmu_update_t *req, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + if (arch_use_lazy_mmu_mode()) + return xen_multi_mmu_update(req, count, success_count, domid); + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_mmuext_op( + struct mmuext_op *op, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + if (arch_use_lazy_mmu_mode()) + return xen_multi_mmuext_op(op, count, success_count, domid); + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_set_gdt( + unsigned long *frame_list, unsigned int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int __must_check +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int __must_check +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_platform_op( + struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, platform_op); +} + +static inline int __must_check +HYPERVISOR_set_debugreg( + unsigned int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long __must_check +HYPERVISOR_get_debugreg( + unsigned int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int __must_check +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + if (arch_use_lazy_mmu_mode()) + xen_multicall_flush(false); + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_multicall( + multicall_entry_t *call_list, unsigned int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int __must_check +HYPERVISOR_event_channel_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_console_io( + int cmd, unsigned int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int __must_check +HYPERVISOR_physdev_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + if (arch_use_lazy_mmu_mode()) + xen_multicall_flush(false); + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int __must_check +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int __must_check +HYPERVISOR_vcpu_op( + int cmd, unsigned int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int __must_check +HYPERVISOR_suspend( + unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +#endif + + return rc; +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int +HYPERVISOR_nmi_op( + unsigned long op, void *arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} +#endif + +#ifndef CONFIG_XEN +static inline unsigned long __must_check +HYPERVISOR_hvm_op( + int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_callback_op( + int cmd, const void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_xenoprof_op( + int op, void *arg) +{ + return _hypercall2(int, xenoprof_op, op, arg); +} + +static inline int __must_check +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + +#endif /* __HYPERCALL_H__ */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypercall_32.h 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -1,191 +1,10 @@ -/****************************************************************************** - * hypercall.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002-2004, K A Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include /* memcpy() */ -#include - -#ifndef __HYPERVISOR_H__ -# error "please don't include this file directly" -#endif - -#ifdef CONFIG_XEN -#define HYPERCALL_STR(name) \ - "call hypercall_page + ("__stringify(__HYPERVISOR_##name)" * 32)" -#else -#define HYPERCALL_STR(name) \ - "mov hypercall_stubs,%%eax; " \ - "add $("__stringify(__HYPERVISOR_##name)" * 32),%%eax; "\ - "call *%%eax" -#endif - -#define _hypercall0(type, name) \ -({ \ - type __res; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res) \ - : \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - type __res; \ - long __ign1; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1) \ - : "1" ((long)(a1)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - type __res; \ - long __ign1, __ign2; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3, __ign4; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3, __ign4, __ign5; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)), \ - "5" ((long)(a5)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall(type, op, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - register typeof((a1)+0) __arg1 asm("ebx") = (a1); \ - register typeof((a2)+0) __arg2 asm("ecx") = (a2); \ - register typeof((a3)+0) __arg3 asm("edx") = (a3); \ - register typeof((a4)+0) __arg4 asm("esi") = (a4); \ - register typeof((a5)+0) __arg5 asm("edi") = (a5); \ - asm volatile ( \ - "call *%6" \ - : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ - "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ - : "0" (hypercall_page + (op) * 32) \ - : "memory" ); \ - __res; \ -}) - -static inline int __must_check -HYPERVISOR_set_trap_table( - const trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int __must_check -HYPERVISOR_mmu_update( - mmu_update_t *req, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmu_update(req, count, success_count, domid); - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_mmuext_op( - struct mmuext_op *op, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmuext_op(op, count, success_count, domid); - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_set_gdt( - unsigned long *frame_list, unsigned int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int __must_check -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} +#define HYPERCALL_arg1 "ebx" +#define HYPERCALL_arg2 "ecx" +#define HYPERCALL_arg3 "edx" +#define HYPERCALL_arg4 "esi" +#define HYPERCALL_arg5 "edi" +#if CONFIG_XEN_COMPAT <= 0x030002 static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_selector, unsigned long event_address, @@ -195,80 +14,24 @@ HYPERVISOR_set_callbacks( event_selector, event_address, failsafe_selector, failsafe_address); } - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int __must_check -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_sched_op( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} +#endif static inline long __must_check HYPERVISOR_set_timer_op( u64 timeout) { - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int __must_check -HYPERVISOR_platform_op( - struct xen_platform_op *platform_op) -{ - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, platform_op, platform_op); -} - -static inline int __must_check -HYPERVISOR_set_debugreg( - unsigned int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long __must_check -HYPERVISOR_get_debugreg( - unsigned int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); + return _hypercall2(long, set_timer_op, + (unsigned long)timeout, + (unsigned long)(timeout>>32)); } static inline int __must_check HYPERVISOR_update_descriptor( u64 ma, u64 desc) { - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - -static inline int __must_check -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_multicall( - multicall_entry_t *call_list, unsigned int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); + return _hypercall4(int, update_descriptor, + (unsigned long)ma, (unsigned long)(ma>>32), + (unsigned long)desc, (unsigned long)(desc>>32)); } static inline int __must_check @@ -287,67 +50,6 @@ HYPERVISOR_update_va_mapping( } static inline int __must_check -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_console_io( - int cmd, unsigned int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int __must_check -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { @@ -358,80 +60,3 @@ HYPERVISOR_update_va_mapping_otherdomain return _hypercall5(int, update_va_mapping_otherdomain, va, new_val.pte_low, pte_hi, flags, domid); } - -static inline int __must_check -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int __must_check -HYPERVISOR_vcpu_op( - int cmd, unsigned int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int __must_check -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -#endif - - return rc; -} - -#if CONFIG_XEN_COMPAT <= 0x030002 -static inline int -HYPERVISOR_nmi_op( - unsigned long op, void *arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} -#endif - -#ifndef CONFIG_XEN -static inline unsigned long __must_check -HYPERVISOR_hvm_op( - int op, void *arg) -{ - return _hypercall2(unsigned long, hvm_op, op, arg); -} -#endif - -static inline int __must_check -HYPERVISOR_callback_op( - int cmd, const void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_xenoprof_op( - int op, void *arg) -{ - return _hypercall2(int, xenoprof_op, op, arg); -} - -static inline int __must_check -HYPERVISOR_kexec_op( - unsigned long op, void *args) -{ - return _hypercall2(int, kexec_op, op, args); -} - - - -#endif /* __HYPERCALL_H__ */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypercall_64.h 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -1,197 +1,10 @@ -/****************************************************************************** - * hypercall.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002-2004, K A Fraser - * - * 64-bit updates: - * Benjamin Liu - * Jun Nakajima - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include /* memcpy() */ -#include - -#ifndef __HYPERVISOR_H__ -# error "please don't include this file directly" -#endif - -#ifdef CONFIG_XEN -#define HYPERCALL_STR(name) \ - "call hypercall_page + ("__stringify(__HYPERVISOR_##name)" * 32)" -#else -#define HYPERCALL_STR(name) \ - "mov $("__stringify(__HYPERVISOR_##name)" * 32),%%eax; "\ - "add hypercall_stubs(%%rip),%%rax; " \ - "call *%%rax" -#endif - -#define _hypercall0(type, name) \ -({ \ - type __res; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res) \ - : \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - type __res; \ - long __ign1; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1) \ - : "1" ((long)(a1)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - type __res; \ - long __ign1, __ign2; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - register long __arg4 asm("r10") = (long)(a4); \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3), "+r" (__arg4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - register long __arg4 asm("r10") = (long)(a4); \ - register long __arg5 asm("r8") = (long)(a5); \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall(type, op, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - register typeof((a1)+0) __arg1 asm("rdi") = (a1); \ - register typeof((a2)+0) __arg2 asm("rsi") = (a2); \ - register typeof((a3)+0) __arg3 asm("rdx") = (a3); \ - register typeof((a4)+0) __arg4 asm("r10") = (a4); \ - register typeof((a5)+0) __arg5 asm("r8") = (a5); \ - asm volatile ( \ - "call *%6" \ - : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ - "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ - : "0" (hypercall_page + (op) * 32) \ - : "memory" ); \ - __res; \ -}) - -static inline int __must_check -HYPERVISOR_set_trap_table( - const trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int __must_check -HYPERVISOR_mmu_update( - mmu_update_t *req, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmu_update(req, count, success_count, domid); - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_mmuext_op( - struct mmuext_op *op, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmuext_op(op, count, success_count, domid); - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_set_gdt( - unsigned long *frame_list, unsigned int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int __must_check -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} +#define HYPERCALL_arg1 "rdi" +#define HYPERCALL_arg2 "rsi" +#define HYPERCALL_arg3 "rdx" +#define HYPERCALL_arg4 "r10" +#define HYPERCALL_arg5 "r8" +#if CONFIG_XEN_COMPAT <= 0x030002 static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_address, unsigned long failsafe_address, @@ -200,27 +13,7 @@ HYPERVISOR_set_callbacks( return _hypercall3(int, set_callbacks, event_address, failsafe_address, syscall_address); } - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int __must_check -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_sched_op( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} +#endif static inline long __must_check HYPERVISOR_set_timer_op( @@ -230,28 +23,6 @@ HYPERVISOR_set_timer_op( } static inline int __must_check -HYPERVISOR_platform_op( - struct xen_platform_op *platform_op) -{ - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, platform_op, platform_op); -} - -static inline int __must_check -HYPERVISOR_set_debugreg( - unsigned int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long __must_check -HYPERVISOR_get_debugreg( - unsigned int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int __must_check HYPERVISOR_update_descriptor( unsigned long ma, unsigned long word) { @@ -259,22 +30,6 @@ HYPERVISOR_update_descriptor( } static inline int __must_check -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_multicall( - multicall_entry_t *call_list, unsigned int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); -} - -static inline int __must_check HYPERVISOR_update_va_mapping( unsigned long va, pte_t new_val, unsigned long flags) { @@ -284,67 +39,6 @@ HYPERVISOR_update_va_mapping( } static inline int __must_check -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_console_io( - int cmd, unsigned int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int __must_check -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { @@ -353,83 +47,8 @@ HYPERVISOR_update_va_mapping_otherdomain } static inline int __must_check -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int __must_check -HYPERVISOR_vcpu_op( - int cmd, unsigned int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int __must_check HYPERVISOR_set_segment_base( int reg, unsigned long value) { return _hypercall2(int, set_segment_base, reg, value); } - -static inline int __must_check -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -#endif - - return rc; -} - -#if CONFIG_XEN_COMPAT <= 0x030002 -static inline int -HYPERVISOR_nmi_op( - unsigned long op, void *arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} -#endif - -#ifndef CONFIG_XEN -static inline unsigned long __must_check -HYPERVISOR_hvm_op( - int op, void *arg) -{ - return _hypercall2(unsigned long, hvm_op, op, arg); -} -#endif - -static inline int __must_check -HYPERVISOR_callback_op( - int cmd, const void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_xenoprof_op( - int op, void *arg) -{ - return _hypercall2(int, xenoprof_op, op, arg); -} - -static inline int __must_check -HYPERVISOR_kexec_op( - unsigned long op, void *args) -{ - return _hypercall2(int, kexec_op, op, args); -} - -#endif /* __HYPERCALL_H__ */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:36:55.000000000 +0100 @@ -194,7 +194,6 @@ static inline void xen_multicall_flush(b extern char hypercall_page[PAGE_SIZE]; #else extern char *hypercall_stubs; -#define hypercall_page hypercall_stubs #define is_running_on_xen() (!!hypercall_stubs) #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "io_32.h" +#else +# include "io_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -212,17 +212,22 @@ static inline void writel(unsigned int b #define mmiowb() -static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) +static inline void +memset_io(volatile void __iomem *addr, unsigned char val, int count) { - memset((void __force *) addr, val, count); + memset((void __force *)addr, val, count); } -static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) + +static inline void +memcpy_fromio(void *dst, const volatile void __iomem *src, int count) { - __memcpy(dst, (void __force *) src, count); + __memcpy(dst, (const void __force *)src, count); } -static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count) + +static inline void +memcpy_toio(volatile void __iomem *dst, const void *src, int count) { - __memcpy((void __force *) dst, src, count); + __memcpy((void __force *)dst, src, count); } /* @@ -250,18 +255,9 @@ static inline void flush_write_buffers(v __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); } -#define dma_cache_inv(_start,_size) flush_write_buffers() -#define dma_cache_wback(_start,_size) flush_write_buffers() -#define dma_cache_wback_inv(_start,_size) flush_write_buffers() - #else -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) -#define flush_write_buffers() +#define flush_write_buffers() do { } while (0) #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -268,12 +268,6 @@ void memset_io(volatile void __iomem *a, */ #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN))) -/* Nothing to do */ - -#define dma_cache_inv(_start,_size) do { } while (0) -#define dma_cache_wback(_start,_size) do { } while (0) -#define dma_cache_wback_inv(_start,_size) do { } while (0) - #define flush_write_buffers() extern int iommu_bio_merge; Index: head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "irqflags_32.h" +#else +# include "irqflags_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -148,6 +148,23 @@ static inline int raw_irqs_disabled_flag \ raw_irqs_disabled_flags(flags); \ }) + +/* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +#define trace_hardirqs_fixup() \ + trace_hardirqs_fixup_flags(__raw_local_save_flags()) #endif /* __ASSEMBLY__ */ /* @@ -179,4 +196,17 @@ static inline int raw_irqs_disabled_flag # define TRACE_IRQS_OFF #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; +#else +# define LOCKDEP_SYS_EXIT +#endif + #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/irqflags_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -116,6 +116,22 @@ static inline int raw_irqs_disabled_flag }) /* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +#define trace_hardirqs_fixup() \ + trace_hardirqs_fixup_flags(__raw_local_save_flags()) +/* * Used in the idle loop; sti takes one instruction cycle * to complete: */ @@ -143,6 +159,20 @@ static inline void halt(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF # endif +# ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +# define LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif #endif #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/maddr.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/maddr.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "maddr_32.h" +#else +# include "maddr_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "mmu_context_32.h" +#else +# include "mmu_context_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/page.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/page.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,13 @@ +#ifdef __KERNEL__ +# ifdef CONFIG_X86_32 +# include "page_32.h" +# else +# include "page_64.h" +# endif +#else +# ifdef __i386__ +# include "page_32.h" +# else +# include "page_64.h" +# endif +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/page_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/page_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/page_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -207,6 +207,7 @@ static inline unsigned long __phys_addr( VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define __HAVE_ARCH_GATE_AREA 1 +#define vmemmap ((struct page *)VMEMMAP_START) #include #include Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pci.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pci.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,100 @@ +#ifndef __x86_PCI_H +#define __x86_PCI_H + +#include /* for struct page */ +#include +#include +#include +#include +#include + + +#ifdef __KERNEL__ + +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ +#ifdef CONFIG_X86_64 + void* iommu; /* IOMMU private data */ +#endif +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + struct pcifront_device *pdev; +#endif +}; + +/* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + +static inline int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} + + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif + +#include +#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain()) + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +void pcibios_config_init(void); +struct pci_bus * pcibios_scan_root(int bus); + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq, int active); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + + +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + *strat = PCI_DMA_BURST_INFINITY; + *strategy_parameter = ~0UL; +} +#endif + + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_32 +# include "pci_32.h" +#else +# include "pci_64.h" +#endif + +/* implement the pci_ DMA API in terms of the generic device dma_ one */ +#include + +/* generic pci stuff */ +#include + + + +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pci_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pci_32.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pci_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -4,52 +4,10 @@ #ifdef __KERNEL__ -struct pci_sysdata { - int node; /* NUMA node */ -}; - -/* scan a bus after allocating a pci_sysdata for it */ -extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); - -#include /* for struct page */ - -/* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ - -#ifdef CONFIG_PCI -extern unsigned int pcibios_assign_all_busses(void); -#else -#define pcibios_assign_all_busses() 0 -#endif - -#include -#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain()) - -extern unsigned long pci_mem_start; -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM (pci_mem_start) - -#define PCIBIOS_MIN_CARDBUS_IO 0x4000 - -void pcibios_config_init(void); -struct pci_bus * pcibios_scan_root(int bus); - -void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq, int active); -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - /* Dynamic DMA mapping stuff. * i386 has everything mapped statically. */ -#include -#include -#include -#include -#include - struct pci_dev; #ifdef CONFIG_SWIOTLB @@ -89,31 +47,8 @@ struct pci_dev; #endif -#define HAVE_PCI_MMAP -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); - - -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif #endif /* __KERNEL__ */ -#ifdef CONFIG_XEN_PCIDEV_FRONTEND -#include -#endif /* CONFIG_XEN_PCIDEV_FRONTEND */ - -/* implement the pci_ DMA API in terms of the generic device dma_ one */ -#include - -/* generic pci stuff */ -#include #endif /* __i386_PCI_H */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pci_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pci_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pci_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -1,16 +1,9 @@ #ifndef __x8664_PCI_H #define __x8664_PCI_H -#include #ifdef __KERNEL__ -struct pci_sysdata { - int node; /* NUMA node */ - void* iommu; /* IOMMU private data */ -}; - -extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); #ifdef CONFIG_CALGARY_IOMMU static inline void* pci_iommu(struct pci_bus *bus) @@ -26,42 +19,11 @@ static inline void set_pci_iommu(struct } #endif /* CONFIG_CALGARY_IOMMU */ -#include /* for struct page */ - -/* Can be used to override the logic in pci_scan_bus for skipping - already-configured bus numbers - to be used for buggy BIOSes - or architectures with incomplete PCI setup by the loader */ - -#ifdef CONFIG_PCI -extern unsigned int pcibios_assign_all_busses(void); -#else -#define pcibios_assign_all_busses() 0 -#endif - -#include -#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain()) - -extern unsigned long pci_mem_start; -#define PCIBIOS_MIN_IO 0x1000 -#define PCIBIOS_MIN_MEM (pci_mem_start) - -#define PCIBIOS_MIN_CARDBUS_IO 0x4000 -void pcibios_config_init(void); -struct pci_bus * pcibios_scan_root(int bus); extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value); extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); -void pcibios_set_master(struct pci_dev *dev); -void pcibios_penalize_isa_irq(int irq, int active); -struct irq_routing_table *pcibios_get_irq_routing_table(void); -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); - -#include -#include -#include -#include -#include + extern void pci_iommu_alloc(void); extern int iommu_setup(char *opt); @@ -75,7 +37,7 @@ extern int iommu_setup(char *opt); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_IOMMU) || defined(CONFIG_CALGARY_IOMMU) +#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; @@ -119,27 +81,7 @@ extern int iommu_setup(char *opt); #endif -#include - -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - -#define HAVE_PCI_MMAP -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine); - #endif /* __KERNEL__ */ -/* generic pci stuff */ -#ifdef CONFIG_PCI -#include -#endif #endif /* __x8664_PCI_H */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "pgalloc_32.h" +#else +# include "pgalloc_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -112,6 +112,8 @@ static inline void pgd_list_del(pgd_t *p spin_unlock(&pgd_lock); } +extern void pgd_test_and_unpin(pgd_t *); + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { /* @@ -122,6 +124,7 @@ static inline pgd_t *pgd_alloc(struct mm if (!pgd) return NULL; pgd_list_add(pgd); + pgd_test_and_unpin(pgd); /* * Copy kernel pointers in from init. * Could keep a freelist or slab cache of those because the kernel @@ -144,27 +147,7 @@ static inline pgd_t *pgd_alloc(struct mm static inline void pgd_free(pgd_t *pgd) { - pte_t *ptep = virt_to_ptep(pgd); - - if (!pte_write(*ptep)) { - xen_pgd_unpin(__pa(pgd)); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pgd, - pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - - ptep = virt_to_ptep(__user_pgd(pgd)); - - if (!pte_write(*ptep)) { - xen_pgd_unpin(__pa(__user_pgd(pgd))); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)__user_pgd(pgd), - pfn_pte(virt_to_phys(__user_pgd(pgd))>>PAGE_SHIFT, - PAGE_KERNEL), - 0)); - } - + pgd_test_and_unpin(pgd); pgd_list_del(pgd); free_pages((unsigned long)pgd, 1); } Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "pgtable_32.h" +#else +# include "pgtable_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -17,10 +17,7 @@ #include #include -#ifndef _I386_BITOPS_H -#include -#endif - +#include #include #include #include @@ -40,7 +37,7 @@ extern spinlock_t pgd_lock; extern struct page *pgd_list; void check_pgt_cache(void); -void pmd_ctor(void *, struct kmem_cache *, unsigned long); +void pmd_ctor(struct kmem_cache *, void *); void pgtable_cache_init(void); void paging_init(void); Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -9,7 +9,7 @@ * the x86-64 page table tree. */ #include -#include +#include #include #include #include @@ -137,6 +137,7 @@ static inline void pgd_clear (pgd_t * pg #define MAXMEM _AC(0x3fffffffffff, UL) #define VMALLOC_START _AC(0xffffc20000000000, UL) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffe20000000000, UL) #define MODULES_VADDR _AC(0xffffffff88000000, UL) #define MODULES_END _AC(0xfffffffffff00000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "processor_32.h" +#else +# include "processor_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -80,6 +80,7 @@ struct cpuinfo_x86 { unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ __u8 cpu_core_id; /* Core id */ + __u8 cpu_index; /* index into per_cpu list */ #endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -106,14 +107,19 @@ DECLARE_PER_CPU(struct tss_struct, init_ #endif #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif -extern int cpu_llc_id[NR_CPUS]; +/* + * the following now lives in the per cpu area: + * extern int cpu_llc_id[NR_CPUS]; + */ +DECLARE_PER_CPU(u8, cpu_llc_id); extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); @@ -560,7 +566,9 @@ static inline void xen_set_iopl_mask(uns * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. */ -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = 0; @@ -568,8 +576,9 @@ static inline void cpuid(unsigned int op } /* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = count; @@ -639,6 +648,17 @@ static inline unsigned int cpuid_edx(uns #define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" #define K7_NOP8 K7_NOP7 ASM_NOP1 +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 GENERIC_NOP1 +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + #ifdef CONFIG_MK8 #define ASM_NOP1 K8_NOP1 #define ASM_NOP2 K8_NOP2 @@ -657,6 +677,17 @@ static inline unsigned int cpuid_edx(uns #define ASM_NOP6 K7_NOP6 #define ASM_NOP7 K7_NOP7 #define ASM_NOP8 K7_NOP8 +#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ + defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ + defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 #else #define ASM_NOP1 GENERIC_NOP1 #define ASM_NOP2 GENERIC_NOP2 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -74,6 +74,7 @@ struct cpuinfo_x86 { __u8 booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical Processor id. */ __u8 cpu_core_id; /* Core id. */ + __u8 cpu_index; /* index into per_cpu list */ #endif } ____cacheline_aligned; @@ -88,11 +89,12 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif extern char ignore_irq13; @@ -343,6 +345,16 @@ struct extended_sigtable { }; +#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 +#else #define ASM_NOP1 K8_NOP1 #define ASM_NOP2 K8_NOP2 #define ASM_NOP3 K8_NOP3 @@ -351,6 +363,7 @@ struct extended_sigtable { #define ASM_NOP6 K8_NOP6 #define ASM_NOP7 K8_NOP7 #define ASM_NOP8 K8_NOP8 +#endif /* Opteron nops */ #define K8_NOP1 ".byte 0x90\n" @@ -362,6 +375,17 @@ struct extended_sigtable { #define K8_NOP7 K8_NOP4 K8_NOP3 #define K8_NOP8 K8_NOP4 K8_NOP4 +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 ".byte 0x90\n" +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + #define ASM_NOP_MAX 8 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ @@ -377,12 +401,6 @@ static inline void sync_core(void) asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); } -#define ARCH_HAS_PREFETCH -static inline void prefetch(void *x) -{ - asm volatile("prefetcht0 (%0)" :: "r" (x)); -} - #define ARCH_HAS_PREFETCHW 1 static inline void prefetchw(void *x) { @@ -398,11 +416,6 @@ static inline void prefetchw(void *x) #define cpu_relax() rep_nop() -static inline void serialize_cpu(void) -{ - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); -} - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { Index: head-2008-12-01/include/asm-x86/mach-xen/asm/segment.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/segment.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "segment_32.h" +#else +# include "../../segment_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "smp_32.h" +#else +# include "smp_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -11,7 +11,7 @@ #endif #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) -#include +#include #include #include #ifdef CONFIG_X86_IO_APIC @@ -30,8 +30,8 @@ extern void smp_alloc_memory(void); extern int pic_mode; extern int smp_num_siblings; -extern cpumask_t cpu_sibling_map[]; -extern cpumask_t cpu_core_map[]; +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_t, cpu_core_map); extern void (*mtrr_hook) (void); extern void zap_low_mappings (void); @@ -39,9 +39,11 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #define MAX_APICID 256 -extern u8 x86_cpu_to_apicid[]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -40,10 +40,19 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); +extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); -extern cpumask_t cpu_sibling_map[NR_CPUS]; -extern cpumask_t cpu_core_map[NR_CPUS]; -extern u8 cpu_llc_id[NR_CPUS]; +/* + * cpu_sibling_map and cpu_core_map now live + * in the per cpu area + * + * extern cpumask_t cpu_sibling_map[NR_CPUS]; + * extern cpumask_t cpu_core_map[NR_CPUS]; + */ +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_t, cpu_core_map); +DECLARE_PER_CPU(u8, cpu_llc_id); #define SMP_TRAMPOLINE_BASE 0x6000 @@ -70,6 +79,8 @@ extern unsigned __cpuinitdata disabled_c #endif /* CONFIG_SMP */ +#define safe_smp_processor_id() smp_processor_id() + #ifdef CONFIG_X86_LOCAL_APIC static inline int hard_smp_processor_id(void) { @@ -82,8 +93,9 @@ static inline int hard_smp_processor_id( * Some lowlevel functions might want to know about * the real APIC ID <-> CPU # mapping. */ -extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ -extern u8 x86_cpu_to_log_apicid[NR_CPUS]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ extern u8 bios_cpu_apicid[]; #ifdef CONFIG_X86_LOCAL_APIC @@ -118,8 +130,9 @@ static __inline int logical_smp_processo #endif #ifdef CONFIG_SMP -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #else +extern unsigned int boot_cpu_id; #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/swiotlb.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/swiotlb.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "swiotlb_32.h" +#else +# include "../../swiotlb.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "system_32.h" +#else +# include "system_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -9,6 +9,7 @@ #include #ifdef __KERNEL__ +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ struct task_struct; /* one of the stranger aspects of C forward declarations.. */ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); @@ -138,7 +139,7 @@ static inline unsigned long xen_read_cr4 { unsigned long val; /* This could fault if %cr4 does not exist */ - asm("1: movl %%cr4, %0 \n" + asm volatile("1: movl %%cr4, %0 \n" "2: \n" ".section __ex_table,\"a\" \n" ".long 1b,2b \n" @@ -157,6 +158,11 @@ static inline void xen_wbinvd(void) asm volatile("wbinvd": : :"memory"); } +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + #define read_cr0() (xen_read_cr0()) #define write_cr0(x) (xen_write_cr0(x)) #define read_cr2() (xen_read_cr2()) @@ -207,6 +213,7 @@ static inline unsigned long get_limit(un #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) /** * read_barrier_depends - Flush all pending reads that subsequents reads @@ -262,18 +269,18 @@ static inline unsigned long get_limit(un #define read_barrier_depends() do { } while(0) +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif #ifdef CONFIG_X86_OOSTORE -/* Actually there are no OOO store capable CPUs for now that do SSE, - but make it already an possibility. */ -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +# define smp_wmb() wmb() #else -#define wmb() __asm__ __volatile__ ("": : :"memory") +# define smp_wmb() barrier() #endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void) xchg(&var, value); } while (0) #else @@ -300,5 +307,6 @@ extern unsigned long arch_align_stack(un extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void __show_registers(struct pt_regs *, int all); #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -11,8 +11,12 @@ #ifdef __KERNEL__ -#define __STR(x) #x -#define STR(x) __STR(x) +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif #define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" @@ -92,7 +96,7 @@ static inline void write_cr0(unsigned lo #define read_cr3() ({ \ unsigned long __dummy; \ - asm("movq %%cr3,%0" : "=r" (__dummy)); \ + asm volatile("movq %%cr3,%0" : "=r" (__dummy)); \ machine_to_phys(__dummy); \ }) @@ -105,7 +109,7 @@ static inline void write_cr3(unsigned lo static inline unsigned long read_cr4(void) { unsigned long cr4; - asm("movq %%cr4,%0" : "=r" (cr4)); + asm volatile("movq %%cr4,%0" : "=r" (cr4)); return cr4; } @@ -131,12 +135,17 @@ static inline void write_cr8(unsigned lo #endif /* __KERNEL__ */ +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + #define nop() __asm__ __volatile__ ("nop") #ifdef CONFIG_SMP #define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() +#define smp_rmb() barrier() +#define smp_wmb() barrier() #define smp_read_barrier_depends() do {} while(0) #else #define smp_mb() barrier() @@ -153,12 +162,8 @@ static inline void write_cr8(unsigned lo */ #define mb() asm volatile("mfence":::"memory") #define rmb() asm volatile("lfence":::"memory") - -#ifdef CONFIG_UNORDERED_IO #define wmb() asm volatile("sfence" ::: "memory") -#else -#define wmb() asm volatile("" ::: "memory") -#endif + #define read_barrier_depends() do {} while(0) #define set_mb(var, value) do { (void) xchg(&var, value); } while (0) Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "tlbflush_32.h" +#else +# include "tlbflush_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-01 11:36:55.000000000 +0100 @@ -23,7 +23,6 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. @@ -97,10 +96,4 @@ static inline void flush_tlb_kernel_rang flush_tlb_all(); } -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - #endif /* _I386_TLBFLUSH_H */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-01 11:36:55.000000000 +0100 @@ -28,7 +28,6 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables * * x86-64 can only flush individual pages or full VMs. For a range flush * we always do the full VM. Might be worth trying if for a small @@ -95,12 +94,4 @@ static inline void flush_tlb_kernel_rang flush_tlb_all(); } -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* x86_64 does not keep any page table caches in a software TLB. - The CPUs do in their hardware TLBs, but they are handled - by the normal TLB flushing algorithms. */ -} - #endif /* _X8664_TLBFLUSH_H */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/xor.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/xor.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "../../xor_32.h" +#else +# include "xor_64.h" +#endif Index: head-2008-12-01/include/asm-x86/mach-xen/mach_time.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/mach_time.h 2007-06-12 13:14:13.000000000 +0200 +++ head-2008-12-01/include/asm-x86/mach-xen/mach_time.h 2008-12-01 11:36:55.000000000 +0100 @@ -1,111 +1,2 @@ -/* - * include/asm-i386/mach-default/mach_time.h - * - * Machine specific set RTC function for generic. - * Split out from time.c by Osamu Tomita - */ -#ifndef _MACH_TIME_H -#define _MACH_TIME_H - -#include - -/* for check timing call set_rtc_mmss() 500ms */ -/* used in arch/i386/time.c::do_timer_interrupt() */ -#define USEC_AFTER 500000 -#define USEC_BEFORE 500000 - -/* - * In order to set the CMOS clock precisely, set_rtc_mmss has to be - * called 500 ms after the second nowtime has started, because when - * nowtime is written into the registers of the CMOS clock, it will - * jump to the next second precisely 500 ms later. Check the Motorola - * MC146818A or Dallas DS12887 data sheet for details. - * - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ -static inline int mach_set_rtc_mmss(unsigned long nowtime) -{ - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - unsigned char save_control, save_freq_select; - - save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */ - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */ - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - cmos_minutes = CMOS_READ(RTC_MINUTES); - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) - BCD_TO_BIN(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) - real_minutes += 30; /* correct for half hour time zone */ - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - BIN_TO_BCD(real_seconds); - BIN_TO_BCD(real_minutes); - } - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } else { - printk(KERN_WARNING - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - - return retval; -} - -static inline unsigned long mach_get_cmos_time(void) -{ - unsigned int year, mon, day, hour, min, sec; - - do { - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - } while (sec != CMOS_READ(RTC_SECONDS)); - - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); - } - - year += 1900; - if (year < 1970) - year += 100; - - return mktime(year, mon, day, hour, min, sec); -} - -#endif /* !_MACH_TIME_H */ +#include "../mc146818rtc_32.h" +#include "../mach-default/mach_time.h" Index: head-2008-12-01/include/asm-x86/mach-xen/mach_timer.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/mach_timer.h 2007-06-12 13:14:13.000000000 +0200 +++ head-2008-12-01/include/asm-x86/mach-xen/mach_timer.h 2008-12-01 11:36:55.000000000 +0100 @@ -1,50 +1 @@ -/* - * include/asm-i386/mach-default/mach_timer.h - * - * Machine specific calibrate_tsc() for generic. - * Split out from timer_tsc.c by Osamu Tomita - */ -/* ------ Calibrate the TSC ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). - * Too much 64-bit arithmetic here to do this cleanly in C, and for - * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) - * output busy loop as low as possible. We avoid reading the CTC registers - * directly because of the awkward 8-bit access mechanism of the 82C54 - * device. - */ -#ifndef _MACH_TIMER_H -#define _MACH_TIMER_H - -#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ -#define CALIBRATE_LATCH \ - ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000) - -static inline void mach_prepare_counter(void) -{ - /* Set the Gate high, disable speaker */ - outb((inb(0x61) & ~0x02) | 0x01, 0x61); - - /* - * Now let's take care of CTC channel 2 - * - * Set the Gate high, program CTC channel 2 for mode 0, - * (interrupt on terminal count mode), binary count, - * load 5 * LATCH count, (LSB and MSB) to begin countdown. - * - * Some devices need a delay here. - */ - outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */ - outb_p(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */ - outb_p(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */ -} - -static inline void mach_countup(unsigned long *count_p) -{ - unsigned long count = 0; - do { - count++; - } while ((inb_p(0x61) & 0x20) == 0); - *count_p = count; -} - -#endif /* !_MACH_TIMER_H */ +#include "../mach-default/mach_timer.h" Index: head-2008-12-01/include/asm-x86/mmu.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mmu.h 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mmu.h 2008-12-01 11:36:55.000000000 +0100 @@ -16,6 +16,9 @@ typedef struct { rwlock_t ldtlock; #endif int size; +#ifdef CONFIG_XEN + unsigned has_foreign_mappings:1; +#endif struct mutex lock; void *vdso; } mm_context_t; Index: head-2008-12-01/include/linux/sysctl.h =================================================================== --- head-2008-12-01.orig/include/linux/sysctl.h 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/include/linux/sysctl.h 2008-12-01 11:36:55.000000000 +0100 @@ -69,6 +69,7 @@ enum CTL_BUS=8, /* Busses */ CTL_ABI=9, /* Binary emulation */ CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ + CTL_XEN=123, /* Xen info and control */ CTL_ARLAN=254, /* arlan wireless driver */ CTL_S390DBF=5677, /* s390 debug */ CTL_SUNRPC=7249, /* sunrpc debug */ Index: head-2008-12-01/include/xen/pcifront.h =================================================================== --- head-2008-12-01.orig/include/xen/pcifront.h 2007-06-18 08:38:13.000000000 +0200 +++ head-2008-12-01/include/xen/pcifront.h 2008-12-01 11:36:55.000000000 +0100 @@ -12,13 +12,11 @@ #ifndef __ia64__ +#include + struct pcifront_device; struct pci_bus; - -struct pcifront_sd { - int domain; - struct pcifront_device *pdev; -}; +#define pcifront_sd pci_sysdata static inline struct pcifront_device * pcifront_get_pdev(struct pcifront_sd *sd) @@ -34,18 +32,6 @@ static inline void pcifront_init_sd(stru sd->pdev = pdev; } -#if defined(CONFIG_PCI_DOMAINS) -static inline int pci_domain_nr(struct pci_bus *bus) -{ - struct pcifront_sd *sd = bus->sysdata; - return sd->domain; -} -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return pci_domain_nr(bus); -} -#endif /* CONFIG_PCI_DOMAINS */ - static inline void pcifront_setup_root_resources(struct pci_bus *bus, struct pcifront_sd *sd) { Index: head-2008-12-01/include/xen/sysctl.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2008-12-01/include/xen/sysctl.h 2008-12-01 11:36:55.000000000 +0100 @@ -0,0 +1,11 @@ +#ifndef _XEN_SYSCTL_H +#define _XEN_SYSCTL_H + +/* CTL_XEN names: */ +enum +{ + CTL_XEN_INDEPENDENT_WALLCLOCK=1, + CTL_XEN_PERMITTED_CLOCK_JITTER=2, +}; + +#endif /* _XEN_SYSCTL_H */ Index: head-2008-12-01/include/xen/xenbus.h =================================================================== --- head-2008-12-01.orig/include/xen/xenbus.h 2008-12-01 11:36:47.000000000 +0100 +++ head-2008-12-01/include/xen/xenbus.h 2008-12-01 11:36:55.000000000 +0100 @@ -107,7 +107,7 @@ struct xenbus_driver { int (*suspend)(struct xenbus_device *dev); int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); - int (*uevent)(struct xenbus_device *, char **, int, char *, int); + int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); struct device_driver driver; int (*read_otherend_details)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev); Index: head-2008-12-01/kernel/kexec.c =================================================================== --- head-2008-12-01.orig/kernel/kexec.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/kernel/kexec.c 2008-12-01 11:36:55.000000000 +0100 @@ -1235,6 +1235,7 @@ static int __init crash_notes_memory_ini module_init(crash_notes_memory_init) +#ifndef CONFIG_XEN /* * parsing the "crashkernel" commandline * @@ -1397,7 +1398,7 @@ int __init parse_crashkernel(char *cm return 0; } - +#endif void crash_save_vmcoreinfo(void) @@ -1454,7 +1455,18 @@ static int __init crash_save_vmcoreinfo_ VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); +#ifndef CONFIG_X86_XEN + VMCOREINFO_SYMBOL(swapper_pg_dir); +#else +/* + * Since for x86-32 Xen swapper_pg_dir is a pointer rather than an array, + * make the value stored consistent with native (i.e. the base address of + * the page directory). + */ +# define swapper_pg_dir *swapper_pg_dir VMCOREINFO_SYMBOL(swapper_pg_dir); +# undef swapper_pg_dir +#endif VMCOREINFO_SYMBOL(_stext); #ifndef CONFIG_NEED_MULTIPLE_NODES Index: head-2008-12-01/kernel/sysctl_check.c =================================================================== --- head-2008-12-01.orig/kernel/sysctl_check.c 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/kernel/sysctl_check.c 2008-12-01 11:36:55.000000000 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include struct trans_ctl_table { int ctl_name; @@ -897,6 +898,14 @@ static const struct trans_ctl_table tran {} }; +#ifdef CONFIG_XEN +static struct trans_ctl_table trans_xen_table[] = { + { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" }, + { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" }, + {} +}; +#endif + static const struct trans_ctl_table trans_arlan_conf_table0[] = { { 1, "spreadingCode" }, { 2, "channelNumber" }, @@ -1232,6 +1241,9 @@ static const struct trans_ctl_table tran { CTL_BUS, "bus", trans_bus_table }, { CTL_ABI, "abi" }, /* CTL_CPU not used */ +#ifdef CONFIG_XEN + { CTL_XEN, "xen", trans_xen_table }, +#endif { CTL_ARLAN, "arlan", trans_arlan_table }, { CTL_S390DBF, "s390dbf", trans_s390dbf_table }, { CTL_SUNRPC, "sunrpc", trans_sunrpc_table }, Index: head-2008-12-01/lib/swiotlb-xen.c =================================================================== --- head-2008-12-01.orig/lib/swiotlb-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/lib/swiotlb-xen.c 2008-12-01 11:36:55.000000000 +0100 @@ -27,7 +27,7 @@ #include #include #include -#include +#include int swiotlb; EXPORT_SYMBOL(swiotlb); @@ -574,9 +574,10 @@ swiotlb_sync_single_for_device(struct de * same here. */ int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; struct phys_addr buffer; dma_addr_t dev_addr; char *map; @@ -584,22 +585,22 @@ swiotlb_map_sg(struct device *hwdev, str BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) { - dev_addr = gnttab_dma_map_page(sg->page) + sg->offset; + for_each_sg(sgl, sg, nelems, i) { + dev_addr = gnttab_dma_map_page(sg_page(sg)) + sg->offset; - if (range_straddles_page_boundary(page_to_pseudophys(sg->page) + if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg)) + sg->offset, sg->length) || address_needs_mapping(hwdev, dev_addr)) { gnttab_dma_unmap_page(dev_addr); - buffer.page = sg->page; + buffer.page = sg_page(sg); buffer.offset = sg->offset; map = map_single(hwdev, buffer, sg->length, dir); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; + swiotlb_unmap_sg(hwdev, sgl, i, dir); + sgl[0].dma_length = 0; return 0; } sg->dma_address = virt_to_bus(map); @@ -615,19 +616,21 @@ swiotlb_map_sg(struct device *hwdev, str * concerning calls here are the same as for swiotlb_unmap_single() above. */ void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) unmap_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); else gnttab_dma_unmap_page(sg->dma_address); + } } /* @@ -638,31 +641,35 @@ swiotlb_unmap_sg(struct device *hwdev, s * and usage. */ void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); + } } void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); + } } #ifdef CONFIG_HIGHMEM