From: www.kernel.org Subject: Update to 2.6.23 Patch-mainline: 2.6.23 Automatically created from "patches.kernel.org/patch-2.6.23" by xen-port-patches.py Acked-by: jbeulich@novell.com Index: head-2008-12-01/arch/x86/Makefile =================================================================== --- head-2008-12-01.orig/arch/x86/Makefile 2008-12-01 11:11:08.000000000 +0100 +++ head-2008-12-01/arch/x86/Makefile 2008-12-01 11:36:47.000000000 +0100 @@ -148,7 +148,7 @@ libs-y += arch/x86/lib/ core-y += $(fcore-y) # Xen paravirtualization support -core-$(CONFIG_XEN) += arch/x86/xen/ +core-$(CONFIG_PARAVIRT_XEN) += arch/x86/xen/ # lguest paravirtualization support core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ Index: head-2008-12-01/arch/x86/kernel/acpi/sleep_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/acpi/sleep_32-xen.c 2008-04-15 09:29:41.000000000 +0200 +++ head-2008-12-01/arch/x86/kernel/acpi/sleep_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -15,7 +15,7 @@ #ifndef CONFIG_ACPI_PV_SLEEP /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; -unsigned long acpi_video_flags; +unsigned long acpi_realmode_flags; extern char wakeup_start, wakeup_end; extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); @@ -74,9 +74,11 @@ static int __init acpi_sleep_setup(char { while ((str != NULL) && (*str != '\0')) { if (strncmp(str, "s3_bios", 7) == 0) - acpi_video_flags = 1; + acpi_realmode_flags |= 1; if (strncmp(str, "s3_mode", 7) == 0) - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); @@ -86,9 +88,11 @@ static int __init acpi_sleep_setup(char __setup("acpi_sleep=", acpi_sleep_setup); +/* Ouch, we want to delete this. We already have better version in userspace, in + s2ram from suspend.sf.net project */ static __init int reset_videomode_after_s3(struct dmi_system_id *d) { - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; return 0; } Index: head-2008-12-01/arch/x86/kernel/asm-offsets_32.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/asm-offsets_32.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/asm-offsets_32.c 2008-12-01 11:36:47.000000000 +0100 @@ -19,7 +19,9 @@ #include #include +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) #include +#endif #include #include "../../../drivers/lguest/lg.h" @@ -121,7 +123,7 @@ void foo(void) OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif -#ifdef CONFIG_XEN +#ifdef CONFIG_PARAVIRT_XEN BLANK(); OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); Index: head-2008-12-01/arch/x86/kernel/cpu/common-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -360,6 +360,8 @@ static void __cpuinit generic_identify(s if ( xlvl >= 0x80000004 ) get_model_name(c); /* Default name */ } + + init_scattered_cpuid_features(c); } early_intel_workaround(c); @@ -611,7 +613,6 @@ extern int nsc_init_cpu(void); extern int amd_init_cpu(void); extern int centaur_init_cpu(void); extern int transmeta_init_cpu(void); -extern int rise_init_cpu(void); extern int nexgen_init_cpu(void); extern int umc_init_cpu(void); @@ -623,7 +624,6 @@ void __init early_cpu_init(void) amd_init_cpu(); centaur_init_cpu(); transmeta_init_cpu(); - rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); early_cpu_detect(); Index: head-2008-12-01/arch/x86/kernel/cpu/mtrr/main-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -167,7 +167,7 @@ mtrr_del(int reg, unsigned long base, un EXPORT_SYMBOL(mtrr_add); EXPORT_SYMBOL(mtrr_del); -__init void mtrr_bp_init(void) +void __init mtrr_bp_init(void) { } Index: head-2008-12-01/arch/x86/kernel/e820_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/e820_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/e820_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -343,6 +344,37 @@ static int __init request_standard_resou subsys_initcall(request_standard_resources); +#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) +/** + * e820_mark_nosave_regions - Find the ranges of physical addresses that do not + * correspond to e820 RAM areas and mark the corresponding pages as nosave for + * hibernation. + * + * This function requires the e820 map to be sorted and without any + * overlapping entries and assumes the first e820 area to be RAM. + */ +void __init e820_mark_nosave_regions(void) +{ + int i; + unsigned long pfn; + + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); + for (i = 1; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (pfn < PFN_UP(ei->addr)) + register_nosave_region(pfn, PFN_UP(ei->addr)); + + pfn = PFN_DOWN(ei->addr + ei->size); + if (ei->type != E820_RAM) + register_nosave_region(PFN_UP(ei->addr), pfn); + + if (pfn >= max_low_pfn) + break; + } +} +#endif + void __init add_memory_region(unsigned long long start, unsigned long long size, int type) { @@ -804,7 +836,7 @@ void __init print_memory_map(char *who) case E820_NVS: printk("(ACPI NVS)\n"); break; - default: printk("type %lu\n", e820.map[i].type); + default: printk("type %u\n", e820.map[i].type); break; } } Index: head-2008-12-01/arch/x86/kernel/entry_32.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/entry_32.S 2008-12-01 11:21:02.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/entry_32.S 2008-12-01 11:36:47.000000000 +0100 @@ -1112,7 +1112,7 @@ ENTRY(kernel_thread_helper) CFI_ENDPROC ENDPROC(kernel_thread_helper) -#ifdef CONFIG_XEN +#ifdef CONFIG_PARAVIRT_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ ENTRY(xen_sysenter_target) @@ -1205,7 +1205,7 @@ ENTRY(xen_failsafe_callback) .previous ENDPROC(xen_failsafe_callback) -#endif /* CONFIG_XEN */ +#endif /* CONFIG_PARAVIRT_XEN */ #ifdef CONFIG_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE Index: head-2008-12-01/arch/x86/kernel/entry_32-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:36:47.000000000 +0100 @@ -452,9 +452,6 @@ restore_nocheck_notrace: 1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: -#ifndef CONFIG_XEN - ENABLE_INTERRUPTS(CLBR_NONE) -#endif pushl $0 # no error code pushl $do_iret_error jmp error_code Index: head-2008-12-01/arch/x86/kernel/head_32-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/head_32-xen.S 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/head_32-xen.S 2008-12-01 11:36:47.000000000 +0100 @@ -86,7 +86,10 @@ ENTRY(_stext) /* * BSS section */ -.section ".bss.page_aligned","w" +.section ".bss.page_aligned","wa" + .align PAGE_SIZE_asm +ENTRY(swapper_pg_pmd) + .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 @@ -136,25 +139,25 @@ ENTRY(empty_zero_page) #endif /* CONFIG_XEN_COMPAT <= 0x030002 */ - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET) + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) #if CONFIG_XEN_COMPAT <= 0x030002 - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long __PAGE_OFFSET) #else - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long 0) #endif - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32) - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) - ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_32) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long HYPERVISOR_VIRT_START) + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") #ifdef CONFIG_X86_PAE - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT) #else - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no") - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, _PAGE_PRESENT,_PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long _PAGE_PRESENT, _PAGE_PRESENT) #endif - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1) + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) Index: head-2008-12-01/arch/x86/kernel/init_task-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/init_task-xen.c 2007-06-12 13:12:48.000000000 +0200 +++ head-2008-12-01/arch/x86/kernel/init_task-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -46,6 +46,6 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; #endif Index: head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -402,14 +402,6 @@ static void set_ioapic_affinity_irq(unsi # include /* kmalloc() */ # include /* time_after() */ -#ifdef CONFIG_BALANCED_IRQ_DEBUG -# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) -# define Dprintk(x...) do { TDprintk(x); } while (0) -# else -# define TDprintk(x...) -# define Dprintk(x...) -# endif - #define IRQBALANCE_CHECK_ARCH -999 #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -492,7 +484,7 @@ static inline void balance_irq(int cpu, static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; - Dprintk("Rotating IRQs among CPUs.\n"); + for_each_online_cpu(i) { for (j = 0; j < NR_IRQS; j++) { if (!irq_desc[j].action) @@ -609,19 +601,11 @@ tryanothercpu: max_loaded = tmp_loaded; /* processor */ imbalance = (max_cpu_irq - min_cpu_irq) / 2; - Dprintk("max_loaded cpu = %d\n", max_loaded); - Dprintk("min_loaded cpu = %d\n", min_loaded); - Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); - Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); - Dprintk("load imbalance = %lu\n", imbalance); - /* if imbalance is less than approx 10% of max load, then * observe diminishing returns action. - quit */ - if (imbalance < (max_cpu_irq >> 3)) { - Dprintk("Imbalance too trivial\n"); + if (imbalance < (max_cpu_irq >> 3)) goto not_worth_the_effort; - } tryanotherirq: /* if we select an IRQ to move that can't go where we want, then @@ -678,9 +662,6 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - - Dprintk("irq = %d moved to cpu = %d\n", - selected_irq, min_loaded); /* mark for change destination */ set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); @@ -700,7 +681,6 @@ not_worth_the_effort: */ balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - Dprintk("IRQ worth rotating not found\n"); return; } @@ -716,6 +696,7 @@ static int balanced_irq(void *unused) set_pending_irq(i, cpumask_of_cpu(0)); } + set_freezable(); for ( ; ; ) { time_remaining = schedule_timeout_interruptible(time_remaining); try_to_freeze(); @@ -825,14 +806,6 @@ static int pirq_entries [MAX_PIRQS]; static int pirqs_enabled; int skip_ioapic_setup; -static int __init ioapic_setup(char *str) -{ - skip_ioapic_setup = 1; - return 1; -} - -__setup("noapic", ioapic_setup); - static int __init ioapic_pirq_setup(char *str) { int i, max; @@ -1323,12 +1296,15 @@ static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) + trigger == IOAPIC_LEVEL) { + irq_desc[irq].status |= IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); - else + } else { + irq_desc[irq].status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); + } set_intr_gate(vector, interrupt[irq]); } #else @@ -1957,7 +1933,7 @@ __setup("no_timer_check", notimercheck); * - if this function detects that timer IRQs are defunct, then we fall * back to ISA timer IRQs */ -int __init timer_irq_works(void) +static int __init timer_irq_works(void) { unsigned long t1 = jiffies; Index: head-2008-12-01/arch/x86/kernel/irq_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/irq_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/irq_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -21,7 +21,7 @@ #include #include -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); @@ -149,15 +149,11 @@ fastcall unsigned int do_IRQ(struct pt_r #ifdef CONFIG_4KSTACKS -/* - * These should really be __section__(".bss.page_aligned") as well, but - * gcc's 3.0 and earlier don't handle that correctly. - */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing Index: head-2008-12-01/arch/x86/kernel/microcode-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/microcode-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/microcode-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include Index: head-2008-12-01/arch/x86/kernel/pci-dma-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -24,7 +24,7 @@ #include #ifdef __x86_64__ -#include +#include int iommu_merge __read_mostly = 0; EXPORT_SYMBOL(iommu_merge); Index: head-2008-12-01/arch/x86/kernel/process_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/process_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/process_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -241,6 +241,7 @@ early_param("idle", idle_setup); void show_regs(struct pt_regs * regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; + unsigned long d0, d1, d2, d3, d6, d7; printk("\n"); printk("Pid: %d, comm: %20s\n", current->pid, current->comm); @@ -265,6 +266,17 @@ void show_regs(struct pt_regs * regs) cr3 = read_cr3(); cr4 = read_cr4_safe(); printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + + get_debugreg(d0, 0); + get_debugreg(d1, 1); + get_debugreg(d2, 2); + get_debugreg(d3, 3); + printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", + d0, d1, d2, d3); + get_debugreg(d6, 6); + get_debugreg(d7, 7); + printk("DR6: %08lx DR7: %08lx\n", d6, d7); + show_trace(NULL, regs, ®s->esp); } @@ -473,7 +485,30 @@ int dump_task_regs(struct task_struct *t return 1; } -static noinline void __switch_to_xtra(struct task_struct *next_p) +#ifdef CONFIG_SECCOMP +void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} +void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} +#endif /* CONFIG_SECCOMP */ + +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *next; @@ -488,33 +523,17 @@ static noinline void __switch_to_xtra(st set_debugreg(next->debugreg[6], 6); set_debugreg(next->debugreg[7], 7); } -} -/* - * This function selects if the context switch from prev to next - * has to tweak the TSC disable bit in the cr4. - */ -static inline void disable_tsc(struct task_struct *prev_p, - struct task_struct *next_p) -{ - struct thread_info *prev, *next; - - /* - * gcc should eliminate the ->thread_info dereference if - * has_secure_computing returns 0 at compile time (SECCOMP=n). - */ - prev = task_thread_info(prev_p); - next = task_thread_info(next_p); - - if (has_secure_computing(prev) || has_secure_computing(next)) { - /* slow path here */ - if (has_secure_computing(prev) && - !has_secure_computing(next)) { - write_cr4(read_cr4() & ~X86_CR4_TSD); - } else if (!has_secure_computing(prev) && - has_secure_computing(next)) - write_cr4(read_cr4() | X86_CR4_TSD); +#ifdef CONFIG_SECCOMP + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); } +#endif } /* @@ -649,10 +668,9 @@ struct task_struct fastcall * __switch_t /* * Now maybe handle debug registers */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) - __switch_to_xtra(next_p); - - disable_tsc(prev_p, next_p); + if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || + task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) + __switch_to_xtra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. Index: head-2008-12-01/arch/x86/kernel/setup_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -114,19 +114,10 @@ static unsigned int highmem_pages = -1; /* * Setup options */ -struct drive_info_struct { char dummy[32]; } drive_info; -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ - defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -EXPORT_SYMBOL(drive_info); -#endif struct screen_info screen_info; EXPORT_SYMBOL(screen_info); struct apm_info apm_info; EXPORT_SYMBOL(apm_info); -struct sys_desc_table_struct { - unsigned short length; - unsigned char table[0]; -}; struct edid_info edid_info; EXPORT_SYMBOL_GPL(edid_info); #ifndef CONFIG_XEN @@ -149,7 +140,7 @@ unsigned long saved_videomode; static char __initdata command_line[COMMAND_LINE_SIZE]; -unsigned char __initdata boot_params[PARAM_SIZE]; +struct boot_params __initdata boot_params; /* * Point at the empty zero page to start with. We map the real shared_info @@ -316,18 +307,18 @@ unsigned long __init find_max_low_pfn(vo printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); else printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); max_pfn = MAXMEM_PFN; #else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_X86_PAE +#ifndef CONFIG_HIGHMEM64G if (max_pfn > MAX_NONPAE_PFN) { max_pfn = MAX_NONPAE_PFN; printk(KERN_WARNING "Warning only 4GB will be used.\n"); - printk(KERN_WARNING "Use a PAE enabled kernel.\n"); + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); } -#endif /* !CONFIG_X86_PAE */ +#endif /* !CONFIG_HIGHMEM64G */ #endif /* !CONFIG_HIGHMEM */ } else { if (highmem_pages == -1) @@ -514,7 +505,7 @@ void __init setup_bootmem_allocator(void * * This should all compile down to nothing when NUMA is off. */ -void __init remapped_pgdat_init(void) +static void __init remapped_pgdat_init(void) { int nid; @@ -589,7 +580,6 @@ void __init setup_arch(char **cmdline_p) properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */ ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); - drive_info = DRIVE_INFO; screen_info = SCREEN_INFO; copy_edid(); apm_info.bios = APM_BIOS_INFO; @@ -767,6 +757,8 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ + paravirt_post_allocator_init(); + if (is_initial_xendomain()) dmi_scan_machine(); @@ -814,6 +806,7 @@ void __init setup_arch(char **cmdline_p) #endif e820_register_memory(); + e820_mark_nosave_regions(); if (is_initial_xendomain()) { #ifdef CONFIG_VT Index: head-2008-12-01/arch/x86/kernel/smp_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -22,6 +22,7 @@ #include #include +#include #if 0 #include #endif @@ -217,13 +218,13 @@ static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); /* - * We cannot call mmdrop() because we are in interrupt context, + * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. * * We need to reload %cr3 since the page tables may be going * away from under us.. */ -static inline void leave_mm (unsigned long cpu) +void leave_mm(unsigned long cpu) { if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) BUG(); Index: head-2008-12-01/arch/x86/kernel/time_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/time_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/time_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -76,11 +76,12 @@ #include #include -#ifdef CONFIG_X86_32 #include DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -#else + +#ifdef CONFIG_X86_64 +#include volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; #endif @@ -758,56 +759,10 @@ unsigned long read_persistent_clock(void return retval; } -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); -int no_sync_cmos_clock; - -static void sync_cmos_clock(unsigned long dummy) -{ - struct timeval now, next; - int fail = 1; - - /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; - } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) +int update_persistent_clock(struct timespec now) { - if (!no_sync_cmos_clock) - mod_timer(&sync_cmos_timer, jiffies + 1); mod_timer(&sync_xen_wallclock_timer, jiffies + 1); + return set_rtc_mmss(now.tv_sec); } extern void (*late_time_init)(void); Index: head-2008-12-01/arch/x86/kernel/traps_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -41,6 +41,10 @@ #include #endif +#if defined(CONFIG_EDAC) +#include +#endif + #include #include #include @@ -102,36 +106,45 @@ asmlinkage void machine_check(void); int kstack_depth_to_print = 24; static unsigned int code_bytes = 64; -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) { return p > (void *)tinfo && - p < (void *)tinfo + THREAD_SIZE - 3; + p <= (void *)tinfo + THREAD_SIZE - size; } +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; + static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, struct stacktrace_ops *ops, void *data) { - unsigned long addr; - #ifdef CONFIG_FRAME_POINTER - while (valid_stack_ptr(tinfo, (void *)ebp)) { - unsigned long new_ebp; - addr = *(unsigned long *)(ebp + 4); + struct stack_frame *frame = (struct stack_frame *)ebp; + while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) { + struct stack_frame *next; + unsigned long addr; + + addr = frame->return_address; ops->address(data, addr); /* * break out of recursive entries (such as * end_of_stack_stop_unwind_function). Also, * we can never allow a frame pointer to * move downwards! - */ - new_ebp = *(unsigned long *)ebp; - if (new_ebp <= ebp) + */ + next = frame->next_frame; + if (next <= frame) break; - ebp = new_ebp; + frame = next; } #else - while (valid_stack_ptr(tinfo, stack)) { + while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) { + unsigned long addr; + addr = *stack++; if (__kernel_text_address(addr)) ops->address(data, addr); @@ -154,7 +167,7 @@ void dump_trace(struct task_struct *task if (!stack) { unsigned long dummy; stack = &dummy; - if (task && task != current) + if (task != current) stack = (unsigned long *)task->thread.esp; } @@ -213,6 +226,7 @@ static void print_trace_address(void *da { printk("%s [<%08lx>] ", (char *)data, addr); print_symbol("%s\n", addr); + touch_nmi_watchdog(); } static struct stacktrace_ops print_trace_ops = { @@ -396,7 +410,7 @@ void die(const char * str, struct pt_reg unsigned long esp; unsigned short ss; - report_bug(regs->eip); + report_bug(regs->eip, regs); printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT @@ -439,6 +453,7 @@ void die(const char * str, struct pt_reg bust_spinlocks(0); die.lock_owner = -1; + add_taint(TAINT_DIE); spin_unlock_irqrestore(&die.lock, flags); if (!regs) @@ -523,10 +538,12 @@ fastcall void do_##name(struct pt_regs * do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ fastcall void do_##name(struct pt_regs * regs, long error_code) \ { \ siginfo_t info; \ + if (irq) \ + local_irq_enable(); \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ @@ -566,13 +583,13 @@ DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) #endif DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip) +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0) DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1) fastcall void __kprobes do_general_protection(struct pt_regs * regs, long error_code) @@ -585,6 +602,13 @@ fastcall void __kprobes do_general_prote current->thread.error_code = error_code; current->thread.trap_no = 13; + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && + printk_ratelimit()) + printk(KERN_INFO + "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", + current->comm, current->pid, + regs->eip, regs->esp, error_code); + force_sig(SIGSEGV, current); return; @@ -610,6 +634,14 @@ mem_parity_error(unsigned char reason, s printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); @@ -720,6 +752,8 @@ static __kprobes void default_do_nmi(str reassert_nmi(); } +static int ignore_nmis; + fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) { int cpu; @@ -730,11 +764,24 @@ fastcall __kprobes void do_nmi(struct pt ++nmi_count(cpu); - default_do_nmi(regs); + if (!ignore_nmis) + default_do_nmi(regs); nmi_exit(); } +void stop_nmi(void) +{ + acpi_nmi_disable(); + ignore_nmis++; +} + +void restart_nmi(void) +{ + ignore_nmis--; + acpi_nmi_enable(); +} + #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { @@ -1023,6 +1070,7 @@ asmlinkage void math_state_restore(void) thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } +EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION Index: head-2008-12-01/arch/x86/mach-xen/setup.c =================================================================== --- head-2008-12-01.orig/arch/x86/mach-xen/setup.c 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/arch/x86/mach-xen/setup.c 2008-12-01 11:36:47.000000000 +0100 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -101,7 +102,7 @@ void __init pre_setup_arch_hook(void) init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base; - setup_xen_features(); + xen_setup_features(); if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) { hypervisor_virt_start = pp.virt_start; @@ -157,4 +158,18 @@ void __init machine_specific_arch_setup( HYPERVISOR_nmi_op(XENNMI_register_callback, &cb); } #endif + + /* Do an early initialization of the fixmap area */ + { + extern pte_t swapper_pg_pmd[PTRS_PER_PTE]; + unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); + pgd_t *pgd = (pgd_t *)xen_start_info->pt_base; + pud_t *pud = pud_offset(pgd + pgd_index(addr), addr); + pmd_t *pmd = pmd_offset(pud, addr); + + swapper_pg_dir = pgd; + init_mm.pgd = pgd; + make_lowmem_page_readonly(swapper_pg_pmd, XENFEAT_writable_page_tables); + set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_pmd) | _PAGE_TABLE)); + } } Index: head-2008-12-01/arch/x86/mm/fault_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/fault_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/fault_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -346,7 +346,10 @@ static inline pmd_t *vmalloc_sync_one(pg pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) + if (!pmd_present(*pmd)) { + bool lazy = x86_read_percpu(xen_lazy_mmu); + + x86_write_percpu(xen_lazy_mmu, false); #if CONFIG_XEN_COMPAT > 0x030002 set_pmd(pmd, *pmd_k); #else @@ -356,7 +359,8 @@ static inline pmd_t *vmalloc_sync_one(pg */ set_pmd(pmd, __pmd(pmd_val(*pmd_k))); #endif - else + x86_write_percpu(xen_lazy_mmu, lazy); + } else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); return pmd_k; } @@ -388,6 +392,8 @@ static inline int vmalloc_fault(unsigned return 0; } +int show_unhandled_signals = 1; + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -408,6 +414,7 @@ fastcall void __kprobes do_page_fault(st struct vm_area_struct * vma; unsigned long address; int write, si_code; + int fault; /* get the address */ address = read_cr2(); @@ -541,20 +548,18 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) goto out_of_memory; - default: - BUG(); + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; /* * Did it hit the DOS screen memory VA from vm86 mode? @@ -589,6 +594,14 @@ bad_area_nosemaphore: if (is_prefetch(regs, address, error_code)) return; + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk("%s%s[%d]: segfault at %08lx eip %08lx " + "esp %08lx error %lx\n", + tsk->pid > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, tsk->pid, address, regs->eip, + regs->esp, error_code); + } tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); Index: head-2008-12-01/arch/x86/mm/highmem_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -34,17 +34,16 @@ void *kmap_atomic_prot(struct page *page /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); - idx = type + KM_TYPE_NR*smp_processor_id(); - BUG_ON(!pte_none(*(kmap_pte-idx))); - if (!PageHighMem(page)) return page_address(page); + idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); /*arch_flush_lazy_mmu_mode();*/ - return (void*) vaddr; + return (void *)vaddr; } void *kmap_atomic(struct page *page, enum km_type type) Index: head-2008-12-01/arch/x86/mm/init_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/init_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/init_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -96,7 +96,7 @@ static pte_t * __init one_page_table_ini #endif pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); + paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); make_lowmem_page_readonly(page_table, XENFEAT_writable_page_tables); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); @@ -446,7 +446,7 @@ static void __init pagetable_init (void) xen_pagetable_setup_done(pgd_base); } -#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) +#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI) /* * Swap suspend & friends need this for resume because things like the intel-agp * driver might have split up a kernel 4MB mapping. @@ -485,9 +485,13 @@ void zap_low_mappings (void) flush_tlb_all(); } +int nx_enabled = 0; + +#ifdef CONFIG_X86_PAE + static int disable_nx __initdata = 0; u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; -EXPORT_SYMBOL(__supported_pte_mask); +EXPORT_SYMBOL_GPL(__supported_pte_mask); /* * noexec = on|off @@ -514,9 +518,6 @@ static int __init noexec_setup(char *str } early_param("noexec", noexec_setup); -int nx_enabled = 0; -#ifdef CONFIG_X86_PAE - static void __init set_nx(void) { unsigned int v[4], l, h; @@ -764,7 +765,7 @@ void __init mem_init(void) zap_low_mappings(); #endif - set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags); + SetPagePinned(virt_to_page(init_mm.pgd)); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -796,8 +797,7 @@ void __init pgtable_cache_init(void) PTRS_PER_PMD*sizeof(pmd_t), PTRS_PER_PMD*sizeof(pmd_t), SLAB_PANIC, - pmd_ctor, - NULL); + pmd_ctor); if (!SHARED_KERNEL_PMD) { /* If we're in PAE mode and have a non-shared kernel pmd, then the pgd size must be a Index: head-2008-12-01/arch/x86/mm/ioremap_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/ioremap_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/ioremap_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -363,9 +363,8 @@ void iounmap(volatile void __iomem *addr /* Reset the direct mapping. Can block */ if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { - /* p->size includes the guard page, but cpa doesn't like that */ change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), - (p->size - PAGE_SIZE) >> PAGE_SHIFT, + get_vm_area_size(p) >> PAGE_SHIFT, PAGE_KERNEL); global_flush_tlb(); } Index: head-2008-12-01/arch/x86/mm/pgtable_32-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -198,7 +198,7 @@ void pte_free(struct page *pte) va, pfn_pte(pfn, PAGE_KERNEL), 0)) BUG(); } else - clear_bit(PG_pinned, &pte->flags); + ClearPagePinned(pte); ClearPageForeign(pte); init_page_count(pte); @@ -248,7 +248,7 @@ static inline void pgd_list_del(pgd_t *p #if (PTRS_PER_PMD == 1) /* Non-PAE pgd constructor */ -void pgd_ctor(void *pgd) +static void pgd_ctor(void *pgd) { unsigned long flags; @@ -271,7 +271,7 @@ void pgd_ctor(void *pgd) } #else /* PTRS_PER_PMD > 1 */ /* PAE pgd constructor */ -void pgd_ctor(void *pgd) +static void pgd_ctor(void *pgd) { /* PAE, kernel PMD may be shared */ @@ -285,7 +285,7 @@ void pgd_ctor(void *pgd) } #endif /* PTRS_PER_PMD */ -void pgd_dtor(void *pgd) +static void pgd_dtor(void *pgd) { unsigned long flags; /* can be called from interrupt context */ @@ -637,9 +637,9 @@ static inline unsigned int pgd_walk_set_ if (PageHighMem(page)) { if (pgprot_val(flags) & _PAGE_RW) - clear_bit(PG_pinned, &page->flags); + ClearPagePinned(page); else - set_bit(PG_pinned, &page->flags); + SetPagePinned(page); } else { MULTI_update_va_mapping(per_cpu(pb_mcl, cpu) + seq, (unsigned long)__va(pfn << PAGE_SHIFT), @@ -709,19 +709,19 @@ static void __pgd_pin(pgd_t *pgd) pgd_walk(pgd, PAGE_KERNEL_RO); kmap_flush_unused(); xen_pgd_pin(__pa(pgd)); - set_bit(PG_pinned, &virt_to_page(pgd)->flags); + SetPagePinned(virt_to_page(pgd)); } static void __pgd_unpin(pgd_t *pgd) { xen_pgd_unpin(__pa(pgd)); pgd_walk(pgd, PAGE_KERNEL); - clear_bit(PG_pinned, &virt_to_page(pgd)->flags); + ClearPagePinned(virt_to_page(pgd)); } static void pgd_test_and_unpin(pgd_t *pgd) { - if (test_bit(PG_pinned, &virt_to_page(pgd)->flags)) + if (PagePinned(virt_to_page(pgd))) __pgd_unpin(pgd); } @@ -759,7 +759,7 @@ void mm_pin_all(void) */ spin_lock_irqsave(&pgd_lock, flags); for (page = pgd_list; page; page = (struct page *)page->index) { - if (!test_bit(PG_pinned, &page->flags)) + if (!PagePinned(page)) __pgd_pin((pgd_t *)page_address(page)); } spin_unlock_irqrestore(&pgd_lock, flags); @@ -767,7 +767,7 @@ void mm_pin_all(void) void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags)) + if (!PagePinned(virt_to_page(mm->pgd))) mm_pin(mm); } @@ -793,7 +793,7 @@ void arch_exit_mmap(struct mm_struct *mm task_unlock(tsk); - if (test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags) && + if (PagePinned(virt_to_page(mm->pgd)) && (atomic_read(&mm->mm_count) == 1) && !mm->context.has_foreign_mappings) mm_unpin(mm); Index: head-2008-12-01/arch/x86/pci/irq-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/pci/irq-xen.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/arch/x86/pci/irq-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -142,8 +142,9 @@ static void __init pirq_peer_trick(void) for(i = 1; i < 256; i++) { if (!busmap[i] || pci_find_bus(0, i)) continue; - if (pci_scan_bus(i, &pci_root_ops, NULL)) - printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + if (pci_scan_bus_with_sysdata(i)) + printk(KERN_INFO "PCI: Discovered primary peer " + "bus %02x [IRQ]\n", i); } pcibios_last_bus = -1; } @@ -553,6 +554,7 @@ static __init int intel_router_probe(str case PCI_DEVICE_ID_INTEL_ICH9_3: case PCI_DEVICE_ID_INTEL_ICH9_4: case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; Index: head-2008-12-01/arch/x86/xen/Kconfig =================================================================== --- head-2008-12-01.orig/arch/x86/xen/Kconfig 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/arch/x86/xen/Kconfig 2008-12-01 11:36:47.000000000 +0100 @@ -2,7 +2,7 @@ # This Kconfig describes xen options # -config XEN +config PARAVIRT_XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK Index: head-2008-12-01/arch/x86/xen/xen-head.S =================================================================== --- head-2008-12-01.orig/arch/x86/xen/xen-head.S 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/arch/x86/xen/xen-head.S 2008-12-01 11:36:47.000000000 +0100 @@ -1,7 +1,7 @@ /* Xen-specific pieces of head.S, intended to be included in the right place in head.S */ -#ifdef CONFIG_XEN +#ifdef CONFIG_PARAVIRT_XEN #include #include @@ -52,4 +52,4 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) -#endif /*CONFIG_XEN */ +#endif /* CONFIG_PARAVIRT_XEN */ Index: head-2008-12-01/arch/x86/ia32/ia32entry-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/ia32/ia32entry-xen.S 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/ia32/ia32entry-xen.S 2008-12-01 11:36:47.000000000 +0100 @@ -105,7 +105,7 @@ ENTRY(ia32_sysenter_target) movl $VSYSCALL32_SYSEXIT,8(%rsp) movq %rax,(%rsp) cld - SAVE_ARGS 0,0,0 + SAVE_ARGS 0,0,1 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ 1: movl (%rbp),%r9d @@ -244,7 +244,7 @@ ia32_badarg: */ ENTRY(ia32_syscall) - CFI_STARTPROC simple + CFI_STARTPROC32 simple CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,SS+8-RIP+16 /*CFI_REL_OFFSET ss,SS-RIP+16*/ @@ -280,6 +280,7 @@ ia32_sysret: ia32_tracesys: SAVE_REST + CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter @@ -476,7 +477,7 @@ ia32_sys_call_table: .quad sys_init_module .quad sys_delete_module .quad quiet_ni_syscall /* 130 get_kernel_syms */ - .quad sys_quotactl + .quad sys32_quotactl .quad sys_getpgid .quad sys_fchdir .quad quiet_ni_syscall /* bdflush */ @@ -669,4 +670,5 @@ ia32_sys_call_table: .quad compat_sys_signalfd .quad compat_sys_timerfd .quad sys_eventfd + .quad sys32_fallocate ia32_syscall_end: Index: head-2008-12-01/arch/x86/kernel/acpi/sleep_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/acpi/sleep_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -51,12 +51,10 @@ Low-Level Sleep Support -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_SLEEP - #ifndef CONFIG_ACPI_PV_SLEEP /* address in low memory of the wakeup routine. */ unsigned long acpi_wakeup_address = 0; -unsigned long acpi_video_flags; +unsigned long acpi_realmode_flags; extern char wakeup_start, wakeup_end; extern unsigned long acpi_copy_wakeup_routine(unsigned long); @@ -109,9 +107,11 @@ static int __init acpi_sleep_setup(char { while ((str != NULL) && (*str != '\0')) { if (strncmp(str, "s3_bios", 7) == 0) - acpi_video_flags = 1; + acpi_realmode_flags |= 1; if (strncmp(str, "s3_mode", 7) == 0) - acpi_video_flags |= 2; + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); @@ -123,8 +123,6 @@ static int __init acpi_sleep_setup(char __setup("acpi_sleep=", acpi_sleep_setup); #endif /* CONFIG_ACPI_PV_SLEEP */ -#endif /*CONFIG_ACPI_SLEEP */ - void acpi_pci_link_exit(void) { } Index: head-2008-12-01/arch/x86/kernel/apic_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/apic_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/apic_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -50,7 +50,7 @@ void ack_bad_irq(unsigned int irq) * holds up an irq slot - in excessive cases (when multiple * unexpected vectors occur) that might lock up the APIC * completely. - * But don't ack when the APIC is disabled. -AK + * But don't ack when the APIC is disabled. -AK */ if (!disable_apic) ack_APIC_irq(); @@ -132,20 +132,6 @@ asmlinkage void smp_spurious_interrupt(v if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); -#if 0 - static unsigned long last_warning; - static unsigned long skipped; - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - if (time_before(last_warning+30*HZ,jiffies)) { - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n", - smp_processor_id(), skipped); - last_warning = jiffies; - skipped = 0; - } else { - skipped++; - } -#endif irq_exit(); } @@ -177,7 +163,7 @@ asmlinkage void smp_error_interrupt(void 7: Illegal register address */ printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); + smp_processor_id(), v , v1); irq_exit(); } Index: head-2008-12-01/arch/x86/kernel/e820_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/e820_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/e820_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -222,37 +222,6 @@ unsigned long __init e820_end_of_ram(voi } /* - * Find the hole size in the range. - */ -unsigned long __init e820_hole_size(unsigned long start, unsigned long end) -{ - unsigned long ram = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - unsigned long last, addr; - - if (ei->type != E820_RAM || - ei->addr+ei->size <= start || - ei->addr >= end) - continue; - - addr = round_up(ei->addr, PAGE_SIZE); - if (addr < start) - addr = start; - - last = round_down(ei->addr + ei->size, PAGE_SIZE); - if (last >= end) - last = end; - - if (last > addr) - ram += last - addr; - } - return ((end - start) - ram); -} - -/* * Mark e820 reserved areas as busy for the resource manager. */ void __init e820_reserve_resources(struct e820entry *e820, int nr_map) @@ -325,47 +294,61 @@ void __init e820_mark_nosave_regions(voi } #endif +/* + * Finds an active region in the address range from start_pfn to end_pfn and + * returns its range in ei_startpfn and ei_endpfn for the e820 entry. + */ +static int __init e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Check if end_pfn_map should be updated */ + if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map) + end_pfn_map = *ei_endpfn; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || + *ei_startpfn >= end_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > end_pfn) + *ei_endpfn = end_pfn; + + /* Obey end_user_pfn to save on memmap */ + if (*ei_startpfn >= end_user_pfn) + return 0; + if (*ei_endpfn > end_user_pfn) + *ei_endpfn = end_user_pfn; + + return 1; +} + /* Walk the e820 map and register active regions within a node */ void __init e820_register_active_regions(int nid, unsigned long start_pfn, unsigned long end_pfn) { + unsigned long ei_startpfn; + unsigned long ei_endpfn; int i; - unsigned long ei_startpfn, ei_endpfn; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; - ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) - >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (ei_startpfn >= ei_endpfn) - continue; - - /* Check if end_pfn_map should be updated */ - if (ei->type != E820_RAM && ei_endpfn > end_pfn_map) - end_pfn_map = ei_endpfn; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || - ei_endpfn <= start_pfn || - ei_startpfn >= end_pfn) - continue; - - /* Check for overlaps */ - if (ei_startpfn < start_pfn) - ei_startpfn = start_pfn; - if (ei_endpfn > end_pfn) - ei_endpfn = end_pfn; - - /* Obey end_user_pfn to save on memmap */ - if (ei_startpfn >= end_user_pfn) - continue; - if (ei_endpfn > end_user_pfn) - ei_endpfn = end_user_pfn; - add_active_range(nid, ei_startpfn, ei_endpfn); - } + for (i = 0; i < e820.nr_map; i++) + if (e820_find_active_region(&e820.map[i], + start_pfn, end_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); } /* @@ -386,12 +369,35 @@ void __init add_memory_region(unsigned l e820.nr_map++; } +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +unsigned long __init e820_hole_size(unsigned long start, unsigned long end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn; + unsigned long ei_endpfn; + unsigned long ram = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + if (e820_find_active_region(&e820.map[i], + start_pfn, end_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return end - start - (ram << PAGE_SHIFT); +} + void __init e820_print_map(char *who) { int i; for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, + printk(KERN_INFO " %s: %016Lx - %016Lx ", who, (unsigned long long) e820.map[i].addr, (unsigned long long) (e820.map[i].addr + e820.map[i].size)); switch (e820.map[i].type) { Index: head-2008-12-01/arch/x86/kernel/early_printk-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/early_printk-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/early_printk-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -174,6 +174,7 @@ static __init void early_serial_init(cha * mappings. Someone should fix this for domain 0. For now, use fake serial. */ #define early_vga_console early_serial_console +#define xenboot_console early_serial_console #endif @@ -261,20 +262,22 @@ static int __init setup_early_printk(cha } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); early_console = &early_serial_console; - } else if (!strncmp(buf, "vga", 3) + } else if (!strncmp(buf, "vga", 3)) { #ifndef CONFIG_XEN && SCREEN_INFO.orig_video_isVGA == 1) { max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; current_ypos = SCREEN_INFO.orig_y; -#else - || !strncmp(buf, "xen", 3)) { #endif early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; +#ifdef CONFIG_XEN + } else if (!strncmp(buf, "xen", 3)) { + early_console = &xenboot_console; +#endif } if (keep_early) Index: head-2008-12-01/arch/x86/kernel/entry_64-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:36:47.000000000 +0100 @@ -310,7 +310,7 @@ sysret_signal: TRACE_IRQS_ON /* sti */ XEN_UNBLOCK_EVENTS(%rsi) - testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx jz 1f /* Really a signal */ @@ -409,7 +409,7 @@ int_very_careful: jmp int_restore_rest int_signal: - testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx jz 1f movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 @@ -552,7 +552,7 @@ retint_careful: jmp retint_check retint_signal: - testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx jz retint_restore_args TRACE_IRQS_ON XEN_UNBLOCK_EVENTS(%rsi) Index: head-2008-12-01/arch/x86/kernel/head_64-xen.S =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/head_64-xen.S 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/head_64-xen.S 2008-12-01 11:36:47.000000000 +0100 @@ -23,7 +23,7 @@ #include #include - .section .bootstrap.text, "ax", @progbits + .section .text.head, "ax", @progbits .code64 .globl startup_64 startup_64: @@ -39,7 +39,7 @@ startup_64: #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ - phys_##name = . - .bootstrap.text; \ + phys_##name = . - .text.head; \ ENTRY(name) NEXT_PAGE(init_level4_pgt) @@ -66,6 +66,12 @@ NEXT_PAGE(level3_user_pgt) NEXT_PAGE(level2_kernel_pgt) .fill 512,8,0 +NEXT_PAGE(level2_fixmap_pgt) + .fill 512,8,0 + +NEXT_PAGE(level1_fixmap_pgt) + .fill 512,8,0 + NEXT_PAGE(hypercall_page) CFI_STARTPROC .rept 0x1000 / 0x20 @@ -172,18 +178,18 @@ ENTRY(empty_zero_page) .byte 0 #endif /* CONFIG_XEN_COMPAT <= 0x030002 */ - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux") - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6") - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, __START_KERNEL_map) + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad __START_KERNEL_map) #if CONFIG_XEN_COMPAT <= 0x030002 - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, __START_KERNEL_map) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad __START_KERNEL_map) #else - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad 0) #endif - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, startup_64) - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page) - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT) - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad startup_64) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) Index: head-2008-12-01/arch/x86/kernel/head64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/head64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/head64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -90,7 +90,7 @@ void __init x86_64_start_kernel(char * r unsigned long machine_to_phys_nr_ents; int i; - setup_xen_features(); + xen_setup_features(); xen_start_info = (struct start_info *)real_mode_data; if (!xen_feature(XENFEAT_auto_translated_physmap)) Index: head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -165,7 +165,9 @@ static inline void io_apic_write(unsigne #endif } -#ifndef CONFIG_XEN +#ifdef CONFIG_XEN +#define io_apic_modify io_apic_write +#else /* * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. @@ -175,8 +177,32 @@ static inline void io_apic_modify(unsign struct io_apic __iomem *io_apic = io_apic_base(apic); writel(value, &io_apic->data); } -#else -#define io_apic_modify io_apic_write + +static int io_apic_level_ack_pending(unsigned int irq) +{ + struct irq_pin_list *entry; + unsigned long flags; + int pending = 0; + + spin_lock_irqsave(&ioapic_lock, flags); + entry = irq_2_pin + irq; + for (;;) { + unsigned int reg; + int pin; + + pin = entry->pin; + if (pin == -1) + break; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + pending |= (reg >> 14) & 1; + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + return pending; +} #endif /* @@ -406,14 +432,12 @@ static void clear_IO_APIC (void) int skip_ioapic_setup; int ioapic_force; -/* dummy parsing: see setup.c */ - -static int __init disable_ioapic_setup(char *str) +static int __init parse_noapic(char *str) { - skip_ioapic_setup = 1; + disable_ioapic_setup(); return 0; } -early_param("noapic", disable_ioapic_setup); +early_param("noapic", parse_noapic); /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ static int __init disable_timer_pin_setup(char *arg) @@ -765,12 +789,15 @@ static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, unsigned long trigger) { - if (trigger) + if (trigger) { + irq_desc[irq].status |= IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); - else + } else { + irq_desc[irq].status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); + } } #else #define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq) @@ -1419,9 +1446,37 @@ static void ack_apic_level(unsigned int ack_APIC_irq(); /* Now we can move and renable the irq */ - move_masked_irq(irq); - if (unlikely(do_unmask_irq)) + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq); unmask_IO_APIC_irq(irq); + } } static struct irq_chip ioapic_chip __read_mostly = { Index: head-2008-12-01/arch/x86/kernel/ldt_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/ldt_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/ldt_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -114,6 +114,8 @@ int init_new_context(struct task_struct memset(&mm->context, 0, sizeof(mm->context)); init_MUTEX(&mm->context.sem); old_mm = current->mm; + if (old_mm) + mm->context.vdso = old_mm->context.vdso; if (old_mm && old_mm->context.size > 0) { down(&old_mm->context.sem); retval = copy_ldt(&mm->context, &old_mm->context); @@ -146,7 +148,7 @@ void destroy_context(struct mm_struct *m kfree(mm->context.ldt); mm->context.size = 0; } - if (!mm->context.pinned) { + if (!PagePinned(virt_to_page(mm->pgd))) { spin_lock(&mm_unpinned_lock); list_del(&mm->context.unpinned); spin_unlock(&mm_unpinned_lock); Index: head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -32,7 +32,6 @@ /* Have we found an MP table */ int smp_found_config; -unsigned int __initdata maxcpus = NR_CPUS; /* * Various Linux-internal data structures created from the @@ -657,6 +656,20 @@ static int mp_find_ioapic(int gsi) return -1; } +static u8 uniq_ioapic_id(u8 id) +{ + int i; + DECLARE_BITMAP(used, 256); + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_config_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->mpc_apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +} + void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) { int idx = 0; @@ -664,7 +677,7 @@ void __init mp_register_ioapic(u8 id, u3 if (bad_ioapic(address)) return; - idx = nr_ioapics++; + idx = nr_ioapics; mp_ioapics[idx].mpc_type = MP_IOAPIC; mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; @@ -673,7 +686,7 @@ void __init mp_register_ioapic(u8 id, u3 #ifndef CONFIG_XEN set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); #endif - mp_ioapics[idx].mpc_apicid = id; + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id); mp_ioapics[idx].mpc_apicver = 0; /* @@ -690,6 +703,8 @@ void __init mp_register_ioapic(u8 id, u3 mp_ioapics[idx].mpc_apicaddr, mp_ioapic_routing[idx].gsi_start, mp_ioapic_routing[idx].gsi_end); + + nr_ioapics++; } void __init Index: head-2008-12-01/arch/x86/kernel/process_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/process_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/process_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -249,6 +250,7 @@ early_param("idle", idle_setup); void __show_regs(struct pt_regs * regs) { unsigned long fs, gs, shadowgs; + unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex,gsindex; unsigned int ds,cs,es; @@ -288,6 +290,14 @@ void __show_regs(struct pt_regs * regs) fs,fsindex,gs,gsindex,shadowgs); printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es); + get_debugreg(d0, 0); + get_debugreg(d1, 1); + get_debugreg(d2, 2); + printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + get_debugreg(d3, 3); + get_debugreg(d6, 6); + get_debugreg(d7, 7); + printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) Index: head-2008-12-01/arch/x86/kernel/setup_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -827,6 +827,8 @@ static void __cpuinit init_amd(struct cp level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + if (c->x86 == 0x10) + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) @@ -852,8 +854,14 @@ static void __cpuinit init_amd(struct cp if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); - /* Fix cpuid4 emulation for more */ - num_cache_leaves = 3; + if (c->extended_cpuid_level >= 0x80000006 && + (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + + if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) + set_bit(X86_FEATURE_K8, &c->x86_capability); /* RDTSC can be speculated around */ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); @@ -1098,6 +1106,8 @@ void __cpuinit identify_cpu(struct cpuin c->x86_capability[2] = cpuid_edx(0x80860001); } + init_scattered_cpuid_features(c); + c->apicid = phys_pkg_id(0); /* @@ -1183,7 +1193,7 @@ static int show_cpuinfo(struct seq_file "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL, + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", /* AMD-defined */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1199,10 +1209,11 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Other (Linux-defined) */ - "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, - "constant_tsc", NULL, NULL, - "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, "sync_rdtsc", + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ @@ -1213,7 +1224,7 @@ static int show_cpuinfo(struct seq_file /* VIA/Cyrix/Centaur-defined */ NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1224,6 +1235,12 @@ static int show_cpuinfo(struct seq_file "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; static char *x86_power_flags[] = { "ts", /* temperature sensor */ Index: head-2008-12-01/arch/x86/kernel/setup64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/setup64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/setup64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -125,11 +125,14 @@ void __init setup_per_cpu_areas(void) } #ifdef CONFIG_XEN -static void switch_pt(void) +static void __init_refok switch_pt(int cpu) { + if (cpu == 0) + xen_init_pt(); xen_pt_switch(__pa_symbol(init_level4_pgt)); xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt))); } +#define switch_pt() switch_pt(cpu) static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr) { @@ -185,9 +188,6 @@ void pda_init(int cpu) pda->mmu_state = 0; if (cpu == 0) { -#ifdef CONFIG_XEN - xen_init_pt(); -#endif /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; Index: head-2008-12-01/arch/x86/kernel/smp_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -362,7 +362,7 @@ __smp_call_function_single(int cpu, void } /* - * smp_call_function_single - Run a function on another CPU + * smp_call_function_single - Run a function on a specific CPU * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. * @nonatomic: Currently unused. @@ -379,17 +379,21 @@ int smp_call_function_single (int cpu, v { /* prevent preemption and reschedule on another processor */ int me = get_cpu(); + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + if (cpu == me) { + local_irq_disable(); + func(info); + local_irq_enable(); put_cpu(); return 0; } - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - spin_lock_bh(&call_lock); + spin_lock(&call_lock); __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock_bh(&call_lock); + spin_unlock(&call_lock); put_cpu(); return 0; } Index: head-2008-12-01/arch/x86/kernel/traps_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -34,6 +34,10 @@ #include #include +#if defined(CONFIG_EDAC) +#include +#endif + #include #include #include @@ -332,6 +336,7 @@ static int print_trace_stack(void *data, static void print_trace_address(void *data, unsigned long addr) { + touch_nmi_watchdog(); printk_address(addr); } @@ -520,6 +525,7 @@ void __kprobes __die(const char * str, s printk("\n"); notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); show_registers(regs); + add_taint(TAINT_DIE); /* Executive summary in case the oops scrolled away */ printk(KERN_ALERT "RIP "); printk_address(regs->rip); @@ -533,7 +539,7 @@ void die(const char * str, struct pt_reg unsigned long flags = oops_begin(); if (!user_mode(regs)) - report_bug(regs->rip); + report_bug(regs->rip, regs); __die(str, regs, err); oops_end(flags); @@ -582,7 +588,8 @@ static void __kprobes do_trap(int trapnr tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; - if (exception_trace && unhandled_signal(tsk, signr)) + if (show_unhandled_signals && unhandled_signal(tsk, signr) && + printk_ratelimit()) printk(KERN_INFO "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, str, @@ -686,7 +693,8 @@ asmlinkage void __kprobes do_general_pro tsk->thread.error_code = error_code; tsk->thread.trap_no = 13; - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) printk(KERN_INFO "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, @@ -721,6 +729,13 @@ mem_parity_error(unsigned char reason, s reason); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); Index: head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -42,6 +42,7 @@ #include #include #include +#include #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) #define __syscall_clobber "r11","rcx","memory" @@ -57,26 +58,9 @@ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) * Try to keep this structure as small as possible to avoid cache line ping pongs */ -struct vsyscall_gtod_data_t { - seqlock_t lock; - - /* open coded 'struct timespec' */ - time_t wall_time_sec; - u32 wall_time_nsec; - - int sysctl_enabled; - struct timezone sys_tz; - struct { /* extract of a clocksource struct */ - cycle_t (*vread)(void); - cycle_t cycle_last; - cycle_t mask; - u32 mult; - u32 shift; - } clock; -}; int __vgetcpu_mode __section_vgetcpu_mode; -struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = +struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = { .lock = SEQLOCK_UNLOCKED, .sysctl_enabled = 1, @@ -96,6 +80,8 @@ void update_vsyscall(struct timespec *wa vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.sys_tz = sys_tz; + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } Index: head-2008-12-01/arch/x86/mm/fault_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/fault_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/fault_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -159,7 +159,9 @@ void dump_pagetable(unsigned long addres pmd_t *pmd; pte_t *pte; - pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd = (pgd_t *)read_cr3(); + + pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); pgd += pgd_index(address); if (bad_address(pgd)) goto bad; printk("PGD %lx ", pgd_val(*pgd)); @@ -219,16 +221,6 @@ static int is_errata93(struct pt_regs *r return 0; } -int unhandled_signal(struct task_struct *tsk, int sig) -{ - if (is_init(tsk)) - return 1; - if (tsk->ptrace & PT_PTRACED) - return 0; - return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || - (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); -} - static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { @@ -302,8 +294,8 @@ static int vmalloc_fault(unsigned long a return 0; } -int page_fault_trace = 0; -int exception_trace = 1; +static int page_fault_trace; +int show_unhandled_signals = 1; #define MEM_VERBOSE 1 @@ -372,7 +364,7 @@ asmlinkage void __kprobes do_page_fault( struct vm_area_struct * vma; unsigned long address; const struct exception_table_entry *fixup; - int write; + int write, fault; unsigned long flags; siginfo_t info; @@ -384,7 +376,7 @@ asmlinkage void __kprobes do_page_fault( prefetchw(&mm->mmap_sem); /* get the address */ - address = current_vcpu_info()->arch.cr2; + address = read_cr2(); info.si_code = SEGV_MAPERR; @@ -445,6 +437,13 @@ asmlinkage void __kprobes do_page_fault( if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; + /* + * User-mode registers count as a user access even for any + * potential system fault or CPU buglet. + */ + if (user_mode_vm(regs)) + error_code |= PF_USER; + again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -511,19 +510,18 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - default: - goto out_of_memory; + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); } - + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; up_read(&mm->mmap_sem); return; @@ -556,7 +554,8 @@ bad_area_nosemaphore: (address >> 32)) return; - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) { + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { printk( "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, @@ -630,7 +629,7 @@ out_of_memory: } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + do_group_exit(SIGKILL); goto no_context; do_sigbus: Index: head-2008-12-01/arch/x86/mm/init_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -66,6 +66,9 @@ int after_bootmem; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); extern unsigned long start_pfn; +extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD]; +extern pte_t level1_fixmap_pgt[PTRS_PER_PTE]; + /* * Use this until direct mapping is established, i.e. before __va() is * available in init_memory_mapping(). @@ -362,6 +365,10 @@ __set_fixmap (enum fixed_addresses idx, set_pte_phys(address, phys, prot, 0); set_pte_phys(address, phys, prot, 1); break; + case FIX_EARLYCON_MEM_BASE: + xen_l1_entry_update(level1_fixmap_pgt + pte_index(address), + pfn_pte_ma(phys >> PAGE_SHIFT, prot)); + break; default: set_pte_phys_ma(address, phys, prot); break; @@ -594,6 +601,13 @@ void __init xen_init_pt(void) __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] = __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE); + /* Do an early initialization of the fixmap area. */ + addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE); + level3_kernel_pgt[pud_index(addr)] = + __pud(__pa_symbol(level2_fixmap_pgt) | _PAGE_TABLE); + level2_fixmap_pgt[pmd_index(addr)] = + __pmd(__pa_symbol(level1_fixmap_pgt) | _PAGE_TABLE); + early_make_page_readonly(init_level4_pgt, XENFEAT_writable_page_tables); early_make_page_readonly(__user_pgd(init_level4_pgt), @@ -604,6 +618,10 @@ void __init xen_init_pt(void) XENFEAT_writable_page_tables); early_make_page_readonly(level2_kernel_pgt, XENFEAT_writable_page_tables); + early_make_page_readonly(level2_fixmap_pgt, + XENFEAT_writable_page_tables); + early_make_page_readonly(level1_fixmap_pgt, + XENFEAT_writable_page_tables); if (!xen_feature(XENFEAT_writable_page_tables)) { xen_pgd_pin(__pa_symbol(init_level4_pgt)); @@ -807,7 +825,7 @@ void __init paging_init(void) sparse_init(); free_area_init_nodes(max_zone_pfns); - init_mm.context.pinned = 1; + SetPagePinned(virt_to_page(init_mm.pgd)); } #endif @@ -1120,41 +1138,6 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } -#ifdef CONFIG_SYSCTL -#include - -extern int exception_trace, page_fault_trace; - -static ctl_table debug_table2[] = { - { - .ctl_name = 99, - .procname = "exception-trace", - .data = &exception_trace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - {} -}; - -static ctl_table debug_root_table2[] = { - { - .ctl_name = CTL_DEBUG, - .procname = "debug", - .mode = 0555, - .child = debug_table2 - }, - {} -}; - -static __init int x8664_sysctl_init(void) -{ - register_sysctl_table(debug_root_table2); - return 0; -} -__initcall(x8664_sysctl_init); -#endif - /* A pseudo VMA to allow ptrace access for the vsyscall page. This only covers the 64bit vsyscall page now. 32bit has a real VMA now and does not need special handling anymore. */ @@ -1193,9 +1176,18 @@ int in_gate_area_no_task(unsigned long a } #ifndef CONFIG_XEN -void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) +void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) { return __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); } #endif + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + return "[vdso]"; + if (vma == &gate_vma) + return "[vsyscall]"; + return NULL; +} Index: head-2008-12-01/arch/x86/mm/pageattr_64-xen.c =================================================================== --- head-2008-12-01.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:36:47.000000000 +0100 @@ -171,7 +171,7 @@ void mm_pin(struct mm_struct *mm) mm_walk(mm, PAGE_KERNEL_RO); xen_pgd_pin(__pa(mm->pgd)); /* kernel */ xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */ - mm->context.pinned = 1; + SetPagePinned(virt_to_page(mm->pgd)); spin_lock(&mm_unpinned_lock); list_del(&mm->context.unpinned); spin_unlock(&mm_unpinned_lock); @@ -189,7 +189,7 @@ void mm_unpin(struct mm_struct *mm) xen_pgd_unpin(__pa(mm->pgd)); xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); mm_walk(mm, PAGE_KERNEL); - mm->context.pinned = 0; + ClearPagePinned(virt_to_page(mm->pgd)); spin_lock(&mm_unpinned_lock); list_add(&mm->context.unpinned, &mm_unpinned); spin_unlock(&mm_unpinned_lock); @@ -217,7 +217,7 @@ void mm_pin_all(void) void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - if (!mm->context.pinned) + if (!PagePinned(virt_to_page(mm->pgd))) mm_pin(mm); } @@ -243,8 +243,9 @@ void arch_exit_mmap(struct mm_struct *mm task_unlock(tsk); - if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) && - !mm->context.has_foreign_mappings ) + if (PagePinned(virt_to_page(mm->pgd)) + && (atomic_read(&mm->mm_count) == 1) + && !mm->context.has_foreign_mappings) mm_unpin(mm); } @@ -343,14 +344,13 @@ static void flush_kernel_map(void *arg) struct page *pg; /* When clflush is available always use it because it is - much cheaper than WBINVD. Disable clflush for now because - the high level code is not ready yet */ + much cheaper than WBINVD. */ + /* clflush is still broken. Disable for now. */ if (1 || !cpu_has_clflush) asm volatile("wbinvd" ::: "memory"); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); - if (cpu_has_clflush) - cache_flush_page(adr); + cache_flush_page(adr); } __flush_tlb_all(); } @@ -364,7 +364,8 @@ static LIST_HEAD(deferred_pages); /* pro static inline void save_page(struct page *fpage) { - list_add(&fpage->lru, &deferred_pages); + if (!test_and_set_bit(PG_arch_1, &fpage->flags)) + list_add(&fpage->lru, &deferred_pages); } /* @@ -398,9 +399,12 @@ __change_page_attr(unsigned long address pte_t *kpte; struct page *kpte_page; pgprot_t ref_prot2; + kpte = lookup_address(address); if (!kpte) return 0; kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); + BUG_ON(PageLRU(kpte_page)); + BUG_ON(PageCompound(kpte_page)); if (pgprot_val(prot) != pgprot_val(ref_prot)) { if (!pte_huge(*kpte)) { set_pte(kpte, pfn_pte(pfn, prot)); @@ -439,10 +443,9 @@ __change_page_attr(unsigned long address return 0; #endif - if (page_private(kpte_page) == 0) { - save_page(kpte_page); + save_page(kpte_page); + if (page_private(kpte_page) == 0) revert_page(address, ref_prot); - } return 0; } @@ -514,6 +517,10 @@ void global_flush_tlb(void) flush_map(&l); list_for_each_entry_safe(pg, next, &l, lru) { + list_del(&pg->lru); + clear_bit(PG_arch_1, &pg->flags); + if (page_private(pg) != 0) + continue; ClearPagePrivate(pg); __free_page(pg); } Index: head-2008-12-01/arch/x86/vdso/vdso32/note.S =================================================================== --- head-2008-12-01.orig/arch/x86/vdso/vdso32/note.S 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/arch/x86/vdso/vdso32/note.S 2008-12-01 11:36:47.000000000 +0100 @@ -13,7 +13,7 @@ ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END -#ifdef CONFIG_XEN +#if defined(CONFIG_X86_XEN) || defined(CONFIG_PARAVIRT_XEN) /* * Add a special note telling glibc's dynamic linker a fake hardware * flavor that it will use to choose the search path for libraries in the @@ -37,8 +37,12 @@ ELFNOTE_END ELFNOTE_START(GNU, 2, "a") .long 1 /* ncaps */ +#ifdef CONFIG_PARAVIRT_XEN VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */ .long 0 /* mask */ +#else + .long 1 << VDSO_NOTE_NONEGSEG_BIT /* mask */ +#endif .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ ELFNOTE_END #endif Index: head-2008-12-01/drivers/Makefile =================================================================== --- head-2008-12-01.orig/drivers/Makefile 2008-12-01 11:11:03.000000000 +0100 +++ head-2008-12-01/drivers/Makefile 2008-12-01 11:36:47.000000000 +0100 @@ -16,7 +16,7 @@ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_PNP) += pnp/ obj-$(CONFIG_ARM_AMBA) += amba/ -obj-$(CONFIG_XEN) += xen/ +obj-$(CONFIG_PARAVIRT_XEN) += xen/ # char/ comes before serial/ etc so that the VT console is the boot-time # default. Index: head-2008-12-01/drivers/block/Kconfig =================================================================== --- head-2008-12-01.orig/drivers/block/Kconfig 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/block/Kconfig 2008-12-01 11:36:47.000000000 +0100 @@ -423,9 +423,9 @@ config XILINX_SYSACE help Include support for the Xilinx SystemACE CompactFlash interface -config XEN_BLKDEV_FRONTEND +config XEN_BLKFRONT tristate "Xen virtual block device support" - depends on XEN + depends on PARAVIRT_XEN default y help This driver implements the front-end of the Xen virtual Index: head-2008-12-01/drivers/block/Makefile =================================================================== --- head-2008-12-01.orig/drivers/block/Makefile 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/block/Makefile 2008-12-01 11:36:47.000000000 +0100 @@ -31,4 +31,4 @@ obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_XEN_BLKFRONT) += xen-blkfront.o Index: head-2008-12-01/drivers/block/xen-blkfront.c =================================================================== --- head-2008-12-01.orig/drivers/block/xen-blkfront.c 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/block/xen-blkfront.c 2008-12-01 11:36:47.000000000 +0100 @@ -1021,7 +1021,6 @@ static struct xenbus_device_id blkfront_ static struct xenbus_driver blkfront = { .name = "vbd", - .owner = THIS_MODULE, .ids = blkfront_ids, .probe = blkfront_probe, .remove = blkfront_remove, Index: head-2008-12-01/drivers/char/Kconfig =================================================================== --- head-2008-12-01.orig/drivers/char/Kconfig 2008-11-10 11:49:15.000000000 +0100 +++ head-2008-12-01/drivers/char/Kconfig 2008-12-01 11:36:47.000000000 +0100 @@ -624,7 +624,7 @@ config HVC_BEAT config HVC_XEN bool "Xen Hypervisor Console support" - depends on XEN + depends on PARAVIRT_XEN select HVC_DRIVER select HVC_IRQ default y Index: head-2008-12-01/drivers/net/Kconfig =================================================================== --- head-2008-12-01.orig/drivers/net/Kconfig 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/net/Kconfig 2008-12-01 11:36:47.000000000 +0100 @@ -2535,9 +2535,9 @@ source "drivers/atm/Kconfig" source "drivers/s390/net/Kconfig" -config XEN_NETDEV_FRONTEND +config XEN_NETFRONT tristate "Xen network device frontend driver" - depends on XEN + depends on PARAVIRT_XEN default y help The network device frontend driver allows the kernel to Index: head-2008-12-01/drivers/net/Makefile =================================================================== --- head-2008-12-01.orig/drivers/net/Makefile 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/net/Makefile 2008-12-01 11:36:47.000000000 +0100 @@ -142,7 +142,7 @@ obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2 obj-$(CONFIG_SLIP) += slip.o obj-$(CONFIG_SLHC) += slhc.o -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o +obj-$(CONFIG_XEN_NETFRONT) += xen-netfront.o obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_IFB) += ifb.o Index: head-2008-12-01/drivers/net/xen-netfront.c =================================================================== --- head-2008-12-01.orig/drivers/net/xen-netfront.c 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/drivers/net/xen-netfront.c 2008-12-01 11:36:47.000000000 +0100 @@ -36,8 +36,6 @@ #include #include #include -#include -#include #include #include #include @@ -765,45 +763,6 @@ static RING_IDX xennet_fill_frags(struct return cons; } -static int skb_checksum_setup(struct sk_buff *skb) -{ - struct iphdr *iph; - unsigned char *th; - int err = -EPROTO; - - if (skb->protocol != htons(ETH_P_IP)) - goto out; - - iph = (void *)skb->data; - th = skb->data + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - - skb->csum_start = th - skb->head; - switch (iph->protocol) { - case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); - break; - case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); - break; - default: - if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", iph->protocol); - goto out; - } - - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - - err = 0; - -out: - return err; -} - static int handle_incoming_queue(struct net_device *dev, struct sk_buff_head *rxq) { @@ -1784,7 +1743,6 @@ static int __devexit xennet_remove(struc static struct xenbus_driver netfront = { .name = "vif", - .owner = THIS_MODULE, .ids = netfront_ids, .probe = netfront_probe, .remove = __devexit_p(xennet_remove), Index: head-2008-12-01/drivers/xen/Makefile =================================================================== --- head-2008-12-01.orig/drivers/xen/Makefile 2008-12-01 11:22:58.000000000 +0100 +++ head-2008-12-01/drivers/xen/Makefile 2008-12-01 11:36:47.000000000 +0100 @@ -1,10 +1,12 @@ -obj-y += core/ -obj-y += console/ -obj-y += evtchn/ -obj-y += xenbus/ -obj-y += char/ +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o -obj-y += util.o +obj-$(CONFIG_XEN) += core/ +obj-$(CONFIG_XEN) += console/ +obj-$(CONFIG_XEN) += evtchn/ +obj-y += xenbus/ +obj-$(CONFIG_XEN) += char/ + +obj-$(CONFIG_XEN) += util.o obj-$(CONFIG_XEN_BALLOON) += balloon/ obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ Index: head-2008-12-01/drivers/xen/blkback/blkback.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blkback/blkback.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/blkback/blkback.c 2008-12-01 11:36:47.000000000 +0100 @@ -154,7 +154,7 @@ static void unplug_queue(blkif_t *blkif) static void plug_queue(blkif_t *blkif, struct block_device *bdev) { - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); if (q == blkif->plug) return; Index: head-2008-12-01/drivers/xen/blkback/common.h =================================================================== --- head-2008-12-01.orig/drivers/xen/blkback/common.h 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/drivers/xen/blkback/common.h 2008-12-01 11:36:47.000000000 +0100 @@ -79,7 +79,7 @@ typedef struct blkif_st { wait_queue_head_t wq; struct task_struct *xenblkd; unsigned int waiting_reqs; - request_queue_t *plug; + struct request_queue *plug; /* statistics */ unsigned long st_print; Index: head-2008-12-01/drivers/xen/blkback/interface.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blkback/interface.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/blkback/interface.c 2008-12-01 11:36:47.000000000 +0100 @@ -177,5 +177,5 @@ void blkif_free(blkif_t *blkif) void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); + 0, 0, NULL); } Index: head-2008-12-01/drivers/xen/blkfront/blkfront.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blkfront/blkfront.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/drivers/xen/blkfront/blkfront.c 2008-12-01 11:36:47.000000000 +0100 @@ -667,7 +667,7 @@ static int blkif_queue_request(struct re * do_blkif_request * read a block; request is in a request queue */ -void do_blkif_request(request_queue_t *rq) +void do_blkif_request(struct request_queue *rq) { struct blkfront_info *info = NULL; struct request *req; Index: head-2008-12-01/drivers/xen/blkfront/block.h =================================================================== --- head-2008-12-01.orig/drivers/xen/blkfront/block.h 2008-08-07 12:44:36.000000000 +0200 +++ head-2008-12-01/drivers/xen/blkfront/block.h 2008-12-01 11:36:47.000000000 +0100 @@ -105,7 +105,7 @@ struct blkfront_info blkif_front_ring_t ring; unsigned int irq; struct xlbd_major_info *mi; - request_queue_t *rq; + struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; @@ -129,7 +129,7 @@ extern int blkif_ioctl(struct inode *ino extern int blkif_getgeo(struct block_device *, struct hd_geometry *); extern int blkif_check(dev_t dev); extern int blkif_revalidate(dev_t dev); -extern void do_blkif_request (request_queue_t *rq); +extern void do_blkif_request (struct request_queue *rq); /* Virtual block-device subsystem. */ /* Note that xlvbd_add doesn't call add_disk for you: you're expected Index: head-2008-12-01/drivers/xen/blkfront/vbd.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blkfront/vbd.c 2008-08-07 12:44:36.000000000 +0200 +++ head-2008-12-01/drivers/xen/blkfront/vbd.c 2008-12-01 11:36:47.000000000 +0100 @@ -211,7 +211,7 @@ xlbd_put_major_info(struct xlbd_major_in static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) { - request_queue_t *rq; + struct request_queue *rq; rq = blk_init_queue(do_blkif_request, &blkif_io_lock); if (rq == NULL) Index: head-2008-12-01/drivers/xen/blktap/common.h =================================================================== --- head-2008-12-01.orig/drivers/xen/blktap/common.h 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/drivers/xen/blktap/common.h 2008-12-01 11:36:47.000000000 +0100 @@ -68,7 +68,7 @@ typedef struct blkif_st { wait_queue_head_t wq; struct task_struct *xenblkd; unsigned int waiting_reqs; - request_queue_t *plug; + struct request_queue *plug; /* statistics */ unsigned long st_print; Index: head-2008-12-01/drivers/xen/blktap/interface.c =================================================================== --- head-2008-12-01.orig/drivers/xen/blktap/interface.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/blktap/interface.c 2008-12-01 11:36:47.000000000 +0100 @@ -177,5 +177,5 @@ void tap_blkif_kmem_cache_free(blkif_t * void __init tap_blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); + 0, 0, NULL); } Index: head-2008-12-01/drivers/xen/core/features.c =================================================================== --- head-2008-12-01.orig/drivers/xen/core/features.c 2007-06-12 13:13:44.000000000 +0200 +++ head-2008-12-01/drivers/xen/core/features.c 2008-12-01 11:36:47.000000000 +0100 @@ -19,7 +19,7 @@ u8 xen_features[XENFEAT_NR_SUBMAPS * 32] /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */ EXPORT_SYMBOL(xen_features); -void setup_xen_features(void) +void xen_setup_features(void) { xen_feature_info_t fi; int i, j; Index: head-2008-12-01/drivers/xen/core/reboot.c =================================================================== --- head-2008-12-01.orig/drivers/xen/core/reboot.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/core/reboot.c 2008-12-01 11:36:47.000000000 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include Index: head-2008-12-01/drivers/xen/scsiback/interface.c =================================================================== --- head-2008-12-01.orig/drivers/xen/scsiback/interface.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/scsiback/interface.c 2008-12-01 11:36:47.000000000 +0100 @@ -167,7 +167,7 @@ void scsiback_free(struct vscsibk_info * int __init scsiback_interface_init(void) { scsiback_cachep = kmem_cache_create("vscsiif_cache", - sizeof(struct vscsibk_info), 0, 0, NULL, NULL); + sizeof(struct vscsibk_info), 0, 0, NULL); if (!scsiback_cachep) { printk(KERN_ERR "scsiback: can't init scsi cache\n"); return -ENOMEM; Index: head-2008-12-01/drivers/xen/scsifront/scsifront.c =================================================================== --- head-2008-12-01.orig/drivers/xen/scsifront/scsifront.c 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/drivers/xen/scsifront/scsifront.c 2008-12-01 11:36:47.000000000 +0100 @@ -147,7 +147,7 @@ static void scsifront_cdb_cmd_done(struc add_id_to_freelist(info, id); sc->result = ring_res->rslt; - sc->resid = ring_res->residual_len; + scsi_set_resid(sc, ring_res->residual_len); if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE) sense_len = VSCSIIF_SENSE_BUFFERSIZE; Index: head-2008-12-01/drivers/xen/tpmback/interface.c =================================================================== --- head-2008-12-01.orig/drivers/xen/tpmback/interface.c 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/drivers/xen/tpmback/interface.c 2008-12-01 11:36:47.000000000 +0100 @@ -12,6 +12,7 @@ */ #include "common.h" +#include #include #include @@ -159,7 +160,7 @@ void tpmif_disconnect_complete(tpmif_t * int __init tpmif_interface_init(void) { tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t), - 0, 0, NULL, NULL); + 0, 0, NULL); return tpmif_cachep ? 0 : -ENOMEM; } Index: head-2008-12-01/drivers/xen/util.c =================================================================== --- head-2008-12-01.orig/drivers/xen/util.c 2007-07-10 09:42:30.000000000 +0200 +++ head-2008-12-01/drivers/xen/util.c 2008-12-01 11:36:47.000000000 +0100 @@ -1,8 +1,5 @@ -#include +#include #include -#include -#include -#include #include struct class *get_xen_class(void) @@ -21,45 +18,3 @@ struct class *get_xen_class(void) return xen_class; } EXPORT_SYMBOL_GPL(get_xen_class); - -#ifdef CONFIG_X86 -static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) -{ - /* apply_to_page_range() does all the hard work. */ - return 0; -} - -struct vm_struct *alloc_vm_area(unsigned long size) -{ - struct vm_struct *area; - - area = get_vm_area(size, VM_IOREMAP); - if (area == NULL) - return NULL; - - /* - * This ensures that page tables are constructed for this region - * of kernel virtual address space and mapped into init_mm. - */ - if (apply_to_page_range(&init_mm, (unsigned long)area->addr, - area->size, f, NULL)) { - free_vm_area(area); - return NULL; - } - - /* Map page directories into every address space. */ - vmalloc_sync_all(); - - return area; -} -EXPORT_SYMBOL_GPL(alloc_vm_area); - -void free_vm_area(struct vm_struct *area) -{ - struct vm_struct *ret; - ret = remove_vm_area(area->addr); - BUG_ON(ret != area); - kfree(area); -} -EXPORT_SYMBOL_GPL(free_vm_area); -#endif /* CONFIG_X86 */ Index: head-2008-12-01/drivers/xen/xenbus/xenbus_client.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_client.c 2008-12-01 11:22:58.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_client.c 2008-12-01 11:36:47.000000000 +0100 @@ -30,19 +30,26 @@ * IN THE SOFTWARE. */ +#if defined(CONFIG_XEN) || defined(MODULE) #include #include #include -#include #include +#else +#include +#include +#include +#include +#include +#include +#include +#endif +#include #ifdef HAVE_XEN_PLATFORM_COMPAT_H #include #endif -#define DPRINTK(fmt, args...) \ - pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) - const char *xenbus_strstate(enum xenbus_state state) { static const char *const name[] = { @@ -58,6 +65,20 @@ const char *xenbus_strstate(enum xenbus_ } EXPORT_SYMBOL_GPL(xenbus_strstate); +/** + * xenbus_watch_path - register a watch + * @dev: xenbus device + * @path: path to watch + * @watch: watch to register + * @callback: callback to register + * + * Register a @watch on the given path, using the given xenbus_watch structure + * for storage, and the given @callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given @path will be saved as + * @watch->node, and remains the caller's to free. On error, @watch->node will + * be NULL, the device will switch to %XenbusStateClosing, and the error will + * be saved in the store. + */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, @@ -81,6 +102,7 @@ int xenbus_watch_path(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_watch_path); +#if defined(CONFIG_XEN) || defined(MODULE) int xenbus_watch_path2(struct xenbus_device *dev, const char *path, const char *path2, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, @@ -99,8 +121,60 @@ int xenbus_watch_path2(struct xenbus_dev return err; } EXPORT_SYMBOL_GPL(xenbus_watch_path2); +#else +/** + * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path + * @dev: xenbus device + * @watch: watch to register + * @callback: callback to register + * @pathfmt: format of path to watch + * + * Register a watch on the given @path, using the given xenbus_watch + * structure for storage, and the given @callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (@path/@path2) will be saved as @watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to %XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_pathfmt(struct xenbus_device *dev, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int), + const char *pathfmt, ...) +{ + int err; + va_list ap; + char *path; + va_start(ap, pathfmt); + path = kvasprintf(GFP_KERNEL, pathfmt, ap); + va_end(ap); + if (!path) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + err = xenbus_watch_path(dev, path, watch, callback); + + if (err) + kfree(path); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); +#endif + + +/** + * xenbus_switch_state + * @dev: xenbus device + * @xbt: transaction handle + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) { /* We check whether the state is currently set to the given value, and @@ -159,8 +233,8 @@ static char *error_path(struct xenbus_de } -void _dev_error(struct xenbus_device *dev, int err, const char *fmt, - va_list ap) +static void _dev_error(struct xenbus_device *dev, int err, + const char *fmt, va_list ap) { int ret; unsigned int len; @@ -181,14 +255,16 @@ void _dev_error(struct xenbus_device *de path_buffer = error_path(dev); if (path_buffer == NULL) { - printk("xenbus: failed to write error node for %s (%s)\n", - dev->nodename, printf_buffer); + dev_err(&dev->dev, + "xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); goto fail; } if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { - printk("xenbus: failed to write error node for %s (%s)\n", - dev->nodename, printf_buffer); + dev_err(&dev->dev, + "xenbus: failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); goto fail; } @@ -200,6 +276,15 @@ fail: } +/** + * xenbus_dev_error + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Report the given negative errno into the store, along with the given + * formatted message. + */ void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) { @@ -212,6 +297,16 @@ void xenbus_dev_error(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_dev_error); +/** + * xenbus_dev_fatal + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) { @@ -226,6 +321,15 @@ void xenbus_dev_fatal(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_dev_fatal); +/** + * xenbus_grant_ring + * @dev: xenbus device + * @ring_mfn: mfn of ring to grant + * + * Grant access to the given @ring_mfn to the peer of the given device. Return + * 0 on success, or -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) { int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); @@ -236,6 +340,12 @@ int xenbus_grant_ring(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_grant_ring); +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) { struct evtchn_alloc_unbound alloc_unbound; @@ -256,6 +366,38 @@ int xenbus_alloc_evtchn(struct xenbus_de EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */ +/** + * Bind to an existing interdomain event channel in another domain. Returns 0 + * on success and stores the local port in *port. On error, returns -errno, + * switches the device to XenbusStateClosing, and saves the error in XenStore. + */ +int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; + + bind_interdomain.remote_dom = dev->otherend_id; + bind_interdomain.remote_port = remote_port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (err) + xenbus_dev_fatal(dev, err, + "binding to event channel %d from domain %d", + remote_port, dev->otherend_id); + else + *port = bind_interdomain.local_port; + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); +#endif + + +/** + * Free an existing event channel. Returns 0 on success or -errno on error. + */ int xenbus_free_evtchn(struct xenbus_device *dev, int port) { struct evtchn_close close; @@ -272,6 +414,191 @@ int xenbus_free_evtchn(struct xenbus_dev EXPORT_SYMBOL_GPL(xenbus_free_evtchn); +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */ +/** + * xenbus_map_ring_valloc + * @dev: xenbus device + * @gnt_ref: grant reference + * @vaddr: pointer to address to be filled out by mapping + * + * Based on Rusty Russell's skeleton driver's map_page. + * Map a page of memory into this domain from another domain's grant table. + * xenbus_map_ring_valloc allocates a page of virtual address space, maps the + * page to that address, and sets *vaddr to that address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +{ + struct gnttab_map_grant_ref op = { + .flags = GNTMAP_host_map, + .ref = gnt_ref, + .dom = dev->otherend_id, + }; + struct vm_struct *area; + + *vaddr = NULL; + + area = alloc_vm_area(PAGE_SIZE); + if (!area) + return -ENOMEM; + + op.host_addr = (unsigned long)area->addr; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) { + free_vm_area(area); + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + return op.status; + } + + /* Stuff the handle in an unused field */ + area->phys_addr = (unsigned long)op.handle; + + *vaddr = area->addr; + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + + +/** + * xenbus_map_ring + * @dev: xenbus device + * @gnt_ref: grant reference + * @handle: pointer to grant handle to be filled + * @vaddr: address to be mapped to + * + * Map a page of memory into this domain from another domain's grant table. + * xenbus_map_ring does not allocate the virtual address space (you must do + * this yourself!). It only maps in the page to the specified address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, + grant_handle_t *handle, void *vaddr) +{ + struct gnttab_map_grant_ref op = { + .host_addr = (unsigned long)vaddr, + .flags = GNTMAP_host_map, + .ref = gnt_ref, + .dom = dev->otherend_id, + }; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) { + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + } else + *handle = op.handle; + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring); + + +/** + * xenbus_unmap_ring_vfree + * @dev: xenbus device + * @vaddr: addr to unmap + * + * Based on Rusty Russell's skeleton driver's unmap_page. + * Unmap a page of memory in this domain that was imported from another domain. + * Use xenbus_unmap_ring_vfree if you mapped in your memory with + * xenbus_map_ring_valloc (it will free the virtual address space). + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) +{ + struct vm_struct *area; + struct gnttab_unmap_grant_ref op = { + .host_addr = (unsigned long)vaddr, + }; + + /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) + * method so that we don't have to muck with vmalloc internals here. + * We could force the user to hang on to their struct vm_struct from + * xenbus_map_ring_valloc, but these 6 lines considerably simplify + * this API. + */ + read_lock(&vmlist_lock); + for (area = vmlist; area != NULL; area = area->next) { + if (area->addr == vaddr) + break; + } + read_unlock(&vmlist_lock); + + if (!area) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + op.handle = (grant_handle_t)area->phys_addr; + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); + + if (op.status == GNTST_okay) + free_vm_area(area); + else + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + (int16_t)area->phys_addr, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + + +/** + * xenbus_unmap_ring + * @dev: xenbus device + * @handle: grant handle + * @vaddr: addr to unmap + * + * Unmap a page of memory in this domain that was imported from another domain. + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring(struct xenbus_device *dev, + grant_handle_t handle, void *vaddr) +{ + struct gnttab_unmap_grant_ref op = { + .host_addr = (unsigned long)vaddr, + .handle = handle, + }; + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + handle, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring); +#endif + + +/** + * xenbus_read_driver_state + * @path: path for driver + * + * Return the state of the driver rooted at the given store path, or + * XenbusStateUnknown if no state can be read. + */ enum xenbus_state xenbus_read_driver_state(const char *path) { enum xenbus_state result; Index: head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:32:38.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:36:47.000000000 +0100 @@ -34,12 +34,15 @@ #include #include #include -#include -#include -#include #include - +#if defined(CONFIG_XEN) || defined(MODULE) +#include #include +#else +#include +#include +#include +#endif #include "xenbus_comms.h" @@ -91,6 +94,13 @@ static const void *get_input_chunk(XENST return buf + MASK_XENSTORE_IDX(cons); } +/** + * xb_write - low level write + * @data: buffer to send + * @len: length of buffer + * + * Returns 0 on success, error otherwise. + */ int xb_write(const void *data, unsigned len) { struct xenstore_domain_interface *intf = xen_store_interface; @@ -199,7 +209,9 @@ int xb_read(void *data, unsigned len) return 0; } -/* Set up interrupt handler off store event channel. */ +/** + * xb_init_comms - Set up interrupt handler off store event channel. + */ int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; @@ -219,7 +231,11 @@ int xb_init_comms(void) if (xenbus_irq) unbind_from_irqhandler(xenbus_irq, &xb_waitq); +#if defined(CONFIG_XEN) || defined(MODULE) err = bind_caller_port_to_irqhandler( +#else + err = bind_evtchn_to_irqhandler( +#endif xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); if (err <= 0) { Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:36:47.000000000 +0100 @@ -43,12 +43,11 @@ #include #include #include -#include +#include -#include #include -#include #include +#if defined(CONFIG_XEN) || defined(MODULE) #include #include #include @@ -57,6 +56,12 @@ #ifdef MODULE #include #endif +#else +#include +#include +#include +#include +#endif #include "xenbus_comms.h" #include "xenbus_probe.h" @@ -168,7 +173,7 @@ static int read_backend_details(struct x return read_otherend_details(xendev, "backend-id", "backend"); } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE)) static int xenbus_uevent_frontend(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { @@ -207,12 +212,16 @@ static struct xen_bus_type xenbus_fronte .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, +#if defined(CONFIG_XEN) || defined(MODULE) .uevent = xenbus_uevent_frontend, #endif +#endif }, +#if defined(CONFIG_XEN) || defined(MODULE) .dev = { .bus_id = "xen", }, +#endif }; static void otherend_changed(struct xenbus_watch *watch, @@ -228,14 +237,15 @@ static void otherend_changed(struct xenb if (!dev->otherend || strncmp(dev->otherend, vec[XS_WATCH_PATH], strlen(dev->otherend))) { - DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]); + dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]); return; } state = xenbus_read_driver_state(dev->otherend); - DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state), - dev->otherend_watch.node, vec[XS_WATCH_PATH]); + dev_dbg(&dev->dev, "state is %d (%s), %s, %s", + state, xenbus_strstate(state), dev->otherend_watch.node, + vec[XS_WATCH_PATH]); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) /* @@ -271,8 +281,13 @@ static int talk_to_otherend(struct xenbu static int watch_otherend(struct xenbus_device *dev) { +#if defined(CONFIG_XEN) || defined(MODULE) return xenbus_watch_path2(dev, dev->otherend, "state", &dev->otherend_watch, otherend_changed); +#else + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, + "%s/%s", dev->otherend, "state"); +#endif } @@ -298,9 +313,9 @@ int xenbus_dev_probe(struct device *_dev err = talk_to_otherend(dev); if (err) { - printk(KERN_WARNING - "xenbus_probe: talk_to_otherend on %s failed.\n", - dev->nodename); + dev_warn(&dev->dev, + "xenbus_probe: talk_to_otherend on %s failed.\n", + dev->nodename); return err; } @@ -310,9 +325,9 @@ int xenbus_dev_probe(struct device *_dev err = watch_otherend(dev); if (err) { - printk(KERN_WARNING - "xenbus_probe: watch_otherend on %s failed.\n", - dev->nodename); + dev_warn(&dev->dev, + "xenbus_probe: watch_otherend on %s failed.\n", + dev->nodename); return err; } @@ -352,14 +367,15 @@ static void xenbus_dev_shutdown(struct d get_device(&dev->dev); if (dev->state != XenbusStateConnected) { - printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__, - dev->nodename, xenbus_strstate(dev->state)); + dev_info(&dev->dev, "%s: %s: %s != Connected, skipping\n", __FUNCTION__, + dev->nodename, xenbus_strstate(dev->state)); goto out; } xenbus_switch_state(dev, XenbusStateClosing); timeout = wait_for_completion_timeout(&dev->down, timeout); if (!timeout) - printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename); + dev_info(&dev->dev, "%s: %s timeout closing device\n", + __FUNCTION__, dev->nodename); out: put_device(&dev->dev); } @@ -547,7 +563,9 @@ int xenbus_probe_node(struct xen_bus_typ xendev->devicetype = tmpstring; init_completion(&xendev->down); +#if defined(CONFIG_XEN) || defined(MODULE) xendev->dev.parent = &bus->dev; +#endif xendev->dev.bus = &bus->bus; xendev->dev.release = xenbus_dev_release; @@ -562,15 +580,16 @@ int xenbus_probe_node(struct xen_bus_typ err = device_create_file(&xendev->dev, &dev_attr_nodename); if (err) - goto unregister; + goto fail_unregister; + err = device_create_file(&xendev->dev, &dev_attr_devtype); if (err) - goto unregister; + goto fail_remove_file; return 0; -unregister: +fail_remove_file: device_remove_file(&xendev->dev, &dev_attr_nodename); - device_remove_file(&xendev->dev, &dev_attr_devtype); +fail_unregister: device_unregister(&xendev->dev); fail: kfree(xendev); @@ -583,7 +602,8 @@ static int xenbus_probe_frontend(const c char *nodename; int err; - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name); + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", + xenbus_frontend.root, type, name); if (!nodename) return -ENOMEM; @@ -659,7 +679,7 @@ static int strsep_len(const char *str, c return (len == 0) ? i : -ERANGE; } -void dev_changed(const char *node, struct xen_bus_type *bus) +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) { int exists, rootlen; struct xenbus_device *dev; @@ -667,7 +687,7 @@ void dev_changed(const char *node, struc const char *p, *root; if (bus->error || char_count(node, '/') < 2) - return; + return; exists = xenbus_exists(XBT_NIL, node, ""); if (!exists) { @@ -701,7 +721,7 @@ static void frontend_changed(struct xenb { DPRINTK(""); - dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); } /* We watch for devices appearing and vanishing. */ @@ -904,10 +924,16 @@ static int xsd_port_read(char *page, cha } #endif -static int xenbus_probe_init(void) +#ifndef MODULE +static int __init xenbus_probe_init(void) +#else +static int __devinit xenbus_probe_init(void) +#endif { int err = 0; +#if defined(CONFIG_XEN) || defined(MODULE) unsigned long page = 0; +#endif DPRINTK(""); @@ -926,6 +952,7 @@ static int xenbus_probe_init(void) * Domain0 doesn't have a store_evtchn or store_mfn yet. */ if (is_initial_xendomain()) { +#if defined(CONFIG_XEN) || defined(MODULE) struct evtchn_alloc_unbound alloc_unbound; /* Allocate page. */ @@ -963,10 +990,13 @@ static int xenbus_probe_init(void) if (xsd_port_intf) xsd_port_intf->read_proc = xsd_port_read; #endif +#else + /* dom0 not yet supported */ +#endif xen_store_interface = mfn_to_virt(xen_store_mfn); } else { xenstored_ready = 1; -#ifdef CONFIG_XEN +#ifndef MODULE xen_store_evtchn = xen_start_info->store_evtchn; xen_store_mfn = xen_start_info->store_mfn; xen_store_interface = mfn_to_virt(xen_store_mfn); @@ -979,7 +1009,9 @@ static int xenbus_probe_init(void) } +#if defined(CONFIG_XEN) || defined(MODULE) xenbus_dev_init(); +#endif /* Initialize the interface to xenstore. */ err = xs_init(); @@ -989,6 +1021,7 @@ static int xenbus_probe_init(void) goto err; } +#if defined(CONFIG_XEN) || defined(MODULE) /* Register ourselves with the kernel device subsystem */ if (!xenbus_frontend.error) { xenbus_frontend.error = device_register(&xenbus_frontend.dev); @@ -999,6 +1032,7 @@ static int xenbus_probe_init(void) xenbus_frontend.error); } } +#endif xenbus_backend_device_register(); if (!is_initial_xendomain()) @@ -1007,8 +1041,10 @@ static int xenbus_probe_init(void) return 0; err: +#if defined(CONFIG_XEN) || defined(MODULE) if (page) free_page(page); +#endif /* * Do not unregister the xenbus front/backend buses here. The buses @@ -1019,11 +1055,15 @@ static int xenbus_probe_init(void) return err; } -#ifdef CONFIG_XEN +#ifndef MODULE postcore_initcall(xenbus_probe_init); +#ifdef CONFIG_XEN MODULE_LICENSE("Dual BSD/GPL"); #else -int xenbus_init(void) +MODULE_LICENSE("GPL"); +#endif +#else +int __devinit xenbus_init(void) { return xenbus_probe_init(); } Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe.h =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe.h 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe.h 2008-12-01 11:36:47.000000000 +0100 @@ -56,7 +56,9 @@ struct xen_bus_type int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename); int (*probe)(const char *type, const char *dir); struct bus_type bus; +#if defined(CONFIG_XEN) || defined(MODULE) struct device dev; +#endif }; extern int xenbus_match(struct device *_dev, struct device_driver *_drv); @@ -71,7 +73,7 @@ extern int xenbus_probe_node(struct xen_ const char *nodename); extern int xenbus_probe_devices(struct xen_bus_type *bus); -extern void dev_changed(const char *node, struct xen_bus_type *bus); +extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); #endif Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe_backend.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe_backend.c 2008-12-01 11:36:47.000000000 +0100 @@ -236,7 +236,7 @@ static void backend_changed(struct xenbu { DPRINTK(""); - dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); } static struct xenbus_watch be_watch = { Index: head-2008-12-01/drivers/xen/xenbus/xenbus_xs.c =================================================================== --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_xs.c 2008-12-01 11:22:58.000000000 +0100 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_xs.c 2008-12-01 11:36:47.000000000 +0100 @@ -221,7 +221,7 @@ static void *xs_talkv(struct xenbus_tran } for (i = 0; i < num_vecs; i++) { - err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; + err = xb_write(iovec[i].iov_base, iovec[i].iov_len); if (err) { mutex_unlock(&xs_state.request_mutex); return ERR_PTR(err); @@ -630,7 +630,9 @@ void unregister_xenbus_watch(struct xenb char token[sizeof(watch) * 2 + 1]; int err; +#if defined(CONFIG_XEN) || defined(MODULE) BUG_ON(watch->flags & XBWF_new_thread); +#endif sprintf(token, "%lX", (long)watch); @@ -649,6 +651,11 @@ void unregister_xenbus_watch(struct xenb up_read(&xs_state.watch_mutex); + /* Make sure there are no callbacks running currently (unless + its us) */ + if (current->pid != xenwatch_pid) + mutex_lock(&xenwatch_mutex); + /* Cancel pending watch events. */ spin_lock(&watch_events_lock); list_for_each_entry_safe(msg, tmp, &watch_events, list) { @@ -660,11 +667,8 @@ void unregister_xenbus_watch(struct xenb } spin_unlock(&watch_events_lock); - /* Flush any currently-executing callback, unless we are it. :-) */ - if (current->pid != xenwatch_pid) { - mutex_lock(&xenwatch_mutex); + if (current->pid != xenwatch_pid) mutex_unlock(&xenwatch_mutex); - } } EXPORT_SYMBOL_GPL(unregister_xenbus_watch); @@ -702,6 +706,7 @@ void xs_suspend_cancel(void) up_write(&xs_state.transaction_mutex); } +#if defined(CONFIG_XEN) || defined(MODULE) static int xenwatch_handle_callback(void *data) { struct xs_stored_msg *msg = data; @@ -719,6 +724,7 @@ static int xenwatch_handle_callback(void return 0; } +#endif static int xenwatch_thread(void *unused) { @@ -748,6 +754,7 @@ static int xenwatch_thread(void *unused) msg = list_entry(ent, struct xs_stored_msg, list); +#if defined(CONFIG_XEN) || defined(MODULE) /* * Unlock the mutex before running an XBWF_new_thread * handler. kthread_run can block which can deadlock @@ -764,6 +771,15 @@ static int xenwatch_thread(void *unused) xenwatch_handle_callback(msg); mutex_unlock(&xenwatch_mutex); } +#else + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + mutex_unlock(&xenwatch_mutex); + kfree(msg->u.watch.vec); + kfree(msg); +#endif } return 0; Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -53,6 +53,8 @@ extern unsigned long __FIXADDR_TOP; enum fixed_addresses { FIX_HOLE, FIX_VDSO, + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, #ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/highmem.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/highmem.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/highmem.h 2008-12-01 11:36:47.000000000 +0100 @@ -76,8 +76,7 @@ struct page *kmap_atomic_to_page(void *p #define kmap_atomic_pte(page, type) \ kmap_atomic_prot(page, type, \ - test_bit(PG_pinned, &(page)->flags) \ - ? PAGE_KERNEL_RO : kmap_prot) + PagePinned(page) ? PAGE_KERNEL_RO : kmap_prot) #define flush_cache_kmaps() do { } while (0) Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -125,6 +125,9 @@ extern void __iomem * __ioremap(unsigned * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual * address. + * + * If the area you are trying to map is a PCI BAR you should have a + * look at pci_iomap(). */ static inline void __iomem * ioremap(unsigned long offset, unsigned long size) @@ -142,6 +145,7 @@ extern void iounmap(volatile void __iome */ extern void *bt_ioremap(unsigned long offset, unsigned long size); extern void bt_iounmap(void *addr, unsigned long size); +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); /* Use early IO mappings for DMI because it's initialized early */ #define dmi_ioremap bt_ioremap Index: head-2008-12-01/include/asm-x86/mach-xen/asm/maddr_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/maddr_32.h 2008-04-02 12:34:02.000000000 +0200 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/maddr_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -155,6 +155,7 @@ static inline paddr_t pte_machine_to_phy #ifdef CONFIG_X86_PAE #define __pte_ma(x) ((pte_t) { (x), (maddr_t)(x) >> 32 } ) +extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot) { pte_t pte; Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -16,7 +16,7 @@ void mm_pin_all(void); static inline void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { - if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) + if (!PagePinned(virt_to_page(next->pgd))) mm_pin(next); } @@ -51,6 +51,8 @@ static inline void __prepare_arch_switch : : "r" (0) ); } +void leave_mm(unsigned long cpu); + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) @@ -60,7 +62,7 @@ static inline void switch_mm(struct mm_s if (likely(prev != next)) { BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && - !test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)); + !PagePinned(virt_to_page(next->pgd))); /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pci_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pci_32.h 2007-09-14 11:14:51.000000000 +0200 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pci_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -3,6 +3,14 @@ #ifdef __KERNEL__ + +struct pci_sysdata { + int node; /* NUMA node */ +}; + +/* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + #include /* for struct page */ /* Can be used to override the logic in pci_scan_bus for skipping @@ -81,48 +89,11 @@ struct pci_dev; #endif -/* This is always fine. */ -#define pci_dac_dma_supported(pci_dev, mask) (1) - -static inline dma64_addr_t -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) -{ - return ((dma64_addr_t) page_to_phys(page) + - (dma64_addr_t) offset); -} - -static inline struct page * -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return pfn_to_page(dma_addr >> PAGE_SHIFT); -} - -static inline unsigned long -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return (dma_addr & ~PAGE_MASK); -} - -static inline void -pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ -} - -static inline void -pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ - flush_write_buffers(); -} - #define HAVE_PCI_MMAP extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); -static inline void pcibios_add_platform_entries(struct pci_dev *dev) -{ -} - #ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -5,7 +5,7 @@ #include /* for struct page */ #include /* for phys_to_virt and page_to_pseudophys */ -#define paravirt_alloc_pt(pfn) do { } while (0) +#define paravirt_alloc_pt(mm, pfn) do { } while (0) #define paravirt_alloc_pd(pfn) do { } while (0) #define paravirt_alloc_pd(pfn) do { } while (0) #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) @@ -14,15 +14,15 @@ #define pmd_populate_kernel(mm, pmd, pte) \ do { \ - paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ } while (0) #define pmd_populate(mm, pmd, pte) \ do { \ unsigned long pfn = page_to_pfn(pte); \ - paravirt_alloc_pt(pfn); \ - if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \ + paravirt_alloc_pt(mm, pfn); \ + if (PagePinned(virt_to_page((mm)->pgd))) { \ if (!PageHighMem(pte)) \ BUG_ON(HYPERVISOR_update_va_mapping( \ (unsigned long)__va(pfn << PAGE_SHIFT), \ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -26,9 +26,6 @@ #include #include -/* Is this pagetable pinned? */ -#define PG_pinned PG_arch_1 - struct vm_area_struct; /* @@ -82,7 +79,7 @@ void paging_init(void); * area for the same reason. ;) */ #define VMALLOC_OFFSET (8*1024*1024) -#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \ +#define VMALLOC_START (((unsigned long) high_memory + \ 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) @@ -231,8 +228,6 @@ extern unsigned long pg0[]; * The following only work if pte_present() is true. * Undefined behaviour if not.. */ -static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; } -static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } @@ -243,13 +238,9 @@ static inline int pte_huge(pte_t pte) { */ static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; } -static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } -static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } -static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } -static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } @@ -295,22 +286,20 @@ static inline pte_t xen_local_ptep_get_a #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ ({ \ int __changed = !pte_same(*(ptep), entry); \ - if (__changed && (dirty)) \ - ptep_establish(vma, address, ptep, entry); \ + if (__changed && (dirty)) { \ + if ( likely((vma)->vm_mm == current->mm) ) { \ + BUG_ON(HYPERVISOR_update_va_mapping(address, \ + entry, \ + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ + UVMF_INVLPG|UVMF_MULTI)); \ + } else { \ + xen_l1_entry_update(ptep, entry); \ + flush_tlb_page(vma, address); \ + } \ + } \ __changed; \ }) -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \ - int __ret = 0; \ - if (pte_dirty(*(ptep))) \ - __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \ - &(ptep)->pte_low); \ - if (__ret) \ - pte_update((vma)->vm_mm, addr, ptep); \ - __ret; \ -}) - #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG #define ptep_test_and_clear_young(vma, addr, ptep) ({ \ int __ret = 0; \ @@ -322,44 +311,13 @@ static inline pte_t xen_local_ptep_get_a __ret; \ }) -/* - * Rules for using ptep_establish: the pte MUST be a user pte, and - * must be a present->present transition. - */ -#define __HAVE_ARCH_PTEP_ESTABLISH -#define ptep_establish(vma, address, ptep, pteval) \ -do { \ - if ( likely((vma)->vm_mm == current->mm) ) { \ - BUG_ON(HYPERVISOR_update_va_mapping(address, \ - pteval, \ - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ - UVMF_INVLPG|UVMF_MULTI)); \ - } else { \ - xen_l1_entry_update(ptep, pteval); \ - flush_tlb_page(vma, address); \ - } \ -} while (0) - -#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH -#define ptep_clear_flush_dirty(vma, address, ptep) \ -({ \ - pte_t __pte = *(ptep); \ - int __dirty = pte_dirty(__pte); \ - __pte = pte_mkclean(__pte); \ - if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ - (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ - else if (__dirty) \ - (ptep)->pte_low = __pte.pte_low; \ - __dirty; \ -}) - #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(vma, address, ptep) \ ({ \ pte_t __pte = *(ptep); \ int __young = pte_young(__pte); \ __pte = pte_mkold(__pte); \ - if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ + if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ else if (__young) \ (ptep)->pte_low = __pte.pte_low; \ @@ -383,7 +341,7 @@ static inline pte_t ptep_get_and_clear(s #define ptep_get_and_clear_full(mm, addr, ptep, full) \ ((full) ? ({ \ pte_t __res = *(ptep); \ - if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) \ + if (PagePinned(virt_to_page((mm)->pgd))) \ xen_l1_entry_update(ptep, __pte(0)); \ else \ *(ptep) = __pte(0); \ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:36:47.000000000 +0100 @@ -23,26 +23,11 @@ #define pud_present(pud) 1 /* - * Is the pte executable? - */ -static inline int pte_x(pte_t pte) -{ - return !(__pte_val(pte) & _PAGE_NX); -} - -/* - * All present user-pages with !NX bit are user-executable: - */ -static inline int pte_exec(pte_t pte) -{ - return pte_user(pte) && pte_x(pte); -} -/* * All present pages with !NX bit are kernel-executable: */ static inline int pte_exec_kernel(pte_t pte) { - return pte_x(pte); + return !(__pte_val(pte) & _PAGE_NX); } /* Rules for using set_pte: the pte being assigned *must* be Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -89,7 +89,6 @@ struct cpuinfo_x86 { #define X86_VENDOR_UMC 3 #define X86_VENDOR_NEXGEN 4 #define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_NUM 9 @@ -122,6 +121,7 @@ void __init cpu_detect(struct cpuinfo_x8 extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; @@ -171,17 +171,6 @@ static inline void clear_in_cr4 (unsigne write_cr4(cr4); } -/* - * NSC/Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - /* Stop speculative execution */ static inline void sync_core(void) { @@ -230,6 +219,10 @@ extern int bootloader_type; #define HAVE_ARCH_PICK_MMAP_LAYOUT +extern void hard_disable_TSC(void); +extern void disable_TSC(void); +extern void hard_enable_TSC(void); + /* * Size of io_bitmap. */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -205,11 +205,6 @@ static inline unsigned long get_limit(un */ -/* - * Actually only lfence would be needed for mb() because all stores done - * by the kernel should be already ordered. But keep a full barrier for now. - */ - #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) @@ -301,15 +296,6 @@ void enable_hlt(void); extern int es7000_plat; void cpu_idle_wait(void); -/* - * On SMP systems, when the scheduler does migration-cost autodetection, - * it needs a way to flush as much of the CPU's caches as possible: - */ -static inline void sched_cacheflush(void) -{ - wbinvd(); -} - extern unsigned long arch_align_stack(unsigned long sp); extern void free_init_pages(char *what, unsigned long begin, unsigned long end); Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -91,7 +91,11 @@ struct tlb_state DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); #endif /* SMP */ -#define flush_tlb_kernel_range(start, end) flush_tlb_all() +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -23,9 +23,9 @@ * compile time, but to set the physical address only * in the boot process. * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate + * These 'compile-time allocated' memory buffers are + * fixed-size 4k pages (or larger if used with an increment + * higher than 1). Use set_fixmap(idx,phys) to associate * physical memory with fixmap indices. * * TLB entries of such buffers will not be flushed across @@ -36,6 +36,8 @@ enum fixed_addresses { VSYSCALL_LAST_PAGE, VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, VSYSCALL_HPET, + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, FIX_HPET_BASE, #ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ @@ -105,7 +107,7 @@ static __always_inline unsigned long fix if (idx >= __end_of_fixed_addresses) __this_fixmap_does_not_exist(); - return __fix_to_virt(idx); + return __fix_to_virt(idx); } #endif Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_64.h 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -162,6 +162,7 @@ extern void bt_iounmap(void *addr, unsig */ extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); /* * ISA I/O bus memory addresses are 1:1 with the physical address. Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -76,7 +76,7 @@ static inline void switch_mm(struct mm_s if (likely(prev != next)) { BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && - !next->context.pinned); + !PagePinned(virt_to_page(next->pgd))); /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); @@ -131,7 +131,7 @@ static inline void switch_mm(struct mm_s static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - if (!next->context.pinned) + if (!PagePinned(virt_to_page(next->pgd))) mm_pin(next); switch_mm(prev, next, NULL); } Index: head-2008-12-01/include/asm-x86/mach-xen/asm/page_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/page_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/page_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -72,7 +72,8 @@ void copy_page(void *, void *); #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pci_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pci_64.h 2007-09-14 11:14:51.000000000 +0200 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pci_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -5,6 +5,27 @@ #ifdef __KERNEL__ +struct pci_sysdata { + int node; /* NUMA node */ + void* iommu; /* IOMMU private data */ +}; + +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + +#ifdef CONFIG_CALGARY_IOMMU +static inline void* pci_iommu(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->iommu; +} + +static inline void set_pci_iommu(struct pci_bus *bus, void *val) +{ + struct pci_sysdata *sd = bus->sysdata; + sd->iommu = val; +} +#endif /* CONFIG_CALGARY_IOMMU */ + #include /* for struct page */ /* Can be used to override the logic in pci_scan_bus for skipping @@ -56,14 +77,6 @@ extern int iommu_setup(char *opt); #if defined(CONFIG_IOMMU) || defined(CONFIG_CALGARY_IOMMU) -/* - * x86-64 always supports DAC, but sometimes it is useful to force - * devices through the IOMMU to get automatic sg list merging. - * Optional right now. - */ -extern int iommu_sac_force; -#define pci_dac_dma_supported(pci_dev, mask) (!iommu_sac_force) - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ @@ -97,8 +110,6 @@ extern int iommu_sac_force; #else /* No IOMMU */ -#define pci_dac_dma_supported(pci_dev, mask) 1 - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) #define pci_unmap_addr(PTR, ADDR_NAME) (0) @@ -110,36 +121,6 @@ extern int iommu_sac_force; #include -static inline dma64_addr_t -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) -{ - return ((dma64_addr_t) page_to_phys(page) + - (dma64_addr_t) offset); -} - -static inline struct page * -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return virt_to_page(__va(dma_addr)); -} - -static inline unsigned long -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) -{ - return (dma_addr & ~PAGE_MASK); -} - -static inline void -pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ -} - -static inline void -pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) -{ - flush_write_buffers(); -} - #ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, @@ -154,10 +135,6 @@ static inline void pci_dma_burst_advice( extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); -static inline void pcibios_add_platform_entries(struct pci_dev *dev) -{ -} - #endif /* __KERNEL__ */ /* generic pci stuff */ Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgalloc_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -21,7 +21,7 @@ static inline void pmd_populate_kernel(s static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { - if (unlikely((mm)->context.pinned)) { + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) { BUG_ON(HYPERVISOR_update_va_mapping( (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT), pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0)); @@ -33,7 +33,7 @@ static inline void pmd_populate(struct m static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - if (unlikely((mm)->context.pinned)) { + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) { BUG_ON(HYPERVISOR_update_va_mapping( (unsigned long)pmd, pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, @@ -50,7 +50,7 @@ static inline void pud_populate(struct m */ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - if (unlikely((mm)->context.pinned)) { + if (unlikely(PagePinned(virt_to_page((mm)->pgd)))) { BUG_ON(HYPERVISOR_update_va_mapping( (unsigned long)pud, pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -302,7 +302,7 @@ static inline pte_t ptep_get_and_clear_f { if (full) { pte_t pte = *ptep; - if (mm->context.pinned) + if (PagePinned(virt_to_page(mm->pgd))) xen_l1_entry_update(ptep, __pte(0)); else *ptep = __pte(0); @@ -331,21 +331,15 @@ static inline pte_t ptep_get_and_clear_f * Undefined behaviour if not.. */ #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) -static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } -static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } -static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); } static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; } static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; } static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; } -static inline pte_t pte_rdprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; } -static inline pte_t pte_exprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; } static inline pte_t pte_mkclean(pte_t pte) { __pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; } -static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; } static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; } @@ -353,13 +347,6 @@ static inline pte_t pte_mkwrite(pte_t pt static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte); -} - static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { if (!pte_young(*ptep)) @@ -498,26 +485,13 @@ static inline pte_t pte_modify(pte_t pte __changed; \ }) -#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH -#define ptep_clear_flush_dirty(vma, address, ptep) \ -({ \ - pte_t __pte = *(ptep); \ - int __dirty = pte_dirty(__pte); \ - __pte = pte_mkclean(__pte); \ - if ((vma)->vm_mm->context.pinned) \ - (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ - else if (__dirty) \ - set_pte(ptep, __pte); \ - __dirty; \ -}) - #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(vma, address, ptep) \ ({ \ pte_t __pte = *(ptep); \ int __young = pte_young(__pte); \ __pte = pte_mkold(__pte); \ - if ((vma)->vm_mm->context.pinned) \ + if (PagePinned(virt_to_page((vma)->vm_mm->pgd))) \ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \ else if (__young) \ set_pte(ptep, __pte); \ @@ -568,6 +542,8 @@ int xen_change_pte_range(struct mm_struc #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \ xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) +pte_t *lookup_address(unsigned long addr); + #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO) @@ -585,7 +561,6 @@ int xen_change_pte_range(struct mm_struc (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY #define __HAVE_ARCH_PTEP_GET_AND_CLEAR #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL #define __HAVE_ARCH_PTEP_CLEAR_FLUSH Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -83,7 +83,6 @@ struct cpuinfo_x86 { #define X86_VENDOR_UMC 3 #define X86_VENDOR_NEXGEN 4 #define X86_VENDOR_CENTAUR 5 -#define X86_VENDOR_RISE 6 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NUM 8 #define X86_VENDOR_UNKNOWN 0xff @@ -100,6 +99,7 @@ extern char ignore_irq13; extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; @@ -377,12 +377,10 @@ static inline void sync_core(void) asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); } -#define cpu_has_fpu 1 - #define ARCH_HAS_PREFETCH static inline void prefetch(void *x) { - asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); + asm volatile("prefetcht0 (%0)" :: "r" (x)); } #define ARCH_HAS_PREFETCHW 1 @@ -400,17 +398,6 @@ static inline void prefetchw(void *x) #define cpu_relax() rep_nop() -/* - * NSC/Cyrix CPU indexed register access macros - */ - -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - static inline void serialize_cpu(void) { __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -79,12 +79,16 @@ static inline unsigned long read_cr0(voi unsigned long cr0; asm volatile("movq %%cr0,%0" : "=r" (cr0)); return cr0; -} +} static inline void write_cr0(unsigned long val) { asm volatile("movq %0,%%cr0" :: "r" (val)); -} +} + +#define read_cr2() current_vcpu_info()->arch.cr2 + +#define write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val))) #define read_cr3() ({ \ unsigned long __dummy; \ @@ -103,27 +107,28 @@ static inline unsigned long read_cr4(voi unsigned long cr4; asm("movq %%cr4,%0" : "=r" (cr4)); return cr4; -} +} static inline void write_cr4(unsigned long val) { asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); -} - -#define stts() (HYPERVISOR_fpu_taskswitch(1)) +} -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory"); +static inline unsigned long read_cr8(void) +{ + return 0; +} -/* - * On SMP systems, when the scheduler does migration-cost autodetection, - * it needs a way to flush as much of the CPU's caches as possible. - */ -static inline void sched_cacheflush(void) +static inline void write_cr8(unsigned long val) { - wbinvd(); + BUG_ON(val); } +#define stts() (HYPERVISOR_fpu_taskswitch(1)) + +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory") + #endif /* __KERNEL__ */ #define nop() __asm__ __volatile__ ("nop") Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_64.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-01 11:36:47.000000000 +0100 @@ -89,7 +89,11 @@ static inline void flush_tlb_range(struc #endif -#define flush_tlb_kernel_range(start, end) flush_tlb_all() +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); +} static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) Index: head-2008-12-01/include/asm-x86/thread_info.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/thread_info.h 2008-12-01 11:28:55.000000000 +0100 +++ head-2008-12-01/include/asm-x86/thread_info.h 2008-12-01 11:36:47.000000000 +0100 @@ -150,7 +150,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) #else -#define _TIF_WORK_CTXSW _TIF_DEBUG +#define _TIF_WORK_CTXSW_NEXT (_TIF_NOTSC | _TIF_DEBUG) +#define _TIF_WORK_CTXSW_PREV (_TIF_NOTSC) #endif #define PREEMPT_ACTIVE 0x10000000 Index: head-2008-12-01/include/asm-x86/xen/interface.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/xen/interface.h 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/include/asm-x86/xen/interface.h 2008-12-01 11:36:47.000000000 +0100 @@ -10,17 +10,17 @@ #define __ASM_X86_XEN_INTERFACE_H #ifdef __XEN__ -#define __DEFINE_GUEST_HANDLE(name, type) \ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name #else -#define __DEFINE_GUEST_HANDLE(name, type) \ +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef type * __guest_handle_ ## name #endif -#define DEFINE_GUEST_HANDLE_STRUCT(name) \ - __DEFINE_GUEST_HANDLE(name, struct name) -#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) -#define GUEST_HANDLE(name) __guest_handle_ ## name +#define DEFINE_XEN_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_XEN_GUEST_HANDLE(name, struct name) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name #ifdef __XEN__ #if defined(__i386__) @@ -47,14 +47,8 @@ #endif #ifndef __ASSEMBLY__ -/* Guest handles for primitive C types. */ -__DEFINE_GUEST_HANDLE(uchar, unsigned char); -__DEFINE_GUEST_HANDLE(uint, unsigned int); -__DEFINE_GUEST_HANDLE(ulong, unsigned long); -DEFINE_GUEST_HANDLE(char); -DEFINE_GUEST_HANDLE(int); -DEFINE_GUEST_HANDLE(long); -DEFINE_GUEST_HANDLE(void); +typedef unsigned long xen_pfn_t; +typedef unsigned long xen_ulong_t; #endif #ifndef HYPERVISOR_VIRT_START @@ -103,7 +97,7 @@ struct trap_info { uint16_t cs; /* code selector */ unsigned long address; /* code offset */ }; -DEFINE_GUEST_HANDLE_STRUCT(trap_info); +DEFINE_XEN_GUEST_HANDLE_STRUCT(trap_info); struct arch_shared_info { unsigned long max_pfn; /* max pfn that appears in table */ @@ -157,7 +151,7 @@ struct vcpu_guest_context { uint64_t gs_base_user; #endif }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_guest_context); #endif /* !__ASSEMBLY__ */ /* Index: head-2008-12-01/include/asm-x86/xen/interface_32.h =================================================================== --- head-2008-12-01.orig/include/asm-x86/xen/interface_32.h 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/include/asm-x86/xen/interface_32.h 2008-12-01 11:36:47.000000000 +0100 @@ -62,7 +62,7 @@ struct cpu_user_regs { uint16_t fs, _pad4; uint16_t gs, _pad5; }; -DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); +DEFINE_XEN_GUEST_HANDLE_STRUCT(cpu_user_regs); typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ Index: head-2008-12-01/include/linux/elfnote.h =================================================================== --- head-2008-12-01.orig/include/linux/elfnote.h 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/include/linux/elfnote.h 2008-12-01 11:36:47.000000000 +0100 @@ -52,7 +52,7 @@ 4484:.balign 4 ; \ .popsection ; -#define ELFNOTE(name, type, desc) \ +#define ELFNOTE(name, type, desc...) \ ELFNOTE_START(name, type, "") \ desc ; \ ELFNOTE_END Index: head-2008-12-01/include/linux/page-flags.h =================================================================== --- head-2008-12-01.orig/include/linux/page-flags.h 2008-12-01 11:25:57.000000000 +0100 +++ head-2008-12-01/include/linux/page-flags.h 2008-12-01 11:36:47.000000000 +0100 @@ -99,6 +99,10 @@ enum pageflags { #endif #ifdef CONFIG_XEN PG_foreign, /* Page is owned by foreign allocator. */ + PG_pinned, /* Cannot alias with PG_owner_priv_1 since + * bad_page() checks include this bit. + * Also cannot use PG_arch_1 since that now + * has a different purpose on x86. */ #endif __NR_PAGEFLAGS, Index: head-2008-12-01/include/linux/skbuff.h =================================================================== --- head-2008-12-01.orig/include/linux/skbuff.h 2008-12-01 11:29:05.000000000 +0100 +++ head-2008-12-01/include/linux/skbuff.h 2008-12-01 11:36:47.000000000 +0100 @@ -1772,7 +1772,7 @@ static inline void skb_forward_csum(stru bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); -#ifdef CONFIG_XEN +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) int skb_checksum_setup(struct sk_buff *skb); #else static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } Index: head-2008-12-01/include/xen/driver_util.h =================================================================== --- head-2008-12-01.orig/include/xen/driver_util.h 2007-06-12 13:14:19.000000000 +0200 +++ head-2008-12-01/include/xen/driver_util.h 2008-12-01 11:36:47.000000000 +0100 @@ -5,10 +5,6 @@ #include #include -/* Allocate/destroy a 'vmalloc' VM area. */ -extern struct vm_struct *alloc_vm_area(unsigned long size); -extern void free_vm_area(struct vm_struct *area); - extern struct class *get_xen_class(void); #endif /* __ASM_XEN_DRIVER_UTIL_H__ */ Index: head-2008-12-01/include/xen/features.h =================================================================== --- head-2008-12-01.orig/include/xen/features.h 2008-12-01 10:53:14.000000000 +0100 +++ head-2008-12-01/include/xen/features.h 2008-12-01 11:36:47.000000000 +0100 @@ -10,6 +10,7 @@ #define __XEN_FEATURES_H__ #include +#include void xen_setup_features(void); @@ -20,4 +21,4 @@ static inline int xen_feature(int flag) return xen_features[flag]; } -#endif /* __ASM_XEN_FEATURES_H__ */ +#endif /* __XEN_FEATURES_H__ */ Index: head-2008-12-01/include/xen/interface/arch-x86/xen.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/arch-x86/xen.h 2008-09-01 12:07:31.000000000 +0200 +++ head-2008-12-01/include/xen/interface/arch-x86/xen.h 2008-12-01 11:36:47.000000000 +0100 @@ -49,6 +49,9 @@ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif +/* Allow co-existing Linux 2.6.23+ Xen interface definitions. */ +#define DEFINE_XEN_GUEST_HANDLE_STRUCT(name) struct name + #if defined(__i386__) #include "xen-x86_32.h" #elif defined(__x86_64__) Index: head-2008-12-01/include/xen/interface/event_channel.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/event_channel.h 2008-12-01 11:22:58.000000000 +0100 +++ head-2008-12-01/include/xen/interface/event_channel.h 2008-12-01 11:36:47.000000000 +0100 @@ -248,6 +248,7 @@ struct evtchn_op { struct evtchn_unmask unmask; } u; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(evtchn_op); typedef struct evtchn_op evtchn_op_t; DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); Index: head-2008-12-01/include/xen/interface/io/netif.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/io/netif.h 2008-12-01 11:22:58.000000000 +0100 +++ head-2008-12-01/include/xen/interface/io/netif.h 2008-12-01 11:36:47.000000000 +0100 @@ -183,8 +183,22 @@ typedef struct netif_rx_response netif_r * Generate netif ring structures and types. */ +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H) DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); +#else +#define xen_netif_tx_request netif_tx_request +#define xen_netif_rx_request netif_rx_request +#define xen_netif_tx_response netif_tx_response +#define xen_netif_rx_response netif_rx_response +DEFINE_RING_TYPES(xen_netif_tx, + struct xen_netif_tx_request, + struct xen_netif_tx_response); +DEFINE_RING_TYPES(xen_netif_rx, + struct xen_netif_rx_request, + struct xen_netif_rx_response); +#define xen_netif_extra_info netif_extra_info +#endif #define NETIF_RSP_DROPPED -2 #define NETIF_RSP_ERROR -1 Index: head-2008-12-01/include/xen/interface/memory.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/memory.h 2008-12-01 11:22:59.000000000 +0100 +++ head-2008-12-01/include/xen/interface/memory.h 2008-12-01 11:36:47.000000000 +0100 @@ -82,6 +82,7 @@ struct xen_memory_reservation { domid_t domid; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation); typedef struct xen_memory_reservation xen_memory_reservation_t; DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); @@ -175,6 +176,7 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); @@ -214,6 +216,7 @@ struct xen_add_to_physmap { /* GPFN where the source mapping page should appear. */ xen_pfn_t gpfn; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap); typedef struct xen_add_to_physmap xen_add_to_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); @@ -254,6 +257,7 @@ struct xen_translate_gpfn_list { */ XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); Index: head-2008-12-01/include/xen/interface/sched.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/sched.h 2008-12-01 11:22:59.000000000 +0100 +++ head-2008-12-01/include/xen/interface/sched.h 2008-12-01 11:36:47.000000000 +0100 @@ -67,6 +67,7 @@ struct sched_shutdown { unsigned int reason; /* SHUTDOWN_* */ }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(sched_shutdown); typedef struct sched_shutdown sched_shutdown_t; DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); @@ -81,6 +82,7 @@ struct sched_poll { unsigned int nr_ports; uint64_t timeout; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(sched_poll); typedef struct sched_poll sched_poll_t; DEFINE_XEN_GUEST_HANDLE(sched_poll_t); Index: head-2008-12-01/include/xen/interface/version.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/version.h 2008-12-01 11:22:59.000000000 +0100 +++ head-2008-12-01/include/xen/interface/version.h 2008-12-01 11:36:47.000000000 +0100 @@ -36,6 +36,9 @@ /* arg == xen_extraversion_t. */ #define XENVER_extraversion 1 typedef char xen_extraversion_t[16]; +struct xen_extraversion { + xen_extraversion_t extraversion; +}; #define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) /* arg == xen_compile_info_t. */ @@ -50,10 +53,16 @@ typedef struct xen_compile_info xen_comp #define XENVER_capabilities 3 typedef char xen_capabilities_info_t[1024]; +struct xen_capabilities_info { + xen_capabilities_info_t info; +}; #define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) #define XENVER_changeset 4 typedef char xen_changeset_info_t[64]; +struct xen_changeset_info { + xen_changeset_info_t info; +}; #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) #define XENVER_platform_parameters 5 Index: head-2008-12-01/include/xen/interface/xen.h =================================================================== --- head-2008-12-01.orig/include/xen/interface/xen.h 2008-12-01 11:22:59.000000000 +0100 +++ head-2008-12-01/include/xen/interface/xen.h 2008-12-01 11:36:47.000000000 +0100 @@ -32,7 +32,9 @@ #include #endif -#if defined(__i386__) || defined(__x86_64__) +#if defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) +#include +#elif defined(__i386__) || defined(__x86_64__) #include "arch-x86/xen.h" #elif defined(__ia64__) #include "arch-ia64.h" @@ -110,7 +112,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); */ /* New sched_op hypercall introduced in 0x00030101. */ -#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#if __XEN_INTERFACE_VERSION__ < 0x00030101 || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)) #undef __HYPERVISOR_sched_op #define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat #endif @@ -124,7 +126,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif /* New platform_op hypercall introduced in 0x00030204. */ -#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#if __XEN_INTERFACE_VERSION__ < 0x00030204 || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)) #define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op #endif @@ -283,6 +285,7 @@ struct mmuext_op { xen_pfn_t src_mfn; } arg2; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(mmuext_op); typedef struct mmuext_op mmuext_op_t; DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #endif @@ -365,6 +368,7 @@ struct mmu_update { uint64_t ptr; /* Machine address of PTE. */ uint64_t val; /* New contents of PTE. */ }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(mmu_update); typedef struct mmu_update mmu_update_t; DEFINE_XEN_GUEST_HANDLE(mmu_update_t); @@ -373,9 +377,15 @@ DEFINE_XEN_GUEST_HANDLE(mmu_update_t); * NB. The fields are natural register size for this architecture. */ struct multicall_entry { - unsigned long op, result; + unsigned long op; +#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H) + unsigned long result; +#else + long result; +#endif unsigned long args[6]; }; +DEFINE_XEN_GUEST_HANDLE_STRUCT(multicall_entry); typedef struct multicall_entry multicall_entry_t; DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); Index: head-2008-12-01/include/xen/xenbus.h =================================================================== --- head-2008-12-01.orig/include/xen/xenbus.h 2008-12-01 11:36:07.000000000 +0100 +++ head-2008-12-01/include/xen/xenbus.h 2008-12-01 11:36:47.000000000 +0100 @@ -57,16 +57,20 @@ struct xenbus_watch void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H) /* See XBWF_ definitions below. */ unsigned long flags; +#endif }; +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H) /* * Execute callback in its own kthread. Useful if the callback is long * running or heavily serialised, to avoid taking out the main xenwatch thread * for a long period of time (or even unwittingly causing a deadlock). */ #define XBWF_new_thread 1 +#endif /* A xenbus device. */ struct xenbus_device { @@ -214,6 +218,7 @@ int xenbus_watch_path(struct xenbus_devi const char **, unsigned int)); +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H) /** * Register a watch on the given path/path2, using the given xenbus_watch * structure for storage, and the given callback function as the callback. @@ -227,7 +232,13 @@ int xenbus_watch_path2(struct xenbus_dev const char *path2, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); - +#else +int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int), + const char *pathfmt, ...) + __attribute__ ((format (printf, 4, 5))); +#endif /** * Advertise in the store a change of the given driver to the given new_state. Index: head-2008-12-01/net/core/dev.c =================================================================== --- head-2008-12-01.orig/net/core/dev.c 2008-12-01 11:36:13.000000000 +0100 +++ head-2008-12-01/net/core/dev.c 2008-12-01 11:36:47.000000000 +0100 @@ -131,7 +131,7 @@ #include "net-sysfs.h" -#ifdef CONFIG_XEN +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) #include #include #include @@ -1740,42 +1740,54 @@ static struct netdev_queue *dev_pick_tx( return netdev_get_tx_queue(dev, queue_index); } -#ifdef CONFIG_XEN +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) inline int skb_checksum_setup(struct sk_buff *skb) { - if (skb->proto_csum_blank) { - struct iphdr *iph; - unsigned char *th; + struct iphdr *iph; + unsigned char *th; + int err = -EPROTO; - if (skb->protocol != htons(ETH_P_IP)) - goto out; - iph = ip_hdr(skb); - th = skb_network_header(skb) + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - skb->csum_start = th - skb->head; - switch (iph->protocol) { - case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); - break; - case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); - break; - default: - if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" - "TCP/UDP packet, dropping a protocol" - " %d packet", iph->protocol); - goto out; - } - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - skb->ip_summed = CHECKSUM_PARTIAL; - skb->proto_csum_blank = 0; +#ifdef CONFIG_XEN + if (!skb->proto_csum_blank) + return 0; +#endif + + if (skb->protocol != htons(ETH_P_IP)) + goto out; + + iph = ip_hdr(skb); + th = skb_network_header(skb) + 4 * iph->ihl; + if (th >= skb_tail_pointer(skb)) + goto out; + + skb->csum_start = th - skb->head; + switch (iph->protocol) { + case IPPROTO_TCP: + skb->csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + skb->csum_offset = offsetof(struct udphdr, check); + break; + default: + if (net_ratelimit()) + printk(KERN_ERR "Attempting to checksum a non-" + "TCP/UDP packet, dropping a protocol" + " %d packet", iph->protocol); + goto out; } - return 0; + + if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) + goto out; + +#ifdef CONFIG_XEN + skb->ip_summed = CHECKSUM_PARTIAL; + skb->proto_csum_blank = 0; +#endif + + err = 0; + out: - return -EPROTO; + return err; } EXPORT_SYMBOL(skb_checksum_setup); #endif