5 Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py
7 Acked-by: jbeulich@novell.com
9 Index: head-2008-12-01/arch/x86/Kconfig
10 ===================================================================
11 --- head-2008-12-01.orig/arch/x86/Kconfig 2008-12-01 11:29:05.000000000 +0100
12 +++ head-2008-12-01/arch/x86/Kconfig 2008-12-01 11:32:38.000000000 +0100
13 @@ -1427,7 +1427,7 @@ config PHYSICAL_START
16 bool "Build a relocatable kernel (EXPERIMENTAL)"
17 - depends on EXPERIMENTAL
18 + depends on EXPERIMENTAL && !X86_XEN
20 This builds a kernel image that retains relocation information
21 so it can be loaded someplace besides the default 1MB.
22 Index: head-2008-12-01/arch/x86/kernel/asm-offsets_32.c
23 ===================================================================
24 --- head-2008-12-01.orig/arch/x86/kernel/asm-offsets_32.c 2008-12-01 11:21:02.000000000 +0100
25 +++ head-2008-12-01/arch/x86/kernel/asm-offsets_32.c 2008-12-01 11:32:38.000000000 +0100
26 @@ -54,6 +54,7 @@ void foo(void)
27 OFFSET(TI_exec_domain, thread_info, exec_domain);
28 OFFSET(TI_flags, thread_info, flags);
29 OFFSET(TI_status, thread_info, status);
30 + OFFSET(TI_cpu, thread_info, cpu);
31 OFFSET(TI_preempt_count, thread_info, preempt_count);
32 OFFSET(TI_addr_limit, thread_info, addr_limit);
33 OFFSET(TI_restart_block, thread_info, restart_block);
34 @@ -108,6 +109,11 @@ void foo(void)
36 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
40 + OFFSET(XEN_START_mfn_list, start_info, mfn_list);
43 #ifdef CONFIG_PARAVIRT
45 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
46 Index: head-2008-12-01/arch/x86/kernel/cpu/common-xen.c
47 ===================================================================
48 --- head-2008-12-01.orig/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:29:05.000000000 +0100
49 +++ head-2008-12-01/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:32:38.000000000 +0100
51 #define phys_pkg_id(a,b) a
55 #include <asm/hypervisor.h>
59 DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
60 EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
63 -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
64 -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
66 +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
67 +EXPORT_SYMBOL(_cpu_pda);
69 static int cachesize_override __cpuinitdata = -1;
70 static int disable_x86_fxsr __cpuinitdata;
71 @@ -60,7 +59,7 @@ static struct cpu_dev __cpuinitdata defa
72 .c_init = default_init,
73 .c_vendor = "Unknown",
75 -static struct cpu_dev * this_cpu = &default_cpu;
76 +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
78 static int __init cachesize_setup(char *str)
80 @@ -242,29 +241,14 @@ static int __cpuinit have_cpuid_p(void)
81 return flag_is_changeable_p(X86_EFLAGS_ID);
84 -/* Do minimum CPU detection early.
85 - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
86 - The others are not touched to avoid unwanted side effects.
88 - WARNING: this function is only called on the BP. Don't add code here
89 - that is supposed to run on all CPUs. */
90 -static void __init early_cpu_detect(void)
91 +void __init cpu_detect(struct cpuinfo_x86 *c)
93 - struct cpuinfo_x86 *c = &boot_cpu_data;
95 - c->x86_cache_alignment = 32;
97 - if (!have_cpuid_p())
100 /* Get vendor name */
101 cpuid(0x00000000, &c->cpuid_level,
102 (int *)&c->x86_vendor_id[0],
103 (int *)&c->x86_vendor_id[8],
104 (int *)&c->x86_vendor_id[4]);
106 - get_cpu_vendor(c, 1);
109 if (c->cpuid_level >= 0x00000001) {
110 u32 junk, tfms, cap0, misc;
111 @@ -281,6 +265,26 @@ static void __init early_cpu_detect(void
115 +/* Do minimum CPU detection early.
116 + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
117 + The others are not touched to avoid unwanted side effects.
119 + WARNING: this function is only called on the BP. Don't add code here
120 + that is supposed to run on all CPUs. */
121 +static void __init early_cpu_detect(void)
123 + struct cpuinfo_x86 *c = &boot_cpu_data;
125 + c->x86_cache_alignment = 32;
127 + if (!have_cpuid_p())
132 + get_cpu_vendor(c, 1);
135 static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
138 @@ -315,6 +319,8 @@ static void __cpuinit generic_identify(s
140 c->apicid = (ebx >> 24) & 0xFF;
142 + if (c->x86_capability[0] & (1<<19))
143 + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
145 /* Have CPUID level 0 only - unheard of */
147 @@ -379,6 +385,7 @@ void __cpuinit identify_cpu(struct cpuin
148 c->x86_vendor_id[0] = '\0'; /* Unset */
149 c->x86_model_id[0] = '\0'; /* Unset */
150 c->x86_max_cores = 1;
151 + c->x86_clflush_size = 32;
152 memset(&c->x86_capability, 0, sizeof c->x86_capability);
154 if (!have_cpuid_p()) {
155 @@ -599,61 +606,23 @@ void __init early_cpu_init(void)
159 -static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr)
160 +/* Make sure %gs is initialized properly in idle threads */
161 +struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
163 - unsigned long frames[16];
167 - for (va = gdt_descr->address, f = 0;
168 - va < gdt_descr->address + gdt_descr->size;
169 - va += PAGE_SIZE, f++) {
170 - frames[f] = virt_to_mfn(va);
171 - make_lowmem_page_readonly(
172 - (void *)va, XENFEAT_writable_descriptor_tables);
174 - if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8))
176 + memset(regs, 0, sizeof(struct pt_regs));
177 + regs->xgs = __KERNEL_PDA;
182 - * cpu_init() initializes state that is per-CPU. Some data is already
183 - * initialized (naturally) in the bootstrap process, such as the GDT
184 - * and IDT. We reload them nevertheless, this function acts as a
185 - * 'CPU state barrier', nothing should get across.
187 -void __cpuinit cpu_init(void)
188 +static __cpuinit int alloc_gdt(int cpu)
190 - int cpu = smp_processor_id();
191 -#ifndef CONFIG_X86_NO_TSS
192 - struct tss_struct * t = &per_cpu(init_tss, cpu);
194 - struct thread_struct *thread = ¤t->thread;
195 - struct desc_struct *gdt;
196 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
197 + struct desc_struct *gdt;
198 + struct i386_pda *pda;
200 - if (cpu_test_and_set(cpu, cpu_initialized)) {
201 - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
202 - for (;;) local_irq_enable();
204 - printk(KERN_INFO "Initializing CPU#%d\n", cpu);
206 - if (cpu_has_vme || cpu_has_de)
207 - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
208 - if (tsc_disable && cpu_has_tsc) {
209 - printk(KERN_NOTICE "Disabling TSC...\n");
210 - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
211 - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
212 - set_in_cr4(X86_CR4_TSD);
214 + gdt = (struct desc_struct *)cpu_gdt_descr->address;
215 + pda = cpu_pda(cpu);
218 - /* The CPU hotplug case */
219 - if (cpu_gdt_descr->address) {
220 - gdt = (struct desc_struct *)cpu_gdt_descr->address;
221 - memset(gdt, 0, PAGE_SIZE);
225 * This is a horrible hack to allocate the GDT. The problem
226 * is that cpu_init() is called really early for the boot CPU
227 @@ -661,54 +630,141 @@ void __cpuinit cpu_init(void)
228 * CPUs, when bootmem will have gone away
230 if (NODE_DATA(0)->bdata->node_bootmem_map) {
231 - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
232 - /* alloc_bootmem_pages panics on failure, so no check */
233 + BUG_ON(gdt != NULL || pda != NULL);
235 + gdt = alloc_bootmem_pages(PAGE_SIZE);
236 + pda = alloc_bootmem(sizeof(*pda));
237 + /* alloc_bootmem(_pages) panics on failure, so no check */
239 memset(gdt, 0, PAGE_SIZE);
240 + memset(pda, 0, sizeof(*pda));
242 - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
243 - if (unlikely(!gdt)) {
244 - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
246 - local_irq_enable();
247 + /* GDT and PDA might already have been allocated if
248 + this is a CPU hotplug re-insertion. */
250 + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
253 + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
255 + if (unlikely(!gdt || !pda)) {
256 + free_pages((unsigned long)gdt, 0);
263 + cpu_gdt_descr->address = (unsigned long)gdt;
264 + cpu_pda(cpu) = pda;
269 +/* Initial PDA used by boot CPU */
270 +struct i386_pda boot_pda = {
273 + .pcurrent = &init_task,
276 +static inline void set_kernel_gs(void)
278 + /* Set %gs for this CPU's PDA. Memory clobber is to create a
279 + barrier with respect to any PDA operations, so the compiler
280 + doesn't move any before here. */
281 + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
284 +/* Initialize the CPU's GDT and PDA. The boot CPU does this for
285 + itself, but secondaries find this done for them. */
286 +__cpuinit int init_gdt(int cpu, struct task_struct *idle)
288 + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
289 + struct desc_struct *gdt;
290 + struct i386_pda *pda;
292 + /* For non-boot CPUs, the GDT and PDA should already have been
294 + if (!alloc_gdt(cpu)) {
295 + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
299 + gdt = (struct desc_struct *)cpu_gdt_descr->address;
300 + pda = cpu_pda(cpu);
302 + BUG_ON(gdt == NULL || pda == NULL);
305 * Initialize the per-CPU GDT with the boot GDT,
306 * and set up the GDT descriptor:
308 memcpy(gdt, cpu_gdt_table, GDT_SIZE);
309 + cpu_gdt_descr->size = GDT_SIZE - 1;
311 - /* Set up GDT entry for 16bit stack */
312 - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
313 - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
314 - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
315 - (CPU_16BIT_STACK_SIZE - 1);
316 + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
317 + (u32 *)&gdt[GDT_ENTRY_PDA].b,
318 + (unsigned long)pda, sizeof(*pda) - 1,
319 + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
321 + memset(pda, 0, sizeof(*pda));
323 + pda->cpu_number = cpu;
324 + pda->pcurrent = idle;
326 - cpu_gdt_descr->size = GDT_SIZE - 1;
327 - cpu_gdt_descr->address = (unsigned long)gdt;
329 - if (cpu == 0 && cpu_gdt_descr->address == 0) {
330 - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
331 - /* alloc_bootmem_pages panics on failure, so no check */
332 - memset(gdt, 0, PAGE_SIZE);
336 - memcpy(gdt, cpu_gdt_table, GDT_SIZE);
338 - cpu_gdt_descr->size = GDT_SIZE;
339 - cpu_gdt_descr->address = (unsigned long)gdt;
340 +void __cpuinit cpu_set_gdt(int cpu)
342 + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
343 + unsigned long va, frames[16];
346 + for (va = cpu_gdt_descr->address, f = 0;
347 + va < cpu_gdt_descr->address + cpu_gdt_descr->size;
348 + va += PAGE_SIZE, f++) {
349 + frames[f] = virt_to_mfn(va);
350 + make_lowmem_page_readonly(
351 + (void *)va, XENFEAT_writable_descriptor_tables);
353 + BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
358 +/* Common CPU init for both boot and secondary CPUs */
359 +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
361 +#ifndef CONFIG_X86_NO_TSS
362 + struct tss_struct * t = &per_cpu(init_tss, cpu);
364 + struct thread_struct *thread = &curr->thread;
366 + if (cpu_test_and_set(cpu, cpu_initialized)) {
367 + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
368 + for (;;) local_irq_enable();
371 - cpu_gdt_init(cpu_gdt_descr);
372 + printk(KERN_INFO "Initializing CPU#%d\n", cpu);
374 + if (cpu_has_vme || cpu_has_de)
375 + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
376 + if (tsc_disable && cpu_has_tsc) {
377 + printk(KERN_NOTICE "Disabling TSC...\n");
378 + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
379 + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
380 + set_in_cr4(X86_CR4_TSD);
384 * Set up and load the per-CPU TSS and LDT
386 atomic_inc(&init_mm.mm_count);
387 - current->active_mm = &init_mm;
388 - BUG_ON(current->mm);
389 - enter_lazy_tlb(&init_mm, current);
390 + curr->active_mm = &init_mm;
393 + enter_lazy_tlb(&init_mm, curr);
395 load_esp0(t, thread);
397 @@ -719,8 +775,8 @@ old_gdt:
398 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
401 - /* Clear %fs and %gs. */
402 - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
404 + asm volatile ("mov %0, %%fs" : : "r" (0));
406 /* Clear all 6 debug registers: */
408 @@ -738,6 +794,38 @@ old_gdt:
409 mxcsr_feature_mask_init();
412 +/* Entrypoint to initialize secondary CPU */
413 +void __cpuinit secondary_cpu_init(void)
415 + int cpu = smp_processor_id();
416 + struct task_struct *curr = current;
418 + _cpu_init(cpu, curr);
422 + * cpu_init() initializes state that is per-CPU. Some data is already
423 + * initialized (naturally) in the bootstrap process, such as the GDT
424 + * and IDT. We reload them nevertheless, this function acts as a
425 + * 'CPU state barrier', nothing should get across.
427 +void __cpuinit cpu_init(void)
429 + int cpu = smp_processor_id();
430 + struct task_struct *curr = current;
432 + /* Set up the real GDT and PDA, so we can transition from the
434 + if (!init_gdt(cpu, curr)) {
435 + /* failed to allocate something; not much we can do... */
437 + local_irq_enable();
441 + _cpu_init(cpu, curr);
444 #ifdef CONFIG_HOTPLUG_CPU
445 void __cpuinit cpu_uninit(void)
447 Index: head-2008-12-01/arch/x86/kernel/cpu/mtrr/main-xen.c
448 ===================================================================
449 --- head-2008-12-01.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-01-28 12:24:18.000000000 +0100
450 +++ head-2008-12-01/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-01 11:32:38.000000000 +0100
452 static DEFINE_MUTEX(mtrr_mutex);
454 void generic_get_mtrr(unsigned int reg, unsigned long *base,
455 - unsigned int *size, mtrr_type * type)
456 + unsigned long *size, mtrr_type * type)
458 struct xen_platform_op op;
460 @@ -115,8 +115,7 @@ int mtrr_del_page(int reg, unsigned long
464 - unsigned long lbase;
465 - unsigned int lsize;
466 + unsigned long lbase, lsize;
468 struct xen_platform_op op;
470 Index: head-2008-12-01/arch/x86/kernel/e820_32-xen.c
471 ===================================================================
472 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
473 +++ head-2008-12-01/arch/x86/kernel/e820_32-xen.c 2008-12-01 11:32:38.000000000 +0100
475 +#include <linux/kernel.h>
476 +#include <linux/types.h>
477 +#include <linux/init.h>
478 +#include <linux/bootmem.h>
479 +#include <linux/ioport.h>
480 +#include <linux/string.h>
481 +#include <linux/kexec.h>
482 +#include <linux/module.h>
483 +#include <linux/mm.h>
484 +#include <linux/efi.h>
485 +#include <linux/pfn.h>
486 +#include <linux/uaccess.h>
488 +#include <asm/pgtable.h>
489 +#include <asm/page.h>
490 +#include <asm/e820.h>
491 +#include <xen/interface/memory.h>
494 +int efi_enabled = 0;
495 +EXPORT_SYMBOL(efi_enabled);
498 +struct e820map e820;
499 +struct change_member {
500 + struct e820entry *pbios; /* pointer to original bios entry */
501 + unsigned long long addr; /* address for this change point */
503 +static struct change_member change_point_list[2*E820MAX] __initdata;
504 +static struct change_member *change_point[2*E820MAX] __initdata;
505 +static struct e820entry *overlap_list[E820MAX] __initdata;
506 +static struct e820entry new_bios[E820MAX] __initdata;
507 +/* For PCI or other memory-mapped resources */
508 +unsigned long pci_mem_start = 0x10000000;
510 +EXPORT_SYMBOL(pci_mem_start);
512 +extern int user_defined_memmap;
513 +struct resource data_resource = {
514 + .name = "Kernel data",
517 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
520 +struct resource code_resource = {
521 + .name = "Kernel code",
524 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
527 +static struct resource system_rom_resource = {
528 + .name = "System ROM",
531 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
534 +static struct resource extension_rom_resource = {
535 + .name = "Extension ROM",
538 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
541 +static struct resource adapter_rom_resources[] = { {
542 + .name = "Adapter ROM",
545 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
547 + .name = "Adapter ROM",
550 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
552 + .name = "Adapter ROM",
555 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
557 + .name = "Adapter ROM",
560 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
562 + .name = "Adapter ROM",
565 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
567 + .name = "Adapter ROM",
570 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
573 +static struct resource video_rom_resource = {
574 + .name = "Video ROM",
577 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
580 +static struct resource video_ram_resource = {
581 + .name = "Video RAM area",
584 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
587 +static struct resource standard_io_resources[] = { {
591 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
596 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
601 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
606 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
608 + .name = "keyboard",
611 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
613 + .name = "dma page reg",
616 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
621 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
626 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
631 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
634 +static int romsignature(const unsigned char *x)
636 + unsigned short sig;
638 + if (probe_kernel_address((const unsigned short *)x, sig) == 0)
639 + ret = (sig == 0xaa55);
643 +static int __init romchecksum(unsigned char *rom, unsigned long length)
645 + unsigned char *p, sum = 0;
647 + for (p = rom; p < rom + length; p++)
652 +static void __init probe_roms(void)
654 + unsigned long start, length, upper;
655 + unsigned char *rom;
659 + /* Nothing to do if not running in dom0. */
660 + if (!is_initial_xendomain())
665 + upper = adapter_rom_resources[0].start;
666 + for (start = video_rom_resource.start; start < upper; start += 2048) {
667 + rom = isa_bus_to_virt(start);
668 + if (!romsignature(rom))
671 + video_rom_resource.start = start;
673 + /* 0 < length <= 0x7f * 512, historically */
674 + length = rom[2] * 512;
676 + /* if checksum okay, trust length byte */
677 + if (length && romchecksum(rom, length))
678 + video_rom_resource.end = start + length - 1;
680 + request_resource(&iomem_resource, &video_rom_resource);
684 + start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
689 + request_resource(&iomem_resource, &system_rom_resource);
690 + upper = system_rom_resource.start;
692 + /* check for extension rom (ignore length byte!) */
693 + rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
694 + if (romsignature(rom)) {
695 + length = extension_rom_resource.end - extension_rom_resource.start + 1;
696 + if (romchecksum(rom, length)) {
697 + request_resource(&iomem_resource, &extension_rom_resource);
698 + upper = extension_rom_resource.start;
702 + /* check for adapter roms on 2k boundaries */
703 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
704 + rom = isa_bus_to_virt(start);
705 + if (!romsignature(rom))
708 + /* 0 < length <= 0x7f * 512, historically */
709 + length = rom[2] * 512;
711 + /* but accept any length that fits if checksum okay */
712 + if (!length || start + length > upper || !romchecksum(rom, length))
715 + adapter_rom_resources[i].start = start;
716 + adapter_rom_resources[i].end = start + length - 1;
717 + request_resource(&iomem_resource, &adapter_rom_resources[i]);
719 + start = adapter_rom_resources[i++].end & ~2047UL;
724 +static struct e820map machine_e820 __initdata;
725 +#define e820 machine_e820
729 + * Request address space for all standard RAM and ROM resources
730 + * and also for regions reported as reserved by the e820.
733 +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
738 + for (i = 0; i < e820.nr_map; i++) {
739 + struct resource *res;
740 +#ifndef CONFIG_RESOURCES_64BIT
741 + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
744 + res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
745 + switch (e820.map[i].type) {
746 + case E820_RAM: res->name = "System RAM"; break;
747 + case E820_ACPI: res->name = "ACPI Tables"; break;
748 + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
749 + default: res->name = "reserved";
751 + res->start = e820.map[i].addr;
752 + res->end = res->start + e820.map[i].size - 1;
753 + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
754 + if (request_resource(&iomem_resource, res)) {
758 + if (e820.map[i].type == E820_RAM) {
760 + * We don't know which RAM region contains kernel data,
761 + * so we try it repeatedly and let the resource manager
765 + request_resource(res, code_resource);
766 + request_resource(res, data_resource);
769 + request_resource(res, &crashk_res);
771 + xen_machine_kexec_register_resources(res);
781 + * Request address space for all standard resources
783 + * This is called just before pcibios_init(), which is also a
784 + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
786 +static int __init request_standard_resources(void)
790 + /* Nothing to do if not running in dom0. */
791 + if (!is_initial_xendomain())
794 + printk("Setting up standard PCI resources\n");
796 + efi_initialize_iomem_resources(&code_resource, &data_resource);
798 + legacy_init_iomem_resources(&code_resource, &data_resource);
800 + /* EFI systems may still have VGA */
801 + request_resource(&iomem_resource, &video_ram_resource);
803 + /* request I/O space for devices used on all i[345]86 PCs */
804 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
805 + request_resource(&ioport_resource, &standard_io_resources[i]);
809 +subsys_initcall(request_standard_resources);
811 +void __init add_memory_region(unsigned long long start,
812 + unsigned long long size, int type)
816 + if (!efi_enabled) {
819 + if (x == E820MAX) {
820 + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
824 + e820.map[x].addr = start;
825 + e820.map[x].size = size;
826 + e820.map[x].type = type;
829 +} /* add_memory_region */
832 + * Sanitize the BIOS e820 map.
834 + * Some e820 responses include overlapping entries. The following
835 + * replaces the original e820 map with a new one, removing overlaps.
838 +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
840 + struct change_member *change_tmp;
841 + unsigned long current_type, last_type;
842 + unsigned long long last_addr;
843 + int chgidx, still_changing;
844 + int overlap_entries;
845 + int new_bios_entry;
846 + int old_nr, new_nr, chg_nr;
850 + Visually we're performing the following (1,2,3,4 = memory types)...
852 + Sample memory map (w/overlaps):
853 + ____22__________________
854 + ______________________4_
855 + ____1111________________
856 + _44_____________________
857 + 11111111________________
858 + ____________________33__
859 + ___________44___________
860 + __________33333_________
861 + ______________22________
862 + ___________________2222_
863 + _________111111111______
864 + _____________________11_
865 + _________________4______
867 + Sanitized equivalent (no overlap):
868 + 1_______________________
869 + _44_____________________
870 + ___1____________________
871 + ____22__________________
872 + ______11________________
873 + _________1______________
874 + __________3_____________
875 + ___________44___________
876 + _____________33_________
877 + _______________2________
878 + ________________1_______
879 + _________________4______
880 + ___________________2____
881 + ____________________33__
882 + ______________________4_
884 + printk("sanitize start\n");
885 + /* if there's only one memory region, don't bother */
886 + if (*pnr_map < 2) {
887 + printk("sanitize bail 0\n");
893 + /* bail out if we find any unreasonable addresses in bios map */
894 + for (i=0; i<old_nr; i++)
895 + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
896 + printk("sanitize bail 1\n");
900 + /* create pointers for initial change-point information (for sorting) */
901 + for (i=0; i < 2*old_nr; i++)
902 + change_point[i] = &change_point_list[i];
904 + /* record all known change-points (starting and ending addresses),
905 + omitting those that are for empty memory regions */
907 + for (i=0; i < old_nr; i++) {
908 + if (biosmap[i].size != 0) {
909 + change_point[chgidx]->addr = biosmap[i].addr;
910 + change_point[chgidx++]->pbios = &biosmap[i];
911 + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
912 + change_point[chgidx++]->pbios = &biosmap[i];
915 + chg_nr = chgidx; /* true number of change-points */
917 + /* sort change-point list by memory addresses (low -> high) */
918 + still_changing = 1;
919 + while (still_changing) {
920 + still_changing = 0;
921 + for (i=1; i < chg_nr; i++) {
922 + /* if <current_addr> > <last_addr>, swap */
923 + /* or, if current=<start_addr> & last=<end_addr>, swap */
924 + if ((change_point[i]->addr < change_point[i-1]->addr) ||
925 + ((change_point[i]->addr == change_point[i-1]->addr) &&
926 + (change_point[i]->addr == change_point[i]->pbios->addr) &&
927 + (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
930 + change_tmp = change_point[i];
931 + change_point[i] = change_point[i-1];
932 + change_point[i-1] = change_tmp;
938 + /* create a new bios memory map, removing overlaps */
939 + overlap_entries=0; /* number of entries in the overlap table */
940 + new_bios_entry=0; /* index for creating new bios map entries */
941 + last_type = 0; /* start with undefined memory type */
942 + last_addr = 0; /* start with 0 as last starting address */
943 + /* loop through change-points, determining affect on the new bios map */
944 + for (chgidx=0; chgidx < chg_nr; chgidx++)
946 + /* keep track of all overlapping bios entries */
947 + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
949 + /* add map entry to overlap list (> 1 entry implies an overlap) */
950 + overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
954 + /* remove entry from list (order independent, so swap with last) */
955 + for (i=0; i<overlap_entries; i++)
957 + if (overlap_list[i] == change_point[chgidx]->pbios)
958 + overlap_list[i] = overlap_list[overlap_entries-1];
962 + /* if there are overlapping entries, decide which "type" to use */
963 + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
965 + for (i=0; i<overlap_entries; i++)
966 + if (overlap_list[i]->type > current_type)
967 + current_type = overlap_list[i]->type;
968 + /* continue building up new bios map based on this information */
969 + if (current_type != last_type) {
970 + if (last_type != 0) {
971 + new_bios[new_bios_entry].size =
972 + change_point[chgidx]->addr - last_addr;
973 + /* move forward only if the new size was non-zero */
974 + if (new_bios[new_bios_entry].size != 0)
975 + if (++new_bios_entry >= E820MAX)
976 + break; /* no more space left for new bios entries */
978 + if (current_type != 0) {
979 + new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
980 + new_bios[new_bios_entry].type = current_type;
981 + last_addr=change_point[chgidx]->addr;
983 + last_type = current_type;
986 + new_nr = new_bios_entry; /* retain count for new bios entries */
988 + /* copy new bios mapping into original location */
989 + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
992 + printk("sanitize end\n");
997 + * Copy the BIOS e820 map into a safe place.
999 + * Sanity-check it while we're at it..
1001 + * If we're lucky and live on a modern system, the setup code
1002 + * will have given us a memory map that we can use to properly
1003 + * set up memory. If we aren't, we'll fake a memory map.
1005 + * We check to see that the memory map contains at least 2 elements
1006 + * before we'll use it, because the detection code in setup.S may
1007 + * not be perfect and most every PC known to man has two memory
1008 + * regions: one from 0 to 640k, and one from 1mb up. (The IBM
1009 + * thinkpad 560x, for example, does not cooperate with the memory
1010 + * detection code.)
1012 +int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
1015 + /* Only one memory region (or negative)? Ignore it */
1019 + BUG_ON(nr_map < 1);
1023 + unsigned long long start = biosmap->addr;
1024 + unsigned long long size = biosmap->size;
1025 + unsigned long long end = start + size;
1026 + unsigned long type = biosmap->type;
1027 + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
1029 + /* Overflow in 64 bits? Ignore the memory map. */
1035 + * Some BIOSes claim RAM in the 640k - 1M region.
1036 + * Not right. Fix it up.
1038 + if (type == E820_RAM) {
1039 + printk("copy_e820_map() type is E820_RAM\n");
1040 + if (start < 0x100000ULL && end > 0xA0000ULL) {
1041 + printk("copy_e820_map() lies in range...\n");
1042 + if (start < 0xA0000ULL) {
1043 + printk("copy_e820_map() start < 0xA0000ULL\n");
1044 + add_memory_region(start, 0xA0000ULL-start, type);
1046 + if (end <= 0x100000ULL) {
1047 + printk("copy_e820_map() end <= 0x100000ULL\n");
1050 + start = 0x100000ULL;
1051 + size = end - start;
1055 + add_memory_region(start, size, type);
1056 + } while (biosmap++,--nr_map);
1059 + if (is_initial_xendomain()) {
1060 + struct xen_memory_map memmap;
1062 + memmap.nr_entries = E820MAX;
1063 + set_xen_guest_handle(memmap.buffer, machine_e820.map);
1065 + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1067 + machine_e820.nr_map = memmap.nr_entries;
1069 + machine_e820 = e820;
1076 + * Callback for efi_memory_walk.
1079 +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
1081 + unsigned long *max_pfn = arg, pfn;
1083 + if (start < end) {
1084 + pfn = PFN_UP(end -1);
1085 + if (pfn > *max_pfn)
1092 +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1094 + memory_present(0, PFN_UP(start), PFN_DOWN(end));
1099 + * Find the highest page frame number we have available
1101 +void __init find_max_pfn(void)
1106 + if (efi_enabled) {
1107 + efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1108 + efi_memmap_walk(efi_memory_present_wrapper, NULL);
1112 + for (i = 0; i < e820.nr_map; i++) {
1113 + unsigned long start, end;
1115 + if (e820.map[i].type != E820_RAM)
1117 + start = PFN_UP(e820.map[i].addr);
1118 + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1121 + if (end > max_pfn)
1123 + memory_present(0, start, end);
1128 + * Free all available memory for boot time allocation. Used
1129 + * as a callback function by efi_memory_walk()
1133 +free_available_memory(unsigned long start, unsigned long end, void *arg)
1135 + /* check max_low_pfn */
1136 + if (start >= (max_low_pfn << PAGE_SHIFT))
1138 + if (end >= (max_low_pfn << PAGE_SHIFT))
1139 + end = max_low_pfn << PAGE_SHIFT;
1141 + free_bootmem(start, end - start);
1146 + * Register fully available low RAM pages with the bootmem allocator.
1148 +void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1152 + if (efi_enabled) {
1153 + efi_memmap_walk(free_available_memory, NULL);
1156 + for (i = 0; i < e820.nr_map; i++) {
1157 + unsigned long curr_pfn, last_pfn, size;
1159 + * Reserve usable low memory
1161 + if (e820.map[i].type != E820_RAM)
1164 + * We are rounding up the start address of usable memory:
1166 + curr_pfn = PFN_UP(e820.map[i].addr);
1167 + if (curr_pfn >= max_low_pfn)
1170 + * ... and at the end of the usable range downwards:
1172 + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1176 + * Truncate to the number of actual pages currently
1179 + if (last_pfn > xen_start_info->nr_pages)
1180 + last_pfn = xen_start_info->nr_pages;
1183 + if (last_pfn > max_low_pfn)
1184 + last_pfn = max_low_pfn;
1187 + * .. finally, did all the rounding and playing
1188 + * around just make the area go away?
1190 + if (last_pfn <= curr_pfn)
1193 + size = last_pfn - curr_pfn;
1194 + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1198 +void __init e820_register_memory(void)
1200 + unsigned long gapstart, gapsize, round;
1201 + unsigned long long last;
1205 + if (is_initial_xendomain()) {
1206 + struct xen_memory_map memmap;
1208 + memmap.nr_entries = E820MAX;
1209 + set_xen_guest_handle(memmap.buffer, machine_e820.map);
1211 + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1213 + machine_e820.nr_map = memmap.nr_entries;
1216 + machine_e820 = e820;
1217 +#define e820 machine_e820
1221 + * Search for the bigest gap in the low 32 bits of the e820
1224 + last = 0x100000000ull;
1225 + gapstart = 0x10000000;
1226 + gapsize = 0x400000;
1228 + while (--i >= 0) {
1229 + unsigned long long start = e820.map[i].addr;
1230 + unsigned long long end = start + e820.map[i].size;
1233 + * Since "last" is at most 4GB, we know we'll
1234 + * fit in 32 bits if this condition is true
1237 + unsigned long gap = last - end;
1239 + if (gap > gapsize) {
1250 + * See how much we want to round up: start off with
1251 + * rounding to the next 1MB area.
1254 + while ((gapsize >> 4) > round)
1256 + /* Fun with two's complement */
1257 + pci_mem_start = (gapstart + round) & -round;
1259 + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1260 + pci_mem_start, gapstart, gapsize);
1263 +void __init print_memory_map(char *who)
1267 + for (i = 0; i < e820.nr_map; i++) {
1268 + printk(" %s: %016Lx - %016Lx ", who,
1270 + e820.map[i].addr + e820.map[i].size);
1271 + switch (e820.map[i].type) {
1272 + case E820_RAM: printk("(usable)\n");
1274 + case E820_RESERVED:
1275 + printk("(reserved)\n");
1278 + printk("(ACPI data)\n");
1281 + printk("(ACPI NVS)\n");
1283 + default: printk("type %lu\n", e820.map[i].type);
1289 +static __init __always_inline void efi_limit_regions(unsigned long long size)
1291 + unsigned long long current_addr = 0;
1292 + efi_memory_desc_t *md, *next_md;
1298 + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
1301 + current_addr = md->phys_addr +
1302 + PFN_PHYS(md->num_pages);
1303 + if (is_available_memory(md)) {
1304 + if (md->phys_addr >= size) continue;
1305 + memcpy(next_md, md, memmap.desc_size);
1306 + if (current_addr >= size) {
1307 + next_md->num_pages -=
1308 + PFN_UP(current_addr-size);
1310 + p1 += memmap.desc_size;
1313 + } else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
1314 + EFI_MEMORY_RUNTIME) {
1315 + /* In order to make runtime services
1316 + * available we have to include runtime
1317 + * memory regions in memory map */
1318 + memcpy(next_md, md, memmap.desc_size);
1319 + p1 += memmap.desc_size;
1324 + memmap.nr_map = j;
1325 + memmap.map_end = memmap.map +
1326 + (memmap.nr_map * memmap.desc_size);
1329 +void __init limit_regions(unsigned long long size)
1331 + unsigned long long current_addr = 0;
1334 + print_memory_map("limit_regions start");
1335 + if (efi_enabled) {
1336 + efi_limit_regions(size);
1339 + for (i = 0; i < e820.nr_map; i++) {
1340 + current_addr = e820.map[i].addr + e820.map[i].size;
1341 + if (current_addr < size)
1344 + if (e820.map[i].type != E820_RAM)
1347 + if (e820.map[i].addr >= size) {
1349 + * This region starts past the end of the
1350 + * requested size, skip it completely.
1354 + e820.nr_map = i + 1;
1355 + e820.map[i].size -= current_addr - size;
1357 + print_memory_map("limit_regions endfor");
1361 + if (current_addr < size) {
1363 + * The e820 map finished before our requested size so
1364 + * extend the final entry to the requested address.
1367 + if (e820.map[i].type == E820_RAM)
1368 + e820.map[i].size -= current_addr - size;
1370 + add_memory_region(current_addr, size - current_addr, E820_RAM);
1373 + print_memory_map("limit_regions endfunc");
1377 + * This function checks if any part of the range <start,end> is mapped
1381 +e820_any_mapped(u64 start, u64 end, unsigned type)
1386 + for (i = 0; i < e820.nr_map; i++) {
1387 + const struct e820entry *ei = &e820.map[i];
1389 + if (!is_initial_xendomain())
1391 + for (i = 0; i < machine_e820.nr_map; ++i) {
1392 + const struct e820entry *ei = &machine_e820.map[i];
1395 + if (type && ei->type != type)
1397 + if (ei->addr >= end || ei->addr + ei->size <= start)
1403 +EXPORT_SYMBOL_GPL(e820_any_mapped);
1406 + * This function checks if the entire range <start,end> is mapped with type.
1408 + * Note: this function only works correct if the e820 table is sorted and
1409 + * not-overlapping, which is the case
1412 +e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1419 + for (i = 0; i < e820.nr_map; i++) {
1420 + struct e820entry *ei = &e820.map[i];
1422 + if (!is_initial_xendomain())
1424 + for (i = 0; i < machine_e820.nr_map; ++i) {
1425 + const struct e820entry *ei = &machine_e820.map[i];
1428 + if (type && ei->type != type)
1430 + /* is the region (part) in overlap with the current region ?*/
1431 + if (ei->addr >= end || ei->addr + ei->size <= start)
1433 + /* if the region is at the beginning of <start,end> we move
1434 + * start to the end of the region since it's ok until there
1436 + if (ei->addr <= start)
1437 + start = ei->addr + ei->size;
1438 + /* if start is now at or beyond end, we're done, full
1441 + return 1; /* we're done */
1446 +static int __init parse_memmap(char *arg)
1451 + if (strcmp(arg, "exactmap") == 0) {
1452 +#ifdef CONFIG_CRASH_DUMP
1453 + /* If we are doing a crash dump, we
1454 + * still need to know the real mem
1455 + * size before original memory map is
1459 + saved_max_pfn = max_pfn;
1462 + user_defined_memmap = 1;
1464 + /* If the user specifies memory size, we
1465 + * limit the BIOS-provided memory map to
1466 + * that size. exactmap can be used to specify
1467 + * the exact map. mem=number can be used to
1468 + * trim the existing memory map.
1470 + unsigned long long start_at, mem_size;
1472 + mem_size = memparse(arg, &arg);
1473 + if (*arg == '@') {
1474 + start_at = memparse(arg+1, &arg);
1475 + add_memory_region(start_at, mem_size, E820_RAM);
1476 + } else if (*arg == '#') {
1477 + start_at = memparse(arg+1, &arg);
1478 + add_memory_region(start_at, mem_size, E820_ACPI);
1479 + } else if (*arg == '$') {
1480 + start_at = memparse(arg+1, &arg);
1481 + add_memory_region(start_at, mem_size, E820_RESERVED);
1483 + limit_regions(mem_size);
1484 + user_defined_memmap = 1;
1489 +early_param("memmap", parse_memmap);
1490 Index: head-2008-12-01/arch/x86/kernel/entry_32-xen.S
1491 ===================================================================
1492 --- head-2008-12-01.orig/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:29:05.000000000 +0100
1493 +++ head-2008-12-01/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:32:38.000000000 +0100
1498 - * 24(%esp) - orig_eax
1501 - * 30(%esp) - %eflags
1502 - * 34(%esp) - %oldesp
1503 - * 38(%esp) - %oldss
1505 + * 28(%esp) - orig_eax
1508 + * 34(%esp) - %eflags
1509 + * 38(%esp) - %oldesp
1510 + * 3C(%esp) - %oldss
1512 * "current" is in register %ebx during any slow entries.
1515 #include <asm/smp.h>
1516 #include <asm/page.h>
1517 #include <asm/desc.h>
1518 +#include <asm/percpu.h>
1519 #include <asm/dwarf2.h>
1520 #include "irq_vectors.h"
1521 #include <xen/interface/xen.h>
1523 -#define nr_syscalls ((syscall_table_size)/4)
1525 + * We use macros for low-level operations which need to be overridden
1526 + * for paravirtualization. The following will never clobber any registers:
1527 + * INTERRUPT_RETURN (aka. "iret")
1528 + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
1529 + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
1531 + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
1532 + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
1533 + * Allowing a register to be clobbered can shrink the paravirt replacement
1534 + * enough to patch inline, increasing performance.
1552 +#define nr_syscalls ((syscall_table_size)/4)
1554 CF_MASK = 0x00000001
1555 TF_MASK = 0x00000100
1556 @@ -79,61 +78,16 @@ VM_MASK = 0x00020000
1557 /* Pseudo-eflags. */
1558 NMI_MASK = 0x80000000
1561 -/* These are replaces for paravirtualization */
1562 -#define DISABLE_INTERRUPTS cli
1563 -#define ENABLE_INTERRUPTS sti
1564 -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
1565 -#define INTERRUPT_RETURN iret
1566 -#define GET_CR0_INTO_EAX movl %cr0, %eax
1568 -/* Offsets into shared_info_t. */
1569 -#define evtchn_upcall_pending /* 0 */
1570 -#define evtchn_upcall_mask 1
1572 -#define sizeof_vcpu_shift 6
1575 -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
1576 - shl $sizeof_vcpu_shift,%esi ; \
1577 - addl HYPERVISOR_shared_info,%esi
1579 -#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
1582 -#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
1583 -#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
1584 -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
1585 -#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
1586 - __DISABLE_INTERRUPTS
1587 -#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
1588 - __ENABLE_INTERRUPTS
1589 -#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
1590 -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
1591 - __TEST_PENDING ; \
1592 - jnz 14f # process more events if necessary... ; \
1593 - movl ESI(%esp), %esi ; \
1595 -14: __DISABLE_INTERRUPTS ; \
1596 - TRACE_IRQS_OFF ; \
1597 -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
1599 - call evtchn_do_upcall ; \
1602 -#define INTERRUPT_RETURN iret
1605 #ifdef CONFIG_PREEMPT
1606 -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
1607 +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
1609 -#define preempt_stop
1610 +#define preempt_stop(clobbers)
1611 #define resume_kernel restore_nocheck
1614 .macro TRACE_IRQS_IRET
1615 #ifdef CONFIG_TRACE_IRQFLAGS
1616 - testl $IF_MASK,EFLAGS(%esp) # interrupts off?
1617 + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
1621 @@ -148,6 +102,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1626 + CFI_ADJUST_CFA_OFFSET 4;\
1627 + /*CFI_REL_OFFSET gs, 0;*/\
1629 CFI_ADJUST_CFA_OFFSET 4;\
1630 /*CFI_REL_OFFSET es, 0;*/\
1631 @@ -177,7 +134,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1632 CFI_REL_OFFSET ebx, 0;\
1633 movl $(__USER_DS), %edx; \
1637 + movl $(__KERNEL_PDA), %edx; \
1640 #define RESTORE_INT_REGS \
1642 @@ -210,17 +169,22 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1644 CFI_ADJUST_CFA_OFFSET -4;\
1645 /*CFI_RESTORE es;*/\
1646 -.section .fixup,"ax"; \
1647 -3: movl $0,(%esp); \
1650 + CFI_ADJUST_CFA_OFFSET -4;\
1651 + /*CFI_RESTORE gs;*/\
1652 +.pushsection .fixup,"ax"; \
1653 4: movl $0,(%esp); \
1655 +5: movl $0,(%esp); \
1658 +6: movl $0,(%esp); \
1660 .section __ex_table,"a";\
1670 #define RING0_INT_FRAME \
1671 CFI_STARTPROC simple;\
1672 @@ -239,18 +203,18 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1673 #define RING0_PTREGS_FRAME \
1674 CFI_STARTPROC simple;\
1676 - CFI_DEF_CFA esp, OLDESP-EBX;\
1677 - /*CFI_OFFSET cs, CS-OLDESP;*/\
1678 - CFI_OFFSET eip, EIP-OLDESP;\
1679 - /*CFI_OFFSET es, ES-OLDESP;*/\
1680 - /*CFI_OFFSET ds, DS-OLDESP;*/\
1681 - CFI_OFFSET eax, EAX-OLDESP;\
1682 - CFI_OFFSET ebp, EBP-OLDESP;\
1683 - CFI_OFFSET edi, EDI-OLDESP;\
1684 - CFI_OFFSET esi, ESI-OLDESP;\
1685 - CFI_OFFSET edx, EDX-OLDESP;\
1686 - CFI_OFFSET ecx, ECX-OLDESP;\
1687 - CFI_OFFSET ebx, EBX-OLDESP
1688 + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
1689 + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
1690 + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
1691 + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
1692 + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
1693 + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
1694 + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
1695 + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
1696 + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
1697 + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
1698 + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
1699 + CFI_OFFSET ebx, PT_EBX-PT_OLDESP
1701 ENTRY(ret_from_fork)
1703 @@ -278,17 +242,18 @@ ENTRY(ret_from_fork)
1708 + preempt_stop(CLBR_ANY)
1710 GET_THREAD_INFO(%ebp)
1712 - movl EFLAGS(%esp), %eax # mix EFLAGS and CS
1713 - movb CS(%esp), %al
1714 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1715 + movb PT_CS(%esp), %al
1716 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
1717 cmpl $USER_RPL, %eax
1718 jb resume_kernel # not returning to v8086 or userspace
1720 ENTRY(resume_userspace)
1721 - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1722 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1723 # setting need_resched or sigpending
1724 # between sampling and the iret
1725 movl TI_flags(%ebp), %ecx
1726 @@ -299,14 +264,14 @@ ENTRY(resume_userspace)
1728 #ifdef CONFIG_PREEMPT
1729 ENTRY(resume_kernel)
1730 - DISABLE_INTERRUPTS
1731 + DISABLE_INTERRUPTS(CLBR_ANY)
1732 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
1735 movl TI_flags(%ebp), %ecx # need_resched set ?
1736 testb $_TIF_NEED_RESCHED, %cl
1738 - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
1739 + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1741 call preempt_schedule_irq
1743 @@ -328,7 +293,7 @@ sysenter_past_esp:
1744 * No need to follow this irqs on/off section: the syscall
1745 * disabled irqs and here we enable it straight after entry:
1748 + ENABLE_INTERRUPTS(CLBR_NONE)
1750 CFI_ADJUST_CFA_OFFSET 4
1751 /*CFI_REL_OFFSET ss, 0*/
1752 @@ -340,12 +305,16 @@ sysenter_past_esp:
1754 CFI_ADJUST_CFA_OFFSET 4
1755 /*CFI_REL_OFFSET cs, 0*/
1756 +#ifndef CONFIG_COMPAT_VDSO
1758 * Push current_thread_info()->sysenter_return to the stack.
1759 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
1760 * pushed above; +8 corresponds to copy_thread's esp0 setting.
1762 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
1764 + pushl $SYSENTER_RETURN
1766 CFI_ADJUST_CFA_OFFSET 4
1767 CFI_REL_OFFSET eip, 0
1769 @@ -372,19 +341,27 @@ sysenter_past_esp:
1770 cmpl $(nr_syscalls), %eax
1772 call *sys_call_table(,%eax,4)
1773 - movl %eax,EAX(%esp)
1774 - DISABLE_INTERRUPTS
1775 + movl %eax,PT_EAX(%esp)
1776 + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
1778 movl TI_flags(%ebp), %ecx
1779 testw $_TIF_ALLWORK_MASK, %cx
1780 jne syscall_exit_work
1781 /* if something modifies registers it must also disable sysexit */
1782 - movl EIP(%esp), %edx
1783 - movl OLDESP(%esp), %ecx
1784 + movl PT_EIP(%esp), %edx
1785 + movl PT_OLDESP(%esp), %ecx
1788 +1: mov PT_GS(%esp), %gs
1789 ENABLE_INTERRUPTS_SYSEXIT
1791 +.pushsection .fixup,"ax"
1792 +2: movl $0,PT_GS(%esp)
1794 +.section __ex_table,"a"
1799 # pv sysenter call handler stub
1800 ENTRY(sysenter_entry_pv)
1801 @@ -419,7 +396,7 @@ ENTRY(system_call)
1802 CFI_ADJUST_CFA_OFFSET 4
1804 GET_THREAD_INFO(%ebp)
1805 - testl $TF_MASK,EFLAGS(%esp)
1806 + testl $TF_MASK,PT_EFLAGS(%esp)
1808 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
1810 @@ -431,9 +408,9 @@ no_singlestep:
1813 call *sys_call_table(,%eax,4)
1814 - movl %eax,EAX(%esp) # store the return value
1815 + movl %eax,PT_EAX(%esp) # store the return value
1817 - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1818 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1819 # setting need_resched or sigpending
1820 # between sampling and the iret
1822 @@ -443,12 +420,12 @@ syscall_exit:
1826 - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
1827 - # Warning: OLDSS(%esp) contains the wrong/random values if we
1828 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
1829 + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
1830 # are returning to the kernel.
1831 # See comments in process.c:copy_thread() for details.
1832 - movb OLDSS(%esp), %ah
1833 - movb CS(%esp), %al
1834 + movb PT_OLDSS(%esp), %ah
1835 + movb PT_CS(%esp), %al
1836 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1837 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1839 @@ -456,7 +433,7 @@ restore_all:
1843 - movl EFLAGS(%esp), %eax
1844 + movl PT_EFLAGS(%esp), %eax
1845 testl $(VM_MASK|NMI_MASK), %eax
1848 @@ -470,13 +447,13 @@ restore_nocheck:
1850 restore_nocheck_notrace:
1853 + addl $4, %esp # skip orig_eax/error_code
1854 CFI_ADJUST_CFA_OFFSET -4
1856 .section .fixup,"ax"
1860 + ENABLE_INTERRUPTS(CLBR_NONE)
1862 pushl $0 # no error code
1863 pushl $do_iret_error
1864 @@ -490,33 +467,42 @@ iret_exc:
1868 - larl OLDSS(%esp), %eax
1869 + larl PT_OLDSS(%esp), %eax
1871 testl $0x00400000, %eax # returning to 32bit stack?
1872 jnz restore_nocheck # allright, normal return
1874 +#ifdef CONFIG_PARAVIRT
1876 + * The kernel can't run on a non-flat stack if paravirt mode
1877 + * is active. Rather than try to fixup the high bits of
1878 + * ESP, bypass this code entirely. This may break DOSemu
1879 + * and/or Wine support in a paravirt VM, although the option
1880 + * is still available to implement the setting of the high
1881 + * 16-bits in the INTERRUPT_RETURN paravirt-op.
1883 + cmpl $0, paravirt_ops+PARAVIRT_enabled
1884 + jne restore_nocheck
1887 /* If returning to userspace with 16bit stack,
1888 * try to fix the higher word of ESP, as the CPU
1890 * This is an "official" bug of all the x86-compatible
1891 * CPUs, which we can try to work around to make
1892 * dosemu and wine happy. */
1893 - subl $8, %esp # reserve space for switch16 pointer
1894 - CFI_ADJUST_CFA_OFFSET 8
1895 - DISABLE_INTERRUPTS
1896 + movl PT_OLDESP(%esp), %eax
1898 + call patch_espfix_desc
1899 + pushl $__ESPFIX_SS
1900 + CFI_ADJUST_CFA_OFFSET 4
1902 + CFI_ADJUST_CFA_OFFSET 4
1903 + DISABLE_INTERRUPTS(CLBR_EAX)
1906 - /* Set up the 16bit stack frame with switch32 pointer on top,
1907 - * and a switch16 pointer on top of the current frame. */
1908 - call setup_x86_bogus_stack
1909 - CFI_ADJUST_CFA_OFFSET -8 # frame has moved
1912 - lss 20+4(%esp), %esp # switch to 16bit stack
1913 -1: INTERRUPT_RETURN
1914 -.section __ex_table,"a"
1919 + CFI_ADJUST_CFA_OFFSET -8
1920 + jmp restore_nocheck
1923 restore_all_enable_events:
1924 @@ -540,7 +526,7 @@ ecrit: /**** END OF CRITICAL REGION ***
1928 - andl $~NMI_MASK, EFLAGS(%esp)
1929 + andl $~NMI_MASK, PT_EFLAGS(%esp)
1932 CFI_ADJUST_CFA_OFFSET -4
1933 @@ -556,7 +542,7 @@ work_pending:
1937 - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1938 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1939 # setting need_resched or sigpending
1940 # between sampling and the iret
1942 @@ -569,7 +555,8 @@ work_resched:
1944 work_notifysig: # deal with pending signals and
1945 # notify-resume requests
1946 - testl $VM_MASK, EFLAGS(%esp)
1948 + testl $VM_MASK, PT_EFLAGS(%esp)
1950 jne work_notifysig_v86 # returning to kernel-space or
1952 @@ -579,29 +566,30 @@ work_notifysig: # deal with pending s
1957 pushl %ecx # save ti_flags for do_notify_resume
1958 CFI_ADJUST_CFA_OFFSET 4
1959 call save_v86_state # %eax contains pt_regs pointer
1961 CFI_ADJUST_CFA_OFFSET -4
1967 call do_notify_resume
1968 jmp resume_userspace_sig
1971 # perform syscall exit tracing
1973 syscall_trace_entry:
1974 - movl $-ENOSYS,EAX(%esp)
1975 + movl $-ENOSYS,PT_EAX(%esp)
1978 call do_syscall_trace
1980 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
1981 # so must skip actual syscall
1982 - movl ORIG_EAX(%esp), %eax
1983 + movl PT_ORIG_EAX(%esp), %eax
1984 cmpl $(nr_syscalls), %eax
1987 @@ -612,7 +600,7 @@ syscall_exit_work:
1988 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
1991 - ENABLE_INTERRUPTS # could let do_syscall_trace() call
1992 + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
1993 # schedule() instead
1996 @@ -626,40 +614,39 @@ syscall_fault:
1997 CFI_ADJUST_CFA_OFFSET 4
1999 GET_THREAD_INFO(%ebp)
2000 - movl $-EFAULT,EAX(%esp)
2001 + movl $-EFAULT,PT_EAX(%esp)
2002 jmp resume_userspace
2005 - movl $-ENOSYS,EAX(%esp)
2006 + movl $-ENOSYS,PT_EAX(%esp)
2007 jmp resume_userspace
2011 #define FIXUP_ESPFIX_STACK \
2012 - movl %esp, %eax; \
2013 - /* switch to 32bit stack using the pointer on top of 16bit stack */ \
2014 - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
2015 - /* copy data from 16bit stack to 32bit stack */ \
2016 - call fixup_x86_bogus_stack; \
2017 - /* put ESP to the proper location */ \
2019 -#define UNWIND_ESPFIX_STACK \
2020 + /* since we are on a wrong stack, we cant make it a C code :( */ \
2021 + movl %gs:PDA_cpu, %ebx; \
2022 + PER_CPU(cpu_gdt_descr, %ebx); \
2023 + movl GDS_address(%ebx), %ebx; \
2024 + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
2025 + addl %esp, %eax; \
2026 + pushl $__KERNEL_DS; \
2027 + CFI_ADJUST_CFA_OFFSET 4; \
2029 CFI_ADJUST_CFA_OFFSET 4; \
2030 + lss (%esp), %esp; \
2031 + CFI_ADJUST_CFA_OFFSET -8;
2032 +#define UNWIND_ESPFIX_STACK \
2034 - /* see if on 16bit stack */ \
2035 + /* see if on espfix stack */ \
2036 cmpw $__ESPFIX_SS, %ax; \
2039 - CFI_ADJUST_CFA_OFFSET -4; \
2040 -.section .fixup,"ax"; \
2041 -28: movl $__KERNEL_DS, %eax; \
2043 + movl $__KERNEL_DS, %eax; \
2046 - /* switch to 32bit stack */ \
2047 + /* switch to normal stack */ \
2048 FIXUP_ESPFIX_STACK; \
2054 * Build the entry stubs and pointer table with
2055 @@ -723,13 +710,16 @@ KPROBE_ENTRY(page_fault)
2056 CFI_ADJUST_CFA_OFFSET 4
2059 + /* the function address is in %gs's slot on the stack */
2061 + CFI_ADJUST_CFA_OFFSET 4
2062 + /*CFI_REL_OFFSET es, 0*/
2064 CFI_ADJUST_CFA_OFFSET 4
2065 /*CFI_REL_OFFSET ds, 0*/
2067 CFI_ADJUST_CFA_OFFSET 4
2068 CFI_REL_OFFSET eax, 0
2071 CFI_ADJUST_CFA_OFFSET 4
2072 CFI_REL_OFFSET ebp, 0
2073 @@ -742,7 +732,6 @@ error_code:
2075 CFI_ADJUST_CFA_OFFSET 4
2076 CFI_REL_OFFSET edx, 0
2077 - decl %eax # eax = -1
2079 CFI_ADJUST_CFA_OFFSET 4
2080 CFI_REL_OFFSET ecx, 0
2081 @@ -750,18 +739,20 @@ error_code:
2082 CFI_ADJUST_CFA_OFFSET 4
2083 CFI_REL_OFFSET ebx, 0
2087 CFI_ADJUST_CFA_OFFSET 4
2088 - /*CFI_REL_OFFSET es, 0*/
2089 + /*CFI_REL_OFFSET gs, 0*/
2090 + movl $(__KERNEL_PDA), %ecx
2094 CFI_ADJUST_CFA_OFFSET -4
2095 /*CFI_REGISTER es, ecx*/
2096 - movl ES(%esp), %edi # get the function address
2097 - movl ORIG_EAX(%esp), %edx # get the error code
2098 - movl %eax, ORIG_EAX(%esp)
2099 - movl %ecx, ES(%esp)
2100 - /*CFI_REL_OFFSET es, ES*/
2101 + movl PT_GS(%esp), %edi # get the function address
2102 + movl PT_ORIG_EAX(%esp), %edx # get the error code
2103 + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
2104 + mov %ecx, PT_GS(%esp)
2105 + /*CFI_REL_OFFSET gs, ES*/
2106 movl $(__USER_DS), %ecx
2109 @@ -793,7 +784,7 @@ ENTRY(hypervisor_callback)
2111 CFI_ADJUST_CFA_OFFSET 4
2113 - movl EIP(%esp),%eax
2114 + movl PT_EIP(%esp),%eax
2118 @@ -802,7 +793,7 @@ ENTRY(hypervisor_callback)
2120 cmpl $sysexit_ecrit,%eax
2122 - addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
2123 + addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame.
2125 CFI_ADJUST_CFA_OFFSET 4
2126 call evtchn_do_upcall
2127 @@ -824,7 +815,7 @@ critical_region_fixup:
2130 15: leal (%esp,%ecx),%esi # %esi points at end of src region
2131 - leal OLDESP(%esp),%edi # %edi points at end of dst region
2132 + leal PT_OLDESP(%esp),%edi # %edi points at end of dst region
2133 shrl $2,%ecx # convert words to bytes
2134 je 17f # skip loop if nothing to copy
2135 16: subl $4,%esi # pre-decrementing copy loop
2136 @@ -848,8 +839,9 @@ critical_fixup_table:
2137 .byte 0x18 # pop %eax
2138 .byte 0x1c # pop %ds
2139 .byte 0x20 # pop %es
2140 - .byte 0x24,0x24,0x24 # add $4,%esp
2142 + .byte 0x24,0x24 # pop %gs
2143 + .byte 0x28,0x28,0x28 # add $4,%esp
2145 .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
2146 .byte 0x00,0x00 # jmp 11b
2148 @@ -940,7 +932,7 @@ ENTRY(device_not_available)
2149 jmp ret_from_exception
2150 device_available_emulate:
2153 + preempt_stop(CLBR_ANY)
2154 call math_state_restore
2155 jmp ret_from_exception
2157 @@ -1010,7 +1002,7 @@ KPROBE_ENTRY(nmi)
2158 cmpw $__ESPFIX_SS, %ax
2160 CFI_ADJUST_CFA_OFFSET -4
2161 - je nmi_16bit_stack
2162 + je nmi_espfix_stack
2163 cmpl $sysenter_entry,(%esp)
2166 @@ -1053,7 +1045,7 @@ nmi_debug_stack_check:
2167 FIX_STACK(24,nmi_stack_correct, 1)
2168 jmp nmi_stack_correct
2172 /* We have a RING0_INT_FRAME here.
2174 * create the pointer to lss back
2175 @@ -1062,7 +1054,6 @@ nmi_16bit_stack:
2176 CFI_ADJUST_CFA_OFFSET 4
2178 CFI_ADJUST_CFA_OFFSET 4
2181 /* copy the iret frame of 12 bytes */
2183 @@ -1073,11 +1064,11 @@ nmi_16bit_stack:
2184 CFI_ADJUST_CFA_OFFSET 4
2186 FIXUP_ESPFIX_STACK # %eax == %esp
2187 - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
2188 xorl %edx,%edx # zero error code
2191 - lss 12+4(%esp), %esp # back to 16bit stack
2192 + lss 12+4(%esp), %esp # back to espfix stack
2193 + CFI_ADJUST_CFA_OFFSET -24
2196 .section __ex_table,"a"
2197 @@ -1093,12 +1084,25 @@ KPROBE_ENTRY(nmi)
2198 xorl %edx,%edx # zero error code
2199 movl %esp,%eax # pt_regs pointer
2201 - orl $NMI_MASK, EFLAGS(%esp)
2202 + orl $NMI_MASK, PT_EFLAGS(%esp)
2208 +#ifdef CONFIG_PARAVIRT
2211 +.section __ex_table,"a"
2216 +ENTRY(native_irq_enable_sysexit)
2223 pushl $-1 # mark this as an int
2224 @@ -1214,37 +1218,6 @@ ENTRY(spurious_interrupt_bug)
2226 #endif /* !CONFIG_XEN */
2228 -#ifdef CONFIG_STACK_UNWIND
2229 -ENTRY(arch_unwind_init_running)
2231 - movl 4(%esp), %edx
2233 - leal 4(%esp), %eax
2234 - movl %ebx, EBX(%edx)
2236 - movl %ebx, ECX(%edx)
2237 - movl %ebx, EDX(%edx)
2238 - movl %esi, ESI(%edx)
2239 - movl %edi, EDI(%edx)
2240 - movl %ebp, EBP(%edx)
2241 - movl %ebx, EAX(%edx)
2242 - movl $__USER_DS, DS(%edx)
2243 - movl $__USER_DS, ES(%edx)
2244 - movl %ebx, ORIG_EAX(%edx)
2245 - movl %ecx, EIP(%edx)
2246 - movl 12(%esp), %ecx
2247 - movl $__KERNEL_CS, CS(%edx)
2248 - movl %ebx, EFLAGS(%edx)
2249 - movl %eax, OLDESP(%edx)
2250 - movl 8(%esp), %eax
2251 - movl %ecx, 8(%esp)
2252 - movl EBX(%edx), %ebx
2253 - movl $__KERNEL_DS, OLDSS(%edx)
2256 -ENDPROC(arch_unwind_init_running)
2259 ENTRY(fixup_4gb_segment)
2261 pushl $do_fixup_4gb_segment
2262 Index: head-2008-12-01/arch/x86/kernel/head_32-xen.S
2263 ===================================================================
2264 --- head-2008-12-01.orig/arch/x86/kernel/head_32-xen.S 2008-12-01 11:29:05.000000000 +0100
2265 +++ head-2008-12-01/arch/x86/kernel/head_32-xen.S 2008-12-01 11:32:38.000000000 +0100
2267 #include <asm/cache.h>
2268 #include <asm/thread_info.h>
2269 #include <asm/asm-offsets.h>
2270 +#include <asm/boot.h>
2271 #include <asm/dwarf2.h>
2272 #include <xen/interface/xen.h>
2273 #include <xen/interface/elfnote.h>
2274 @@ -35,6 +36,8 @@ ENTRY(startup_32)
2275 /* Set up the stack pointer */
2276 movl $(init_thread_union+THREAD_SIZE),%esp
2280 /* get vendor info */
2281 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
2283 @@ -57,14 +60,58 @@ ENTRY(startup_32)
2285 movb $1,X86_HARD_MATH
2287 - xorl %eax,%eax # Clear FS/GS and LDT
2288 + xorl %eax,%eax # Clear FS
2292 + movl $(__KERNEL_PDA),%eax
2295 cld # gcc2 wants the direction flag cleared at all times
2297 pushl $0 # fake return address for unwinder
2301 + * Point the GDT at this CPU's PDA. This will be
2302 + * cpu_gdt_table and boot_pda.
2305 + /* get the PDA pointer */
2306 + movl $boot_pda, %eax
2308 + /* slot the PDA address into the GDT */
2309 + mov $cpu_gdt_table, %ecx
2310 + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
2312 + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
2313 + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
2315 + # %esi still points to start_info, and no registers
2316 + # need to be preserved.
2318 + movl XEN_START_mfn_list(%esi), %ebx
2319 + movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
2320 + shrl $PAGE_SHIFT, %eax
2321 + movl (%ebx,%eax,4), %ecx
2322 + pushl %ecx # frame number for set_gdt below
2326 + shldl $PAGE_SHIFT, %ecx, %edx
2327 + shll $PAGE_SHIFT, %ecx
2329 + movl $cpu_gdt_table, %ebx
2330 + movl $__HYPERVISOR_update_va_mapping, %eax
2333 + movl $(PAGE_SIZE_asm / 8), %ecx
2335 + movl $__HYPERVISOR_set_gdt, %eax
2341 #define HYPERCALL_PAGE_OFFSET 0x1000
2342 .org HYPERCALL_PAGE_OFFSET
2343 ENTRY(hypercall_page)
2344 @@ -93,7 +140,8 @@ ENTRY(empty_zero_page)
2346 * The Global Descriptor Table contains 28 quadwords, per-CPU.
2348 - .align L1_CACHE_BYTES
2349 + .section .data.page_aligned, "aw"
2350 + .align PAGE_SIZE_asm
2351 ENTRY(cpu_gdt_table)
2352 .quad 0x0000000000000000 /* NULL descriptor */
2353 .quad 0x0000000000000000 /* 0x0b reserved */
2354 @@ -135,12 +183,13 @@ ENTRY(cpu_gdt_table)
2355 .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
2356 .quad 0x0000000000000000 /* 0xc8 APM DS data */
2358 - .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */
2359 - .quad 0x0000000000000000 /* 0xd8 - unused */
2360 + .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
2361 + .quad 0x00cf92000000ffff /* 0xd8 - PDA */
2362 .quad 0x0000000000000000 /* 0xe0 - unused */
2363 .quad 0x0000000000000000 /* 0xe8 - unused */
2364 .quad 0x0000000000000000 /* 0xf0 - unused */
2365 .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
2366 + .align PAGE_SIZE_asm
2368 #if CONFIG_XEN_COMPAT <= 0x030002
2370 @@ -165,9 +214,9 @@ ENTRY(cpu_gdt_table)
2371 .ascii ",ELF_PADDR_OFFSET=0x"
2373 .ascii ",VIRT_ENTRY=0x"
2374 - utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
2375 + utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET)
2376 .ascii ",HYPERCALL_PAGE=0x"
2377 - utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
2378 + utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
2379 .ascii ",FEATURES=writable_page_tables"
2380 .ascii "|writable_descriptor_tables"
2381 .ascii "|auto_translated_physmap"
2382 Index: head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c
2383 ===================================================================
2384 --- head-2008-12-01.orig/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2385 +++ head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:32:38.000000000 +0100
2387 #include <linux/pci.h>
2388 #include <linux/msi.h>
2389 #include <linux/htirq.h>
2390 +#include <linux/freezer.h>
2393 #include <asm/smp.h>
2394 @@ -199,14 +200,20 @@ static struct IO_APIC_route_entry ioapic
2395 * the interrupt, and we need to make sure the entry is fully populated
2396 * before that happens.
2398 -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2400 +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2402 - unsigned long flags;
2403 union entry_union eu;
2405 - spin_lock_irqsave(&ioapic_lock, flags);
2406 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2407 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2410 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2412 + unsigned long flags;
2413 + spin_lock_irqsave(&ioapic_lock, flags);
2414 + __ioapic_write_entry(apic, pin, e);
2415 spin_unlock_irqrestore(&ioapic_lock, flags);
2418 @@ -889,8 +896,7 @@ static int __init find_isa_irq_pin(int i
2420 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
2421 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
2422 - mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
2423 - mp_bus_id_to_type[lbus] == MP_BUS_NEC98
2424 + mp_bus_id_to_type[lbus] == MP_BUS_MCA
2426 (mp_irqs[i].mpc_irqtype == type) &&
2427 (mp_irqs[i].mpc_srcbusirq == irq))
2428 @@ -909,8 +915,7 @@ static int __init find_isa_irq_apic(int
2430 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
2431 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
2432 - mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
2433 - mp_bus_id_to_type[lbus] == MP_BUS_NEC98
2434 + mp_bus_id_to_type[lbus] == MP_BUS_MCA
2436 (mp_irqs[i].mpc_irqtype == type) &&
2437 (mp_irqs[i].mpc_srcbusirq == irq))
2438 @@ -1043,12 +1048,6 @@ static int EISA_ELCR(unsigned int irq)
2439 #define default_MCA_trigger(idx) (1)
2440 #define default_MCA_polarity(idx) (0)
2442 -/* NEC98 interrupts are always polarity zero edge triggered,
2443 - * when listed as conforming in the MP table. */
2445 -#define default_NEC98_trigger(idx) (0)
2446 -#define default_NEC98_polarity(idx) (0)
2448 static int __init MPBIOS_polarity(int idx)
2450 int bus = mp_irqs[idx].mpc_srcbus;
2451 @@ -1083,11 +1082,6 @@ static int __init MPBIOS_polarity(int id
2452 polarity = default_MCA_polarity(idx);
2455 - case MP_BUS_NEC98: /* NEC 98 pin */
2457 - polarity = default_NEC98_polarity(idx);
2462 printk(KERN_WARNING "broken BIOS!!\n");
2463 @@ -1157,11 +1151,6 @@ static int MPBIOS_trigger(int idx)
2464 trigger = default_MCA_trigger(idx);
2467 - case MP_BUS_NEC98: /* NEC 98 pin */
2469 - trigger = default_NEC98_trigger(idx);
2474 printk(KERN_WARNING "broken BIOS!!\n");
2475 @@ -1223,7 +1212,6 @@ static int pin_2_irq(int idx, int apic,
2476 case MP_BUS_ISA: /* ISA pin */
2479 - case MP_BUS_NEC98:
2481 irq = mp_irqs[idx].mpc_srcbusirq;
2483 @@ -1291,7 +1279,7 @@ static inline int IO_APIC_irq_trigger(in
2486 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
2487 -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
2488 +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
2490 static int __assign_irq_vector(int irq)
2492 @@ -1417,8 +1405,8 @@ static void __init setup_IO_APIC_irqs(vo
2493 if (!apic && (irq < 16))
2494 disable_8259A_irq(irq);
2496 - ioapic_write_entry(apic, pin, entry);
2497 spin_lock_irqsave(&ioapic_lock, flags);
2498 + __ioapic_write_entry(apic, pin, entry);
2499 set_native_irq_info(irq, TARGET_CPUS);
2500 spin_unlock_irqrestore(&ioapic_lock, flags);
2502 @@ -1988,6 +1976,15 @@ static void __init setup_ioapic_ids_from
2506 +static int no_timer_check __initdata;
2508 +static int __init notimercheck(char *s)
2510 + no_timer_check = 1;
2513 +__setup("no_timer_check", notimercheck);
2516 * There is a nasty bug in some older SMP boards, their mptable lies
2517 * about the timer IRQ. We do the following to work around the situation:
2518 @@ -1996,10 +1993,13 @@ static void __init setup_ioapic_ids_from
2519 * - if this function detects that timer IRQs are defunct, then we fall
2520 * back to ISA timer IRQs
2522 -static int __init timer_irq_works(void)
2523 +int __init timer_irq_works(void)
2525 unsigned long t1 = jiffies;
2527 + if (no_timer_check)
2531 /* Let ten ticks pass... */
2532 mdelay((10 * 1000) / HZ);
2533 @@ -2226,9 +2226,15 @@ static inline void unlock_ExtINT_logic(v
2534 unsigned char save_control, save_freq_select;
2536 pin = find_isa_irq_pin(8, mp_INT);
2541 apic = find_isa_irq_apic(8, mp_INT);
2548 entry0 = ioapic_read_entry(apic, pin);
2549 clear_IO_APIC_pin(apic, pin);
2550 @@ -2273,7 +2279,7 @@ int timer_uses_ioapic_pin_0;
2551 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2552 * fanatically on his truly buggy board.
2554 -static inline void check_timer(void)
2555 +static inline void __init check_timer(void)
2557 int apic1, pin1, apic2, pin2;
2559 @@ -2558,7 +2564,7 @@ device_initcall(ioapic_init_sysfs);
2560 int create_irq(void)
2562 /* Allocate an unused irq */
2563 - int irq, new, vector;
2564 + int irq, new, vector = 0;
2565 unsigned long flags;
2568 @@ -2939,8 +2945,8 @@ int io_apic_set_pci_routing (int ioapic,
2569 if (!ioapic && (irq < 16))
2570 disable_8259A_irq(irq);
2572 - ioapic_write_entry(ioapic, pin, entry);
2573 spin_lock_irqsave(&ioapic_lock, flags);
2574 + __ioapic_write_entry(ioapic, pin, entry);
2575 set_native_irq_info(irq, TARGET_CPUS);
2576 spin_unlock_irqrestore(&ioapic_lock, flags);
2578 Index: head-2008-12-01/arch/x86/kernel/ldt_32-xen.c
2579 ===================================================================
2580 --- head-2008-12-01.orig/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2581 +++ head-2008-12-01/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:32:38.000000000 +0100
2582 @@ -177,16 +177,14 @@ static int read_default_ldt(void __user
2589 - address = &default_ldt[0];
2590 size = 5*sizeof(struct desc_struct);
2591 if (size > bytecount)
2595 - if (copy_to_user(ptr, address, size))
2596 + if (clear_user(ptr, size))
2600 Index: head-2008-12-01/arch/x86/kernel/microcode-xen.c
2601 ===================================================================
2602 --- head-2008-12-01.orig/arch/x86/kernel/microcode-xen.c 2008-12-01 11:29:05.000000000 +0100
2603 +++ head-2008-12-01/arch/x86/kernel/microcode-xen.c 2008-12-01 11:32:38.000000000 +0100
2606 * Intel CPU Microcode Update Driver for Linux
2608 - * Copyright (C) 2000-2004 Tigran Aivazian
2609 + * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
2610 * 2006 Shaohua Li <shaohua.li@intel.com>
2612 * This driver allows to upgrade microcode on Intel processors
2614 #include <asm/processor.h>
2616 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
2617 -MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
2618 +MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
2619 MODULE_LICENSE("GPL");
2622 @@ -195,7 +195,7 @@ static int __init microcode_init (void)
2623 request_microcode();
2626 - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
2627 + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
2631 Index: head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c
2632 ===================================================================
2633 --- head-2008-12-01.orig/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2634 +++ head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:32:38.000000000 +0100
2637 /* Have we found an MP table */
2638 int smp_found_config;
2639 -unsigned int __initdata maxcpus = NR_CPUS;
2640 +unsigned int __cpuinitdata maxcpus = NR_CPUS;
2643 * Various Linux-internal data structures created from the
2644 @@ -102,10 +102,10 @@ static int __init mpf_checksum(unsigned
2647 static int mpc_record;
2648 -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
2649 +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
2652 -static void __devinit MP_processor_info (struct mpc_config_processor *m)
2653 +static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2656 physid_mask_t phys_cpu;
2657 @@ -221,7 +221,7 @@ static void __devinit MP_processor_info
2658 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
2661 -void __init MP_processor_info (struct mpc_config_processor *m)
2662 +static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2666 @@ -256,8 +256,6 @@ static void __init MP_bus_info (struct m
2667 mp_current_pci_id++;
2668 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
2669 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
2670 - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
2671 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
2673 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
2675 @@ -842,7 +840,7 @@ void __init mp_register_lapic_address(u6
2679 -void __devinit mp_register_lapic (u8 id, u8 enabled)
2680 +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
2682 struct mpc_config_processor processor;
2684 Index: head-2008-12-01/arch/x86/kernel/pci-dma-xen.c
2685 ===================================================================
2686 --- head-2008-12-01.orig/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:29:05.000000000 +0100
2687 +++ head-2008-12-01/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:32:38.000000000 +0100
2688 @@ -276,7 +276,7 @@ EXPORT_SYMBOL(dma_free_coherent);
2689 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
2690 dma_addr_t device_addr, size_t size, int flags)
2692 - void __iomem *mem_base;
2693 + void __iomem *mem_base = NULL;
2694 int pages = size >> PAGE_SHIFT;
2695 int bitmap_size = (pages + 31)/32;
2697 @@ -293,14 +293,12 @@ int dma_declare_coherent_memory(struct d
2701 - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
2702 + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
2705 - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
2706 - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
2707 + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
2708 if (!dev->dma_mem->bitmap)
2710 - memset(dev->dma_mem->bitmap, 0, bitmap_size);
2712 dev->dma_mem->virt_base = mem_base;
2713 dev->dma_mem->device_base = device_addr;
2714 @@ -315,6 +313,8 @@ int dma_declare_coherent_memory(struct d
2716 kfree(dev->dma_mem->bitmap);
2719 + iounmap(mem_base);
2722 EXPORT_SYMBOL(dma_declare_coherent_memory);
2723 Index: head-2008-12-01/arch/x86/kernel/process_32-xen.c
2724 ===================================================================
2725 --- head-2008-12-01.orig/arch/x86/kernel/process_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2726 +++ head-2008-12-01/arch/x86/kernel/process_32-xen.c 2008-12-01 11:32:38.000000000 +0100
2729 #include <asm/tlbflush.h>
2730 #include <asm/cpu.h>
2731 +#include <asm/pda.h>
2733 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
2735 @@ -104,28 +105,24 @@ EXPORT_SYMBOL(enable_hlt);
2737 static void poll_idle (void)
2739 - local_irq_enable();
2746 - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
2750 static void xen_idle(void)
2752 - local_irq_disable();
2753 + current_thread_info()->status &= ~TS_POLLING;
2755 + * TS_POLLING-cleared state must be visible before we
2756 + * test NEED_RESCHED:
2760 - if (need_resched())
2761 + local_irq_disable();
2762 + if (!need_resched())
2763 + safe_halt(); /* enables interrupts racelessly */
2767 - current_thread_info()->status &= ~TS_POLLING;
2768 - smp_mb__after_clear_bit();
2770 - current_thread_info()->status |= TS_POLLING;
2772 + current_thread_info()->status |= TS_POLLING;
2774 #ifdef CONFIG_APM_MODULE
2775 EXPORT_SYMBOL(default_idle);
2776 @@ -250,8 +247,8 @@ void show_regs(struct pt_regs * regs)
2777 regs->eax,regs->ebx,regs->ecx,regs->edx);
2778 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2779 regs->esi, regs->edi, regs->ebp);
2780 - printk(" DS: %04x ES: %04x\n",
2781 - 0xffff & regs->xds,0xffff & regs->xes);
2782 + printk(" DS: %04x ES: %04x GS: %04x\n",
2783 + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
2787 @@ -282,6 +279,7 @@ int kernel_thread(int (*fn)(void *), voi
2789 regs.xds = __USER_DS;
2790 regs.xes = __USER_DS;
2791 + regs.xgs = __KERNEL_PDA;
2793 regs.eip = (unsigned long) kernel_thread_helper;
2794 regs.xcs = __KERNEL_CS | get_kernel_rpl();
2795 @@ -359,7 +357,6 @@ int copy_thread(int nr, unsigned long cl
2796 p->thread.eip = (unsigned long) ret_from_fork;
2798 savesegment(fs,p->thread.fs);
2799 - savesegment(gs,p->thread.gs);
2802 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2803 @@ -438,7 +435,7 @@ void dump_thread(struct pt_regs * regs,
2804 dump->regs.ds = regs->xds;
2805 dump->regs.es = regs->xes;
2806 savesegment(fs,dump->regs.fs);
2807 - savesegment(gs,dump->regs.gs);
2808 + dump->regs.gs = regs->xgs;
2809 dump->regs.orig_eax = regs->orig_eax;
2810 dump->regs.eip = regs->eip;
2811 dump->regs.cs = regs->xcs;
2812 @@ -635,17 +632,19 @@ struct task_struct fastcall * __switch_t
2813 if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
2816 + /* we're going to use this soon, after a few expensive things */
2817 + if (next_p->fpu_counter > 5)
2818 + prefetch(&next->i387.fxsave);
2821 - * Restore %fs and %gs if needed.
2822 + * Restore %fs if needed.
2824 - * Glibc normally makes %fs be zero, and %gs is one of
2825 - * the TLS segments.
2826 + * Glibc normally makes %fs be zero.
2828 if (unlikely(next->fs))
2829 loadsegment(fs, next->fs);
2832 - loadsegment(gs, next->gs);
2833 + write_pda(pcurrent, next_p);
2836 * Now maybe handle debug registers
2837 @@ -655,6 +654,13 @@ struct task_struct fastcall * __switch_t
2839 disable_tsc(prev_p, next_p);
2841 + /* If the task has used fpu the last 5 timeslices, just do a full
2842 + * restore of the math state immediately to avoid the trap; the
2843 + * chances of needing FPU soon are obviously high now
2845 + if (next_p->fpu_counter > 5)
2846 + math_state_restore();
2851 Index: head-2008-12-01/arch/x86/kernel/quirks-xen.c
2852 ===================================================================
2853 --- head-2008-12-01.orig/arch/x86/kernel/quirks-xen.c 2008-01-28 12:24:19.000000000 +0100
2854 +++ head-2008-12-01/arch/x86/kernel/quirks-xen.c 2008-12-01 11:32:38.000000000 +0100
2857 #include <linux/pci.h>
2858 #include <linux/irq.h>
2859 +#include <asm/pci-direct.h>
2860 +#include <asm/genapic.h>
2861 +#include <asm/cpu.h>
2863 #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI)
2865 -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
2866 +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
2870 @@ -14,14 +16,12 @@ static void __devinit quirk_intel_irqbal
2871 /* BIOS may enable hardware IRQ balancing for
2872 * E7520/E7320/E7525(revision ID 0x9 and below)
2874 - * Disable SW irqbalance/affinity on those platforms.
2875 + * For those platforms, make sure that the genapic is set to 'flat'
2877 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
2881 - printk(KERN_INFO "Intel E7520/7320/7525 detected.");
2883 /* enable access to config space*/
2884 pci_read_config_byte(dev, 0xf4, &config);
2885 pci_write_config_byte(dev, 0xf4, config|0x2);
2886 @@ -30,6 +30,46 @@ static void __devinit quirk_intel_irqbal
2887 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
2889 if (!(word & (1 << 13))) {
2891 +#ifdef CONFIG_X86_64
2892 + if (genapic != &apic_flat)
2893 + panic("APIC mode must be flat on this system\n");
2894 +#elif defined(CONFIG_X86_GENERICARCH)
2895 + if (genapic != &apic_default)
2896 + panic("APIC mode must be default(flat) on this system. Use apic=default\n");
2901 + /* put back the original value for config space*/
2902 + if (!(config & 0x2))
2903 + pci_write_config_byte(dev, 0xf4, config);
2906 +void __init quirk_intel_irqbalance(void)
2911 + /* BIOS may enable hardware IRQ balancing for
2912 + * E7520/E7320/E7525(revision ID 0x9 and below)
2913 + * based platforms.
2914 + * Disable SW irqbalance/affinity on those platforms.
2916 + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
2920 + printk(KERN_INFO "Intel E7520/7320/7525 detected.");
2922 + /* enable access to config space */
2923 + config = read_pci_config_byte(0, 0, 0, 0xf4);
2924 + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
2926 + /* read xTPR register */
2927 + word = read_pci_config_16(0, 0, 0x40, 0x4c);
2929 + if (!(word & (1 << 13))) {
2930 struct xen_platform_op op;
2931 printk(KERN_INFO "Disabling irq balancing and affinity\n");
2932 op.cmd = XENPF_platform_quirk;
2933 @@ -37,11 +77,12 @@ static void __devinit quirk_intel_irqbal
2934 WARN_ON(HYPERVISOR_platform_op(&op));
2937 - /* put back the original value for config space*/
2938 + /* put back the original value for config space */
2939 if (!(config & 0x2))
2940 - pci_write_config_byte(dev, 0xf4, config);
2941 + write_pci_config_byte(0, 0, 0, 0xf4, config);
2943 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
2944 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
2945 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
2946 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
2947 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
2948 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
2951 Index: head-2008-12-01/arch/x86/kernel/setup_32-xen.c
2952 ===================================================================
2953 --- head-2008-12-01.orig/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2954 +++ head-2008-12-01/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:32:38.000000000 +0100
2956 #include <xen/interface/kexec.h>
2959 -/* Forward Declaration. */
2960 -void __init find_max_pfn(void);
2962 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
2963 static struct notifier_block xen_panic_block = {
2964 xen_panic_event, NULL, 0 /* try to go last */
2965 @@ -89,14 +86,11 @@ int disable_pse __devinitdata = 0;
2971 -int efi_enabled = 0;
2972 -EXPORT_SYMBOL(efi_enabled);
2974 +extern struct resource code_resource;
2975 +extern struct resource data_resource;
2977 /* cpu data as detected by the assembly code in head.S */
2978 -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2979 +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2980 /* common cpu data for all cpus */
2981 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2982 EXPORT_SYMBOL(boot_cpu_data);
2983 @@ -112,12 +106,6 @@ unsigned int machine_submodel_id;
2984 unsigned int BIOS_revision;
2985 unsigned int mca_pentium_flag;
2987 -/* For PCI or other memory-mapped resources */
2988 -unsigned long pci_mem_start = 0x10000000;
2990 -EXPORT_SYMBOL(pci_mem_start);
2993 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
2994 int bootloader_type;
2996 @@ -150,10 +138,6 @@ struct ist_info ist_info;
2997 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
2998 EXPORT_SYMBOL(ist_info);
3000 -struct e820map e820;
3002 -struct e820map machine_e820;
3005 extern void early_cpu_init(void);
3006 extern int root_mountflags;
3007 @@ -168,209 +152,6 @@ static char command_line[COMMAND_LINE_SI
3009 unsigned char __initdata boot_params[PARAM_SIZE];
3011 -static struct resource data_resource = {
3012 - .name = "Kernel data",
3015 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM
3018 -static struct resource code_resource = {
3019 - .name = "Kernel code",
3022 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM
3025 -static struct resource system_rom_resource = {
3026 - .name = "System ROM",
3029 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3032 -static struct resource extension_rom_resource = {
3033 - .name = "Extension ROM",
3036 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3039 -static struct resource adapter_rom_resources[] = { {
3040 - .name = "Adapter ROM",
3043 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3045 - .name = "Adapter ROM",
3048 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3050 - .name = "Adapter ROM",
3053 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3055 - .name = "Adapter ROM",
3058 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3060 - .name = "Adapter ROM",
3063 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3065 - .name = "Adapter ROM",
3068 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3071 -static struct resource video_rom_resource = {
3072 - .name = "Video ROM",
3075 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3078 -static struct resource video_ram_resource = {
3079 - .name = "Video RAM area",
3082 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM
3085 -static struct resource standard_io_resources[] = { {
3089 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3094 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3099 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3104 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3106 - .name = "keyboard",
3109 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3111 - .name = "dma page reg",
3114 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3119 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3124 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3129 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3132 -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
3134 -static int __init romchecksum(unsigned char *rom, unsigned long length)
3136 - unsigned char *p, sum = 0;
3138 - for (p = rom; p < rom + length; p++)
3143 -static void __init probe_roms(void)
3145 - unsigned long start, length, upper;
3146 - unsigned char *rom;
3150 - /* Nothing to do if not running in dom0. */
3151 - if (!is_initial_xendomain())
3156 - upper = adapter_rom_resources[0].start;
3157 - for (start = video_rom_resource.start; start < upper; start += 2048) {
3158 - rom = isa_bus_to_virt(start);
3159 - if (!romsignature(rom))
3162 - video_rom_resource.start = start;
3164 - /* 0 < length <= 0x7f * 512, historically */
3165 - length = rom[2] * 512;
3167 - /* if checksum okay, trust length byte */
3168 - if (length && romchecksum(rom, length))
3169 - video_rom_resource.end = start + length - 1;
3171 - request_resource(&iomem_resource, &video_rom_resource);
3175 - start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
3176 - if (start < upper)
3180 - request_resource(&iomem_resource, &system_rom_resource);
3181 - upper = system_rom_resource.start;
3183 - /* check for extension rom (ignore length byte!) */
3184 - rom = isa_bus_to_virt(extension_rom_resource.start);
3185 - if (romsignature(rom)) {
3186 - length = extension_rom_resource.end - extension_rom_resource.start + 1;
3187 - if (romchecksum(rom, length)) {
3188 - request_resource(&iomem_resource, &extension_rom_resource);
3189 - upper = extension_rom_resource.start;
3193 - /* check for adapter roms on 2k boundaries */
3194 - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
3195 - rom = isa_bus_to_virt(start);
3196 - if (!romsignature(rom))
3199 - /* 0 < length <= 0x7f * 512, historically */
3200 - length = rom[2] * 512;
3202 - /* but accept any length that fits if checksum okay */
3203 - if (!length || start + length > upper || !romchecksum(rom, length))
3206 - adapter_rom_resources[i].start = start;
3207 - adapter_rom_resources[i].end = start + length - 1;
3208 - request_resource(&iomem_resource, &adapter_rom_resources[i]);
3210 - start = adapter_rom_resources[i++].end & ~2047UL;
3215 * Point at the empty zero page to start with. We map the real shared_info
3216 * page as soon as fixmap is up and running.
3217 @@ -386,353 +167,6 @@ EXPORT_SYMBOL(phys_to_machine_mapping);
3218 start_info_t *xen_start_info;
3219 EXPORT_SYMBOL(xen_start_info);
3221 -void __init add_memory_region(unsigned long long start,
3222 - unsigned long long size, int type)
3226 - if (!efi_enabled) {
3229 - if (x == E820MAX) {
3230 - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
3234 - e820.map[x].addr = start;
3235 - e820.map[x].size = size;
3236 - e820.map[x].type = type;
3239 -} /* add_memory_region */
3241 -static void __init limit_regions(unsigned long long size)
3243 - unsigned long long current_addr = 0;
3246 - if (efi_enabled) {
3247 - efi_memory_desc_t *md;
3250 - for (p = memmap.map, i = 0; p < memmap.map_end;
3251 - p += memmap.desc_size, i++) {
3253 - current_addr = md->phys_addr + (md->num_pages << 12);
3254 - if (md->type == EFI_CONVENTIONAL_MEMORY) {
3255 - if (current_addr >= size) {
3257 - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
3258 - memmap.nr_map = i + 1;
3264 - for (i = 0; i < e820.nr_map; i++) {
3265 - current_addr = e820.map[i].addr + e820.map[i].size;
3266 - if (current_addr < size)
3269 - if (e820.map[i].type != E820_RAM)
3272 - if (e820.map[i].addr >= size) {
3274 - * This region starts past the end of the
3275 - * requested size, skip it completely.
3279 - e820.nr_map = i + 1;
3280 - e820.map[i].size -= current_addr - size;
3285 - if (i==e820.nr_map && current_addr < size) {
3287 - * The e820 map finished before our requested size so
3288 - * extend the final entry to the requested address.
3291 - if (e820.map[i].type == E820_RAM)
3292 - e820.map[i].size -= current_addr - size;
3294 - add_memory_region(current_addr, size - current_addr, E820_RAM);
3299 -#define E820_DEBUG 1
3301 -static void __init print_memory_map(char *who)
3305 - for (i = 0; i < e820.nr_map; i++) {
3306 - printk(" %s: %016Lx - %016Lx ", who,
3308 - e820.map[i].addr + e820.map[i].size);
3309 - switch (e820.map[i].type) {
3310 - case E820_RAM: printk("(usable)\n");
3312 - case E820_RESERVED:
3313 - printk("(reserved)\n");
3316 - printk("(ACPI data)\n");
3319 - printk("(ACPI NVS)\n");
3321 - default: printk("type %lu\n", e820.map[i].type);
3328 - * Sanitize the BIOS e820 map.
3330 - * Some e820 responses include overlapping entries. The following
3331 - * replaces the original e820 map with a new one, removing overlaps.
3334 -struct change_member {
3335 - struct e820entry *pbios; /* pointer to original bios entry */
3336 - unsigned long long addr; /* address for this change point */
3338 -static struct change_member change_point_list[2*E820MAX] __initdata;
3339 -static struct change_member *change_point[2*E820MAX] __initdata;
3340 -static struct e820entry *overlap_list[E820MAX] __initdata;
3341 -static struct e820entry new_bios[E820MAX] __initdata;
3343 -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
3345 - struct change_member *change_tmp;
3346 - unsigned long current_type, last_type;
3347 - unsigned long long last_addr;
3348 - int chgidx, still_changing;
3349 - int overlap_entries;
3350 - int new_bios_entry;
3351 - int old_nr, new_nr, chg_nr;
3355 - Visually we're performing the following (1,2,3,4 = memory types)...
3357 - Sample memory map (w/overlaps):
3358 - ____22__________________
3359 - ______________________4_
3360 - ____1111________________
3361 - _44_____________________
3362 - 11111111________________
3363 - ____________________33__
3364 - ___________44___________
3365 - __________33333_________
3366 - ______________22________
3367 - ___________________2222_
3368 - _________111111111______
3369 - _____________________11_
3370 - _________________4______
3372 - Sanitized equivalent (no overlap):
3373 - 1_______________________
3374 - _44_____________________
3375 - ___1____________________
3376 - ____22__________________
3377 - ______11________________
3378 - _________1______________
3379 - __________3_____________
3380 - ___________44___________
3381 - _____________33_________
3382 - _______________2________
3383 - ________________1_______
3384 - _________________4______
3385 - ___________________2____
3386 - ____________________33__
3387 - ______________________4_
3390 - /* if there's only one memory region, don't bother */
3394 - old_nr = *pnr_map;
3396 - /* bail out if we find any unreasonable addresses in bios map */
3397 - for (i=0; i<old_nr; i++)
3398 - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
3401 - /* create pointers for initial change-point information (for sorting) */
3402 - for (i=0; i < 2*old_nr; i++)
3403 - change_point[i] = &change_point_list[i];
3405 - /* record all known change-points (starting and ending addresses),
3406 - omitting those that are for empty memory regions */
3408 - for (i=0; i < old_nr; i++) {
3409 - if (biosmap[i].size != 0) {
3410 - change_point[chgidx]->addr = biosmap[i].addr;
3411 - change_point[chgidx++]->pbios = &biosmap[i];
3412 - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
3413 - change_point[chgidx++]->pbios = &biosmap[i];
3416 - chg_nr = chgidx; /* true number of change-points */
3418 - /* sort change-point list by memory addresses (low -> high) */
3419 - still_changing = 1;
3420 - while (still_changing) {
3421 - still_changing = 0;
3422 - for (i=1; i < chg_nr; i++) {
3423 - /* if <current_addr> > <last_addr>, swap */
3424 - /* or, if current=<start_addr> & last=<end_addr>, swap */
3425 - if ((change_point[i]->addr < change_point[i-1]->addr) ||
3426 - ((change_point[i]->addr == change_point[i-1]->addr) &&
3427 - (change_point[i]->addr == change_point[i]->pbios->addr) &&
3428 - (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
3431 - change_tmp = change_point[i];
3432 - change_point[i] = change_point[i-1];
3433 - change_point[i-1] = change_tmp;
3439 - /* create a new bios memory map, removing overlaps */
3440 - overlap_entries=0; /* number of entries in the overlap table */
3441 - new_bios_entry=0; /* index for creating new bios map entries */
3442 - last_type = 0; /* start with undefined memory type */
3443 - last_addr = 0; /* start with 0 as last starting address */
3444 - /* loop through change-points, determining affect on the new bios map */
3445 - for (chgidx=0; chgidx < chg_nr; chgidx++)
3447 - /* keep track of all overlapping bios entries */
3448 - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
3450 - /* add map entry to overlap list (> 1 entry implies an overlap) */
3451 - overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
3455 - /* remove entry from list (order independent, so swap with last) */
3456 - for (i=0; i<overlap_entries; i++)
3458 - if (overlap_list[i] == change_point[chgidx]->pbios)
3459 - overlap_list[i] = overlap_list[overlap_entries-1];
3461 - overlap_entries--;
3463 - /* if there are overlapping entries, decide which "type" to use */
3464 - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
3466 - for (i=0; i<overlap_entries; i++)
3467 - if (overlap_list[i]->type > current_type)
3468 - current_type = overlap_list[i]->type;
3469 - /* continue building up new bios map based on this information */
3470 - if (current_type != last_type) {
3471 - if (last_type != 0) {
3472 - new_bios[new_bios_entry].size =
3473 - change_point[chgidx]->addr - last_addr;
3474 - /* move forward only if the new size was non-zero */
3475 - if (new_bios[new_bios_entry].size != 0)
3476 - if (++new_bios_entry >= E820MAX)
3477 - break; /* no more space left for new bios entries */
3479 - if (current_type != 0) {
3480 - new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
3481 - new_bios[new_bios_entry].type = current_type;
3482 - last_addr=change_point[chgidx]->addr;
3484 - last_type = current_type;
3487 - new_nr = new_bios_entry; /* retain count for new bios entries */
3489 - /* copy new bios mapping into original location */
3490 - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
3491 - *pnr_map = new_nr;
3497 - * Copy the BIOS e820 map into a safe place.
3499 - * Sanity-check it while we're at it..
3501 - * If we're lucky and live on a modern system, the setup code
3502 - * will have given us a memory map that we can use to properly
3503 - * set up memory. If we aren't, we'll fake a memory map.
3505 - * We check to see that the memory map contains at least 2 elements
3506 - * before we'll use it, because the detection code in setup.S may
3507 - * not be perfect and most every PC known to man has two memory
3508 - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
3509 - * thinkpad 560x, for example, does not cooperate with the memory
3510 - * detection code.)
3512 -int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
3515 - /* Only one memory region (or negative)? Ignore it */
3519 - BUG_ON(nr_map < 1);
3523 - unsigned long long start = biosmap->addr;
3524 - unsigned long long size = biosmap->size;
3525 - unsigned long long end = start + size;
3526 - unsigned long type = biosmap->type;
3528 - /* Overflow in 64 bits? Ignore the memory map. */
3534 - * Some BIOSes claim RAM in the 640k - 1M region.
3535 - * Not right. Fix it up.
3537 - if (type == E820_RAM) {
3538 - if (start < 0x100000ULL && end > 0xA0000ULL) {
3539 - if (start < 0xA0000ULL)
3540 - add_memory_region(start, 0xA0000ULL-start, type);
3541 - if (end <= 0x100000ULL)
3543 - start = 0x100000ULL;
3544 - size = end - start;
3548 - add_memory_region(start, size, type);
3549 - } while (biosmap++,--nr_map);
3552 - if (is_initial_xendomain()) {
3553 - struct xen_memory_map memmap;
3555 - memmap.nr_entries = E820MAX;
3556 - set_xen_guest_handle(memmap.buffer, machine_e820.map);
3558 - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
3560 - machine_e820.nr_map = memmap.nr_entries;
3562 - machine_e820 = e820;
3568 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
3570 #ifdef CONFIG_EDD_MODULE
3571 @@ -758,7 +192,7 @@ static inline void copy_edd(void)
3575 -static int __initdata user_defined_memmap = 0;
3576 +int __initdata user_defined_memmap = 0;
3579 * "mem=nopentium" disables the 4MB page tables.
3580 @@ -795,51 +229,6 @@ static int __init parse_mem(char *arg)
3582 early_param("mem", parse_mem);
3584 -static int __init parse_memmap(char *arg)
3589 - if (strcmp(arg, "exactmap") == 0) {
3590 -#ifdef CONFIG_CRASH_DUMP
3591 - /* If we are doing a crash dump, we
3592 - * still need to know the real mem
3593 - * size before original memory map is
3597 - saved_max_pfn = max_pfn;
3600 - user_defined_memmap = 1;
3602 - /* If the user specifies memory size, we
3603 - * limit the BIOS-provided memory map to
3604 - * that size. exactmap can be used to specify
3605 - * the exact map. mem=number can be used to
3606 - * trim the existing memory map.
3608 - unsigned long long start_at, mem_size;
3610 - mem_size = memparse(arg, &arg);
3611 - if (*arg == '@') {
3612 - start_at = memparse(arg+1, &arg);
3613 - add_memory_region(start_at, mem_size, E820_RAM);
3614 - } else if (*arg == '#') {
3615 - start_at = memparse(arg+1, &arg);
3616 - add_memory_region(start_at, mem_size, E820_ACPI);
3617 - } else if (*arg == '$') {
3618 - start_at = memparse(arg+1, &arg);
3619 - add_memory_region(start_at, mem_size, E820_RESERVED);
3621 - limit_regions(mem_size);
3622 - user_defined_memmap = 1;
3627 -early_param("memmap", parse_memmap);
3629 #ifdef CONFIG_PROC_VMCORE
3630 /* elfcorehdr= specifies the location of elf core header
3631 * stored by the crashed kernel.
3632 @@ -906,127 +295,6 @@ early_param("reservetop", parse_reservet
3636 - * Callback for efi_memory_walk.
3639 -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
3641 - unsigned long *max_pfn = arg, pfn;
3643 - if (start < end) {
3644 - pfn = PFN_UP(end -1);
3645 - if (pfn > *max_pfn)
3652 -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
3654 - memory_present(0, PFN_UP(start), PFN_DOWN(end));
3659 - * This function checks if any part of the range <start,end> is mapped
3663 -e820_any_mapped(u64 start, u64 end, unsigned type)
3668 - for (i = 0; i < e820.nr_map; i++) {
3669 - const struct e820entry *ei = &e820.map[i];
3671 - if (!is_initial_xendomain())
3673 - for (i = 0; i < machine_e820.nr_map; ++i) {
3674 - const struct e820entry *ei = &machine_e820.map[i];
3677 - if (type && ei->type != type)
3679 - if (ei->addr >= end || ei->addr + ei->size <= start)
3685 -EXPORT_SYMBOL_GPL(e820_any_mapped);
3688 - * This function checks if the entire range <start,end> is mapped with type.
3690 - * Note: this function only works correct if the e820 table is sorted and
3691 - * not-overlapping, which is the case
3694 -e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
3701 - for (i = 0; i < e820.nr_map; i++) {
3702 - struct e820entry *ei = &e820.map[i];
3704 - if (!is_initial_xendomain())
3706 - for (i = 0; i < machine_e820.nr_map; ++i) {
3707 - const struct e820entry *ei = &machine_e820.map[i];
3709 - if (type && ei->type != type)
3711 - /* is the region (part) in overlap with the current region ?*/
3712 - if (ei->addr >= end || ei->addr + ei->size <= start)
3714 - /* if the region is at the beginning of <start,end> we move
3715 - * start to the end of the region since it's ok until there
3717 - if (ei->addr <= start)
3718 - start = ei->addr + ei->size;
3719 - /* if start is now at or beyond end, we're done, full
3722 - return 1; /* we're done */
3728 - * Find the highest page frame number we have available
3730 -void __init find_max_pfn(void)
3735 - if (efi_enabled) {
3736 - efi_memmap_walk(efi_find_max_pfn, &max_pfn);
3737 - efi_memmap_walk(efi_memory_present_wrapper, NULL);
3741 - for (i = 0; i < e820.nr_map; i++) {
3742 - unsigned long start, end;
3744 - if (e820.map[i].type != E820_RAM)
3746 - start = PFN_UP(e820.map[i].addr);
3747 - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3750 - if (end > max_pfn)
3752 - memory_present(0, start, end);
3757 * Determine low and high memory ranges:
3759 unsigned long __init find_max_low_pfn(void)
3760 @@ -1085,77 +353,6 @@ unsigned long __init find_max_low_pfn(vo
3765 - * Free all available memory for boot time allocation. Used
3766 - * as a callback function by efi_memory_walk()
3770 -free_available_memory(unsigned long start, unsigned long end, void *arg)
3772 - /* check max_low_pfn */
3773 - if (start >= (max_low_pfn << PAGE_SHIFT))
3775 - if (end >= (max_low_pfn << PAGE_SHIFT))
3776 - end = max_low_pfn << PAGE_SHIFT;
3778 - free_bootmem(start, end - start);
3783 - * Register fully available low RAM pages with the bootmem allocator.
3785 -static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
3789 - if (efi_enabled) {
3790 - efi_memmap_walk(free_available_memory, NULL);
3793 - for (i = 0; i < e820.nr_map; i++) {
3794 - unsigned long curr_pfn, last_pfn, size;
3796 - * Reserve usable low memory
3798 - if (e820.map[i].type != E820_RAM)
3801 - * We are rounding up the start address of usable memory:
3803 - curr_pfn = PFN_UP(e820.map[i].addr);
3804 - if (curr_pfn >= max_low_pfn)
3807 - * ... and at the end of the usable range downwards:
3809 - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3813 - * Truncate to the number of actual pages currently
3816 - if (last_pfn > xen_start_info->nr_pages)
3817 - last_pfn = xen_start_info->nr_pages;
3820 - if (last_pfn > max_low_pfn)
3821 - last_pfn = max_low_pfn;
3824 - * .. finally, did all the rounding and playing
3825 - * around just make the area go away?
3827 - if (last_pfn <= curr_pfn)
3830 - size = last_pfn - curr_pfn;
3831 - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
3837 * workaround for Dell systems that neglect to reserve EBDA
3838 @@ -1245,8 +442,8 @@ void __init setup_bootmem_allocator(void
3839 * the (very unlikely) case of us accidentally initializing the
3840 * bootmem allocator with an invalid RAM area.
3842 - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
3843 - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
3844 + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
3845 + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
3849 @@ -1328,160 +525,6 @@ void __init remapped_pgdat_init(void)
3854 - * Request address space for all standard RAM and ROM resources
3855 - * and also for regions reported as reserved by the e820.
3858 -legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
3859 - struct resource *code_resource,
3860 - struct resource *data_resource)
3866 - for (i = 0; i < nr_map; i++) {
3867 - struct resource *res;
3868 -#ifndef CONFIG_RESOURCES_64BIT
3869 - if (e820[i].addr + e820[i].size > 0x100000000ULL)
3872 - res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
3873 - switch (e820[i].type) {
3874 - case E820_RAM: res->name = "System RAM"; break;
3875 - case E820_ACPI: res->name = "ACPI Tables"; break;
3876 - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
3877 - default: res->name = "reserved";
3879 - res->start = e820[i].addr;
3880 - res->end = res->start + e820[i].size - 1;
3881 - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3882 - if (request_resource(&iomem_resource, res)) {
3886 - if (e820[i].type == E820_RAM) {
3888 - * We don't know which RAM region contains kernel data,
3889 - * so we try it repeatedly and let the resource manager
3893 - request_resource(res, code_resource);
3894 - request_resource(res, data_resource);
3896 -#ifdef CONFIG_KEXEC
3897 - if (crashk_res.start != crashk_res.end)
3898 - request_resource(res, &crashk_res);
3900 - xen_machine_kexec_register_resources(res);
3908 - * Locate a unused range of the physical address space below 4G which
3909 - * can be used for PCI mappings.
3912 -e820_setup_gap(struct e820entry *e820, int nr_map)
3914 - unsigned long gapstart, gapsize, round;
3915 - unsigned long long last;
3919 - * Search for the bigest gap in the low 32 bits of the e820
3922 - last = 0x100000000ull;
3923 - gapstart = 0x10000000;
3924 - gapsize = 0x400000;
3926 - while (--i >= 0) {
3927 - unsigned long long start = e820[i].addr;
3928 - unsigned long long end = start + e820[i].size;
3931 - * Since "last" is at most 4GB, we know we'll
3932 - * fit in 32 bits if this condition is true
3935 - unsigned long gap = last - end;
3937 - if (gap > gapsize) {
3947 - * See how much we want to round up: start off with
3948 - * rounding to the next 1MB area.
3951 - while ((gapsize >> 4) > round)
3953 - /* Fun with two's complement */
3954 - pci_mem_start = (gapstart + round) & -round;
3956 - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
3957 - pci_mem_start, gapstart, gapsize);
3961 - * Request address space for all standard resources
3963 - * This is called just before pcibios_init(), which is also a
3964 - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
3966 -static int __init request_standard_resources(void)
3970 - /* Nothing to do if not running in dom0. */
3971 - if (!is_initial_xendomain())
3974 - printk("Setting up standard PCI resources\n");
3976 - legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
3977 - &code_resource, &data_resource);
3980 - efi_initialize_iomem_resources(&code_resource, &data_resource);
3982 - legacy_init_iomem_resources(e820.map, e820.nr_map,
3983 - &code_resource, &data_resource);
3986 - /* EFI systems may still have VGA */
3987 - request_resource(&iomem_resource, &video_ram_resource);
3989 - /* request I/O space for devices used on all i[345]86 PCs */
3990 - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
3991 - request_resource(&ioport_resource, &standard_io_resources[i]);
3995 -subsys_initcall(request_standard_resources);
3997 -static void __init register_memory(void)
4000 - if (is_initial_xendomain())
4001 - e820_setup_gap(machine_e820.map, machine_e820.nr_map);
4004 - e820_setup_gap(e820.map, e820.nr_map);
4008 static void set_mca_bus(int x)
4010 @@ -1491,6 +534,12 @@ static void set_mca_bus(int x)
4011 static void set_mca_bus(int x) { }
4014 +/* Overridden in paravirt.c if CONFIG_PARAVIRT */
4015 +char * __init __attribute__((weak)) memory_setup(void)
4017 + return machine_specific_memory_setup();
4021 * Determine if we were loaded by an EFI loader. If so, then we have also been
4022 * passed the efi memmap, systab, etc., so we should use these data structures
4023 @@ -1578,7 +627,7 @@ void __init setup_arch(char **cmdline_p)
4026 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4027 - print_memory_map(machine_specific_memory_setup());
4028 + print_memory_map(memory_setup());
4032 @@ -1757,7 +806,7 @@ void __init setup_arch(char **cmdline_p)
4036 - register_memory();
4037 + e820_register_memory();
4039 if (is_initial_xendomain()) {
4041 Index: head-2008-12-01/arch/x86/kernel/smp_32-xen.c
4042 ===================================================================
4043 --- head-2008-12-01.orig/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4044 +++ head-2008-12-01/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4045 @@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo
4050 + /* Can deadlock when called with interrupts disabled */
4051 + WARN_ON(irqs_disabled());
4053 spin_lock_bh(&call_lock);
4054 __smp_call_function_single(cpu, func, info, nonatomic, wait);
4055 spin_unlock_bh(&call_lock);
4056 Index: head-2008-12-01/arch/x86/kernel/time_32-xen.c
4057 ===================================================================
4058 --- head-2008-12-01.orig/arch/x86/kernel/time_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4059 +++ head-2008-12-01/arch/x86/kernel/time_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4061 #include <asm/uaccess.h>
4062 #include <asm/processor.h>
4063 #include <asm/timer.h>
4064 +#include <asm/time.h>
4065 #include <asm/sections.h>
4067 #include "mach_time.h"
4068 @@ -129,11 +130,11 @@ static DEFINE_PER_CPU(struct vcpu_runsta
4069 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
4070 #define NS_PER_TICK (1000000000LL/HZ)
4072 -static void __clock_was_set(void *unused)
4073 +static void __clock_was_set(struct work_struct *unused)
4077 -static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL);
4078 +static DECLARE_WORK(clock_was_set_work, __clock_was_set);
4081 * GCC 4.3 can turn loops over an induction variable into division. We do
4082 @@ -544,10 +545,7 @@ static int set_rtc_mmss(unsigned long no
4083 /* gets recalled with irq locally disabled */
4084 /* XXX - does irqsave resolve this? -johnstul */
4085 spin_lock_irqsave(&rtc_lock, flags);
4087 - retval = efi_set_rtc_mmss(nowtime);
4089 - retval = mach_set_rtc_mmss(nowtime);
4090 + retval = set_wallclock(nowtime);
4091 spin_unlock_irqrestore(&rtc_lock, flags);
4094 @@ -874,10 +872,7 @@ unsigned long get_cmos_time(void)
4096 spin_lock_irqsave(&rtc_lock, flags);
4099 - retval = efi_get_time();
4101 - retval = mach_get_cmos_time();
4102 + retval = get_wallclock();
4104 spin_unlock_irqrestore(&rtc_lock, flags);
4106 @@ -979,7 +974,7 @@ static void __init hpet_time_init(void)
4107 printk("Using HPET for base-timer\n");
4115 Index: head-2008-12-01/arch/x86/kernel/traps_32-xen.c
4116 ===================================================================
4117 --- head-2008-12-01.orig/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4118 +++ head-2008-12-01/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4120 #include <linux/kexec.h>
4121 #include <linux/unwind.h>
4122 #include <linux/uaccess.h>
4123 +#include <linux/nmi.h>
4124 +#include <linux/bug.h>
4127 #include <linux/ioport.h>
4128 @@ -61,9 +63,6 @@ int panic_on_unrecovered_nmi;
4130 asmlinkage int system_call(void);
4132 -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
4133 - { 0, 0 }, { 0, 0 } };
4135 /* Do we ignore FPU interrupts ? */
4136 char ignore_fpu_irq = 0;
4138 @@ -100,12 +99,7 @@ asmlinkage void fixup_4gb_segment(void);
4140 asmlinkage void machine_check(void);
4142 -static int kstack_depth_to_print = 24;
4143 -#ifdef CONFIG_STACK_UNWIND
4144 -static int call_trace = 1;
4146 -#define call_trace (-1)
4148 +int kstack_depth_to_print = 24;
4149 ATOMIC_NOTIFIER_HEAD(i386die_chain);
4151 int register_die_notifier(struct notifier_block *nb)
4152 @@ -159,25 +153,7 @@ static inline unsigned long print_contex
4156 -struct ops_and_data {
4157 - struct stacktrace_ops *ops;
4161 -static asmlinkage int
4162 -dump_trace_unwind(struct unwind_frame_info *info, void *data)
4164 - struct ops_and_data *oad = (struct ops_and_data *)data;
4167 - while (unwind(info) == 0 && UNW_PC(info)) {
4169 - oad->ops->address(oad->data, UNW_PC(info));
4170 - if (arch_unw_user_mode(info))
4175 +#define MSG(msg) ops->warning(data, msg)
4177 void dump_trace(struct task_struct *task, struct pt_regs *regs,
4178 unsigned long *stack,
4179 @@ -188,39 +164,6 @@ void dump_trace(struct task_struct *task
4183 - if (call_trace >= 0) {
4185 - struct unwind_frame_info info;
4186 - struct ops_and_data oad = { .ops = ops, .data = data };
4189 - if (unwind_init_frame_info(&info, task, regs) == 0)
4190 - unw_ret = dump_trace_unwind(&info, &oad);
4191 - } else if (task == current)
4192 - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
4194 - if (unwind_init_blocked(&info, task) == 0)
4195 - unw_ret = dump_trace_unwind(&info, &oad);
4197 - if (unw_ret > 0) {
4198 - if (call_trace == 1 && !arch_unw_user_mode(&info)) {
4199 - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
4201 - if (UNW_SP(&info) >= PAGE_OFFSET) {
4202 - ops->warning(data, "Leftover inexact backtrace:\n");
4203 - stack = (void *)UNW_SP(&info);
4206 - ebp = UNW_FP(&info);
4208 - ops->warning(data, "Full inexact backtrace again:\n");
4209 - } else if (call_trace >= 1)
4212 - ops->warning(data, "Full inexact backtrace again:\n");
4214 - ops->warning(data, "Inexact backtrace:\n");
4217 unsigned long dummy;
4219 @@ -253,6 +196,7 @@ void dump_trace(struct task_struct *task
4220 stack = (unsigned long*)context->previous_esp;
4223 + touch_nmi_watchdog();
4226 EXPORT_SYMBOL(dump_trace);
4227 @@ -385,7 +329,7 @@ void show_registers(struct pt_regs *regs
4228 * time of the fault..
4233 int code_bytes = 64;
4236 @@ -394,18 +338,20 @@ void show_registers(struct pt_regs *regs
4238 printk(KERN_EMERG "Code: ");
4240 - eip = (u8 __user *)regs->eip - 43;
4241 - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
4242 + eip = (u8 *)regs->eip - 43;
4243 + if (eip < (u8 *)PAGE_OFFSET ||
4244 + probe_kernel_address(eip, c)) {
4245 /* try starting at EIP */
4246 - eip = (u8 __user *)regs->eip;
4247 + eip = (u8 *)regs->eip;
4250 for (i = 0; i < code_bytes; i++, eip++) {
4251 - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
4252 + if (eip < (u8 *)PAGE_OFFSET ||
4253 + probe_kernel_address(eip, c)) {
4254 printk(" Bad EIP value.");
4257 - if (eip == (u8 __user *)regs->eip)
4258 + if (eip == (u8 *)regs->eip)
4259 printk("<%02x> ", c);
4262 @@ -414,43 +360,22 @@ void show_registers(struct pt_regs *regs
4266 -static void handle_BUG(struct pt_regs *regs)
4267 +int is_valid_bugaddr(unsigned long eip)
4269 - unsigned long eip = regs->eip;
4272 if (eip < PAGE_OFFSET)
4274 - if (probe_kernel_address((unsigned short __user *)eip, ud2))
4276 - if (ud2 != 0x0b0f)
4279 + if (probe_kernel_address((unsigned short *)eip, ud2))
4282 - printk(KERN_EMERG "------------[ cut here ]------------\n");
4284 -#ifdef CONFIG_DEBUG_BUGVERBOSE
4286 - unsigned short line;
4290 - if (probe_kernel_address((unsigned short __user *)(eip + 2),
4293 - if (__get_user(file, (char * __user *)(eip + 4)) ||
4294 - (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
4295 - file = "<bad filename>";
4297 - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
4301 - printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n");
4302 + return ud2 == 0x0b0f;
4305 -/* This is gone through when something in the kernel
4306 - * has done something bad and is about to be terminated.
4309 + * This is gone through when something in the kernel has done something bad and
4310 + * is about to be terminated.
4312 void die(const char * str, struct pt_regs * regs, long err)
4315 @@ -458,7 +383,7 @@ void die(const char * str, struct pt_reg
4317 int lock_owner_depth;
4319 - .lock = SPIN_LOCK_UNLOCKED,
4320 + .lock = __SPIN_LOCK_UNLOCKED(die.lock),
4322 .lock_owner_depth = 0
4324 @@ -482,7 +407,8 @@ void die(const char * str, struct pt_reg
4329 + report_bug(regs->eip);
4331 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
4332 #ifdef CONFIG_PREEMPT
4333 printk(KERN_EMERG "PREEMPT ");
4334 @@ -682,8 +608,7 @@ mem_parity_error(unsigned char reason, s
4336 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
4337 "CPU %d.\n", reason, smp_processor_id());
4338 - printk(KERN_EMERG "You probably have a hardware problem with your RAM "
4340 + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
4341 if (panic_on_unrecovered_nmi)
4342 panic("NMI: Not continuing");
4344 @@ -741,7 +666,6 @@ void __kprobes die_nmi(struct pt_regs *r
4345 printk(" on CPU%d, eip %08lx, registers:\n",
4346 smp_processor_id(), regs->eip);
4347 show_registers(regs);
4348 - printk(KERN_EMERG "console shuts up ...\n");
4350 spin_unlock(&nmi_print_lock);
4352 @@ -1057,49 +981,24 @@ fastcall void do_spurious_interrupt_bug(
4356 -fastcall void setup_x86_bogus_stack(unsigned char * stk)
4357 +fastcall unsigned long patch_espfix_desc(unsigned long uesp,
4358 + unsigned long kesp)
4360 - unsigned long *switch16_ptr, *switch32_ptr;
4361 - struct pt_regs *regs;
4362 - unsigned long stack_top, stack_bot;
4363 - unsigned short iret_frame16_off;
4364 - int cpu = smp_processor_id();
4365 - /* reserve the space on 32bit stack for the magic switch16 pointer */
4366 - memmove(stk, stk + 8, sizeof(struct pt_regs));
4367 - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
4368 - regs = (struct pt_regs *)stk;
4369 - /* now the switch32 on 16bit stack */
4370 - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
4371 - stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
4372 - switch32_ptr = (unsigned long *)(stack_top - 8);
4373 - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
4374 - /* copy iret frame on 16bit stack */
4375 - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20);
4376 - /* fill in the switch pointers */
4377 - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
4378 - switch16_ptr[1] = __ESPFIX_SS;
4379 - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
4380 - 8 - CPU_16BIT_STACK_SIZE;
4381 - switch32_ptr[1] = __KERNEL_DS;
4384 -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
4386 - unsigned long *switch32_ptr;
4387 - unsigned char *stack16, *stack32;
4388 - unsigned long stack_top, stack_bot;
4390 int cpu = smp_processor_id();
4391 - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
4392 - stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
4393 - switch32_ptr = (unsigned long *)(stack_top - 8);
4394 - /* copy the data from 16bit stack to 32bit stack */
4395 - len = CPU_16BIT_STACK_SIZE - 8 - sp;
4396 - stack16 = (unsigned char *)(stack_bot + sp);
4397 - stack32 = (unsigned char *)
4398 - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
4399 - memcpy(stack32, stack16, len);
4401 + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
4402 + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
4403 + unsigned long base = (kesp - uesp) & -THREAD_SIZE;
4404 + unsigned long new_kesp = kesp - base;
4405 + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
4406 + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
4407 + /* Set up base for espfix segment */
4408 + desc &= 0x00f0ff0000000000ULL;
4409 + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
4410 + ((((__u64)base) << 32) & 0xff00000000000000ULL) |
4411 + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
4412 + (lim_pages & 0xffff);
4413 + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
4418 @@ -1113,7 +1012,7 @@ fastcall unsigned char * fixup_x86_bogus
4419 * Must be called with kernel preemption disabled (in this case,
4420 * local interrupts are disabled at the call-site in entry.S).
4422 -asmlinkage void math_state_restore(struct pt_regs regs)
4423 +asmlinkage void math_state_restore(void)
4425 struct thread_info *thread = current_thread_info();
4426 struct task_struct *tsk = thread->task;
4427 @@ -1123,6 +1022,7 @@ asmlinkage void math_state_restore(struc
4430 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
4431 + tsk->fpu_counter++;
4434 #ifndef CONFIG_MATH_EMULATION
4435 @@ -1234,19 +1134,3 @@ static int __init kstack_setup(char *s)
4438 __setup("kstack=", kstack_setup);
4440 -#ifdef CONFIG_STACK_UNWIND
4441 -static int __init call_trace_setup(char *s)
4443 - if (strcmp(s, "old") == 0)
4445 - else if (strcmp(s, "both") == 0)
4447 - else if (strcmp(s, "newfallback") == 0)
4449 - else if (strcmp(s, "new") == 2)
4453 -__setup("call_trace=", call_trace_setup);
4455 Index: head-2008-12-01/arch/x86/kernel/vmlinux_32.lds.S
4456 ===================================================================
4457 --- head-2008-12-01.orig/arch/x86/kernel/vmlinux_32.lds.S 2008-12-01 10:53:14.000000000 +0100
4458 +++ head-2008-12-01/arch/x86/kernel/vmlinux_32.lds.S 2008-12-01 11:32:38.000000000 +0100
4459 @@ -29,6 +29,12 @@ PHDRS {
4462 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
4464 +#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002
4466 +#define LOAD_OFFSET 0
4469 phys_startup_32 = startup_32 - LOAD_OFFSET;
4471 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
4472 Index: head-2008-12-01/arch/x86/kvm/Kconfig
4473 ===================================================================
4474 --- head-2008-12-01.orig/arch/x86/kvm/Kconfig 2008-12-01 10:53:14.000000000 +0100
4475 +++ head-2008-12-01/arch/x86/kvm/Kconfig 2008-12-01 11:32:38.000000000 +0100
4476 @@ -7,6 +7,7 @@ config HAVE_KVM
4477 menuconfig VIRTUALIZATION
4478 bool "Virtualization"
4479 depends on HAVE_KVM || X86
4483 Say Y here to get to see options for using your Linux host to run other
4484 Index: head-2008-12-01/arch/x86/mm/fault_32-xen.c
4485 ===================================================================
4486 --- head-2008-12-01.orig/arch/x86/mm/fault_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4487 +++ head-2008-12-01/arch/x86/mm/fault_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4489 #include <linux/highmem.h>
4490 #include <linux/module.h>
4491 #include <linux/kprobes.h>
4492 +#include <linux/uaccess.h>
4494 #include <asm/system.h>
4495 -#include <asm/uaccess.h>
4496 #include <asm/desc.h>
4497 #include <asm/kdebug.h>
4498 #include <asm/segment.h>
4499 @@ -167,7 +167,7 @@ static inline unsigned long get_segment_
4500 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
4502 unsigned long limit;
4503 - unsigned long instr = get_segment_eip (regs, &limit);
4504 + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
4508 @@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs
4509 unsigned char instr_hi;
4510 unsigned char instr_lo;
4512 - if (instr > limit)
4513 + if (instr > (unsigned char *)limit)
4515 - if (__get_user(opcode, (unsigned char __user *) instr))
4516 + if (probe_kernel_address(instr, opcode))
4519 instr_hi = opcode & 0xf0;
4520 @@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs
4522 /* Prefetch instruction is 0x0F0D or 0x0F18 */
4524 - if (instr > limit)
4525 + if (instr > (unsigned char *)limit)
4527 - if (__get_user(opcode, (unsigned char __user *) instr))
4528 + if (probe_kernel_address(instr, opcode))
4530 prefetch = (instr_lo == 0xF) &&
4531 (opcode == 0x0D || opcode == 0x18);
4532 Index: head-2008-12-01/arch/x86/mm/highmem_32-xen.c
4533 ===================================================================
4534 --- head-2008-12-01.orig/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4535 +++ head-2008-12-01/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4536 @@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page *
4537 unsigned long vaddr;
4539 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
4540 - inc_preempt_count();
4541 + pagefault_disable();
4542 if (!PageHighMem(page))
4543 return page_address(page);
4545 @@ -63,26 +63,22 @@ void kunmap_atomic(void *kvaddr, enum km
4546 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
4547 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
4549 -#ifdef CONFIG_DEBUG_HIGHMEM
4550 - if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
4551 - dec_preempt_count();
4552 - preempt_check_resched();
4556 - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
4560 * Force other mappings to Oops if they'll try to access this pte
4561 * without first remap it. Keeping stale mappings around is a bad idea
4562 * also, in case the page changes cacheability attributes or becomes
4563 * a protected page in a hypervisor.
4565 - kpte_clear_flush(kmap_pte-idx, vaddr);
4566 + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
4567 + kpte_clear_flush(kmap_pte-idx, vaddr);
4569 +#ifdef CONFIG_DEBUG_HIGHMEM
4570 + BUG_ON(vaddr < PAGE_OFFSET);
4571 + BUG_ON(vaddr >= (unsigned long)high_memory);
4575 - dec_preempt_count();
4576 - preempt_check_resched();
4577 + pagefault_enable();
4580 /* This is the same as kmap_atomic() but can map memory that doesn't
4581 @@ -93,7 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
4582 enum fixed_addresses idx;
4583 unsigned long vaddr;
4585 - inc_preempt_count();
4586 + pagefault_disable();
4588 idx = type + KM_TYPE_NR*smp_processor_id();
4589 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
4590 Index: head-2008-12-01/arch/x86/mm/init_32-xen.c
4591 ===================================================================
4592 --- head-2008-12-01.orig/arch/x86/mm/init_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4593 +++ head-2008-12-01/arch/x86/mm/init_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4594 @@ -233,8 +233,6 @@ static inline int page_kills_ppro(unsign
4598 -extern int is_available_memory(efi_memory_desc_t *);
4600 int page_is_ram(unsigned long pagenr)
4603 @@ -327,7 +325,7 @@ void __init add_one_highpage_init(struct
4604 SetPageReserved(page);
4607 -static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
4608 +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
4610 free_new_highpage(page, pfn);
4612 @@ -344,7 +342,7 @@ static int add_one_highpage_hotplug(stru
4613 * has been added dynamically that would be
4614 * onlined here is in HIGHMEM
4616 -void online_page(struct page *page)
4617 +void __meminit online_page(struct page *page)
4619 ClearPageReserved(page);
4620 add_one_highpage_hotplug(page, page_to_pfn(page));
4621 @@ -732,16 +730,10 @@ void __init mem_init(void)
4622 set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
4626 - * this is for the non-NUMA, single node SMP system case.
4627 - * Specifically, in the case of x86, we will always add
4628 - * memory to the highmem for now.
4630 #ifdef CONFIG_MEMORY_HOTPLUG
4631 -#ifndef CONFIG_NEED_MULTIPLE_NODES
4632 int arch_add_memory(int nid, u64 start, u64 size)
4634 - struct pglist_data *pgdata = &contig_page_data;
4635 + struct pglist_data *pgdata = NODE_DATA(nid);
4636 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
4637 unsigned long start_pfn = start >> PAGE_SHIFT;
4638 unsigned long nr_pages = size >> PAGE_SHIFT;
4639 @@ -753,11 +745,11 @@ int remove_memory(u64 start, u64 size)
4644 +EXPORT_SYMBOL_GPL(remove_memory);
4647 -kmem_cache_t *pgd_cache;
4648 -kmem_cache_t *pmd_cache;
4649 +struct kmem_cache *pgd_cache;
4650 +struct kmem_cache *pmd_cache;
4652 void __init pgtable_cache_init(void)
4654 Index: head-2008-12-01/arch/x86/mm/pgtable_32-xen.c
4655 ===================================================================
4656 --- head-2008-12-01.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4657 +++ head-2008-12-01/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:32:38.000000000 +0100
4658 @@ -203,7 +203,7 @@ void pte_free(struct page *pte)
4662 -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
4663 +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
4665 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
4667 @@ -243,7 +243,7 @@ static inline void pgd_list_del(pgd_t *p
4668 set_page_private(next, (unsigned long)pprev);
4671 -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
4672 +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
4674 unsigned long flags;
4676 @@ -264,7 +264,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
4679 /* never called when PTRS_PER_PMD > 1 */
4680 -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
4681 +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
4683 unsigned long flags; /* can be called from interrupt context */
4685 Index: head-2008-12-01/arch/x86/pci/irq-xen.c
4686 ===================================================================
4687 --- head-2008-12-01.orig/arch/x86/pci/irq-xen.c 2008-12-01 11:29:05.000000000 +0100
4688 +++ head-2008-12-01/arch/x86/pci/irq-xen.c 2008-12-01 11:32:38.000000000 +0100
4689 @@ -768,7 +768,7 @@ static void __init pirq_find_router(stru
4690 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
4691 rt->rtr_vendor, rt->rtr_device);
4693 - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
4694 + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
4695 if (!pirq_router_dev) {
4696 DBG(KERN_DEBUG "PCI: Interrupt router not found at "
4697 "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
4698 @@ -788,6 +788,8 @@ static void __init pirq_find_router(stru
4699 pirq_router_dev->vendor,
4700 pirq_router_dev->device,
4701 pci_name(pirq_router_dev));
4703 + /* The device remains referenced for the kernel lifetime */
4706 static struct irq_info *pirq_get_info(struct pci_dev *dev)
4707 Index: head-2008-12-01/arch/x86/kernel/entry_64-xen.S
4708 ===================================================================
4709 --- head-2008-12-01.orig/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:29:05.000000000 +0100
4710 +++ head-2008-12-01/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:32:38.000000000 +0100
4711 @@ -261,7 +261,6 @@ ENTRY(system_call)
4712 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
4713 GET_THREAD_INFO(%rcx)
4714 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
4715 - CFI_REMEMBER_STATE
4717 cmpq $__NR_syscall_max,%rax
4719 @@ -272,7 +271,6 @@ ENTRY(system_call)
4720 * Syscall return path ending with SYSRET (fast path)
4721 * Has incomplete stack frame and undefined top of stack.
4723 - .globl ret_from_sys_call
4725 movl $_TIF_ALLWORK_MASK,%edi
4727 @@ -282,8 +280,8 @@ sysret_check:
4729 movl threadinfo_flags(%rcx),%edx
4731 - CFI_REMEMBER_STATE
4733 + CFI_REMEMBER_STATE
4735 * sysretq will re-enable interrupts:
4737 @@ -292,10 +290,10 @@ sysret_check:
4739 HYPERVISOR_IRET VGCF_IN_SYSCALL
4742 /* Handle reschedules */
4743 /* edx: work, edi: workmask */
4746 bt $TIF_NEED_RESCHED,%edx
4749 @@ -334,7 +332,6 @@ badsys:
4751 /* Do syscall tracing */
4755 movq $-ENOSYS,RAX(%rsp)
4756 FIXUP_TOP_OF_STACK %rdi
4757 @@ -350,32 +347,13 @@ tracesys:
4758 call *sys_call_table(,%rax,8)
4759 1: movq %rax,RAX-ARGOFFSET(%rsp)
4760 /* Use IRET because user could have changed frame */
4761 - jmp int_ret_from_sys_call
4766 * Syscall return path ending with IRET.
4767 * Has correct top of stack, but partial stack frame.
4769 -ENTRY(int_ret_from_sys_call)
4770 - CFI_STARTPROC simple
4772 - CFI_DEF_CFA rsp,SS+8-ARGOFFSET
4773 - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
4774 - CFI_REL_OFFSET rsp,RSP-ARGOFFSET
4775 - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
4776 - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
4777 - CFI_REL_OFFSET rip,RIP-ARGOFFSET
4778 - CFI_REL_OFFSET rdx,RDX-ARGOFFSET
4779 - CFI_REL_OFFSET rcx,RCX-ARGOFFSET
4780 - CFI_REL_OFFSET rax,RAX-ARGOFFSET
4781 - CFI_REL_OFFSET rdi,RDI-ARGOFFSET
4782 - CFI_REL_OFFSET rsi,RSI-ARGOFFSET
4783 - CFI_REL_OFFSET r8,R8-ARGOFFSET
4784 - CFI_REL_OFFSET r9,R9-ARGOFFSET
4785 - CFI_REL_OFFSET r10,R10-ARGOFFSET
4786 - CFI_REL_OFFSET r11,R11-ARGOFFSET
4788 + .globl int_ret_from_sys_call
4789 +int_ret_from_sys_call:
4790 XEN_BLOCK_EVENTS(%rsi)
4792 testb $3,CS-ARGOFFSET(%rsp)
4793 @@ -428,8 +406,6 @@ int_very_careful:
4795 CFI_ADJUST_CFA_OFFSET -8
4796 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
4797 - XEN_BLOCK_EVENTS(%rsi)
4799 jmp int_restore_rest
4802 @@ -445,7 +421,7 @@ int_restore_rest:
4806 -END(int_ret_from_sys_call)
4810 * Certain special system calls that need to save a complete full stack frame.
4811 @@ -1275,36 +1251,3 @@ ENTRY(call_softirq)
4814 ENDPROC(call_softirq)
4816 -#ifdef CONFIG_STACK_UNWIND
4817 -ENTRY(arch_unwind_init_running)
4819 - movq %r15, R15(%rdi)
4820 - movq %r14, R14(%rdi)
4822 - movq %r13, R13(%rdi)
4823 - movq %r12, R12(%rdi)
4825 - movq %rbp, RBP(%rdi)
4826 - movq %rbx, RBX(%rdi)
4828 - movq %rax, R11(%rdi)
4829 - movq %rax, R10(%rdi)
4830 - movq %rax, R9(%rdi)
4831 - movq %rax, R8(%rdi)
4832 - movq %rax, RAX(%rdi)
4833 - movq %rax, RCX(%rdi)
4834 - movq %rax, RDX(%rdi)
4835 - movq %rax, RSI(%rdi)
4836 - movq %rax, RDI(%rdi)
4837 - movq %rax, ORIG_RAX(%rdi)
4838 - movq %rcx, RIP(%rdi)
4839 - leaq 8(%rsp), %rcx
4840 - movq $__KERNEL_CS, CS(%rdi)
4841 - movq %rax, EFLAGS(%rdi)
4842 - movq %rcx, RSP(%rdi)
4843 - movq $__KERNEL_DS, SS(%rdi)
4846 -ENDPROC(arch_unwind_init_running)
4848 Index: head-2008-12-01/arch/x86/kernel/genapic_64-xen.c
4849 ===================================================================
4850 --- head-2008-12-01.orig/arch/x86/kernel/genapic_64-xen.c 2007-06-12 13:13:01.000000000 +0200
4851 +++ head-2008-12-01/arch/x86/kernel/genapic_64-xen.c 2008-12-01 11:32:38.000000000 +0100
4852 @@ -34,6 +34,7 @@ extern struct genapic apic_physflat;
4855 struct genapic *genapic = &apic_flat;
4856 +struct genapic *genapic_force;
4858 extern struct genapic apic_xen;
4859 struct genapic *genapic = &apic_xen;
4860 @@ -52,6 +53,13 @@ void __init clustered_apic_check(void)
4861 u8 cluster_cnt[NUM_APIC_CLUSTERS];
4864 + /* genapic selection can be forced because of certain quirks.
4866 + if (genapic_force) {
4867 + genapic = genapic_force;
4871 #if defined(CONFIG_ACPI)
4873 * Some x86_64 machines use physical APIC mode regardless of how many
4874 Index: head-2008-12-01/arch/x86/kernel/head64-xen.c
4875 ===================================================================
4876 --- head-2008-12-01.orig/arch/x86/kernel/head64-xen.c 2008-12-01 11:29:05.000000000 +0100
4877 +++ head-2008-12-01/arch/x86/kernel/head64-xen.c 2008-12-01 11:32:38.000000000 +0100
4878 @@ -104,7 +104,10 @@ void __init x86_64_start_kernel(char * r
4879 machine_to_phys_order++;
4882 - for (i = 0; i < 256; i++)
4883 + /* clear bss before set_intr_gate with early_idt_handler */
4886 + for (i = 0; i < IDT_ENTRIES; i++)
4887 set_intr_gate(i, early_idt_handler);
4888 asm volatile("lidt %0" :: "m" (idt_descr));
4890 Index: head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c
4891 ===================================================================
4892 --- head-2008-12-01.orig/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:29:05.000000000 +0100
4893 +++ head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:32:38.000000000 +0100
4894 @@ -202,14 +202,20 @@ static struct IO_APIC_route_entry ioapic
4895 * the interrupt, and we need to make sure the entry is fully populated
4896 * before that happens.
4898 -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4900 +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4902 - unsigned long flags;
4903 union entry_union eu;
4905 - spin_lock_irqsave(&ioapic_lock, flags);
4906 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
4907 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
4910 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4912 + unsigned long flags;
4913 + spin_lock_irqsave(&ioapic_lock, flags);
4914 + __ioapic_write_entry(apic, pin, e);
4915 spin_unlock_irqrestore(&ioapic_lock, flags);
4918 @@ -720,6 +726,22 @@ static int assign_irq_vector(int irq, cp
4922 +static void __clear_irq_vector(int irq)
4927 + BUG_ON(!irq_vector[irq]);
4929 + vector = irq_vector[irq];
4930 + cpus_and(mask, irq_domain[irq], cpu_online_map);
4931 + for_each_cpu_mask(cpu, mask)
4932 + per_cpu(vector_irq, cpu)[vector] = -1;
4934 + irq_vector[irq] = 0;
4935 + irq_domain[irq] = CPU_MASK_NONE;
4938 void __setup_vector_irq(int cpu)
4940 /* Initialize vector_irq on a new cpu */
4941 @@ -767,26 +789,65 @@ static void ioapic_register_intr(int irq
4942 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
4943 #endif /* !CONFIG_XEN */
4945 -static void __init setup_IO_APIC_irqs(void)
4946 +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
4948 struct IO_APIC_route_entry entry;
4949 - int apic, pin, idx, irq, first_notcon = 1, vector;
4951 unsigned long flags;
4953 - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
4955 - for (apic = 0; apic < nr_ioapics; apic++) {
4956 - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
4958 + * add it to the IO-APIC irq-routing table:
4960 + memset(&entry,0,sizeof(entry));
4963 - * add it to the IO-APIC irq-routing table:
4965 - memset(&entry,0,sizeof(entry));
4966 + entry.delivery_mode = INT_DELIVERY_MODE;
4967 + entry.dest_mode = INT_DEST_MODE;
4968 + entry.mask = 0; /* enable IRQ */
4969 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4971 - entry.delivery_mode = INT_DELIVERY_MODE;
4972 - entry.dest_mode = INT_DEST_MODE;
4973 - entry.mask = 0; /* enable IRQ */
4974 + entry.trigger = irq_trigger(idx);
4975 + entry.polarity = irq_polarity(idx);
4977 + if (irq_trigger(idx)) {
4978 + entry.trigger = 1;
4980 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4983 + if (/* !apic && */ !IO_APIC_IRQ(irq))
4986 + if (IO_APIC_IRQ(irq)) {
4988 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4992 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4993 + entry.vector = vector;
4995 + ioapic_register_intr(irq, vector, IOAPIC_AUTO);
4996 + if (!apic && (irq < 16))
4997 + disable_8259A_irq(irq);
5000 + ioapic_write_entry(apic, pin, entry);
5002 + spin_lock_irqsave(&ioapic_lock, flags);
5003 + set_native_irq_info(irq, TARGET_CPUS);
5004 + spin_unlock_irqrestore(&ioapic_lock, flags);
5008 +static void __init setup_IO_APIC_irqs(void)
5010 + int apic, pin, idx, irq, first_notcon = 1;
5012 + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
5014 + for (apic = 0; apic < nr_ioapics; apic++) {
5015 + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
5017 idx = find_irq_entry(apic,pin,mp_INT);
5019 @@ -798,39 +859,11 @@ static void __init setup_IO_APIC_irqs(vo
5023 - entry.trigger = irq_trigger(idx);
5024 - entry.polarity = irq_polarity(idx);
5026 - if (irq_trigger(idx)) {
5027 - entry.trigger = 1;
5029 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
5032 irq = pin_2_irq(idx, apic, pin);
5033 add_pin_to_irq(irq, apic, pin);
5035 - if (/* !apic && */ !IO_APIC_IRQ(irq))
5038 - if (IO_APIC_IRQ(irq)) {
5040 - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
5044 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
5045 - entry.vector = vector;
5046 + setup_IO_APIC_irq(apic, pin, idx, irq);
5048 - ioapic_register_intr(irq, vector, IOAPIC_AUTO);
5049 - if (!apic && (irq < 16))
5050 - disable_8259A_irq(irq);
5052 - ioapic_write_entry(apic, pin, entry);
5054 - spin_lock_irqsave(&ioapic_lock, flags);
5055 - set_native_irq_info(irq, TARGET_CPUS);
5056 - spin_unlock_irqrestore(&ioapic_lock, flags);
5060 @@ -1826,7 +1859,7 @@ void destroy_irq(unsigned int irq)
5061 dynamic_irq_cleanup(irq);
5063 spin_lock_irqsave(&vector_lock, flags);
5064 - irq_vector[irq] = 0;
5065 + __clear_irq_vector(irq);
5066 spin_unlock_irqrestore(&vector_lock, flags);
5069 @@ -2131,7 +2164,15 @@ void __init setup_ioapic_dest(void)
5070 if (irq_entry == -1)
5072 irq = pin_2_irq(irq_entry, ioapic, pin);
5073 - set_ioapic_affinity_irq(irq, TARGET_CPUS);
5075 + /* setup_IO_APIC_irqs could fail to get vector for some device
5076 + * when you have too many devices, because at that time only boot
5079 + if(!irq_vector[irq])
5080 + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
5082 + set_ioapic_affinity_irq(irq, TARGET_CPUS);
5086 Index: head-2008-12-01/arch/x86/kernel/irq_64-xen.c
5087 ===================================================================
5088 --- head-2008-12-01.orig/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5089 +++ head-2008-12-01/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5090 @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt
5092 if (likely(irq < NR_IRQS))
5093 generic_handle_irq(irq);
5095 + else if (printk_ratelimit())
5096 printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
5097 __func__, smp_processor_id(), irq);
5099 Index: head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c
5100 ===================================================================
5101 --- head-2008-12-01.orig/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5102 +++ head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5104 int smp_found_config;
5105 unsigned int __initdata maxcpus = NR_CPUS;
5107 -int acpi_found_madt;
5110 * Various Linux-internal data structures created from the
5112 Index: head-2008-12-01/arch/x86/kernel/process_64-xen.c
5113 ===================================================================
5114 --- head-2008-12-01.orig/arch/x86/kernel/process_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5115 +++ head-2008-12-01/arch/x86/kernel/process_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5116 @@ -119,29 +119,23 @@ void exit_idle(void)
5117 static void poll_idle (void)
5127 - "i" (_TIF_NEED_RESCHED),
5128 - "m" (current_thread_info()->flags));
5132 static void xen_idle(void)
5134 + current_thread_info()->status &= ~TS_POLLING;
5136 + * TS_POLLING-cleared state must be visible before we
5137 + * test NEED_RESCHED:
5140 local_irq_disable();
5142 - if (need_resched())
5143 - local_irq_enable();
5145 - current_thread_info()->status &= ~TS_POLLING;
5146 - smp_mb__after_clear_bit();
5147 + if (!need_resched())
5149 - current_thread_info()->status |= TS_POLLING;
5152 + local_irq_enable();
5153 + current_thread_info()->status |= TS_POLLING;
5156 #ifdef CONFIG_HOTPLUG_CPU
5157 @@ -181,6 +175,12 @@ void cpu_idle (void)
5158 idle = xen_idle; /* no alternatives */
5159 if (cpu_is_offline(smp_processor_id()))
5162 + * Idle routines should keep interrupts disabled
5163 + * from here on, until they go to idle.
5164 + * Otherwise, idle callbacks can misfire.
5166 + local_irq_disable();
5169 /* In many cases the interrupt that ended idle
5170 Index: head-2008-12-01/arch/x86/kernel/setup_64-xen.c
5171 ===================================================================
5172 --- head-2008-12-01.orig/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5173 +++ head-2008-12-01/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5174 @@ -573,8 +573,7 @@ void __init setup_arch(char **cmdline_p)
5175 if (LOADER_TYPE && INITRD_START) {
5176 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
5177 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
5179 - INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
5180 + initrd_start = INITRD_START + PAGE_OFFSET;
5181 initrd_end = initrd_start+INITRD_SIZE;
5184 @@ -990,11 +989,8 @@ static void __cpuinit init_amd(struct cp
5185 /* Fix cpuid4 emulation for more */
5186 num_cache_leaves = 3;
5188 - /* When there is only one core no need to synchronize RDTSC */
5189 - if (num_possible_cpus() == 1)
5190 - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5192 - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5193 + /* RDTSC can be speculated around */
5194 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5197 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
5198 @@ -1093,6 +1089,15 @@ static void __cpuinit init_intel(struct
5199 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
5203 + unsigned int l1, l2;
5204 + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
5205 + if (!(l1 & (1<<11)))
5206 + set_bit(X86_FEATURE_BTS, c->x86_capability);
5207 + if (!(l1 & (1<<12)))
5208 + set_bit(X86_FEATURE_PEBS, c->x86_capability);
5211 n = c->extended_cpuid_level;
5212 if (n >= 0x80000008) {
5213 unsigned eax = cpuid_eax(0x80000008);
5214 @@ -1112,7 +1117,10 @@ static void __cpuinit init_intel(struct
5215 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
5217 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
5218 - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5220 + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5222 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5223 c->x86_max_cores = intel_num_cpu_cores(c);
5226 Index: head-2008-12-01/arch/x86/kernel/smp_64-xen.c
5227 ===================================================================
5228 --- head-2008-12-01.orig/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5229 +++ head-2008-12-01/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5230 @@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v
5235 + /* Can deadlock when called with interrupts disabled */
5236 + WARN_ON(irqs_disabled());
5238 spin_lock_bh(&call_lock);
5239 __smp_call_function_single(cpu, func, info, nonatomic, wait);
5240 spin_unlock_bh(&call_lock);
5244 +EXPORT_SYMBOL(smp_call_function_single);
5247 * this function sends a 'generic call function' IPI to all other CPUs
5248 Index: head-2008-12-01/arch/x86/kernel/traps_64-xen.c
5249 ===================================================================
5250 --- head-2008-12-01.orig/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5251 +++ head-2008-12-01/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5253 #include <linux/kprobes.h>
5254 #include <linux/kexec.h>
5255 #include <linux/unwind.h>
5256 +#include <linux/uaccess.h>
5257 +#include <linux/bug.h>
5259 #include <asm/system.h>
5260 -#include <asm/uaccess.h>
5262 #include <asm/atomic.h>
5263 #include <asm/debugreg.h>
5264 @@ -108,12 +109,7 @@ static inline void preempt_conditional_c
5265 preempt_enable_no_resched();
5268 -static int kstack_depth_to_print = 12;
5269 -#ifdef CONFIG_STACK_UNWIND
5270 -static int call_trace = 1;
5272 -#define call_trace (-1)
5274 +int kstack_depth_to_print = 12;
5276 #ifdef CONFIG_KALLSYMS
5277 void printk_address(unsigned long address)
5278 @@ -218,24 +214,7 @@ static unsigned long *in_exception_stack
5282 -struct ops_and_data {
5283 - struct stacktrace_ops *ops;
5287 -static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
5289 - struct ops_and_data *oad = (struct ops_and_data *)context;
5292 - while (unwind(info) == 0 && UNW_PC(info)) {
5294 - oad->ops->address(oad->data, UNW_PC(info));
5295 - if (arch_unw_user_mode(info))
5300 +#define MSG(txt) ops->warning(data, txt)
5303 * x86-64 can have upto three kernel stacks:
5304 @@ -250,61 +229,24 @@ static inline int valid_stack_ptr(struct
5305 return p > t && p < t + THREAD_SIZE - 3;
5308 -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
5309 +void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
5310 + unsigned long *stack,
5311 struct stacktrace_ops *ops, void *data)
5313 - const unsigned cpu = smp_processor_id();
5314 - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
5315 + const unsigned cpu = get_cpu();
5316 + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
5318 struct thread_info *tinfo;
5323 - if (call_trace >= 0) {
5325 - struct unwind_frame_info info;
5326 - struct ops_and_data oad = { .ops = ops, .data = data };
5329 - if (unwind_init_frame_info(&info, tsk, regs) == 0)
5330 - unw_ret = dump_trace_unwind(&info, &oad);
5331 - } else if (tsk == current)
5332 - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
5334 - if (unwind_init_blocked(&info, tsk) == 0)
5335 - unw_ret = dump_trace_unwind(&info, &oad);
5337 - if (unw_ret > 0) {
5338 - if (call_trace == 1 && !arch_unw_user_mode(&info)) {
5339 - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
5341 - if ((long)UNW_SP(&info) < 0) {
5342 - ops->warning(data, "Leftover inexact backtrace:\n");
5343 - stack = (unsigned long *)UNW_SP(&info);
5347 - ops->warning(data, "Full inexact backtrace again:\n");
5348 - } else if (call_trace >= 1)
5351 - ops->warning(data, "Full inexact backtrace again:\n");
5353 - ops->warning(data, "Inexact backtrace:\n");
5356 unsigned long dummy;
5358 if (tsk && tsk != current)
5359 stack = (unsigned long *)tsk->thread.rsp;
5362 - * Align the stack pointer on word boundary, later loops
5363 - * rely on that (and corruption / debug info bugs can cause
5364 - * unaligned values here):
5366 - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
5369 * Print function call entries within a stack. 'cond' is the
5370 @@ -314,9 +256,9 @@ void dump_trace(struct task_struct *tsk,
5371 #define HANDLE_STACK(cond) \
5373 unsigned long addr = *stack++; \
5374 - if (oops_in_progress ? \
5375 - __kernel_text_address(addr) : \
5376 - kernel_text_address(addr)) { \
5377 + /* Use unlocked access here because except for NMIs \
5378 + we should be already protected against module unloads */ \
5379 + if (__kernel_text_address(addr)) { \
5381 * If the address is either in the text segment of the \
5382 * kernel, or in the region which contains vmalloc'ed \
5383 @@ -379,9 +321,10 @@ void dump_trace(struct task_struct *tsk,
5385 * This handles the process stack:
5387 - tinfo = current_thread_info();
5388 + tinfo = task_thread_info(tsk);
5389 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
5393 EXPORT_SYMBOL(dump_trace);
5395 @@ -518,30 +461,15 @@ bad:
5399 -void handle_BUG(struct pt_regs *regs)
5401 - struct bug_frame f;
5403 - const char *prefix = "";
5404 +int is_valid_bugaddr(unsigned long rip)
5406 + unsigned short ud2;
5408 - if (user_mode(regs))
5410 - if (__copy_from_user(&f, (const void __user *) regs->rip,
5411 - sizeof(struct bug_frame)))
5413 - if (f.filename >= 0 ||
5414 - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
5416 - len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
5417 - if (len < 0 || len >= PATH_MAX)
5418 - f.filename = (int)(long)"unmapped filename";
5419 - else if (len > 50) {
5420 - f.filename += len - 50;
5423 - printk("----------- [cut here ] --------- [please bite here ] ---------\n");
5424 - printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
5426 + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
5429 + return ud2 == 0x0b0f;
5433 void out_of_line_bug(void)
5434 @@ -621,7 +549,9 @@ void die(const char * str, struct pt_reg
5436 unsigned long flags = oops_begin();
5439 + if (!user_mode(regs))
5440 + report_bug(regs->rip);
5442 __die(str, regs, err);
5445 @@ -790,8 +720,7 @@ mem_parity_error(unsigned char reason, s
5447 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
5449 - printk(KERN_EMERG "You probably have a hardware problem with your "
5451 + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
5453 if (panic_on_unrecovered_nmi)
5454 panic("NMI: Not continuing");
5455 @@ -1227,21 +1156,3 @@ static int __init kstack_setup(char *s)
5458 early_param("kstack", kstack_setup);
5460 -#ifdef CONFIG_STACK_UNWIND
5461 -static int __init call_trace_setup(char *s)
5465 - if (strcmp(s, "old") == 0)
5467 - else if (strcmp(s, "both") == 0)
5469 - else if (strcmp(s, "newfallback") == 0)
5471 - else if (strcmp(s, "new") == 0)
5475 -early_param("call_trace", call_trace_setup);
5477 Index: head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c
5478 ===================================================================
5479 --- head-2008-12-01.orig/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5480 +++ head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5482 #include <asm/topology.h>
5484 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
5485 +#define __syscall_clobber "r11","rcx","memory"
5487 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
5488 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
5489 @@ -224,8 +225,7 @@ out:
5491 static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
5492 void __user *oldval, size_t __user *oldlenp,
5493 - void __user *newval, size_t newlen,
5495 + void __user *newval, size_t newlen)
5499 @@ -277,7 +277,6 @@ static void __cpuinit cpu_vsyscall_init(
5500 vsyscall_set_cpu(raw_smp_processor_id());
5503 -#ifdef CONFIG_HOTPLUG_CPU
5504 static int __cpuinit
5505 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
5507 @@ -286,13 +285,13 @@ cpu_vsyscall_notifier(struct notifier_bl
5508 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
5513 static void __init map_vsyscall(void)
5515 extern char __vsyscall_0;
5516 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
5518 + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
5519 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
5522 Index: head-2008-12-01/arch/x86/mm/fault_64-xen.c
5523 ===================================================================
5524 --- head-2008-12-01.orig/arch/x86/mm/fault_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5525 +++ head-2008-12-01/arch/x86/mm/fault_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5527 #include <linux/compiler.h>
5528 #include <linux/module.h>
5529 #include <linux/kprobes.h>
5530 +#include <linux/uaccess.h>
5532 #include <asm/system.h>
5533 -#include <asm/uaccess.h>
5534 #include <asm/pgalloc.h>
5535 #include <asm/smp.h>
5536 #include <asm/tlbflush.h>
5537 @@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
5538 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
5539 unsigned long error_code)
5541 - unsigned char __user *instr;
5542 + unsigned char *instr;
5545 unsigned char *max_instr;
5546 @@ -116,7 +116,7 @@ static noinline int is_prefetch(struct p
5547 unsigned char instr_hi;
5548 unsigned char instr_lo;
5550 - if (__get_user(opcode, (char __user *)instr))
5551 + if (probe_kernel_address(instr, opcode))
5554 instr_hi = opcode & 0xf0;
5555 @@ -154,7 +154,7 @@ static noinline int is_prefetch(struct p
5557 /* Prefetch instruction is 0x0F0D or 0x0F18 */
5559 - if (__get_user(opcode, (char __user *)instr))
5560 + if (probe_kernel_address(instr, opcode))
5562 prefetch = (instr_lo == 0xF) &&
5563 (opcode == 0x0D || opcode == 0x18);
5564 @@ -170,7 +170,7 @@ static noinline int is_prefetch(struct p
5565 static int bad_address(void *p)
5567 unsigned long dummy;
5568 - return __get_user(dummy, (unsigned long __user *)p);
5569 + return probe_kernel_address((unsigned long *)p, dummy);
5572 void dump_pagetable(unsigned long address)
5573 Index: head-2008-12-01/arch/x86/mm/init_64-xen.c
5574 ===================================================================
5575 --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5576 +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5577 @@ -1129,14 +1129,15 @@ static __init int x8664_sysctl_init(void
5578 __initcall(x8664_sysctl_init);
5581 -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
5582 +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
5583 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
5584 not need special handling anymore. */
5586 static struct vm_area_struct gate_vma = {
5587 .vm_start = VSYSCALL_START,
5588 - .vm_end = VSYSCALL_END,
5589 - .vm_page_prot = PAGE_READONLY
5590 + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
5591 + .vm_page_prot = PAGE_READONLY_EXEC,
5592 + .vm_flags = VM_READ | VM_EXEC
5595 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
5596 Index: head-2008-12-01/arch/x86/mm/pageattr_64-xen.c
5597 ===================================================================
5598 --- head-2008-12-01.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5599 +++ head-2008-12-01/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:32:38.000000000 +0100
5600 @@ -330,34 +330,40 @@ static struct page *split_large_page(uns
5605 -static void flush_kernel_map(void *address)
5606 +static void cache_flush_page(void *adr)
5608 - if (0 && address && cpu_has_clflush) {
5609 - /* is this worth it? */
5611 - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
5612 - asm volatile("clflush (%0)" :: "r" (address + i));
5614 - asm volatile("wbinvd":::"memory");
5616 - __flush_tlb_one(address);
5618 - __flush_tlb_all();
5620 + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
5621 + asm volatile("clflush (%0)" :: "r" (adr + i));
5624 +static void flush_kernel_map(void *arg)
5626 + struct list_head *l = (struct list_head *)arg;
5629 -static inline void flush_map(unsigned long address)
5630 + /* When clflush is available always use it because it is
5631 + much cheaper than WBINVD */
5632 + if (!cpu_has_clflush)
5633 + asm volatile("wbinvd" ::: "memory");
5634 + list_for_each_entry(pg, l, lru) {
5635 + void *adr = page_address(pg);
5636 + if (cpu_has_clflush)
5637 + cache_flush_page(adr);
5638 + __flush_tlb_one(adr);
5642 +static inline void flush_map(struct list_head *l)
5644 - on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
5645 + on_each_cpu(flush_kernel_map, l, 1, 1);
5648 -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
5649 +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
5651 static inline void save_page(struct page *fpage)
5653 - fpage->lru.next = (struct list_head *)deferred_pages;
5654 - deferred_pages = fpage;
5655 + list_add(&fpage->lru, &deferred_pages);
5659 @@ -487,18 +493,18 @@ int change_page_attr(struct page *page,
5661 void global_flush_tlb(void)
5663 - struct page *dpage;
5664 + struct page *pg, *next;
5665 + struct list_head l;
5667 down_read(&init_mm.mmap_sem);
5668 - dpage = xchg(&deferred_pages, NULL);
5669 + list_replace_init(&deferred_pages, &l);
5670 up_read(&init_mm.mmap_sem);
5672 - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
5674 - struct page *tmp = dpage;
5675 - dpage = (struct page *)dpage->lru.next;
5676 - ClearPagePrivate(tmp);
5680 + list_for_each_entry_safe(pg, next, &l, lru) {
5681 + ClearPagePrivate(pg);
5686 Index: head-2008-12-01/drivers/pci/msi-xen.c
5687 ===================================================================
5688 --- head-2008-12-01.orig/drivers/pci/msi-xen.c 2008-12-01 11:29:05.000000000 +0100
5689 +++ head-2008-12-01/drivers/pci/msi-xen.c 2008-12-01 11:32:38.000000000 +0100
5690 @@ -263,10 +263,8 @@ void disable_msi_mode(struct pci_dev *de
5691 pci_write_config_word(dev, msi_control_reg(pos), control);
5692 dev->msix_enabled = 0;
5694 - if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
5695 - /* PCI Express Endpoint device detected */
5696 - pci_intx(dev, 1); /* enable intx */
5699 + pci_intx(dev, 1); /* enable intx */
5702 static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
5703 @@ -284,10 +282,8 @@ static void enable_msi_mode(struct pci_d
5704 pci_write_config_word(dev, msi_control_reg(pos), control);
5705 dev->msix_enabled = 1;
5707 - if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
5708 - /* PCI Express Endpoint device detected */
5709 - pci_intx(dev, 0); /* disable intx */
5712 + pci_intx(dev, 0); /* disable intx */
5716 Index: head-2008-12-01/drivers/xen/balloon/balloon.c
5717 ===================================================================
5718 --- head-2008-12-01.orig/drivers/xen/balloon/balloon.c 2008-12-01 11:29:05.000000000 +0100
5719 +++ head-2008-12-01/drivers/xen/balloon/balloon.c 2008-12-01 11:32:38.000000000 +0100
5720 @@ -97,8 +97,8 @@ extern unsigned long totalhigh_pages;
5721 static LIST_HEAD(ballooned_pages);
5723 /* Main work function, always executed in process context. */
5724 -static void balloon_process(void *unused);
5725 -static DECLARE_WORK(balloon_worker, balloon_process, NULL);
5726 +static void balloon_process(struct work_struct *unused);
5727 +static DECLARE_WORK(balloon_worker, balloon_process);
5728 static struct timer_list balloon_timer;
5730 /* When ballooning out (allocating memory to return to Xen) we don't really
5731 @@ -389,7 +389,7 @@ static int decrease_reservation(unsigned
5732 * by the balloon lock), or with changes to the Xen hard limit, but we will
5733 * recover from these in time.
5735 -static void balloon_process(void *unused)
5736 +static void balloon_process(struct work_struct *unused)
5740 Index: head-2008-12-01/drivers/xen/blkback/blkback.c
5741 ===================================================================
5742 --- head-2008-12-01.orig/drivers/xen/blkback/blkback.c 2008-12-01 11:29:05.000000000 +0100
5743 +++ head-2008-12-01/drivers/xen/blkback/blkback.c 2008-12-01 11:32:38.000000000 +0100
5746 #include <linux/spinlock.h>
5747 #include <linux/kthread.h>
5748 +#include <linux/freezer.h>
5749 #include <linux/list.h>
5750 #include <linux/delay.h>
5751 #include <xen/balloon.h>
5752 Index: head-2008-12-01/drivers/xen/blkback/interface.c
5753 ===================================================================
5754 --- head-2008-12-01.orig/drivers/xen/blkback/interface.c 2007-06-12 13:13:44.000000000 +0200
5755 +++ head-2008-12-01/drivers/xen/blkback/interface.c 2008-12-01 11:32:38.000000000 +0100
5757 #include <xen/evtchn.h>
5758 #include <linux/kthread.h>
5760 -static kmem_cache_t *blkif_cachep;
5761 +static struct kmem_cache *blkif_cachep;
5763 blkif_t *blkif_alloc(domid_t domid)
5765 Index: head-2008-12-01/drivers/xen/blkfront/blkfront.c
5766 ===================================================================
5767 --- head-2008-12-01.orig/drivers/xen/blkfront/blkfront.c 2008-12-01 11:29:05.000000000 +0100
5768 +++ head-2008-12-01/drivers/xen/blkfront/blkfront.c 2008-12-01 11:32:38.000000000 +0100
5769 @@ -70,7 +70,7 @@ static int setup_blkring(struct xenbus_d
5770 static void kick_pending_request_queues(struct blkfront_info *);
5772 static irqreturn_t blkif_int(int irq, void *dev_id);
5773 -static void blkif_restart_queue(void *arg);
5774 +static void blkif_restart_queue(struct work_struct *arg);
5775 static void blkif_recover(struct blkfront_info *);
5776 static void blkif_completion(struct blk_shadow *);
5777 static void blkif_free(struct blkfront_info *, int);
5778 @@ -110,7 +110,7 @@ static int blkfront_probe(struct xenbus_
5780 info->vdevice = vdevice;
5781 info->connected = BLKIF_STATE_DISCONNECTED;
5782 - INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
5783 + INIT_WORK(&info->work, blkif_restart_queue);
5785 for (i = 0; i < BLK_RING_SIZE; i++)
5786 info->shadow[i].req.id = i+1;
5787 @@ -459,9 +459,9 @@ static void kick_pending_request_queues(
5791 -static void blkif_restart_queue(void *arg)
5792 +static void blkif_restart_queue(struct work_struct *arg)
5794 - struct blkfront_info *info = (struct blkfront_info *)arg;
5795 + struct blkfront_info *info = container_of(arg, struct blkfront_info, work);
5796 spin_lock_irq(&blkif_io_lock);
5797 if (info->connected == BLKIF_STATE_CONNECTED)
5798 kick_pending_request_queues(info);
5799 Index: head-2008-12-01/drivers/xen/blktap/blktap.c
5800 ===================================================================
5801 --- head-2008-12-01.orig/drivers/xen/blktap/blktap.c 2008-12-01 11:29:05.000000000 +0100
5802 +++ head-2008-12-01/drivers/xen/blktap/blktap.c 2008-12-01 11:32:38.000000000 +0100
5805 #include <linux/spinlock.h>
5806 #include <linux/kthread.h>
5807 +#include <linux/freezer.h>
5808 #include <linux/list.h>
5809 #include <asm/hypervisor.h>
5811 Index: head-2008-12-01/drivers/xen/blktap/interface.c
5812 ===================================================================
5813 --- head-2008-12-01.orig/drivers/xen/blktap/interface.c 2008-09-15 13:40:15.000000000 +0200
5814 +++ head-2008-12-01/drivers/xen/blktap/interface.c 2008-12-01 11:32:38.000000000 +0100
5817 #include <xen/evtchn.h>
5819 -static kmem_cache_t *blkif_cachep;
5820 +static struct kmem_cache *blkif_cachep;
5822 blkif_t *tap_alloc_blkif(domid_t domid)
5824 Index: head-2008-12-01/drivers/xen/char/mem.c
5825 ===================================================================
5826 --- head-2008-12-01.orig/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200
5827 +++ head-2008-12-01/drivers/xen/char/mem.c 2008-12-01 11:32:38.000000000 +0100
5828 @@ -157,7 +157,7 @@ static loff_t memory_lseek(struct file *
5832 - mutex_lock(&file->f_dentry->d_inode->i_mutex);
5833 + mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
5836 file->f_pos = offset;
5837 @@ -172,7 +172,7 @@ static loff_t memory_lseek(struct file *
5841 - mutex_unlock(&file->f_dentry->d_inode->i_mutex);
5842 + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
5846 Index: head-2008-12-01/drivers/xen/console/console.c
5847 ===================================================================
5848 --- head-2008-12-01.orig/drivers/xen/console/console.c 2008-12-01 11:29:05.000000000 +0100
5849 +++ head-2008-12-01/drivers/xen/console/console.c 2008-12-01 11:32:38.000000000 +0100
5850 @@ -80,11 +80,6 @@ static int xc_num = -1;
5851 #define XEN_XVC_MAJOR 204
5852 #define XEN_XVC_MINOR 191
5854 -#ifdef CONFIG_MAGIC_SYSRQ
5855 -static unsigned long sysrq_requested;
5856 -extern int sysrq_enabled;
5859 static int __init xencons_setup(char *str)
5862 @@ -339,8 +334,8 @@ void __init dom0_init_screen_info(const
5863 #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
5864 ((_tty)->index != (xc_num - 1)))
5866 -static struct termios *xencons_termios[MAX_NR_CONSOLES];
5867 -static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
5868 +static struct ktermios *xencons_termios[MAX_NR_CONSOLES];
5869 +static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES];
5870 static struct tty_struct *xencons_tty;
5871 static int xencons_priv_irq;
5873 @@ -356,7 +351,9 @@ void xencons_rx(char *buf, unsigned len)
5875 for (i = 0; i < len; i++) {
5876 #ifdef CONFIG_MAGIC_SYSRQ
5877 - if (sysrq_enabled) {
5879 + static unsigned long sysrq_requested;
5881 if (buf[i] == '\x0f') { /* ^O */
5882 if (!sysrq_requested) {
5883 sysrq_requested = jiffies;
5884 Index: head-2008-12-01/drivers/xen/core/reboot.c
5885 ===================================================================
5886 --- head-2008-12-01.orig/drivers/xen/core/reboot.c 2008-12-01 11:29:05.000000000 +0100
5887 +++ head-2008-12-01/drivers/xen/core/reboot.c 2008-12-01 11:32:38.000000000 +0100
5888 @@ -34,8 +34,8 @@ static int suspend_cancelled;
5889 /* Can we leave APs online when we suspend? */
5890 static int fast_suspend;
5892 -static void __shutdown_handler(void *unused);
5893 -static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
5894 +static void __shutdown_handler(struct work_struct *unused);
5895 +static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler);
5897 static int setup_suspend_evtchn(void);
5899 @@ -105,7 +105,7 @@ static int xen_suspend(void *__unused)
5900 case SHUTDOWN_RESUMING:
5903 - schedule_work(&shutdown_work);
5904 + schedule_delayed_work(&shutdown_work, 0);
5908 @@ -137,12 +137,12 @@ static void switch_shutdown_state(int ne
5910 /* Either we kick off the work, or we leave it to xen_suspend(). */
5911 if (old_state == SHUTDOWN_INVALID)
5912 - schedule_work(&shutdown_work);
5913 + schedule_delayed_work(&shutdown_work, 0);
5915 BUG_ON(old_state != SHUTDOWN_RESUMING);
5918 -static void __shutdown_handler(void *unused)
5919 +static void __shutdown_handler(struct work_struct *unused)
5923 Index: head-2008-12-01/drivers/xen/core/smpboot.c
5924 ===================================================================
5925 --- head-2008-12-01.orig/drivers/xen/core/smpboot.c 2008-12-01 11:29:05.000000000 +0100
5926 +++ head-2008-12-01/drivers/xen/core/smpboot.c 2008-12-01 11:32:38.000000000 +0100
5927 @@ -165,7 +165,12 @@ static void xen_smp_intr_exit(unsigned i
5929 void __cpuinit cpu_bringup(void)
5932 + cpu_set_gdt(current_thread_info()->cpu);
5933 + secondary_cpu_init();
5937 identify_cpu(cpu_data + smp_processor_id());
5938 touch_softlockup_watchdog();
5940 @@ -304,11 +309,12 @@ void __init smp_prepare_cpus(unsigned in
5944 + idle = fork_idle(cpu);
5946 + panic("failed fork for CPU %d", cpu);
5949 gdt_descr = &cpu_gdt_descr[cpu];
5951 - gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
5953 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
5954 if (unlikely(!gdt_descr->address)) {
5955 printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
5956 @@ -317,6 +323,11 @@ void __init smp_prepare_cpus(unsigned in
5958 gdt_descr->size = GDT_SIZE;
5959 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
5961 + if (unlikely(!init_gdt(cpu, idle)))
5963 + gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
5966 (void *)gdt_descr->address,
5967 XENFEAT_writable_descriptor_tables);
5968 @@ -336,10 +347,6 @@ void __init smp_prepare_cpus(unsigned in
5969 cpu_2_logical_apicid[cpu] = apicid;
5970 x86_cpu_to_apicid[cpu] = apicid;
5972 - idle = fork_idle(cpu);
5974 - panic("failed fork for CPU %d", cpu);
5977 cpu_pda(cpu)->pcurrent = idle;
5978 cpu_pda(cpu)->cpunumber = cpu;
5979 Index: head-2008-12-01/drivers/xen/fbfront/xenfb.c
5980 ===================================================================
5981 --- head-2008-12-01.orig/drivers/xen/fbfront/xenfb.c 2008-12-01 11:29:05.000000000 +0100
5982 +++ head-2008-12-01/drivers/xen/fbfront/xenfb.c 2008-12-01 11:32:38.000000000 +0100
5984 #include <linux/vmalloc.h>
5985 #include <linux/mm.h>
5986 #include <linux/mutex.h>
5987 +#include <linux/freezer.h>
5988 #include <asm/hypervisor.h>
5989 #include <xen/evtchn.h>
5990 #include <xen/interface/io/fbif.h>
5991 Index: head-2008-12-01/drivers/xen/netback/loopback.c
5992 ===================================================================
5993 --- head-2008-12-01.orig/drivers/xen/netback/loopback.c 2008-12-01 11:29:05.000000000 +0100
5994 +++ head-2008-12-01/drivers/xen/netback/loopback.c 2008-12-01 11:32:38.000000000 +0100
5996 #include <net/dst.h>
5997 #include <net/xfrm.h> /* secpath_reset() */
5998 #include <asm/hypervisor.h> /* is_initial_xendomain() */
5999 +#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */
6001 static int nloopbacks = -1;
6002 module_param(nloopbacks, int, 0);
6003 Index: head-2008-12-01/drivers/xen/pciback/conf_space_header.c
6004 ===================================================================
6005 --- head-2008-12-01.orig/drivers/xen/pciback/conf_space_header.c 2008-10-29 09:55:56.000000000 +0100
6006 +++ head-2008-12-01/drivers/xen/pciback/conf_space_header.c 2008-12-01 11:32:38.000000000 +0100
6007 @@ -22,14 +22,14 @@ static int command_write(struct pci_dev
6011 - if (!dev->is_enabled && is_enable_cmd(value)) {
6012 + if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) {
6013 if (unlikely(verbose_request))
6014 printk(KERN_DEBUG "pciback: %s: enable\n",
6016 err = pci_enable_device(dev);
6019 - } else if (dev->is_enabled && !is_enable_cmd(value)) {
6020 + } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) {
6021 if (unlikely(verbose_request))
6022 printk(KERN_DEBUG "pciback: %s: disable\n",
6024 Index: head-2008-12-01/drivers/xen/pciback/pciback.h
6025 ===================================================================
6026 --- head-2008-12-01.orig/drivers/xen/pciback/pciback.h 2008-12-01 11:29:05.000000000 +0100
6027 +++ head-2008-12-01/drivers/xen/pciback/pciback.h 2008-12-01 11:32:38.000000000 +0100
6028 @@ -88,7 +88,7 @@ void pciback_release_devices(struct pcib
6030 /* Handles events from front-end */
6031 irqreturn_t pciback_handle_event(int irq, void *dev_id);
6032 -void pciback_do_op(void *data);
6033 +void pciback_do_op(struct work_struct *work);
6035 int pciback_xenbus_register(void);
6036 void pciback_xenbus_unregister(void);
6037 Index: head-2008-12-01/drivers/xen/pciback/pciback_ops.c
6038 ===================================================================
6039 --- head-2008-12-01.orig/drivers/xen/pciback/pciback_ops.c 2008-12-01 11:29:05.000000000 +0100
6040 +++ head-2008-12-01/drivers/xen/pciback/pciback_ops.c 2008-12-01 11:32:38.000000000 +0100
6041 @@ -25,7 +25,7 @@ void pciback_reset_device(struct pci_dev
6043 pci_write_config_word(dev, PCI_COMMAND, 0);
6045 - dev->is_enabled = 0;
6046 + atomic_set(&dev->enable_cnt, 0);
6047 dev->is_busmaster = 0;
6049 pci_read_config_word(dev, PCI_COMMAND, &cmd);
6050 @@ -51,9 +51,9 @@ static inline void test_and_schedule_op(
6051 * context because some of the pci_* functions can sleep (mostly due to ACPI
6052 * use of semaphores). This function is intended to be called from a work
6053 * queue in process context taking a struct pciback_device as a parameter */
6054 -void pciback_do_op(void *data)
6055 +void pciback_do_op(struct work_struct *work)
6057 - struct pciback_device *pdev = data;
6058 + struct pciback_device *pdev = container_of(work, struct pciback_device, op_work);
6059 struct pci_dev *dev;
6060 struct xen_pci_op *op = &pdev->sh_info->op;
6062 Index: head-2008-12-01/drivers/xen/pciback/xenbus.c
6063 ===================================================================
6064 --- head-2008-12-01.orig/drivers/xen/pciback/xenbus.c 2008-07-21 11:00:33.000000000 +0200
6065 +++ head-2008-12-01/drivers/xen/pciback/xenbus.c 2008-12-01 11:32:38.000000000 +0100
6066 @@ -32,7 +32,7 @@ static struct pciback_device *alloc_pdev
6067 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
6068 pdev->be_watching = 0;
6070 - INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
6071 + INIT_WORK(&pdev->op_work, pciback_do_op);
6073 if (pciback_init_devices(pdev)) {
6075 @@ -54,7 +54,6 @@ static void pciback_disconnect(struct pc
6077 /* If the driver domain started an op, make sure we complete it or
6078 * delete it before releasing the shared memory */
6079 - cancel_delayed_work(&pdev->op_work);
6080 flush_scheduled_work();
6082 if (pdev->sh_info != NULL) {
6083 Index: head-2008-12-01/drivers/xen/scsiback/interface.c
6084 ===================================================================
6085 --- head-2008-12-01.orig/drivers/xen/scsiback/interface.c 2008-07-21 11:00:33.000000000 +0200
6086 +++ head-2008-12-01/drivers/xen/scsiback/interface.c 2008-12-01 11:32:38.000000000 +0100
6088 #include <linux/kthread.h>
6091 -static kmem_cache_t *scsiback_cachep;
6092 +static struct kmem_cache *scsiback_cachep;
6094 struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
6096 Index: head-2008-12-01/drivers/xen/scsiback/scsiback.c
6097 ===================================================================
6098 --- head-2008-12-01.orig/drivers/xen/scsiback/scsiback.c 2008-12-01 11:29:05.000000000 +0100
6099 +++ head-2008-12-01/drivers/xen/scsiback/scsiback.c 2008-12-01 11:32:38.000000000 +0100
6100 @@ -322,13 +322,11 @@ static int scsiback_merge_bio(struct req
6103 blk_rq_bio_prep(q, rq, bio);
6104 - else if (!q->back_merge_fn(q, rq, bio))
6105 + else if (!ll_back_merge_fn(q, rq, bio))
6108 rq->biotail->bi_next = bio;
6110 - rq->hard_nr_sectors += bio_sectors(bio);
6111 - rq->nr_sectors = rq->hard_nr_sectors;
6115 Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c
6116 ===================================================================
6117 --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:29:05.000000000 +0100
6118 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:32:38.000000000 +0100
6119 @@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron
6121 if (skb->ip_summed == CHECKSUM_PARTIAL) {
6122 /* Set to zero to encourage falcon to work it out for us */
6123 - *(u16*)(skb->h.raw + skb->csum) = 0;
6124 + *(u16*)(skb->h.raw + skb->csum_offset) = 0;
6127 if (multi_post_start_new_buffer(vnic, &state)) {
6128 @@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro
6130 if (skb->ip_summed == CHECKSUM_PARTIAL) {
6131 /* Set to zero to encourage falcon to work it out for us */
6132 - *(u16*)(skb->h.raw + skb->csum) = 0;
6133 + *(u16*)(skb->h.raw + skb->csum_offset) = 0;
6135 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
6136 (skb, idx, frag_data, frag_len, {
6137 Index: head-2008-12-01/drivers/xen/tpmback/interface.c
6138 ===================================================================
6139 --- head-2008-12-01.orig/drivers/xen/tpmback/interface.c 2008-01-21 11:15:26.000000000 +0100
6140 +++ head-2008-12-01/drivers/xen/tpmback/interface.c 2008-12-01 11:32:38.000000000 +0100
6142 #include <xen/balloon.h>
6143 #include <xen/gnttab.h>
6145 -static kmem_cache_t *tpmif_cachep;
6146 +static struct kmem_cache *tpmif_cachep;
6147 int num_frontends = 0;
6149 LIST_HEAD(tpmif_list);
6150 Index: head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c
6151 ===================================================================
6152 --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:29:05.000000000 +0100
6153 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:32:38.000000000 +0100
6156 static int xenbus_irq;
6158 -extern void xenbus_probe(void *);
6159 +extern void xenbus_probe(struct work_struct *);
6160 extern int xenstored_ready;
6161 -static DECLARE_WORK(probe_work, xenbus_probe, NULL);
6162 +static DECLARE_WORK(probe_work, xenbus_probe);
6164 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
6166 Index: head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c
6167 ===================================================================
6168 --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:22:58.000000000 +0100
6169 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_probe.c 2008-12-01 11:32:38.000000000 +0100
6170 @@ -843,7 +843,7 @@ void unregister_xenstore_notifier(struct
6171 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
6174 -void xenbus_probe(void *unused)
6175 +void xenbus_probe(struct work_struct *unused)
6177 BUG_ON((xenstored_ready <= 0));
6179 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc_32.h
6180 ===================================================================
6181 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-01 11:29:05.000000000 +0100
6182 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-01 11:32:38.000000000 +0100
6184 #include <asm/ldt.h>
6185 #include <asm/segment.h>
6187 -#define CPU_16BIT_STACK_SIZE 1024
6189 #ifndef __ASSEMBLY__
6191 #include <linux/preempt.h>
6194 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
6196 -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
6198 struct Xgt_desc_struct {
6199 unsigned short size;
6200 unsigned long address __attribute__((packed));
6201 @@ -32,11 +28,6 @@ static inline struct desc_struct *get_cp
6202 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
6206 - * This is the ldt that every process will get unless we need
6207 - * something other than this.
6209 -extern struct desc_struct default_ldt[];
6210 extern struct desc_struct idt_table[];
6211 extern void set_intr_gate(unsigned int irq, void * addr);
6213 @@ -63,8 +54,8 @@ static inline void pack_gate(__u32 *a, _
6214 #define DESCTYPE_DPL3 0x60 /* DPL-3 */
6215 #define DESCTYPE_S 0x10 /* !system */
6218 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
6219 -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
6221 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
6222 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
6223 @@ -75,6 +66,7 @@ static inline void pack_gate(__u32 *a, _
6224 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
6225 #define store_tr(tr) __asm__ ("str %0":"=m" (tr))
6226 #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
6230 # error update this code.
6231 @@ -90,22 +82,43 @@ static inline void load_TLS(struct threa
6235 +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6236 +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6237 +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6239 static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
6241 __u32 *lp = (__u32 *)((char *)dt + entry*8);
6246 -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6247 -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6248 +#define set_ldt native_set_ldt
6250 extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
6251 extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
6252 +#define set_ldt xen_set_ldt
6256 +static inline fastcall void native_set_ldt(const void *addr,
6257 + unsigned int entries)
6259 + if (likely(entries == 0))
6260 + __asm__ __volatile__("lldt %w0"::"q" (0));
6262 + unsigned cpu = smp_processor_id();
6265 + pack_descriptor(&a, &b, (unsigned long)addr,
6266 + entries * sizeof(struct desc_struct) - 1,
6268 + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
6269 + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
6273 -#ifndef CONFIG_X86_NO_IDT
6274 -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6276 +#ifndef CONFIG_X86_NO_IDT
6277 static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
6280 @@ -125,14 +138,6 @@ static inline void __set_tss_desc(unsign
6284 -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
6287 - pack_descriptor(&a, &b, (unsigned long)addr,
6288 - entries * sizeof(struct desc_struct) - 1,
6290 - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
6293 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
6295 @@ -163,36 +168,22 @@ static inline void set_ldt_desc(unsigned
6297 static inline void clear_LDT(void)
6299 - int cpu = get_cpu();
6302 - * NB. We load the default_ldt for lcall7/27 handling on demand, as
6303 - * it slows down context switching. Noone uses it anyway.
6305 - cpu = cpu; /* XXX avoid compiler warning */
6306 - xen_set_ldt(NULL, 0);
6312 * load one particular LDT into the current CPU
6314 -static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
6315 +static inline void load_LDT_nolock(mm_context_t *pc)
6317 - void *segments = pc->ldt;
6318 - int count = pc->size;
6320 - if (likely(!count))
6323 - xen_set_ldt(segments, count);
6324 + set_ldt(pc->ldt, pc->size);
6327 static inline void load_LDT(mm_context_t *pc)
6329 - int cpu = get_cpu();
6330 - load_LDT_nolock(pc, cpu);
6332 + preempt_disable();
6333 + load_LDT_nolock(pc);
6337 static inline unsigned long get_desc_base(unsigned long *desc)
6338 @@ -204,6 +195,29 @@ static inline unsigned long get_desc_bas
6342 +#else /* __ASSEMBLY__ */
6345 + * GET_DESC_BASE reads the descriptor base of the specified segment.
6348 + * idx - descriptor index
6349 + * gdt - GDT pointer
6350 + * base - 32bit register to which the base will be written
6351 + * lo_w - lo word of the "base" register
6352 + * lo_b - lo byte of the "base" register
6353 + * hi_b - hi byte of the low word of the "base" register
6356 + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
6357 + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
6359 +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
6360 + movb idx*8+4(gdt), lo_b; \
6361 + movb idx*8+7(gdt), hi_b; \
6363 + movw idx*8+2(gdt), lo_w;
6365 #endif /* !__ASSEMBLY__ */
6368 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_32.h
6369 ===================================================================
6370 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-04-02 12:34:02.000000000 +0200
6371 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-12-01 11:32:38.000000000 +0100
6372 @@ -127,10 +127,10 @@ dma_get_cache_alignment(void)
6373 return (1 << INTERNODE_CACHE_SHIFT);
6376 -#define dma_is_consistent(d) (1)
6377 +#define dma_is_consistent(d, h) (1)
6380 -dma_cache_sync(void *vaddr, size_t size,
6381 +dma_cache_sync(struct device *dev, void *vaddr, size_t size,
6382 enum dma_data_direction direction)
6384 flush_write_buffers();
6385 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h
6386 ===================================================================
6387 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:29:05.000000000 +0100
6388 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:32:38.000000000 +0100
6390 #ifndef _ASM_FIXMAP_H
6391 #define _ASM_FIXMAP_H
6394 /* used by vmalloc.c, vsyscall.lds.S.
6396 * Leave one empty page between vmalloc'ed areas and
6397 * the start of the fixmap.
6399 extern unsigned long __FIXADDR_TOP;
6400 +#ifdef CONFIG_COMPAT_VDSO
6401 +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
6402 +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
6405 #ifndef __ASSEMBLY__
6406 #include <linux/kernel.h>
6407 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/highmem.h
6408 ===================================================================
6409 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/highmem.h 2008-10-29 09:55:56.000000000 +0100
6410 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/highmem.h 2008-12-01 11:32:38.000000000 +0100
6411 @@ -85,7 +85,7 @@ static inline void clear_user_highpage(s
6413 void copy_highpage(struct page *to, struct page *from);
6414 static inline void copy_user_highpage(struct page *to, struct page *from,
6415 - unsigned long vaddr)
6416 + unsigned long vaddr, struct vm_area_struct *vma)
6418 copy_highpage(to, from);
6420 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h
6421 ===================================================================
6422 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:29:05.000000000 +0100
6423 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:32:38.000000000 +0100
6425 #include <asm/percpu.h>
6426 #include <asm/ptrace.h>
6427 #include <asm/page.h>
6428 -#if defined(__i386__)
6429 -# ifdef CONFIG_X86_PAE
6430 -# include <asm-generic/pgtable-nopud.h>
6432 -# include <asm-generic/pgtable-nopmd.h>
6434 -#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
6435 -# include <asm-generic/pgtable-nopud.h>
6438 extern shared_info_t *HYPERVISOR_shared_info;
6440 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h
6441 ===================================================================
6442 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:29:05.000000000 +0100
6443 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:32:38.000000000 +0100
6444 @@ -269,11 +269,7 @@ static inline void flush_write_buffers(v
6446 #endif /* __KERNEL__ */
6448 -#ifdef SLOW_IO_BY_JUMPING
6449 -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:"
6451 #define __SLOW_DOWN_IO "outb %%al,$0x80;"
6454 static inline void slow_down_io(void) {
6455 __asm__ __volatile__(
6456 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_32.h
6457 ===================================================================
6458 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2007-06-12 13:14:02.000000000 +0200
6459 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-01 11:32:38.000000000 +0100
6462 #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
6464 -#define raw_local_save_flags(flags) \
6465 - do { (flags) = __raw_local_save_flags(); } while (0)
6467 #define raw_local_irq_restore(x) \
6469 vcpu_info_t *_vcpu; \
6470 @@ -66,18 +63,6 @@ void raw_safe_halt(void);
6474 -static inline int raw_irqs_disabled_flags(unsigned long flags)
6476 - return (flags != 0);
6479 -#define raw_irqs_disabled() \
6481 - unsigned long flags = __raw_local_save_flags(); \
6483 - raw_irqs_disabled_flags(flags); \
6487 * For spinlocks, etc:
6489 @@ -90,9 +75,62 @@ static inline int raw_irqs_disabled_flag
6494 +/* Offsets into shared_info_t. */
6495 +#define evtchn_upcall_pending /* 0 */
6496 +#define evtchn_upcall_mask 1
6498 +#define sizeof_vcpu_shift 6
6501 +#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
6502 + shl $sizeof_vcpu_shift,%esi ; \
6503 + addl HYPERVISOR_shared_info,%esi
6505 +#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
6508 +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
6509 +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
6510 +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
6511 +#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
6512 + __DISABLE_INTERRUPTS
6513 +#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
6514 + __ENABLE_INTERRUPTS
6515 +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
6516 +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
6517 + __TEST_PENDING ; \
6518 + jnz 14f /* process more events if necessary... */ ; \
6519 + movl PT_ESI(%esp), %esi ; \
6521 +14: __DISABLE_INTERRUPTS ; \
6522 + TRACE_IRQS_OFF ; \
6523 +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
6525 + call evtchn_do_upcall ; \
6528 +#define INTERRUPT_RETURN iret
6529 +#endif /* __ASSEMBLY__ */
6531 +#ifndef __ASSEMBLY__
6532 +#define raw_local_save_flags(flags) \
6533 + do { (flags) = __raw_local_save_flags(); } while (0)
6535 #define raw_local_irq_save(flags) \
6536 do { (flags) = __raw_local_irq_save(); } while (0)
6538 +static inline int raw_irqs_disabled_flags(unsigned long flags)
6540 + return (flags != 0);
6543 +#define raw_irqs_disabled() \
6545 + unsigned long flags = __raw_local_save_flags(); \
6547 + raw_irqs_disabled_flags(flags); \
6549 #endif /* __ASSEMBLY__ */
6552 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_32.h
6553 ===================================================================
6554 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2007-06-12 13:14:02.000000000 +0200
6555 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-01 11:32:38.000000000 +0100
6556 @@ -27,14 +27,13 @@ static inline void enter_lazy_tlb(struct
6557 static inline void __prepare_arch_switch(void)
6560 - * Save away %fs and %gs. No need to save %es and %ds, as those
6561 - * are always kernel segments while inside the kernel. Must
6562 - * happen before reload of cr3/ldt (i.e., not in __switch_to).
6563 + * Save away %fs. No need to save %gs, as it was saved on the
6564 + * stack on entry. No need to save %es and %ds, as those are
6565 + * always kernel segments while inside the kernel.
6567 - asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
6568 - : "=m" (current->thread.fs),
6569 - "=m" (current->thread.gs));
6570 - asm volatile ( "movl %0,%%fs ; movl %0,%%gs"
6571 + asm volatile ( "mov %%fs,%0"
6572 + : "=m" (current->thread.fs));
6573 + asm volatile ( "movl %0,%%fs"
6577 @@ -89,14 +88,14 @@ static inline void switch_mm(struct mm_s
6578 * tlb flush IPI delivery. We must reload %cr3.
6580 load_cr3(next->pgd);
6581 - load_LDT_nolock(&next->context, cpu);
6582 + load_LDT_nolock(&next->context);
6588 -#define deactivate_mm(tsk, mm) \
6589 - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
6590 +#define deactivate_mm(tsk, mm) \
6591 + asm("movl %0,%%fs": :"r" (0));
6593 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
6595 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h
6596 ===================================================================
6597 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:29:05.000000000 +0100
6598 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:32:38.000000000 +0100
6600 #ifndef _I386_PGTABLE_3LEVEL_H
6601 #define _I386_PGTABLE_3LEVEL_H
6603 -#include <asm-generic/pgtable-nopud.h>
6606 * Intel Physical Address Extension (PAE) Mode - three-level page
6607 * tables on PPro+ CPUs.
6608 @@ -75,6 +73,23 @@ static inline void set_pte(pte_t *ptep,
6609 xen_l3_entry_update((pudptr), (pudval))
6612 + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6613 + * entry, so clear the bottom half first and enforce ordering with a compiler
6616 +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6618 + if ((mm != current->mm && mm != &init_mm)
6619 + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6620 + ptep->pte_low = 0;
6622 + ptep->pte_high = 0;
6626 +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
6629 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
6630 * the TLB via cr3 if the top-level pgd is changed...
6631 * We do not let the generic code free and clear pgd entries due to
6632 @@ -93,45 +108,16 @@ static inline void pud_clear (pud_t * pu
6633 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
6636 -static inline int pte_none(pte_t pte)
6638 - return !(pte.pte_low | pte.pte_high);
6642 - * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6643 - * entry, so clear the bottom half first and enforce ordering with a compiler
6646 -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6647 +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
6649 - if ((mm != current->mm && mm != &init_mm)
6650 - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6651 - ptep->pte_low = 0;
6653 + uint64_t val = __pte_val(res);
6654 + if (__cmpxchg64(ptep, val, 0) != val) {
6655 + /* xchg acts as a barrier before the setting of the high bits */
6656 + res.pte_low = xchg(&ptep->pte_low, 0);
6657 + res.pte_high = ptep->pte_high;
6662 -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
6664 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
6665 -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6667 - pte_t pte = *ptep;
6668 - if (!pte_none(pte)) {
6669 - if ((mm != &init_mm) ||
6670 - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6671 - uint64_t val = __pte_val(pte);
6672 - if (__cmpxchg64(ptep, val, 0) != val) {
6673 - /* xchg acts as a barrier before the setting of the high bits */
6674 - pte.pte_low = xchg(&ptep->pte_low, 0);
6675 - pte.pte_high = ptep->pte_high;
6676 - ptep->pte_high = 0;
6684 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
6685 @@ -160,6 +146,11 @@ static inline int pte_same(pte_t a, pte_
6687 #define pte_page(x) pfn_to_page(pte_pfn(x))
6689 +static inline int pte_none(pte_t pte)
6691 + return !(pte.pte_low | pte.pte_high);
6694 #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
6695 ((_pte).pte_high << (32-PAGE_SHIFT)))
6696 #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
6697 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h
6698 ===================================================================
6699 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:29:05.000000000 +0100
6700 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:32:38.000000000 +0100
6701 @@ -38,14 +38,14 @@ struct vm_area_struct;
6702 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
6703 extern unsigned long empty_zero_page[1024];
6704 extern pgd_t *swapper_pg_dir;
6705 -extern kmem_cache_t *pgd_cache;
6706 -extern kmem_cache_t *pmd_cache;
6707 +extern struct kmem_cache *pgd_cache;
6708 +extern struct kmem_cache *pmd_cache;
6709 extern spinlock_t pgd_lock;
6710 extern struct page *pgd_list;
6712 -void pmd_ctor(void *, kmem_cache_t *, unsigned long);
6713 -void pgd_ctor(void *, kmem_cache_t *, unsigned long);
6714 -void pgd_dtor(void *, kmem_cache_t *, unsigned long);
6715 +void pmd_ctor(void *, struct kmem_cache *, unsigned long);
6716 +void pgd_ctor(void *, struct kmem_cache *, unsigned long);
6717 +void pgd_dtor(void *, struct kmem_cache *, unsigned long);
6718 void pgtable_cache_init(void);
6719 void paging_init(void);
6721 @@ -276,7 +276,6 @@ static inline pte_t pte_mkhuge(pte_t pte
6722 #define pte_update(mm, addr, ptep) do { } while (0)
6723 #define pte_update_defer(mm, addr, ptep) do { } while (0)
6727 * We only update the dirty/accessed state if we set
6728 * the dirty bit by hand in the kernel, since the hardware
6729 @@ -342,6 +341,19 @@ do { \
6733 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
6734 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6736 + pte_t pte = *ptep;
6737 + if (!pte_none(pte)
6738 + && (mm != &init_mm
6739 + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
6740 + pte = raw_ptep_get_and_clear(ptep, pte);
6741 + pte_update(mm, addr, ptep);
6746 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
6747 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
6749 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h
6750 ===================================================================
6751 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:29:05.000000000 +0100
6752 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:32:38.000000000 +0100
6754 #include <linux/threads.h>
6755 #include <asm/percpu.h>
6756 #include <linux/cpumask.h>
6757 +#include <linux/init.h>
6758 #include <xen/interface/physdev.h>
6760 /* flag for disabling the tsc */
6761 @@ -73,6 +74,7 @@ struct cpuinfo_x86 {
6763 unsigned char x86_max_cores; /* cpuid returned max cores value */
6764 unsigned char apicid;
6765 + unsigned short x86_clflush_size;
6767 unsigned char booted_cores; /* number of cores as seen by OS */
6768 __u8 phys_proc_id; /* Physical processor id. */
6769 @@ -114,6 +116,8 @@ extern struct cpuinfo_x86 cpu_data[];
6770 extern int cpu_llc_id[NR_CPUS];
6771 extern char ignore_fpu_irq;
6773 +void __init cpu_detect(struct cpuinfo_x86 *c);
6775 extern void identify_cpu(struct cpuinfo_x86 *);
6776 extern void print_cpu_info(struct cpuinfo_x86 *);
6777 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
6778 @@ -146,8 +150,8 @@ static inline void detect_ht(struct cpui
6779 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
6780 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
6782 -static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
6783 - unsigned int *ecx, unsigned int *edx)
6784 +static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
6785 + unsigned int *ecx, unsigned int *edx)
6787 /* ecx is often an input as well as an output. */
6789 @@ -158,59 +162,6 @@ static inline void __cpuid(unsigned int
6790 : "0" (*eax), "2" (*ecx));
6794 - * Generic CPUID function
6795 - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6796 - * resulting in stale register contents being returned.
6798 -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6802 - __cpuid(eax, ebx, ecx, edx);
6805 -/* Some CPUID calls want 'count' to be placed in ecx */
6806 -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6811 - __cpuid(eax, ebx, ecx, edx);
6815 - * CPUID functions returning a single datum
6817 -static inline unsigned int cpuid_eax(unsigned int op)
6819 - unsigned int eax, ebx, ecx, edx;
6821 - cpuid(op, &eax, &ebx, &ecx, &edx);
6824 -static inline unsigned int cpuid_ebx(unsigned int op)
6826 - unsigned int eax, ebx, ecx, edx;
6828 - cpuid(op, &eax, &ebx, &ecx, &edx);
6831 -static inline unsigned int cpuid_ecx(unsigned int op)
6833 - unsigned int eax, ebx, ecx, edx;
6835 - cpuid(op, &eax, &ebx, &ecx, &edx);
6838 -static inline unsigned int cpuid_edx(unsigned int op)
6840 - unsigned int eax, ebx, ecx, edx;
6842 - cpuid(op, &eax, &ebx, &ecx, &edx);
6846 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
6849 @@ -480,9 +431,9 @@ struct thread_struct {
6850 .vm86_info = NULL, \
6851 .sysenter_cs = __KERNEL_CS, \
6852 .io_bitmap_ptr = NULL, \
6853 + .gs = __KERNEL_PDA, \
6856 -#ifndef CONFIG_X86_NO_TSS
6858 * Note that the .io_bitmap member must be extra-big. This is because
6859 * the CPU will access an additional byte beyond the end of the IO
6860 @@ -497,26 +448,9 @@ struct thread_struct {
6861 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
6864 -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
6866 - tss->esp0 = thread->esp0;
6867 - /* This can only happen when SEP is enabled, no need to test "SEP"arately */
6868 - if (unlikely(tss->ss1 != thread->sysenter_cs)) {
6869 - tss->ss1 = thread->sysenter_cs;
6870 - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6873 -#define load_esp0(tss, thread) \
6874 - __load_esp0(tss, thread)
6876 -#define load_esp0(tss, thread) do { \
6877 - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6882 #define start_thread(regs, new_eip, new_esp) do { \
6883 - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
6884 + __asm__("movl %0,%%fs": :"r" (0)); \
6887 regs->xds = __USER_DS; \
6888 regs->xes = __USER_DS; \
6889 @@ -526,26 +460,6 @@ static inline void __load_esp0(struct ts
6890 regs->esp = new_esp; \
6894 - * These special macros can be used to get or set a debugging register
6896 -#define get_debugreg(var, register) \
6897 - (var) = HYPERVISOR_get_debugreg((register))
6898 -#define set_debugreg(value, register) \
6899 - WARN_ON(HYPERVISOR_set_debugreg((register), (value)))
6902 - * Set IOPL bits in EFLAGS from given mask
6904 -static inline void set_iopl_mask(unsigned mask)
6906 - struct physdev_set_iopl set_iopl;
6908 - /* Force the change at ring 0. */
6909 - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
6910 - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
6913 /* Forward declaration, a strange C thing */
6916 @@ -637,6 +551,105 @@ static inline void rep_nop(void)
6918 #define cpu_relax() rep_nop()
6920 +#define paravirt_enabled() 0
6921 +#define __cpuid xen_cpuid
6923 +#ifndef CONFIG_X86_NO_TSS
6924 +static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
6926 + tss->esp0 = thread->esp0;
6927 + /* This can only happen when SEP is enabled, no need to test "SEP"arately */
6928 + if (unlikely(tss->ss1 != thread->sysenter_cs)) {
6929 + tss->ss1 = thread->sysenter_cs;
6930 + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6933 +#define load_esp0(tss, thread) \
6934 + __load_esp0(tss, thread)
6936 +#define load_esp0(tss, thread) do { \
6937 + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6944 + * These special macros can be used to get or set a debugging register
6946 +#define get_debugreg(var, register) \
6947 + (var) = HYPERVISOR_get_debugreg(register)
6948 +#define set_debugreg(value, register) \
6949 + WARN_ON(HYPERVISOR_set_debugreg(register, value))
6951 +#define set_iopl_mask xen_set_iopl_mask
6954 + * Set IOPL bits in EFLAGS from given mask
6956 +static inline void xen_set_iopl_mask(unsigned mask)
6958 + struct physdev_set_iopl set_iopl;
6960 + /* Force the change at ring 0. */
6961 + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
6962 + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
6967 + * Generic CPUID function
6968 + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6969 + * resulting in stale register contents being returned.
6971 +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6975 + __cpuid(eax, ebx, ecx, edx);
6978 +/* Some CPUID calls want 'count' to be placed in ecx */
6979 +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6984 + __cpuid(eax, ebx, ecx, edx);
6988 + * CPUID functions returning a single datum
6990 +static inline unsigned int cpuid_eax(unsigned int op)
6992 + unsigned int eax, ebx, ecx, edx;
6994 + cpuid(op, &eax, &ebx, &ecx, &edx);
6997 +static inline unsigned int cpuid_ebx(unsigned int op)
6999 + unsigned int eax, ebx, ecx, edx;
7001 + cpuid(op, &eax, &ebx, &ecx, &edx);
7004 +static inline unsigned int cpuid_ecx(unsigned int op)
7006 + unsigned int eax, ebx, ecx, edx;
7008 + cpuid(op, &eax, &ebx, &ecx, &edx);
7011 +static inline unsigned int cpuid_edx(unsigned int op)
7013 + unsigned int eax, ebx, ecx, edx;
7015 + cpuid(op, &eax, &ebx, &ecx, &edx);
7019 /* generic versions from gas */
7020 #define GENERIC_NOP1 ".byte 0x90\n"
7021 #define GENERIC_NOP2 ".byte 0x89,0xf6\n"
7022 @@ -736,4 +749,8 @@ extern unsigned long boot_option_idle_ov
7023 extern void enable_sep_cpu(void);
7024 extern int sysenter_setup(void);
7026 +extern int init_gdt(int cpu, struct task_struct *idle);
7027 +extern void cpu_set_gdt(int);
7028 +extern void secondary_cpu_init(void);
7030 #endif /* __ASM_I386_PROCESSOR_H */
7031 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/segment_32.h
7032 ===================================================================
7033 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-01 11:29:05.000000000 +0100
7034 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-01 11:32:38.000000000 +0100
7036 * 25 - APM BIOS support
7038 * 26 - ESPFIX small SS
7040 + * 27 - PDA [ per-cpu private data area ]
7045 #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
7046 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
7048 +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
7049 +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
7051 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
7054 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h
7055 ===================================================================
7056 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:29:05.000000000 +0100
7057 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:32:38.000000000 +0100
7059 #include <linux/kernel.h>
7060 #include <linux/threads.h>
7061 #include <linux/cpumask.h>
7062 +#include <asm/pda.h>
7065 #ifdef CONFIG_X86_LOCAL_APIC
7066 @@ -56,7 +57,7 @@ extern void cpu_uninit(void);
7067 * from the initial startup. We map APIC_BASE very early in page_setup(),
7068 * so this is correct in the x86 case.
7070 -#define raw_smp_processor_id() (current_thread_info()->cpu)
7071 +#define raw_smp_processor_id() (read_pda(cpu_number))
7073 extern cpumask_t cpu_possible_map;
7074 #define cpu_callin_map cpu_possible_map
7075 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h
7076 ===================================================================
7077 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:29:05.000000000 +0100
7078 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:32:38.000000000 +0100
7079 @@ -139,17 +139,17 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
7080 #define write_cr4(x) \
7081 __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
7084 - * Clear and set 'TS' bit respectively
7087 + __asm__ __volatile__ ("wbinvd": : :"memory")
7089 +/* Clear the 'TS' bit */
7090 #define clts() (HYPERVISOR_fpu_taskswitch(0))
7092 +/* Set the 'TS' bit */
7093 #define stts() (HYPERVISOR_fpu_taskswitch(1))
7095 #endif /* __KERNEL__ */
7098 - __asm__ __volatile__ ("wbinvd": : :"memory")
7100 static inline unsigned long get_limit(unsigned long segment)
7102 unsigned long __limit;
7103 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc_64.h
7104 ===================================================================
7105 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/desc_64.h 2008-01-28 12:24:19.000000000 +0100
7106 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-01 11:32:38.000000000 +0100
7109 #include <linux/string.h>
7110 #include <linux/smp.h>
7111 +#include <asm/desc_defs.h>
7113 #include <asm/segment.h>
7114 #include <asm/mmu.h>
7116 -// 8 byte segment descriptor
7117 -struct desc_struct {
7120 - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
7121 - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
7122 -} __attribute__((packed));
7124 -struct n_desc_struct {
7129 - GATE_INTERRUPT = 0xE,
7135 -struct gate_struct {
7138 - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
7139 - u16 offset_middle;
7142 -} __attribute__((packed));
7144 -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
7145 -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
7146 -#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
7153 -// LDT or TSS descriptor in the GDT. 16 bytes.
7154 -struct ldttss_desc {
7157 - unsigned base1 : 8, type : 5, dpl : 2, p : 1;
7158 - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
7161 -} __attribute__((packed));
7164 - unsigned short size;
7165 - unsigned long address;
7166 -} __attribute__((packed)) ;
7168 extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS];
7170 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
7171 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h
7172 ===================================================================
7173 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:29:05.000000000 +0100
7174 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:32:38.000000000 +0100
7175 @@ -64,6 +64,9 @@ static inline int dma_mapping_error(dma_
7176 return (dma_addr == bad_dma_address);
7179 +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
7180 +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
7182 extern void *dma_alloc_coherent(struct device *dev, size_t size,
7183 dma_addr_t *dma_handle, gfp_t gfp);
7184 extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
7185 @@ -181,12 +184,13 @@ static inline int dma_get_cache_alignmen
7186 return boot_cpu_data.x86_clflush_size;
7189 -#define dma_is_consistent(h) 1
7190 +#define dma_is_consistent(d, h) 1
7192 extern int dma_set_mask(struct device *dev, u64 mask);
7195 -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir)
7196 +dma_cache_sync(struct device *dev, void *vaddr, size_t size,
7197 + enum dma_data_direction dir)
7199 flush_write_buffers();
7201 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h
7202 ===================================================================
7203 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:29:05.000000000 +0100
7204 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:32:38.000000000 +0100
7205 @@ -235,19 +235,18 @@ extern unsigned int __kernel_page_user;
7207 static inline unsigned long pgd_bad(pgd_t pgd)
7209 - unsigned long val = __pgd_val(pgd);
7211 - val &= ~(_PAGE_USER | _PAGE_DIRTY);
7212 - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
7213 + return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7216 -static inline unsigned long pud_bad(pud_t pud)
7218 - unsigned long val = __pud_val(pud);
7220 - val &= ~(_PAGE_USER | _PAGE_DIRTY);
7221 - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
7223 +static inline unsigned long pud_bad(pud_t pud)
7225 + return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7228 +static inline unsigned long pmd_bad(pmd_t pmd)
7230 + return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7233 #define set_pte_at(_mm,addr,ptep,pteval) do { \
7234 if (((_mm) != current->mm && (_mm) != &init_mm) || \
7235 @@ -402,8 +401,6 @@ static inline int pmd_large(pmd_t pte) {
7236 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
7238 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
7239 -#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \
7240 - != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT)))
7241 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
7242 #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
7244 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h
7245 ===================================================================
7246 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:29:05.000000000 +0100
7247 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:32:38.000000000 +0100
7248 @@ -484,6 +484,14 @@ static inline void __mwait(unsigned long
7249 : :"a" (eax), "c" (ecx));
7252 +static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
7254 + /* "mwait %eax,%ecx;" */
7256 + "sti; .byte 0x0f,0x01,0xc9;"
7257 + : :"a" (eax), "c" (ecx));
7260 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
7262 #define stack_current() \
7263 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h
7264 ===================================================================
7265 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:29:05.000000000 +0100
7266 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:32:38.000000000 +0100
7267 @@ -88,11 +88,6 @@ extern u8 x86_cpu_to_log_apicid[NR_CPUS]
7268 extern u8 bios_cpu_apicid[];
7270 #ifdef CONFIG_X86_LOCAL_APIC
7271 -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
7273 - return cpus_addr(cpumask)[0];
7276 static inline int cpu_present_to_apicid(int mps_cpu)
7278 if (mps_cpu < NR_CPUS)
7279 @@ -127,13 +122,6 @@ static __inline int logical_smp_processo
7280 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
7282 #define cpu_physical_id(cpu) boot_cpu_id
7283 -static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
7284 - void *info, int retry, int wait)
7286 - /* Disable interrupts here? */
7290 #endif /* !CONFIG_SMP */
7293 Index: head-2008-12-01/kernel/kexec.c
7294 ===================================================================
7295 --- head-2008-12-01.orig/kernel/kexec.c 2008-12-01 11:23:01.000000000 +0100
7296 +++ head-2008-12-01/kernel/kexec.c 2008-12-01 11:32:38.000000000 +0100
7297 @@ -372,7 +372,7 @@ static struct page *kimage_alloc_pages(g
7299 address_bits = BITS_PER_LONG;
7301 - address_bits = long_log2(limit);
7302 + address_bits = ilog2(limit);
7304 if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
7305 __free_pages(pages, order);
7306 Index: head-2008-12-01/net/core/dev.c
7307 ===================================================================
7308 --- head-2008-12-01.orig/net/core/dev.c 2008-12-01 11:29:05.000000000 +0100
7309 +++ head-2008-12-01/net/core/dev.c 2008-12-01 11:32:38.000000000 +0100
7310 @@ -1751,10 +1751,10 @@ inline int skb_checksum_setup(struct sk_
7312 switch (skb->nh.iph->protocol) {
7314 - skb->csum = offsetof(struct tcphdr, check);
7315 + skb->csum_offset = offsetof(struct tcphdr, check);
7318 - skb->csum = offsetof(struct udphdr, check);
7319 + skb->csum_offset = offsetof(struct udphdr, check);
7322 if (net_ratelimit())
7323 @@ -1763,7 +1763,7 @@ inline int skb_checksum_setup(struct sk_
7324 " %d packet", skb->nh.iph->protocol);
7327 - if ((skb->h.raw + skb->csum + 2) > skb->tail)
7328 + if ((skb->h.raw + skb->csum_offset + 2) > skb->tail)
7330 skb->ip_summed = CHECKSUM_PARTIAL;
7331 skb->proto_csum_blank = 0;