]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.20
Add a patch to fix Intel E100 wake-on-lan problems.
[ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen3-patch-2.6.20
1 From: www.kernel.org
2 Subject: Linux 2.6.20
3 Patch-mainline: 2.6.20
4
5 Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 --- sle11-2009-06-29.orig/arch/x86/Kconfig 2009-03-04 11:28:34.000000000 +0100
10 +++ sle11-2009-06-29/arch/x86/Kconfig 2009-02-05 10:22:19.000000000 +0100
11 @@ -1431,7 +1431,7 @@ config PHYSICAL_START
12
13 config RELOCATABLE
14 bool "Build a relocatable kernel (EXPERIMENTAL)"
15 - depends on EXPERIMENTAL
16 + depends on EXPERIMENTAL && !X86_XEN
17 help
18 This builds a kernel image that retains relocation information
19 so it can be loaded someplace besides the default 1MB.
20 --- sle11-2009-06-29.orig/arch/x86/kernel/asm-offsets_32.c 2008-11-25 12:35:53.000000000 +0100
21 +++ sle11-2009-06-29/arch/x86/kernel/asm-offsets_32.c 2008-12-15 11:26:44.000000000 +0100
22 @@ -54,6 +54,7 @@ void foo(void)
23 OFFSET(TI_exec_domain, thread_info, exec_domain);
24 OFFSET(TI_flags, thread_info, flags);
25 OFFSET(TI_status, thread_info, status);
26 + OFFSET(TI_cpu, thread_info, cpu);
27 OFFSET(TI_preempt_count, thread_info, preempt_count);
28 OFFSET(TI_addr_limit, thread_info, addr_limit);
29 OFFSET(TI_restart_block, thread_info, restart_block);
30 @@ -108,6 +109,11 @@ void foo(void)
31
32 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
33
34 +#ifdef CONFIG_XEN
35 + BLANK();
36 + OFFSET(XEN_START_mfn_list, start_info, mfn_list);
37 +#endif
38 +
39 #ifdef CONFIG_PARAVIRT
40 BLANK();
41 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
42 --- sle11-2009-06-29.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-04 11:28:34.000000000 +0100
43 +++ sle11-2009-06-29/arch/x86/kernel/cpu/common-xen.c 2008-12-15 11:26:44.000000000 +0100
44 @@ -22,6 +22,7 @@
45 #define phys_pkg_id(a,b) a
46 #endif
47 #endif
48 +#include <asm/pda.h>
49 #include <asm/hypervisor.h>
50
51 #include "cpu.h"
52 @@ -29,10 +30,8 @@
53 DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
54 EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
55
56 -#ifndef CONFIG_XEN
57 -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
58 -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
59 -#endif
60 +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
61 +EXPORT_SYMBOL(_cpu_pda);
62
63 static int cachesize_override __cpuinitdata = -1;
64 static int disable_x86_fxsr __cpuinitdata;
65 @@ -60,7 +59,7 @@ static struct cpu_dev __cpuinitdata defa
66 .c_init = default_init,
67 .c_vendor = "Unknown",
68 };
69 -static struct cpu_dev * this_cpu = &default_cpu;
70 +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
71
72 static int __init cachesize_setup(char *str)
73 {
74 @@ -242,29 +241,14 @@ static int __cpuinit have_cpuid_p(void)
75 return flag_is_changeable_p(X86_EFLAGS_ID);
76 }
77
78 -/* Do minimum CPU detection early.
79 - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
80 - The others are not touched to avoid unwanted side effects.
81 -
82 - WARNING: this function is only called on the BP. Don't add code here
83 - that is supposed to run on all CPUs. */
84 -static void __init early_cpu_detect(void)
85 +void __init cpu_detect(struct cpuinfo_x86 *c)
86 {
87 - struct cpuinfo_x86 *c = &boot_cpu_data;
88 -
89 - c->x86_cache_alignment = 32;
90 -
91 - if (!have_cpuid_p())
92 - return;
93 -
94 /* Get vendor name */
95 cpuid(0x00000000, &c->cpuid_level,
96 (int *)&c->x86_vendor_id[0],
97 (int *)&c->x86_vendor_id[8],
98 (int *)&c->x86_vendor_id[4]);
99
100 - get_cpu_vendor(c, 1);
101 -
102 c->x86 = 4;
103 if (c->cpuid_level >= 0x00000001) {
104 u32 junk, tfms, cap0, misc;
105 @@ -281,6 +265,26 @@ static void __init early_cpu_detect(void
106 }
107 }
108
109 +/* Do minimum CPU detection early.
110 + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
111 + The others are not touched to avoid unwanted side effects.
112 +
113 + WARNING: this function is only called on the BP. Don't add code here
114 + that is supposed to run on all CPUs. */
115 +static void __init early_cpu_detect(void)
116 +{
117 + struct cpuinfo_x86 *c = &boot_cpu_data;
118 +
119 + c->x86_cache_alignment = 32;
120 +
121 + if (!have_cpuid_p())
122 + return;
123 +
124 + cpu_detect(c);
125 +
126 + get_cpu_vendor(c, 1);
127 +}
128 +
129 static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
130 {
131 u32 tfms, xlvl;
132 @@ -315,6 +319,8 @@ static void __cpuinit generic_identify(s
133 #else
134 c->apicid = (ebx >> 24) & 0xFF;
135 #endif
136 + if (c->x86_capability[0] & (1<<19))
137 + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
138 } else {
139 /* Have CPUID level 0 only - unheard of */
140 c->x86 = 4;
141 @@ -379,6 +385,7 @@ void __cpuinit identify_cpu(struct cpuin
142 c->x86_vendor_id[0] = '\0'; /* Unset */
143 c->x86_model_id[0] = '\0'; /* Unset */
144 c->x86_max_cores = 1;
145 + c->x86_clflush_size = 32;
146 memset(&c->x86_capability, 0, sizeof c->x86_capability);
147
148 if (!have_cpuid_p()) {
149 @@ -599,61 +606,23 @@ void __init early_cpu_init(void)
150 #endif
151 }
152
153 -static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr)
154 +/* Make sure %gs is initialized properly in idle threads */
155 +struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
156 {
157 - unsigned long frames[16];
158 - unsigned long va;
159 - int f;
160 -
161 - for (va = gdt_descr->address, f = 0;
162 - va < gdt_descr->address + gdt_descr->size;
163 - va += PAGE_SIZE, f++) {
164 - frames[f] = virt_to_mfn(va);
165 - make_lowmem_page_readonly(
166 - (void *)va, XENFEAT_writable_descriptor_tables);
167 - }
168 - if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8))
169 - BUG();
170 + memset(regs, 0, sizeof(struct pt_regs));
171 + regs->xgs = __KERNEL_PDA;
172 + return regs;
173 }
174
175 -/*
176 - * cpu_init() initializes state that is per-CPU. Some data is already
177 - * initialized (naturally) in the bootstrap process, such as the GDT
178 - * and IDT. We reload them nevertheless, this function acts as a
179 - * 'CPU state barrier', nothing should get across.
180 - */
181 -void __cpuinit cpu_init(void)
182 +static __cpuinit int alloc_gdt(int cpu)
183 {
184 - int cpu = smp_processor_id();
185 -#ifndef CONFIG_X86_NO_TSS
186 - struct tss_struct * t = &per_cpu(init_tss, cpu);
187 -#endif
188 - struct thread_struct *thread = &current->thread;
189 - struct desc_struct *gdt;
190 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
191 + struct desc_struct *gdt;
192 + struct i386_pda *pda;
193
194 - if (cpu_test_and_set(cpu, cpu_initialized)) {
195 - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
196 - for (;;) local_irq_enable();
197 - }
198 - printk(KERN_INFO "Initializing CPU#%d\n", cpu);
199 -
200 - if (cpu_has_vme || cpu_has_de)
201 - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
202 - if (tsc_disable && cpu_has_tsc) {
203 - printk(KERN_NOTICE "Disabling TSC...\n");
204 - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
205 - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
206 - set_in_cr4(X86_CR4_TSD);
207 - }
208 + gdt = (struct desc_struct *)cpu_gdt_descr->address;
209 + pda = cpu_pda(cpu);
210
211 -#ifndef CONFIG_XEN
212 - /* The CPU hotplug case */
213 - if (cpu_gdt_descr->address) {
214 - gdt = (struct desc_struct *)cpu_gdt_descr->address;
215 - memset(gdt, 0, PAGE_SIZE);
216 - goto old_gdt;
217 - }
218 /*
219 * This is a horrible hack to allocate the GDT. The problem
220 * is that cpu_init() is called really early for the boot CPU
221 @@ -661,54 +630,141 @@ void __cpuinit cpu_init(void)
222 * CPUs, when bootmem will have gone away
223 */
224 if (NODE_DATA(0)->bdata->node_bootmem_map) {
225 - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
226 - /* alloc_bootmem_pages panics on failure, so no check */
227 + BUG_ON(gdt != NULL || pda != NULL);
228 +
229 + gdt = alloc_bootmem_pages(PAGE_SIZE);
230 + pda = alloc_bootmem(sizeof(*pda));
231 + /* alloc_bootmem(_pages) panics on failure, so no check */
232 +
233 memset(gdt, 0, PAGE_SIZE);
234 + memset(pda, 0, sizeof(*pda));
235 } else {
236 - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
237 - if (unlikely(!gdt)) {
238 - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
239 - for (;;)
240 - local_irq_enable();
241 + /* GDT and PDA might already have been allocated if
242 + this is a CPU hotplug re-insertion. */
243 + if (gdt == NULL)
244 + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
245 +
246 + if (pda == NULL)
247 + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
248 +
249 + if (unlikely(!gdt || !pda)) {
250 + free_pages((unsigned long)gdt, 0);
251 + kfree(pda);
252 + return 0;
253 }
254 }
255 -old_gdt:
256 +
257 + cpu_gdt_descr->address = (unsigned long)gdt;
258 + cpu_pda(cpu) = pda;
259 +
260 + return 1;
261 +}
262 +
263 +/* Initial PDA used by boot CPU */
264 +struct i386_pda boot_pda = {
265 + ._pda = &boot_pda,
266 + .cpu_number = 0,
267 + .pcurrent = &init_task,
268 +};
269 +
270 +static inline void set_kernel_gs(void)
271 +{
272 + /* Set %gs for this CPU's PDA. Memory clobber is to create a
273 + barrier with respect to any PDA operations, so the compiler
274 + doesn't move any before here. */
275 + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
276 +}
277 +
278 +/* Initialize the CPU's GDT and PDA. The boot CPU does this for
279 + itself, but secondaries find this done for them. */
280 +__cpuinit int init_gdt(int cpu, struct task_struct *idle)
281 +{
282 + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
283 + struct desc_struct *gdt;
284 + struct i386_pda *pda;
285 +
286 + /* For non-boot CPUs, the GDT and PDA should already have been
287 + allocated. */
288 + if (!alloc_gdt(cpu)) {
289 + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
290 + return 0;
291 + }
292 +
293 + gdt = (struct desc_struct *)cpu_gdt_descr->address;
294 + pda = cpu_pda(cpu);
295 +
296 + BUG_ON(gdt == NULL || pda == NULL);
297 +
298 /*
299 * Initialize the per-CPU GDT with the boot GDT,
300 * and set up the GDT descriptor:
301 */
302 memcpy(gdt, cpu_gdt_table, GDT_SIZE);
303 + cpu_gdt_descr->size = GDT_SIZE - 1;
304
305 - /* Set up GDT entry for 16bit stack */
306 - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
307 - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
308 - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
309 - (CPU_16BIT_STACK_SIZE - 1);
310 + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
311 + (u32 *)&gdt[GDT_ENTRY_PDA].b,
312 + (unsigned long)pda, sizeof(*pda) - 1,
313 + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
314 +
315 + memset(pda, 0, sizeof(*pda));
316 + pda->_pda = pda;
317 + pda->cpu_number = cpu;
318 + pda->pcurrent = idle;
319
320 - cpu_gdt_descr->size = GDT_SIZE - 1;
321 - cpu_gdt_descr->address = (unsigned long)gdt;
322 -#else
323 - if (cpu == 0 && cpu_gdt_descr->address == 0) {
324 - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
325 - /* alloc_bootmem_pages panics on failure, so no check */
326 - memset(gdt, 0, PAGE_SIZE);
327 + return 1;
328 +}
329
330 - memcpy(gdt, cpu_gdt_table, GDT_SIZE);
331 -
332 - cpu_gdt_descr->size = GDT_SIZE;
333 - cpu_gdt_descr->address = (unsigned long)gdt;
334 +void __cpuinit cpu_set_gdt(int cpu)
335 +{
336 + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
337 + unsigned long va, frames[16];
338 + int f;
339 +
340 + for (va = cpu_gdt_descr->address, f = 0;
341 + va < cpu_gdt_descr->address + cpu_gdt_descr->size;
342 + va += PAGE_SIZE, f++) {
343 + frames[f] = virt_to_mfn(va);
344 + make_lowmem_page_readonly(
345 + (void *)va, XENFEAT_writable_descriptor_tables);
346 }
347 + BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
348 +
349 + set_kernel_gs();
350 +}
351 +
352 +/* Common CPU init for both boot and secondary CPUs */
353 +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
354 +{
355 +#ifndef CONFIG_X86_NO_TSS
356 + struct tss_struct * t = &per_cpu(init_tss, cpu);
357 #endif
358 + struct thread_struct *thread = &curr->thread;
359 +
360 + if (cpu_test_and_set(cpu, cpu_initialized)) {
361 + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
362 + for (;;) local_irq_enable();
363 + }
364
365 - cpu_gdt_init(cpu_gdt_descr);
366 + printk(KERN_INFO "Initializing CPU#%d\n", cpu);
367 +
368 + if (cpu_has_vme || cpu_has_de)
369 + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
370 + if (tsc_disable && cpu_has_tsc) {
371 + printk(KERN_NOTICE "Disabling TSC...\n");
372 + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
373 + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
374 + set_in_cr4(X86_CR4_TSD);
375 + }
376
377 /*
378 * Set up and load the per-CPU TSS and LDT
379 */
380 atomic_inc(&init_mm.mm_count);
381 - current->active_mm = &init_mm;
382 - BUG_ON(current->mm);
383 - enter_lazy_tlb(&init_mm, current);
384 + curr->active_mm = &init_mm;
385 + if (curr->mm)
386 + BUG();
387 + enter_lazy_tlb(&init_mm, curr);
388
389 load_esp0(t, thread);
390
391 @@ -719,8 +775,8 @@ old_gdt:
392 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
393 #endif
394
395 - /* Clear %fs and %gs. */
396 - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
397 + /* Clear %fs. */
398 + asm volatile ("mov %0, %%fs" : : "r" (0));
399
400 /* Clear all 6 debug registers: */
401 set_debugreg(0, 0);
402 @@ -738,6 +794,38 @@ old_gdt:
403 mxcsr_feature_mask_init();
404 }
405
406 +/* Entrypoint to initialize secondary CPU */
407 +void __cpuinit secondary_cpu_init(void)
408 +{
409 + int cpu = smp_processor_id();
410 + struct task_struct *curr = current;
411 +
412 + _cpu_init(cpu, curr);
413 +}
414 +
415 +/*
416 + * cpu_init() initializes state that is per-CPU. Some data is already
417 + * initialized (naturally) in the bootstrap process, such as the GDT
418 + * and IDT. We reload them nevertheless, this function acts as a
419 + * 'CPU state barrier', nothing should get across.
420 + */
421 +void __cpuinit cpu_init(void)
422 +{
423 + int cpu = smp_processor_id();
424 + struct task_struct *curr = current;
425 +
426 + /* Set up the real GDT and PDA, so we can transition from the
427 + boot versions. */
428 + if (!init_gdt(cpu, curr)) {
429 + /* failed to allocate something; not much we can do... */
430 + for (;;)
431 + local_irq_enable();
432 + }
433 +
434 + cpu_set_gdt(cpu);
435 + _cpu_init(cpu, curr);
436 +}
437 +
438 #ifdef CONFIG_HOTPLUG_CPU
439 void __cpuinit cpu_uninit(void)
440 {
441 --- sle11-2009-06-29.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-06-29 15:14:52.000000000 +0200
442 +++ sle11-2009-06-29/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-15 11:26:44.000000000 +0100
443 @@ -12,7 +12,7 @@
444 static DEFINE_MUTEX(mtrr_mutex);
445
446 void generic_get_mtrr(unsigned int reg, unsigned long *base,
447 - unsigned int *size, mtrr_type * type)
448 + unsigned long *size, mtrr_type * type)
449 {
450 struct xen_platform_op op;
451
452 @@ -115,8 +115,7 @@ int mtrr_del_page(int reg, unsigned long
453 {
454 unsigned i;
455 mtrr_type ltype;
456 - unsigned long lbase;
457 - unsigned int lsize;
458 + unsigned long lbase, lsize;
459 int error = -EINVAL;
460 struct xen_platform_op op;
461
462 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
463 +++ sle11-2009-06-29/arch/x86/kernel/e820_32-xen.c 2008-12-15 11:26:44.000000000 +0100
464 @@ -0,0 +1,1015 @@
465 +#include <linux/kernel.h>
466 +#include <linux/types.h>
467 +#include <linux/init.h>
468 +#include <linux/bootmem.h>
469 +#include <linux/ioport.h>
470 +#include <linux/string.h>
471 +#include <linux/kexec.h>
472 +#include <linux/module.h>
473 +#include <linux/mm.h>
474 +#include <linux/efi.h>
475 +#include <linux/pfn.h>
476 +#include <linux/uaccess.h>
477 +
478 +#include <asm/pgtable.h>
479 +#include <asm/page.h>
480 +#include <asm/e820.h>
481 +#include <xen/interface/memory.h>
482 +
483 +#ifdef CONFIG_EFI
484 +int efi_enabled = 0;
485 +EXPORT_SYMBOL(efi_enabled);
486 +#endif
487 +
488 +struct e820map e820;
489 +struct change_member {
490 + struct e820entry *pbios; /* pointer to original bios entry */
491 + unsigned long long addr; /* address for this change point */
492 +};
493 +static struct change_member change_point_list[2*E820MAX] __initdata;
494 +static struct change_member *change_point[2*E820MAX] __initdata;
495 +static struct e820entry *overlap_list[E820MAX] __initdata;
496 +static struct e820entry new_bios[E820MAX] __initdata;
497 +/* For PCI or other memory-mapped resources */
498 +unsigned long pci_mem_start = 0x10000000;
499 +#ifdef CONFIG_PCI
500 +EXPORT_SYMBOL(pci_mem_start);
501 +#endif
502 +extern int user_defined_memmap;
503 +struct resource data_resource = {
504 + .name = "Kernel data",
505 + .start = 0,
506 + .end = 0,
507 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
508 +};
509 +
510 +struct resource code_resource = {
511 + .name = "Kernel code",
512 + .start = 0,
513 + .end = 0,
514 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
515 +};
516 +
517 +static struct resource system_rom_resource = {
518 + .name = "System ROM",
519 + .start = 0xf0000,
520 + .end = 0xfffff,
521 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
522 +};
523 +
524 +static struct resource extension_rom_resource = {
525 + .name = "Extension ROM",
526 + .start = 0xe0000,
527 + .end = 0xeffff,
528 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
529 +};
530 +
531 +static struct resource adapter_rom_resources[] = { {
532 + .name = "Adapter ROM",
533 + .start = 0xc8000,
534 + .end = 0,
535 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
536 +}, {
537 + .name = "Adapter ROM",
538 + .start = 0,
539 + .end = 0,
540 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
541 +}, {
542 + .name = "Adapter ROM",
543 + .start = 0,
544 + .end = 0,
545 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
546 +}, {
547 + .name = "Adapter ROM",
548 + .start = 0,
549 + .end = 0,
550 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
551 +}, {
552 + .name = "Adapter ROM",
553 + .start = 0,
554 + .end = 0,
555 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
556 +}, {
557 + .name = "Adapter ROM",
558 + .start = 0,
559 + .end = 0,
560 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
561 +} };
562 +
563 +static struct resource video_rom_resource = {
564 + .name = "Video ROM",
565 + .start = 0xc0000,
566 + .end = 0xc7fff,
567 + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
568 +};
569 +
570 +static struct resource video_ram_resource = {
571 + .name = "Video RAM area",
572 + .start = 0xa0000,
573 + .end = 0xbffff,
574 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
575 +};
576 +
577 +static struct resource standard_io_resources[] = { {
578 + .name = "dma1",
579 + .start = 0x0000,
580 + .end = 0x001f,
581 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
582 +}, {
583 + .name = "pic1",
584 + .start = 0x0020,
585 + .end = 0x0021,
586 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
587 +}, {
588 + .name = "timer0",
589 + .start = 0x0040,
590 + .end = 0x0043,
591 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
592 +}, {
593 + .name = "timer1",
594 + .start = 0x0050,
595 + .end = 0x0053,
596 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
597 +}, {
598 + .name = "keyboard",
599 + .start = 0x0060,
600 + .end = 0x006f,
601 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
602 +}, {
603 + .name = "dma page reg",
604 + .start = 0x0080,
605 + .end = 0x008f,
606 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
607 +}, {
608 + .name = "pic2",
609 + .start = 0x00a0,
610 + .end = 0x00a1,
611 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
612 +}, {
613 + .name = "dma2",
614 + .start = 0x00c0,
615 + .end = 0x00df,
616 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
617 +}, {
618 + .name = "fpu",
619 + .start = 0x00f0,
620 + .end = 0x00ff,
621 + .flags = IORESOURCE_BUSY | IORESOURCE_IO
622 +} };
623 +
624 +static int romsignature(const unsigned char *x)
625 +{
626 + unsigned short sig;
627 + int ret = 0;
628 + if (probe_kernel_address((const unsigned short *)x, sig) == 0)
629 + ret = (sig == 0xaa55);
630 + return ret;
631 +}
632 +
633 +static int __init romchecksum(unsigned char *rom, unsigned long length)
634 +{
635 + unsigned char *p, sum = 0;
636 +
637 + for (p = rom; p < rom + length; p++)
638 + sum += *p;
639 + return sum == 0;
640 +}
641 +
642 +static void __init probe_roms(void)
643 +{
644 + unsigned long start, length, upper;
645 + unsigned char *rom;
646 + int i;
647 +
648 +#ifdef CONFIG_XEN
649 + /* Nothing to do if not running in dom0. */
650 + if (!is_initial_xendomain())
651 + return;
652 +#endif
653 +
654 + /* video rom */
655 + upper = adapter_rom_resources[0].start;
656 + for (start = video_rom_resource.start; start < upper; start += 2048) {
657 + rom = isa_bus_to_virt(start);
658 + if (!romsignature(rom))
659 + continue;
660 +
661 + video_rom_resource.start = start;
662 +
663 + /* 0 < length <= 0x7f * 512, historically */
664 + length = rom[2] * 512;
665 +
666 + /* if checksum okay, trust length byte */
667 + if (length && romchecksum(rom, length))
668 + video_rom_resource.end = start + length - 1;
669 +
670 + request_resource(&iomem_resource, &video_rom_resource);
671 + break;
672 + }
673 +
674 + start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
675 + if (start < upper)
676 + start = upper;
677 +
678 + /* system rom */
679 + request_resource(&iomem_resource, &system_rom_resource);
680 + upper = system_rom_resource.start;
681 +
682 + /* check for extension rom (ignore length byte!) */
683 + rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
684 + if (romsignature(rom)) {
685 + length = extension_rom_resource.end - extension_rom_resource.start + 1;
686 + if (romchecksum(rom, length)) {
687 + request_resource(&iomem_resource, &extension_rom_resource);
688 + upper = extension_rom_resource.start;
689 + }
690 + }
691 +
692 + /* check for adapter roms on 2k boundaries */
693 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
694 + rom = isa_bus_to_virt(start);
695 + if (!romsignature(rom))
696 + continue;
697 +
698 + /* 0 < length <= 0x7f * 512, historically */
699 + length = rom[2] * 512;
700 +
701 + /* but accept any length that fits if checksum okay */
702 + if (!length || start + length > upper || !romchecksum(rom, length))
703 + continue;
704 +
705 + adapter_rom_resources[i].start = start;
706 + adapter_rom_resources[i].end = start + length - 1;
707 + request_resource(&iomem_resource, &adapter_rom_resources[i]);
708 +
709 + start = adapter_rom_resources[i++].end & ~2047UL;
710 + }
711 +}
712 +
713 +#ifdef CONFIG_XEN
714 +static struct e820map machine_e820 __initdata;
715 +#define e820 machine_e820
716 +#endif
717 +
718 +/*
719 + * Request address space for all standard RAM and ROM resources
720 + * and also for regions reported as reserved by the e820.
721 + */
722 +static void __init
723 +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
724 +{
725 + int i;
726 +
727 + probe_roms();
728 + for (i = 0; i < e820.nr_map; i++) {
729 + struct resource *res;
730 +#ifndef CONFIG_RESOURCES_64BIT
731 + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
732 + continue;
733 +#endif
734 + res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
735 + switch (e820.map[i].type) {
736 + case E820_RAM: res->name = "System RAM"; break;
737 + case E820_ACPI: res->name = "ACPI Tables"; break;
738 + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
739 + default: res->name = "reserved";
740 + }
741 + res->start = e820.map[i].addr;
742 + res->end = res->start + e820.map[i].size - 1;
743 + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
744 + if (request_resource(&iomem_resource, res)) {
745 + kfree(res);
746 + continue;
747 + }
748 + if (e820.map[i].type == E820_RAM) {
749 + /*
750 + * We don't know which RAM region contains kernel data,
751 + * so we try it repeatedly and let the resource manager
752 + * test it.
753 + */
754 +#ifndef CONFIG_XEN
755 + request_resource(res, code_resource);
756 + request_resource(res, data_resource);
757 +#endif
758 +#ifdef CONFIG_KEXEC
759 + request_resource(res, &crashk_res);
760 +#ifdef CONFIG_XEN
761 + xen_machine_kexec_register_resources(res);
762 +#endif
763 +#endif
764 + }
765 + }
766 +}
767 +
768 +#undef e820
769 +
770 +/*
771 + * Request address space for all standard resources
772 + *
773 + * This is called just before pcibios_init(), which is also a
774 + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
775 + */
776 +static int __init request_standard_resources(void)
777 +{
778 + int i;
779 +
780 + /* Nothing to do if not running in dom0. */
781 + if (!is_initial_xendomain())
782 + return 0;
783 +
784 + printk("Setting up standard PCI resources\n");
785 + if (efi_enabled)
786 + efi_initialize_iomem_resources(&code_resource, &data_resource);
787 + else
788 + legacy_init_iomem_resources(&code_resource, &data_resource);
789 +
790 + /* EFI systems may still have VGA */
791 + request_resource(&iomem_resource, &video_ram_resource);
792 +
793 + /* request I/O space for devices used on all i[345]86 PCs */
794 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
795 + request_resource(&ioport_resource, &standard_io_resources[i]);
796 + return 0;
797 +}
798 +
799 +subsys_initcall(request_standard_resources);
800 +
801 +void __init add_memory_region(unsigned long long start,
802 + unsigned long long size, int type)
803 +{
804 + int x;
805 +
806 + if (!efi_enabled) {
807 + x = e820.nr_map;
808 +
809 + if (x == E820MAX) {
810 + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
811 + return;
812 + }
813 +
814 + e820.map[x].addr = start;
815 + e820.map[x].size = size;
816 + e820.map[x].type = type;
817 + e820.nr_map++;
818 + }
819 +} /* add_memory_region */
820 +
821 +/*
822 + * Sanitize the BIOS e820 map.
823 + *
824 + * Some e820 responses include overlapping entries. The following
825 + * replaces the original e820 map with a new one, removing overlaps.
826 + *
827 + */
828 +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
829 +{
830 + struct change_member *change_tmp;
831 + unsigned long current_type, last_type;
832 + unsigned long long last_addr;
833 + int chgidx, still_changing;
834 + int overlap_entries;
835 + int new_bios_entry;
836 + int old_nr, new_nr, chg_nr;
837 + int i;
838 +
839 + /*
840 + Visually we're performing the following (1,2,3,4 = memory types)...
841 +
842 + Sample memory map (w/overlaps):
843 + ____22__________________
844 + ______________________4_
845 + ____1111________________
846 + _44_____________________
847 + 11111111________________
848 + ____________________33__
849 + ___________44___________
850 + __________33333_________
851 + ______________22________
852 + ___________________2222_
853 + _________111111111______
854 + _____________________11_
855 + _________________4______
856 +
857 + Sanitized equivalent (no overlap):
858 + 1_______________________
859 + _44_____________________
860 + ___1____________________
861 + ____22__________________
862 + ______11________________
863 + _________1______________
864 + __________3_____________
865 + ___________44___________
866 + _____________33_________
867 + _______________2________
868 + ________________1_______
869 + _________________4______
870 + ___________________2____
871 + ____________________33__
872 + ______________________4_
873 + */
874 + printk("sanitize start\n");
875 + /* if there's only one memory region, don't bother */
876 + if (*pnr_map < 2) {
877 + printk("sanitize bail 0\n");
878 + return -1;
879 + }
880 +
881 + old_nr = *pnr_map;
882 +
883 + /* bail out if we find any unreasonable addresses in bios map */
884 + for (i=0; i<old_nr; i++)
885 + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
886 + printk("sanitize bail 1\n");
887 + return -1;
888 + }
889 +
890 + /* create pointers for initial change-point information (for sorting) */
891 + for (i=0; i < 2*old_nr; i++)
892 + change_point[i] = &change_point_list[i];
893 +
894 + /* record all known change-points (starting and ending addresses),
895 + omitting those that are for empty memory regions */
896 + chgidx = 0;
897 + for (i=0; i < old_nr; i++) {
898 + if (biosmap[i].size != 0) {
899 + change_point[chgidx]->addr = biosmap[i].addr;
900 + change_point[chgidx++]->pbios = &biosmap[i];
901 + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
902 + change_point[chgidx++]->pbios = &biosmap[i];
903 + }
904 + }
905 + chg_nr = chgidx; /* true number of change-points */
906 +
907 + /* sort change-point list by memory addresses (low -> high) */
908 + still_changing = 1;
909 + while (still_changing) {
910 + still_changing = 0;
911 + for (i=1; i < chg_nr; i++) {
912 + /* if <current_addr> > <last_addr>, swap */
913 + /* or, if current=<start_addr> & last=<end_addr>, swap */
914 + if ((change_point[i]->addr < change_point[i-1]->addr) ||
915 + ((change_point[i]->addr == change_point[i-1]->addr) &&
916 + (change_point[i]->addr == change_point[i]->pbios->addr) &&
917 + (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
918 + )
919 + {
920 + change_tmp = change_point[i];
921 + change_point[i] = change_point[i-1];
922 + change_point[i-1] = change_tmp;
923 + still_changing=1;
924 + }
925 + }
926 + }
927 +
928 + /* create a new bios memory map, removing overlaps */
929 + overlap_entries=0; /* number of entries in the overlap table */
930 + new_bios_entry=0; /* index for creating new bios map entries */
931 + last_type = 0; /* start with undefined memory type */
932 + last_addr = 0; /* start with 0 as last starting address */
933 + /* loop through change-points, determining affect on the new bios map */
934 + for (chgidx=0; chgidx < chg_nr; chgidx++)
935 + {
936 + /* keep track of all overlapping bios entries */
937 + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
938 + {
939 + /* add map entry to overlap list (> 1 entry implies an overlap) */
940 + overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
941 + }
942 + else
943 + {
944 + /* remove entry from list (order independent, so swap with last) */
945 + for (i=0; i<overlap_entries; i++)
946 + {
947 + if (overlap_list[i] == change_point[chgidx]->pbios)
948 + overlap_list[i] = overlap_list[overlap_entries-1];
949 + }
950 + overlap_entries--;
951 + }
952 + /* if there are overlapping entries, decide which "type" to use */
953 + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
954 + current_type = 0;
955 + for (i=0; i<overlap_entries; i++)
956 + if (overlap_list[i]->type > current_type)
957 + current_type = overlap_list[i]->type;
958 + /* continue building up new bios map based on this information */
959 + if (current_type != last_type) {
960 + if (last_type != 0) {
961 + new_bios[new_bios_entry].size =
962 + change_point[chgidx]->addr - last_addr;
963 + /* move forward only if the new size was non-zero */
964 + if (new_bios[new_bios_entry].size != 0)
965 + if (++new_bios_entry >= E820MAX)
966 + break; /* no more space left for new bios entries */
967 + }
968 + if (current_type != 0) {
969 + new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
970 + new_bios[new_bios_entry].type = current_type;
971 + last_addr=change_point[chgidx]->addr;
972 + }
973 + last_type = current_type;
974 + }
975 + }
976 + new_nr = new_bios_entry; /* retain count for new bios entries */
977 +
978 + /* copy new bios mapping into original location */
979 + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
980 + *pnr_map = new_nr;
981 +
982 + printk("sanitize end\n");
983 + return 0;
984 +}
985 +
986 +/*
987 + * Copy the BIOS e820 map into a safe place.
988 + *
989 + * Sanity-check it while we're at it..
990 + *
991 + * If we're lucky and live on a modern system, the setup code
992 + * will have given us a memory map that we can use to properly
993 + * set up memory. If we aren't, we'll fake a memory map.
994 + *
995 + * We check to see that the memory map contains at least 2 elements
996 + * before we'll use it, because the detection code in setup.S may
997 + * not be perfect and most every PC known to man has two memory
998 + * regions: one from 0 to 640k, and one from 1mb up. (The IBM
999 + * thinkpad 560x, for example, does not cooperate with the memory
1000 + * detection code.)
1001 + */
1002 +int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
1003 +{
1004 +#ifndef CONFIG_XEN
1005 + /* Only one memory region (or negative)? Ignore it */
1006 + if (nr_map < 2)
1007 + return -1;
1008 +#else
1009 + BUG_ON(nr_map < 1);
1010 +#endif
1011 +
1012 + do {
1013 + unsigned long long start = biosmap->addr;
1014 + unsigned long long size = biosmap->size;
1015 + unsigned long long end = start + size;
1016 + unsigned long type = biosmap->type;
1017 + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
1018 +
1019 + /* Overflow in 64 bits? Ignore the memory map. */
1020 + if (start > end)
1021 + return -1;
1022 +
1023 +#ifndef CONFIG_XEN
1024 + /*
1025 + * Some BIOSes claim RAM in the 640k - 1M region.
1026 + * Not right. Fix it up.
1027 + */
1028 + if (type == E820_RAM) {
1029 + printk("copy_e820_map() type is E820_RAM\n");
1030 + if (start < 0x100000ULL && end > 0xA0000ULL) {
1031 + printk("copy_e820_map() lies in range...\n");
1032 + if (start < 0xA0000ULL) {
1033 + printk("copy_e820_map() start < 0xA0000ULL\n");
1034 + add_memory_region(start, 0xA0000ULL-start, type);
1035 + }
1036 + if (end <= 0x100000ULL) {
1037 + printk("copy_e820_map() end <= 0x100000ULL\n");
1038 + continue;
1039 + }
1040 + start = 0x100000ULL;
1041 + size = end - start;
1042 + }
1043 + }
1044 +#endif
1045 + add_memory_region(start, size, type);
1046 + } while (biosmap++,--nr_map);
1047 +
1048 +#ifdef CONFIG_XEN
1049 + if (is_initial_xendomain()) {
1050 + struct xen_memory_map memmap;
1051 +
1052 + memmap.nr_entries = E820MAX;
1053 + set_xen_guest_handle(memmap.buffer, machine_e820.map);
1054 +
1055 + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1056 + BUG();
1057 + machine_e820.nr_map = memmap.nr_entries;
1058 + } else
1059 + machine_e820 = e820;
1060 +#endif
1061 +
1062 + return 0;
1063 +}
1064 +
1065 +/*
1066 + * Callback for efi_memory_walk.
1067 + */
1068 +static int __init
1069 +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
1070 +{
1071 + unsigned long *max_pfn = arg, pfn;
1072 +
1073 + if (start < end) {
1074 + pfn = PFN_UP(end -1);
1075 + if (pfn > *max_pfn)
1076 + *max_pfn = pfn;
1077 + }
1078 + return 0;
1079 +}
1080 +
1081 +static int __init
1082 +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1083 +{
1084 + memory_present(0, PFN_UP(start), PFN_DOWN(end));
1085 + return 0;
1086 +}
1087 +
1088 +/*
1089 + * Find the highest page frame number we have available
1090 + */
1091 +void __init find_max_pfn(void)
1092 +{
1093 + int i;
1094 +
1095 + max_pfn = 0;
1096 + if (efi_enabled) {
1097 + efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1098 + efi_memmap_walk(efi_memory_present_wrapper, NULL);
1099 + return;
1100 + }
1101 +
1102 + for (i = 0; i < e820.nr_map; i++) {
1103 + unsigned long start, end;
1104 + /* RAM? */
1105 + if (e820.map[i].type != E820_RAM)
1106 + continue;
1107 + start = PFN_UP(e820.map[i].addr);
1108 + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1109 + if (start >= end)
1110 + continue;
1111 + if (end > max_pfn)
1112 + max_pfn = end;
1113 + memory_present(0, start, end);
1114 + }
1115 +}
1116 +
1117 +/*
1118 + * Free all available memory for boot time allocation. Used
1119 + * as a callback function by efi_memory_walk()
1120 + */
1121 +
1122 +static int __init
1123 +free_available_memory(unsigned long start, unsigned long end, void *arg)
1124 +{
1125 + /* check max_low_pfn */
1126 + if (start >= (max_low_pfn << PAGE_SHIFT))
1127 + return 0;
1128 + if (end >= (max_low_pfn << PAGE_SHIFT))
1129 + end = max_low_pfn << PAGE_SHIFT;
1130 + if (start < end)
1131 + free_bootmem(start, end - start);
1132 +
1133 + return 0;
1134 +}
1135 +/*
1136 + * Register fully available low RAM pages with the bootmem allocator.
1137 + */
1138 +void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1139 +{
1140 + int i;
1141 +
1142 + if (efi_enabled) {
1143 + efi_memmap_walk(free_available_memory, NULL);
1144 + return;
1145 + }
1146 + for (i = 0; i < e820.nr_map; i++) {
1147 + unsigned long curr_pfn, last_pfn, size;
1148 + /*
1149 + * Reserve usable low memory
1150 + */
1151 + if (e820.map[i].type != E820_RAM)
1152 + continue;
1153 + /*
1154 + * We are rounding up the start address of usable memory:
1155 + */
1156 + curr_pfn = PFN_UP(e820.map[i].addr);
1157 + if (curr_pfn >= max_low_pfn)
1158 + continue;
1159 + /*
1160 + * ... and at the end of the usable range downwards:
1161 + */
1162 + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1163 +
1164 +#ifdef CONFIG_XEN
1165 + /*
1166 + * Truncate to the number of actual pages currently
1167 + * present.
1168 + */
1169 + if (last_pfn > xen_start_info->nr_pages)
1170 + last_pfn = xen_start_info->nr_pages;
1171 +#endif
1172 +
1173 + if (last_pfn > max_low_pfn)
1174 + last_pfn = max_low_pfn;
1175 +
1176 + /*
1177 + * .. finally, did all the rounding and playing
1178 + * around just make the area go away?
1179 + */
1180 + if (last_pfn <= curr_pfn)
1181 + continue;
1182 +
1183 + size = last_pfn - curr_pfn;
1184 + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1185 + }
1186 +}
1187 +
1188 +void __init e820_register_memory(void)
1189 +{
1190 + unsigned long gapstart, gapsize, round;
1191 + unsigned long long last;
1192 + int i;
1193 +
1194 +#ifdef CONFIG_XEN
1195 + if (is_initial_xendomain()) {
1196 + struct xen_memory_map memmap;
1197 +
1198 + memmap.nr_entries = E820MAX;
1199 + set_xen_guest_handle(memmap.buffer, machine_e820.map);
1200 +
1201 + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1202 + BUG();
1203 + machine_e820.nr_map = memmap.nr_entries;
1204 + }
1205 + else
1206 + machine_e820 = e820;
1207 +#define e820 machine_e820
1208 +#endif
1209 +
1210 + /*
1211 + * Search for the bigest gap in the low 32 bits of the e820
1212 + * memory space.
1213 + */
1214 + last = 0x100000000ull;
1215 + gapstart = 0x10000000;
1216 + gapsize = 0x400000;
1217 + i = e820.nr_map;
1218 + while (--i >= 0) {
1219 + unsigned long long start = e820.map[i].addr;
1220 + unsigned long long end = start + e820.map[i].size;
1221 +
1222 + /*
1223 + * Since "last" is at most 4GB, we know we'll
1224 + * fit in 32 bits if this condition is true
1225 + */
1226 + if (last > end) {
1227 + unsigned long gap = last - end;
1228 +
1229 + if (gap > gapsize) {
1230 + gapsize = gap;
1231 + gapstart = end;
1232 + }
1233 + }
1234 + if (start < last)
1235 + last = start;
1236 + }
1237 +#undef e820
1238 +
1239 + /*
1240 + * See how much we want to round up: start off with
1241 + * rounding to the next 1MB area.
1242 + */
1243 + round = 0x100000;
1244 + while ((gapsize >> 4) > round)
1245 + round += round;
1246 + /* Fun with two's complement */
1247 + pci_mem_start = (gapstart + round) & -round;
1248 +
1249 + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1250 + pci_mem_start, gapstart, gapsize);
1251 +}
1252 +
1253 +void __init print_memory_map(char *who)
1254 +{
1255 + int i;
1256 +
1257 + for (i = 0; i < e820.nr_map; i++) {
1258 + printk(" %s: %016Lx - %016Lx ", who,
1259 + e820.map[i].addr,
1260 + e820.map[i].addr + e820.map[i].size);
1261 + switch (e820.map[i].type) {
1262 + case E820_RAM: printk("(usable)\n");
1263 + break;
1264 + case E820_RESERVED:
1265 + printk("(reserved)\n");
1266 + break;
1267 + case E820_ACPI:
1268 + printk("(ACPI data)\n");
1269 + break;
1270 + case E820_NVS:
1271 + printk("(ACPI NVS)\n");
1272 + break;
1273 + default: printk("type %lu\n", e820.map[i].type);
1274 + break;
1275 + }
1276 + }
1277 +}
1278 +
1279 +static __init __always_inline void efi_limit_regions(unsigned long long size)
1280 +{
1281 + unsigned long long current_addr = 0;
1282 + efi_memory_desc_t *md, *next_md;
1283 + void *p, *p1;
1284 + int i, j;
1285 +
1286 + j = 0;
1287 + p1 = memmap.map;
1288 + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
1289 + md = p;
1290 + next_md = p1;
1291 + current_addr = md->phys_addr +
1292 + PFN_PHYS(md->num_pages);
1293 + if (is_available_memory(md)) {
1294 + if (md->phys_addr >= size) continue;
1295 + memcpy(next_md, md, memmap.desc_size);
1296 + if (current_addr >= size) {
1297 + next_md->num_pages -=
1298 + PFN_UP(current_addr-size);
1299 + }
1300 + p1 += memmap.desc_size;
1301 + next_md = p1;
1302 + j++;
1303 + } else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
1304 + EFI_MEMORY_RUNTIME) {
1305 + /* In order to make runtime services
1306 + * available we have to include runtime
1307 + * memory regions in memory map */
1308 + memcpy(next_md, md, memmap.desc_size);
1309 + p1 += memmap.desc_size;
1310 + next_md = p1;
1311 + j++;
1312 + }
1313 + }
1314 + memmap.nr_map = j;
1315 + memmap.map_end = memmap.map +
1316 + (memmap.nr_map * memmap.desc_size);
1317 +}
1318 +
1319 +void __init limit_regions(unsigned long long size)
1320 +{
1321 + unsigned long long current_addr = 0;
1322 + int i;
1323 +
1324 + print_memory_map("limit_regions start");
1325 + if (efi_enabled) {
1326 + efi_limit_regions(size);
1327 + return;
1328 + }
1329 + for (i = 0; i < e820.nr_map; i++) {
1330 + current_addr = e820.map[i].addr + e820.map[i].size;
1331 + if (current_addr < size)
1332 + continue;
1333 +
1334 + if (e820.map[i].type != E820_RAM)
1335 + continue;
1336 +
1337 + if (e820.map[i].addr >= size) {
1338 + /*
1339 + * This region starts past the end of the
1340 + * requested size, skip it completely.
1341 + */
1342 + e820.nr_map = i;
1343 + } else {
1344 + e820.nr_map = i + 1;
1345 + e820.map[i].size -= current_addr - size;
1346 + }
1347 + print_memory_map("limit_regions endfor");
1348 + return;
1349 + }
1350 +#ifdef CONFIG_XEN
1351 + if (current_addr < size) {
1352 + /*
1353 + * The e820 map finished before our requested size so
1354 + * extend the final entry to the requested address.
1355 + */
1356 + --i;
1357 + if (e820.map[i].type == E820_RAM)
1358 + e820.map[i].size -= current_addr - size;
1359 + else
1360 + add_memory_region(current_addr, size - current_addr, E820_RAM);
1361 + }
1362 +#endif
1363 + print_memory_map("limit_regions endfunc");
1364 +}
1365 +
1366 +/*
1367 + * This function checks if any part of the range <start,end> is mapped
1368 + * with type.
1369 + */
1370 +int
1371 +e820_any_mapped(u64 start, u64 end, unsigned type)
1372 +{
1373 + int i;
1374 +
1375 +#ifndef CONFIG_XEN
1376 + for (i = 0; i < e820.nr_map; i++) {
1377 + const struct e820entry *ei = &e820.map[i];
1378 +#else
1379 + if (!is_initial_xendomain())
1380 + return 0;
1381 + for (i = 0; i < machine_e820.nr_map; ++i) {
1382 + const struct e820entry *ei = &machine_e820.map[i];
1383 +#endif
1384 +
1385 + if (type && ei->type != type)
1386 + continue;
1387 + if (ei->addr >= end || ei->addr + ei->size <= start)
1388 + continue;
1389 + return 1;
1390 + }
1391 + return 0;
1392 +}
1393 +EXPORT_SYMBOL_GPL(e820_any_mapped);
1394 +
1395 + /*
1396 + * This function checks if the entire range <start,end> is mapped with type.
1397 + *
1398 + * Note: this function only works correct if the e820 table is sorted and
1399 + * not-overlapping, which is the case
1400 + */
1401 +int __init
1402 +e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1403 +{
1404 + u64 start = s;
1405 + u64 end = e;
1406 + int i;
1407 +
1408 +#ifndef CONFIG_XEN
1409 + for (i = 0; i < e820.nr_map; i++) {
1410 + struct e820entry *ei = &e820.map[i];
1411 +#else
1412 + if (!is_initial_xendomain())
1413 + return 0;
1414 + for (i = 0; i < machine_e820.nr_map; ++i) {
1415 + const struct e820entry *ei = &machine_e820.map[i];
1416 +#endif
1417 +
1418 + if (type && ei->type != type)
1419 + continue;
1420 + /* is the region (part) in overlap with the current region ?*/
1421 + if (ei->addr >= end || ei->addr + ei->size <= start)
1422 + continue;
1423 + /* if the region is at the beginning of <start,end> we move
1424 + * start to the end of the region since it's ok until there
1425 + */
1426 + if (ei->addr <= start)
1427 + start = ei->addr + ei->size;
1428 + /* if start is now at or beyond end, we're done, full
1429 + * coverage */
1430 + if (start >= end)
1431 + return 1; /* we're done */
1432 + }
1433 + return 0;
1434 +}
1435 +
1436 +static int __init parse_memmap(char *arg)
1437 +{
1438 + if (!arg)
1439 + return -EINVAL;
1440 +
1441 + if (strcmp(arg, "exactmap") == 0) {
1442 +#ifdef CONFIG_CRASH_DUMP
1443 + /* If we are doing a crash dump, we
1444 + * still need to know the real mem
1445 + * size before original memory map is
1446 + * reset.
1447 + */
1448 + find_max_pfn();
1449 + saved_max_pfn = max_pfn;
1450 +#endif
1451 + e820.nr_map = 0;
1452 + user_defined_memmap = 1;
1453 + } else {
1454 + /* If the user specifies memory size, we
1455 + * limit the BIOS-provided memory map to
1456 + * that size. exactmap can be used to specify
1457 + * the exact map. mem=number can be used to
1458 + * trim the existing memory map.
1459 + */
1460 + unsigned long long start_at, mem_size;
1461 +
1462 + mem_size = memparse(arg, &arg);
1463 + if (*arg == '@') {
1464 + start_at = memparse(arg+1, &arg);
1465 + add_memory_region(start_at, mem_size, E820_RAM);
1466 + } else if (*arg == '#') {
1467 + start_at = memparse(arg+1, &arg);
1468 + add_memory_region(start_at, mem_size, E820_ACPI);
1469 + } else if (*arg == '$') {
1470 + start_at = memparse(arg+1, &arg);
1471 + add_memory_region(start_at, mem_size, E820_RESERVED);
1472 + } else {
1473 + limit_regions(mem_size);
1474 + user_defined_memmap = 1;
1475 + }
1476 + }
1477 + return 0;
1478 +}
1479 +early_param("memmap", parse_memmap);
1480 --- sle11-2009-06-29.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:07:47.000000000 +0200
1481 +++ sle11-2009-06-29/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:08:06.000000000 +0200
1482 @@ -30,12 +30,13 @@
1483 * 18(%esp) - %eax
1484 * 1C(%esp) - %ds
1485 * 20(%esp) - %es
1486 - * 24(%esp) - orig_eax
1487 - * 28(%esp) - %eip
1488 - * 2C(%esp) - %cs
1489 - * 30(%esp) - %eflags
1490 - * 34(%esp) - %oldesp
1491 - * 38(%esp) - %oldss
1492 + * 24(%esp) - %gs
1493 + * 28(%esp) - orig_eax
1494 + * 2C(%esp) - %eip
1495 + * 30(%esp) - %cs
1496 + * 34(%esp) - %eflags
1497 + * 38(%esp) - %oldesp
1498 + * 3C(%esp) - %oldss
1499 *
1500 * "current" is in register %ebx during any slow entries.
1501 */
1502 @@ -48,27 +49,25 @@
1503 #include <asm/smp.h>
1504 #include <asm/page.h>
1505 #include <asm/desc.h>
1506 +#include <asm/percpu.h>
1507 #include <asm/dwarf2.h>
1508 #include "irq_vectors.h"
1509 #include <xen/interface/xen.h>
1510
1511 -#define nr_syscalls ((syscall_table_size)/4)
1512 +/*
1513 + * We use macros for low-level operations which need to be overridden
1514 + * for paravirtualization. The following will never clobber any registers:
1515 + * INTERRUPT_RETURN (aka. "iret")
1516 + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
1517 + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
1518 + *
1519 + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
1520 + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
1521 + * Allowing a register to be clobbered can shrink the paravirt replacement
1522 + * enough to patch inline, increasing performance.
1523 + */
1524
1525 -EBX = 0x00
1526 -ECX = 0x04
1527 -EDX = 0x08
1528 -ESI = 0x0C
1529 -EDI = 0x10
1530 -EBP = 0x14
1531 -EAX = 0x18
1532 -DS = 0x1C
1533 -ES = 0x20
1534 -ORIG_EAX = 0x24
1535 -EIP = 0x28
1536 -CS = 0x2C
1537 -EFLAGS = 0x30
1538 -OLDESP = 0x34
1539 -OLDSS = 0x38
1540 +#define nr_syscalls ((syscall_table_size)/4)
1541
1542 CF_MASK = 0x00000001
1543 TF_MASK = 0x00000100
1544 @@ -79,61 +78,16 @@ VM_MASK = 0x00020000
1545 /* Pseudo-eflags. */
1546 NMI_MASK = 0x80000000
1547
1548 -#ifndef CONFIG_XEN
1549 -/* These are replaces for paravirtualization */
1550 -#define DISABLE_INTERRUPTS cli
1551 -#define ENABLE_INTERRUPTS sti
1552 -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
1553 -#define INTERRUPT_RETURN iret
1554 -#define GET_CR0_INTO_EAX movl %cr0, %eax
1555 -#else
1556 -/* Offsets into shared_info_t. */
1557 -#define evtchn_upcall_pending /* 0 */
1558 -#define evtchn_upcall_mask 1
1559 -
1560 -#define sizeof_vcpu_shift 6
1561 -
1562 -#ifdef CONFIG_SMP
1563 -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
1564 - shl $sizeof_vcpu_shift,%esi ; \
1565 - addl HYPERVISOR_shared_info,%esi
1566 -#else
1567 -#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
1568 -#endif
1569 -
1570 -#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
1571 -#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
1572 -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
1573 -#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
1574 - __DISABLE_INTERRUPTS
1575 -#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
1576 - __ENABLE_INTERRUPTS
1577 -#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
1578 -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
1579 - __TEST_PENDING ; \
1580 - jnz 14f # process more events if necessary... ; \
1581 - movl ESI(%esp), %esi ; \
1582 - sysexit ; \
1583 -14: __DISABLE_INTERRUPTS ; \
1584 - TRACE_IRQS_OFF ; \
1585 -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
1586 - push %esp ; \
1587 - call evtchn_do_upcall ; \
1588 - add $4,%esp ; \
1589 - jmp ret_from_intr
1590 -#define INTERRUPT_RETURN iret
1591 -#endif
1592 -
1593 #ifdef CONFIG_PREEMPT
1594 -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
1595 +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
1596 #else
1597 -#define preempt_stop
1598 +#define preempt_stop(clobbers)
1599 #define resume_kernel restore_nocheck
1600 #endif
1601
1602 .macro TRACE_IRQS_IRET
1603 #ifdef CONFIG_TRACE_IRQFLAGS
1604 - testl $IF_MASK,EFLAGS(%esp) # interrupts off?
1605 + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
1606 jz 1f
1607 TRACE_IRQS_ON
1608 1:
1609 @@ -148,6 +102,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1610
1611 #define SAVE_ALL \
1612 cld; \
1613 + pushl %gs; \
1614 + CFI_ADJUST_CFA_OFFSET 4;\
1615 + /*CFI_REL_OFFSET gs, 0;*/\
1616 pushl %es; \
1617 CFI_ADJUST_CFA_OFFSET 4;\
1618 /*CFI_REL_OFFSET es, 0;*/\
1619 @@ -177,7 +134,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1620 CFI_REL_OFFSET ebx, 0;\
1621 movl $(__USER_DS), %edx; \
1622 movl %edx, %ds; \
1623 - movl %edx, %es;
1624 + movl %edx, %es; \
1625 + movl $(__KERNEL_PDA), %edx; \
1626 + movl %edx, %gs
1627
1628 #define RESTORE_INT_REGS \
1629 popl %ebx; \
1630 @@ -210,17 +169,22 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1631 2: popl %es; \
1632 CFI_ADJUST_CFA_OFFSET -4;\
1633 /*CFI_RESTORE es;*/\
1634 -.section .fixup,"ax"; \
1635 -3: movl $0,(%esp); \
1636 - jmp 1b; \
1637 +3: popl %gs; \
1638 + CFI_ADJUST_CFA_OFFSET -4;\
1639 + /*CFI_RESTORE gs;*/\
1640 +.pushsection .fixup,"ax"; \
1641 4: movl $0,(%esp); \
1642 + jmp 1b; \
1643 +5: movl $0,(%esp); \
1644 jmp 2b; \
1645 -.previous; \
1646 +6: movl $0,(%esp); \
1647 + jmp 3b; \
1648 .section __ex_table,"a";\
1649 .align 4; \
1650 - .long 1b,3b; \
1651 - .long 2b,4b; \
1652 -.previous
1653 + .long 1b,4b; \
1654 + .long 2b,5b; \
1655 + .long 3b,6b; \
1656 +.popsection
1657
1658 #define RING0_INT_FRAME \
1659 CFI_STARTPROC simple;\
1660 @@ -239,18 +203,18 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1661 #define RING0_PTREGS_FRAME \
1662 CFI_STARTPROC simple;\
1663 CFI_SIGNAL_FRAME;\
1664 - CFI_DEF_CFA esp, OLDESP-EBX;\
1665 - /*CFI_OFFSET cs, CS-OLDESP;*/\
1666 - CFI_OFFSET eip, EIP-OLDESP;\
1667 - /*CFI_OFFSET es, ES-OLDESP;*/\
1668 - /*CFI_OFFSET ds, DS-OLDESP;*/\
1669 - CFI_OFFSET eax, EAX-OLDESP;\
1670 - CFI_OFFSET ebp, EBP-OLDESP;\
1671 - CFI_OFFSET edi, EDI-OLDESP;\
1672 - CFI_OFFSET esi, ESI-OLDESP;\
1673 - CFI_OFFSET edx, EDX-OLDESP;\
1674 - CFI_OFFSET ecx, ECX-OLDESP;\
1675 - CFI_OFFSET ebx, EBX-OLDESP
1676 + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
1677 + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
1678 + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
1679 + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
1680 + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
1681 + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
1682 + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
1683 + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
1684 + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
1685 + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
1686 + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
1687 + CFI_OFFSET ebx, PT_EBX-PT_OLDESP
1688
1689 ENTRY(ret_from_fork)
1690 CFI_STARTPROC
1691 @@ -278,17 +242,18 @@ ENTRY(ret_from_fork)
1692 ALIGN
1693 RING0_PTREGS_FRAME
1694 ret_from_exception:
1695 - preempt_stop
1696 + preempt_stop(CLBR_ANY)
1697 ret_from_intr:
1698 GET_THREAD_INFO(%ebp)
1699 check_userspace:
1700 - movl EFLAGS(%esp), %eax # mix EFLAGS and CS
1701 - movb CS(%esp), %al
1702 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1703 + movb PT_CS(%esp), %al
1704 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
1705 cmpl $USER_RPL, %eax
1706 jb resume_kernel # not returning to v8086 or userspace
1707 +
1708 ENTRY(resume_userspace)
1709 - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1710 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1711 # setting need_resched or sigpending
1712 # between sampling and the iret
1713 movl TI_flags(%ebp), %ecx
1714 @@ -299,14 +264,14 @@ ENTRY(resume_userspace)
1715
1716 #ifdef CONFIG_PREEMPT
1717 ENTRY(resume_kernel)
1718 - DISABLE_INTERRUPTS
1719 + DISABLE_INTERRUPTS(CLBR_ANY)
1720 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
1721 jnz restore_nocheck
1722 need_resched:
1723 movl TI_flags(%ebp), %ecx # need_resched set ?
1724 testb $_TIF_NEED_RESCHED, %cl
1725 jz restore_all
1726 - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
1727 + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1728 jz restore_all
1729 call preempt_schedule_irq
1730 jmp need_resched
1731 @@ -328,7 +293,7 @@ sysenter_past_esp:
1732 * No need to follow this irqs on/off section: the syscall
1733 * disabled irqs and here we enable it straight after entry:
1734 */
1735 - ENABLE_INTERRUPTS
1736 + ENABLE_INTERRUPTS(CLBR_NONE)
1737 pushl $(__USER_DS)
1738 CFI_ADJUST_CFA_OFFSET 4
1739 /*CFI_REL_OFFSET ss, 0*/
1740 @@ -340,12 +305,16 @@ sysenter_past_esp:
1741 pushl $(__USER_CS)
1742 CFI_ADJUST_CFA_OFFSET 4
1743 /*CFI_REL_OFFSET cs, 0*/
1744 +#ifndef CONFIG_COMPAT_VDSO
1745 /*
1746 * Push current_thread_info()->sysenter_return to the stack.
1747 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
1748 * pushed above; +8 corresponds to copy_thread's esp0 setting.
1749 */
1750 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
1751 +#else
1752 + pushl $SYSENTER_RETURN
1753 +#endif
1754 CFI_ADJUST_CFA_OFFSET 4
1755 CFI_REL_OFFSET eip, 0
1756
1757 @@ -372,19 +341,27 @@ sysenter_past_esp:
1758 cmpl $(nr_syscalls), %eax
1759 jae syscall_badsys
1760 call *sys_call_table(,%eax,4)
1761 - movl %eax,EAX(%esp)
1762 - DISABLE_INTERRUPTS
1763 + movl %eax,PT_EAX(%esp)
1764 + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
1765 TRACE_IRQS_OFF
1766 movl TI_flags(%ebp), %ecx
1767 testw $_TIF_ALLWORK_MASK, %cx
1768 jne syscall_exit_work
1769 /* if something modifies registers it must also disable sysexit */
1770 - movl EIP(%esp), %edx
1771 - movl OLDESP(%esp), %ecx
1772 + movl PT_EIP(%esp), %edx
1773 + movl PT_OLDESP(%esp), %ecx
1774 xorl %ebp,%ebp
1775 TRACE_IRQS_ON
1776 +1: mov PT_GS(%esp), %gs
1777 ENABLE_INTERRUPTS_SYSEXIT
1778 CFI_ENDPROC
1779 +.pushsection .fixup,"ax"
1780 +2: movl $0,PT_GS(%esp)
1781 + jmp 1b
1782 +.section __ex_table,"a"
1783 + .align 4
1784 + .long 1b,2b
1785 +.popsection
1786
1787 # pv sysenter call handler stub
1788 ENTRY(sysenter_entry_pv)
1789 @@ -419,7 +396,7 @@ ENTRY(system_call)
1790 CFI_ADJUST_CFA_OFFSET 4
1791 SAVE_ALL
1792 GET_THREAD_INFO(%ebp)
1793 - testl $TF_MASK,EFLAGS(%esp)
1794 + testl $TF_MASK,PT_EFLAGS(%esp)
1795 jz no_singlestep
1796 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
1797 no_singlestep:
1798 @@ -431,9 +408,9 @@ no_singlestep:
1799 jae syscall_badsys
1800 syscall_call:
1801 call *sys_call_table(,%eax,4)
1802 - movl %eax,EAX(%esp) # store the return value
1803 + movl %eax,PT_EAX(%esp) # store the return value
1804 syscall_exit:
1805 - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1806 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1807 # setting need_resched or sigpending
1808 # between sampling and the iret
1809 TRACE_IRQS_OFF
1810 @@ -443,12 +420,12 @@ syscall_exit:
1811
1812 restore_all:
1813 #ifndef CONFIG_XEN
1814 - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
1815 - # Warning: OLDSS(%esp) contains the wrong/random values if we
1816 + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
1817 + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
1818 # are returning to the kernel.
1819 # See comments in process.c:copy_thread() for details.
1820 - movb OLDSS(%esp), %ah
1821 - movb CS(%esp), %al
1822 + movb PT_OLDSS(%esp), %ah
1823 + movb PT_CS(%esp), %al
1824 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1825 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1826 CFI_REMEMBER_STATE
1827 @@ -456,7 +433,7 @@ restore_all:
1828 restore_nocheck:
1829 #else
1830 restore_nocheck:
1831 - movl EFLAGS(%esp), %eax
1832 + movl PT_EFLAGS(%esp), %eax
1833 testl $(VM_MASK|NMI_MASK), %eax
1834 CFI_REMEMBER_STATE
1835 jnz hypervisor_iret
1836 @@ -470,13 +447,13 @@ restore_nocheck:
1837 TRACE_IRQS_IRET
1838 restore_nocheck_notrace:
1839 RESTORE_REGS
1840 - addl $4, %esp
1841 + addl $4, %esp # skip orig_eax/error_code
1842 CFI_ADJUST_CFA_OFFSET -4
1843 1: INTERRUPT_RETURN
1844 .section .fixup,"ax"
1845 iret_exc:
1846 #ifndef CONFIG_XEN
1847 - ENABLE_INTERRUPTS
1848 + ENABLE_INTERRUPTS(CLBR_NONE)
1849 #endif
1850 pushl $0 # no error code
1851 pushl $do_iret_error
1852 @@ -490,33 +467,42 @@ iret_exc:
1853 CFI_RESTORE_STATE
1854 #ifndef CONFIG_XEN
1855 ldt_ss:
1856 - larl OLDSS(%esp), %eax
1857 + larl PT_OLDSS(%esp), %eax
1858 jnz restore_nocheck
1859 testl $0x00400000, %eax # returning to 32bit stack?
1860 jnz restore_nocheck # allright, normal return
1861 +
1862 +#ifdef CONFIG_PARAVIRT
1863 + /*
1864 + * The kernel can't run on a non-flat stack if paravirt mode
1865 + * is active. Rather than try to fixup the high bits of
1866 + * ESP, bypass this code entirely. This may break DOSemu
1867 + * and/or Wine support in a paravirt VM, although the option
1868 + * is still available to implement the setting of the high
1869 + * 16-bits in the INTERRUPT_RETURN paravirt-op.
1870 + */
1871 + cmpl $0, paravirt_ops+PARAVIRT_enabled
1872 + jne restore_nocheck
1873 +#endif
1874 +
1875 /* If returning to userspace with 16bit stack,
1876 * try to fix the higher word of ESP, as the CPU
1877 * won't restore it.
1878 * This is an "official" bug of all the x86-compatible
1879 * CPUs, which we can try to work around to make
1880 * dosemu and wine happy. */
1881 - subl $8, %esp # reserve space for switch16 pointer
1882 - CFI_ADJUST_CFA_OFFSET 8
1883 - DISABLE_INTERRUPTS
1884 + movl PT_OLDESP(%esp), %eax
1885 + movl %esp, %edx
1886 + call patch_espfix_desc
1887 + pushl $__ESPFIX_SS
1888 + CFI_ADJUST_CFA_OFFSET 4
1889 + pushl %eax
1890 + CFI_ADJUST_CFA_OFFSET 4
1891 + DISABLE_INTERRUPTS(CLBR_EAX)
1892 TRACE_IRQS_OFF
1893 - movl %esp, %eax
1894 - /* Set up the 16bit stack frame with switch32 pointer on top,
1895 - * and a switch16 pointer on top of the current frame. */
1896 - call setup_x86_bogus_stack
1897 - CFI_ADJUST_CFA_OFFSET -8 # frame has moved
1898 - TRACE_IRQS_IRET
1899 - RESTORE_REGS
1900 - lss 20+4(%esp), %esp # switch to 16bit stack
1901 -1: INTERRUPT_RETURN
1902 -.section __ex_table,"a"
1903 - .align 4
1904 - .long 1b,iret_exc
1905 -.previous
1906 + lss (%esp), %esp
1907 + CFI_ADJUST_CFA_OFFSET -8
1908 + jmp restore_nocheck
1909 #else
1910 ALIGN
1911 restore_all_enable_events:
1912 @@ -540,7 +526,7 @@ ecrit: /**** END OF CRITICAL REGION ***
1913
1914 CFI_RESTORE_STATE
1915 hypervisor_iret:
1916 - andl $~NMI_MASK, EFLAGS(%esp)
1917 + andl $~NMI_MASK, PT_EFLAGS(%esp)
1918 RESTORE_REGS
1919 addl $4, %esp
1920 CFI_ADJUST_CFA_OFFSET -4
1921 @@ -556,7 +542,7 @@ work_pending:
1922 jz work_notifysig
1923 work_resched:
1924 call schedule
1925 - DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1926 + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1927 # setting need_resched or sigpending
1928 # between sampling and the iret
1929 TRACE_IRQS_OFF
1930 @@ -569,7 +555,8 @@ work_resched:
1931
1932 work_notifysig: # deal with pending signals and
1933 # notify-resume requests
1934 - testl $VM_MASK, EFLAGS(%esp)
1935 +#ifdef CONFIG_VM86
1936 + testl $VM_MASK, PT_EFLAGS(%esp)
1937 movl %esp, %eax
1938 jne work_notifysig_v86 # returning to kernel-space or
1939 # vm86-space
1940 @@ -579,29 +566,30 @@ work_notifysig: # deal with pending s
1941
1942 ALIGN
1943 work_notifysig_v86:
1944 -#ifdef CONFIG_VM86
1945 pushl %ecx # save ti_flags for do_notify_resume
1946 CFI_ADJUST_CFA_OFFSET 4
1947 call save_v86_state # %eax contains pt_regs pointer
1948 popl %ecx
1949 CFI_ADJUST_CFA_OFFSET -4
1950 movl %eax, %esp
1951 +#else
1952 + movl %esp, %eax
1953 +#endif
1954 xorl %edx, %edx
1955 call do_notify_resume
1956 jmp resume_userspace_sig
1957 -#endif
1958
1959 # perform syscall exit tracing
1960 ALIGN
1961 syscall_trace_entry:
1962 - movl $-ENOSYS,EAX(%esp)
1963 + movl $-ENOSYS,PT_EAX(%esp)
1964 movl %esp, %eax
1965 xorl %edx,%edx
1966 call do_syscall_trace
1967 cmpl $0, %eax
1968 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
1969 # so must skip actual syscall
1970 - movl ORIG_EAX(%esp), %eax
1971 + movl PT_ORIG_EAX(%esp), %eax
1972 cmpl $(nr_syscalls), %eax
1973 jnae syscall_call
1974 jmp syscall_exit
1975 @@ -612,7 +600,7 @@ syscall_exit_work:
1976 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
1977 jz work_pending
1978 TRACE_IRQS_ON
1979 - ENABLE_INTERRUPTS # could let do_syscall_trace() call
1980 + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
1981 # schedule() instead
1982 movl %esp, %eax
1983 movl $1, %edx
1984 @@ -626,40 +614,39 @@ syscall_fault:
1985 CFI_ADJUST_CFA_OFFSET 4
1986 SAVE_ALL
1987 GET_THREAD_INFO(%ebp)
1988 - movl $-EFAULT,EAX(%esp)
1989 + movl $-EFAULT,PT_EAX(%esp)
1990 jmp resume_userspace
1991
1992 syscall_badsys:
1993 - movl $-ENOSYS,EAX(%esp)
1994 + movl $-ENOSYS,PT_EAX(%esp)
1995 jmp resume_userspace
1996 CFI_ENDPROC
1997
1998 #ifndef CONFIG_XEN
1999 #define FIXUP_ESPFIX_STACK \
2000 - movl %esp, %eax; \
2001 - /* switch to 32bit stack using the pointer on top of 16bit stack */ \
2002 - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
2003 - /* copy data from 16bit stack to 32bit stack */ \
2004 - call fixup_x86_bogus_stack; \
2005 - /* put ESP to the proper location */ \
2006 - movl %eax, %esp;
2007 -#define UNWIND_ESPFIX_STACK \
2008 + /* since we are on a wrong stack, we cant make it a C code :( */ \
2009 + movl %gs:PDA_cpu, %ebx; \
2010 + PER_CPU(cpu_gdt_descr, %ebx); \
2011 + movl GDS_address(%ebx), %ebx; \
2012 + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
2013 + addl %esp, %eax; \
2014 + pushl $__KERNEL_DS; \
2015 + CFI_ADJUST_CFA_OFFSET 4; \
2016 pushl %eax; \
2017 CFI_ADJUST_CFA_OFFSET 4; \
2018 + lss (%esp), %esp; \
2019 + CFI_ADJUST_CFA_OFFSET -8;
2020 +#define UNWIND_ESPFIX_STACK \
2021 movl %ss, %eax; \
2022 - /* see if on 16bit stack */ \
2023 + /* see if on espfix stack */ \
2024 cmpw $__ESPFIX_SS, %ax; \
2025 - je 28f; \
2026 -27: popl %eax; \
2027 - CFI_ADJUST_CFA_OFFSET -4; \
2028 -.section .fixup,"ax"; \
2029 -28: movl $__KERNEL_DS, %eax; \
2030 + jne 27f; \
2031 + movl $__KERNEL_DS, %eax; \
2032 movl %eax, %ds; \
2033 movl %eax, %es; \
2034 - /* switch to 32bit stack */ \
2035 + /* switch to normal stack */ \
2036 FIXUP_ESPFIX_STACK; \
2037 - jmp 27b; \
2038 -.previous
2039 +27:;
2040
2041 /*
2042 * Build the entry stubs and pointer table with
2043 @@ -723,13 +710,16 @@ KPROBE_ENTRY(page_fault)
2044 CFI_ADJUST_CFA_OFFSET 4
2045 ALIGN
2046 error_code:
2047 + /* the function address is in %gs's slot on the stack */
2048 + pushl %es
2049 + CFI_ADJUST_CFA_OFFSET 4
2050 + /*CFI_REL_OFFSET es, 0*/
2051 pushl %ds
2052 CFI_ADJUST_CFA_OFFSET 4
2053 /*CFI_REL_OFFSET ds, 0*/
2054 pushl %eax
2055 CFI_ADJUST_CFA_OFFSET 4
2056 CFI_REL_OFFSET eax, 0
2057 - xorl %eax, %eax
2058 pushl %ebp
2059 CFI_ADJUST_CFA_OFFSET 4
2060 CFI_REL_OFFSET ebp, 0
2061 @@ -742,7 +732,6 @@ error_code:
2062 pushl %edx
2063 CFI_ADJUST_CFA_OFFSET 4
2064 CFI_REL_OFFSET edx, 0
2065 - decl %eax # eax = -1
2066 pushl %ecx
2067 CFI_ADJUST_CFA_OFFSET 4
2068 CFI_REL_OFFSET ecx, 0
2069 @@ -750,18 +739,20 @@ error_code:
2070 CFI_ADJUST_CFA_OFFSET 4
2071 CFI_REL_OFFSET ebx, 0
2072 cld
2073 - pushl %es
2074 + pushl %gs
2075 CFI_ADJUST_CFA_OFFSET 4
2076 - /*CFI_REL_OFFSET es, 0*/
2077 + /*CFI_REL_OFFSET gs, 0*/
2078 + movl $(__KERNEL_PDA), %ecx
2079 + movl %ecx, %gs
2080 UNWIND_ESPFIX_STACK
2081 popl %ecx
2082 CFI_ADJUST_CFA_OFFSET -4
2083 /*CFI_REGISTER es, ecx*/
2084 - movl ES(%esp), %edi # get the function address
2085 - movl ORIG_EAX(%esp), %edx # get the error code
2086 - movl %eax, ORIG_EAX(%esp)
2087 - movl %ecx, ES(%esp)
2088 - /*CFI_REL_OFFSET es, ES*/
2089 + movl PT_GS(%esp), %edi # get the function address
2090 + movl PT_ORIG_EAX(%esp), %edx # get the error code
2091 + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
2092 + mov %ecx, PT_GS(%esp)
2093 + /*CFI_REL_OFFSET gs, ES*/
2094 movl $(__USER_DS), %ecx
2095 movl %ecx, %ds
2096 movl %ecx, %es
2097 @@ -793,8 +784,8 @@ ENTRY(hypervisor_callback)
2098 pushl %eax
2099 CFI_ADJUST_CFA_OFFSET 4
2100 SAVE_ALL
2101 - movl CS(%esp),%ecx
2102 - movl EIP(%esp),%eax
2103 + movl PT_CS(%esp),%ecx
2104 + movl PT_EIP(%esp),%eax
2105 andl $SEGMENT_RPL_MASK,%ecx
2106 cmpl $USER_RPL,%ecx
2107 jae .Ldo_upcall
2108 @@ -808,7 +799,7 @@ ENTRY(hypervisor_callback)
2109 jb .Ldo_upcall
2110 cmpl $sysexit_ecrit,%eax
2111 ja .Ldo_upcall
2112 - addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
2113 + addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame.
2114 #endif
2115 .Ldo_upcall:
2116 push %esp
2117 @@ -830,7 +821,7 @@ critical_region_fixup:
2118 movsbl critical_fixup_table-scrit(%eax),%ecx # %ecx contains num slots popped
2119 testl %ecx,%ecx
2120 leal (%esp,%ecx,4),%esi # %esi points at end of src region
2121 - leal OLDESP(%esp),%edi # %edi points at end of dst region
2122 + leal PT_OLDESP(%esp),%edi # %edi points at end of dst region
2123 jle 17f # skip loop if nothing to copy
2124 16: subl $4,%esi # pre-decrementing copy loop
2125 subl $4,%edi
2126 @@ -853,8 +844,9 @@ critical_fixup_table:
2127 .byte 6 # pop %eax
2128 .byte 7 # pop %ds
2129 .byte 8 # pop %es
2130 - .byte 9,9,9 # add $4,%esp
2131 - .byte 10 # iret
2132 + .byte 9,9 # pop %gs
2133 + .byte 10,10,10 # add $4,%esp
2134 + .byte 11 # iret
2135 .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS
2136 .previous
2137
2138 @@ -944,7 +936,7 @@ ENTRY(device_not_available)
2139 jmp ret_from_exception
2140 device_available_emulate:
2141 #endif
2142 - preempt_stop
2143 + preempt_stop(CLBR_ANY)
2144 call math_state_restore
2145 jmp ret_from_exception
2146 CFI_ENDPROC
2147 @@ -1014,7 +1006,7 @@ KPROBE_ENTRY(nmi)
2148 cmpw $__ESPFIX_SS, %ax
2149 popl %eax
2150 CFI_ADJUST_CFA_OFFSET -4
2151 - je nmi_16bit_stack
2152 + je nmi_espfix_stack
2153 cmpl $sysenter_entry,(%esp)
2154 je nmi_stack_fixup
2155 pushl %eax
2156 @@ -1057,7 +1049,7 @@ nmi_debug_stack_check:
2157 FIX_STACK(24,nmi_stack_correct, 1)
2158 jmp nmi_stack_correct
2159
2160 -nmi_16bit_stack:
2161 +nmi_espfix_stack:
2162 /* We have a RING0_INT_FRAME here.
2163 *
2164 * create the pointer to lss back
2165 @@ -1066,7 +1058,6 @@ nmi_16bit_stack:
2166 CFI_ADJUST_CFA_OFFSET 4
2167 pushl %esp
2168 CFI_ADJUST_CFA_OFFSET 4
2169 - movzwl %sp, %esp
2170 addw $4, (%esp)
2171 /* copy the iret frame of 12 bytes */
2172 .rept 3
2173 @@ -1077,11 +1068,11 @@ nmi_16bit_stack:
2174 CFI_ADJUST_CFA_OFFSET 4
2175 SAVE_ALL
2176 FIXUP_ESPFIX_STACK # %eax == %esp
2177 - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
2178 xorl %edx,%edx # zero error code
2179 call do_nmi
2180 RESTORE_REGS
2181 - lss 12+4(%esp), %esp # back to 16bit stack
2182 + lss 12+4(%esp), %esp # back to espfix stack
2183 + CFI_ADJUST_CFA_OFFSET -24
2184 1: INTERRUPT_RETURN
2185 CFI_ENDPROC
2186 .section __ex_table,"a"
2187 @@ -1097,12 +1088,25 @@ KPROBE_ENTRY(nmi)
2188 xorl %edx,%edx # zero error code
2189 movl %esp,%eax # pt_regs pointer
2190 call do_nmi
2191 - orl $NMI_MASK, EFLAGS(%esp)
2192 + orl $NMI_MASK, PT_EFLAGS(%esp)
2193 jmp restore_all
2194 CFI_ENDPROC
2195 #endif
2196 KPROBE_END(nmi)
2197
2198 +#ifdef CONFIG_PARAVIRT
2199 +ENTRY(native_iret)
2200 +1: iret
2201 +.section __ex_table,"a"
2202 + .align 4
2203 + .long 1b,iret_exc
2204 +.previous
2205 +
2206 +ENTRY(native_irq_enable_sysexit)
2207 + sti
2208 + sysexit
2209 +#endif
2210 +
2211 KPROBE_ENTRY(int3)
2212 RING0_INT_FRAME
2213 pushl $-1 # mark this as an int
2214 @@ -1218,37 +1222,6 @@ ENTRY(spurious_interrupt_bug)
2215 CFI_ENDPROC
2216 #endif /* !CONFIG_XEN */
2217
2218 -#ifdef CONFIG_STACK_UNWIND
2219 -ENTRY(arch_unwind_init_running)
2220 - CFI_STARTPROC
2221 - movl 4(%esp), %edx
2222 - movl (%esp), %ecx
2223 - leal 4(%esp), %eax
2224 - movl %ebx, EBX(%edx)
2225 - xorl %ebx, %ebx
2226 - movl %ebx, ECX(%edx)
2227 - movl %ebx, EDX(%edx)
2228 - movl %esi, ESI(%edx)
2229 - movl %edi, EDI(%edx)
2230 - movl %ebp, EBP(%edx)
2231 - movl %ebx, EAX(%edx)
2232 - movl $__USER_DS, DS(%edx)
2233 - movl $__USER_DS, ES(%edx)
2234 - movl %ebx, ORIG_EAX(%edx)
2235 - movl %ecx, EIP(%edx)
2236 - movl 12(%esp), %ecx
2237 - movl $__KERNEL_CS, CS(%edx)
2238 - movl %ebx, EFLAGS(%edx)
2239 - movl %eax, OLDESP(%edx)
2240 - movl 8(%esp), %eax
2241 - movl %ecx, 8(%esp)
2242 - movl EBX(%edx), %ebx
2243 - movl $__KERNEL_DS, OLDSS(%edx)
2244 - jmpl *%eax
2245 - CFI_ENDPROC
2246 -ENDPROC(arch_unwind_init_running)
2247 -#endif
2248 -
2249 ENTRY(fixup_4gb_segment)
2250 RING0_EC_FRAME
2251 pushl $do_fixup_4gb_segment
2252 --- sle11-2009-06-29.orig/arch/x86/kernel/head_32-xen.S 2009-03-04 11:28:34.000000000 +0100
2253 +++ sle11-2009-06-29/arch/x86/kernel/head_32-xen.S 2008-12-15 11:26:44.000000000 +0100
2254 @@ -9,6 +9,7 @@
2255 #include <asm/cache.h>
2256 #include <asm/thread_info.h>
2257 #include <asm/asm-offsets.h>
2258 +#include <asm/boot.h>
2259 #include <asm/dwarf2.h>
2260 #include <xen/interface/xen.h>
2261 #include <xen/interface/elfnote.h>
2262 @@ -35,6 +36,8 @@ ENTRY(startup_32)
2263 /* Set up the stack pointer */
2264 movl $(init_thread_union+THREAD_SIZE),%esp
2265
2266 + call setup_pda
2267 +
2268 /* get vendor info */
2269 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
2270 XEN_CPUID
2271 @@ -57,14 +60,58 @@ ENTRY(startup_32)
2272
2273 movb $1,X86_HARD_MATH
2274
2275 - xorl %eax,%eax # Clear FS/GS and LDT
2276 + xorl %eax,%eax # Clear FS
2277 movl %eax,%fs
2278 - movl %eax,%gs
2279 +
2280 + movl $(__KERNEL_PDA),%eax
2281 + mov %eax,%gs
2282 +
2283 cld # gcc2 wants the direction flag cleared at all times
2284
2285 pushl $0 # fake return address for unwinder
2286 jmp start_kernel
2287
2288 +/*
2289 + * Point the GDT at this CPU's PDA. This will be
2290 + * cpu_gdt_table and boot_pda.
2291 + */
2292 +setup_pda:
2293 + /* get the PDA pointer */
2294 + movl $boot_pda, %eax
2295 +
2296 + /* slot the PDA address into the GDT */
2297 + mov $cpu_gdt_table, %ecx
2298 + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
2299 + shr $16, %eax
2300 + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
2301 + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
2302 +
2303 + # %esi still points to start_info, and no registers
2304 + # need to be preserved.
2305 +
2306 + movl XEN_START_mfn_list(%esi), %ebx
2307 + movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
2308 + shrl $PAGE_SHIFT, %eax
2309 + movl (%ebx,%eax,4), %ecx
2310 + pushl %ecx # frame number for set_gdt below
2311 +
2312 + xorl %esi, %esi
2313 + xorl %edx, %edx
2314 + shldl $PAGE_SHIFT, %ecx, %edx
2315 + shll $PAGE_SHIFT, %ecx
2316 + orl $0x61, %ecx
2317 + movl $cpu_gdt_table, %ebx
2318 + movl $__HYPERVISOR_update_va_mapping, %eax
2319 + int $0x82
2320 +
2321 + movl $(PAGE_SIZE_asm / 8), %ecx
2322 + movl %esp, %ebx
2323 + movl $__HYPERVISOR_set_gdt, %eax
2324 + int $0x82
2325 +
2326 + popl %ecx
2327 + ret
2328 +
2329 #define HYPERCALL_PAGE_OFFSET 0x1000
2330 .org HYPERCALL_PAGE_OFFSET
2331 ENTRY(hypercall_page)
2332 @@ -93,7 +140,8 @@ ENTRY(empty_zero_page)
2333 /*
2334 * The Global Descriptor Table contains 28 quadwords, per-CPU.
2335 */
2336 - .align L1_CACHE_BYTES
2337 + .section .data.page_aligned, "aw"
2338 + .align PAGE_SIZE_asm
2339 ENTRY(cpu_gdt_table)
2340 .quad 0x0000000000000000 /* NULL descriptor */
2341 .quad 0x0000000000000000 /* 0x0b reserved */
2342 @@ -135,12 +183,13 @@ ENTRY(cpu_gdt_table)
2343 .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
2344 .quad 0x0000000000000000 /* 0xc8 APM DS data */
2345
2346 - .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */
2347 - .quad 0x0000000000000000 /* 0xd8 - unused */
2348 + .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
2349 + .quad 0x00cf92000000ffff /* 0xd8 - PDA */
2350 .quad 0x0000000000000000 /* 0xe0 - unused */
2351 .quad 0x0000000000000000 /* 0xe8 - unused */
2352 .quad 0x0000000000000000 /* 0xf0 - unused */
2353 .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
2354 + .align PAGE_SIZE_asm
2355
2356 #if CONFIG_XEN_COMPAT <= 0x030002
2357 /*
2358 @@ -165,9 +214,9 @@ ENTRY(cpu_gdt_table)
2359 .ascii ",ELF_PADDR_OFFSET=0x"
2360 utoa __PAGE_OFFSET
2361 .ascii ",VIRT_ENTRY=0x"
2362 - utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
2363 + utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET)
2364 .ascii ",HYPERCALL_PAGE=0x"
2365 - utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
2366 + utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
2367 .ascii ",FEATURES=writable_page_tables"
2368 .ascii "|writable_descriptor_tables"
2369 .ascii "|auto_translated_physmap"
2370 --- sle11-2009-06-29.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2371 +++ sle11-2009-06-29/arch/x86/kernel/io_apic_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2372 @@ -34,6 +34,7 @@
2373 #include <linux/pci.h>
2374 #include <linux/msi.h>
2375 #include <linux/htirq.h>
2376 +#include <linux/freezer.h>
2377
2378 #include <asm/io.h>
2379 #include <asm/smp.h>
2380 @@ -199,14 +200,20 @@ static struct IO_APIC_route_entry ioapic
2381 * the interrupt, and we need to make sure the entry is fully populated
2382 * before that happens.
2383 */
2384 -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2385 +static void
2386 +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2387 {
2388 - unsigned long flags;
2389 union entry_union eu;
2390 eu.entry = e;
2391 - spin_lock_irqsave(&ioapic_lock, flags);
2392 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2393 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2394 +}
2395 +
2396 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2397 +{
2398 + unsigned long flags;
2399 + spin_lock_irqsave(&ioapic_lock, flags);
2400 + __ioapic_write_entry(apic, pin, e);
2401 spin_unlock_irqrestore(&ioapic_lock, flags);
2402 }
2403
2404 @@ -889,8 +896,7 @@ static int __init find_isa_irq_pin(int i
2405
2406 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
2407 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
2408 - mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
2409 - mp_bus_id_to_type[lbus] == MP_BUS_NEC98
2410 + mp_bus_id_to_type[lbus] == MP_BUS_MCA
2411 ) &&
2412 (mp_irqs[i].mpc_irqtype == type) &&
2413 (mp_irqs[i].mpc_srcbusirq == irq))
2414 @@ -909,8 +915,7 @@ static int __init find_isa_irq_apic(int
2415
2416 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
2417 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
2418 - mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
2419 - mp_bus_id_to_type[lbus] == MP_BUS_NEC98
2420 + mp_bus_id_to_type[lbus] == MP_BUS_MCA
2421 ) &&
2422 (mp_irqs[i].mpc_irqtype == type) &&
2423 (mp_irqs[i].mpc_srcbusirq == irq))
2424 @@ -1043,12 +1048,6 @@ static int EISA_ELCR(unsigned int irq)
2425 #define default_MCA_trigger(idx) (1)
2426 #define default_MCA_polarity(idx) (0)
2427
2428 -/* NEC98 interrupts are always polarity zero edge triggered,
2429 - * when listed as conforming in the MP table. */
2430 -
2431 -#define default_NEC98_trigger(idx) (0)
2432 -#define default_NEC98_polarity(idx) (0)
2433 -
2434 static int __init MPBIOS_polarity(int idx)
2435 {
2436 int bus = mp_irqs[idx].mpc_srcbus;
2437 @@ -1083,11 +1082,6 @@ static int __init MPBIOS_polarity(int id
2438 polarity = default_MCA_polarity(idx);
2439 break;
2440 }
2441 - case MP_BUS_NEC98: /* NEC 98 pin */
2442 - {
2443 - polarity = default_NEC98_polarity(idx);
2444 - break;
2445 - }
2446 default:
2447 {
2448 printk(KERN_WARNING "broken BIOS!!\n");
2449 @@ -1157,11 +1151,6 @@ static int MPBIOS_trigger(int idx)
2450 trigger = default_MCA_trigger(idx);
2451 break;
2452 }
2453 - case MP_BUS_NEC98: /* NEC 98 pin */
2454 - {
2455 - trigger = default_NEC98_trigger(idx);
2456 - break;
2457 - }
2458 default:
2459 {
2460 printk(KERN_WARNING "broken BIOS!!\n");
2461 @@ -1223,7 +1212,6 @@ static int pin_2_irq(int idx, int apic,
2462 case MP_BUS_ISA: /* ISA pin */
2463 case MP_BUS_EISA:
2464 case MP_BUS_MCA:
2465 - case MP_BUS_NEC98:
2466 {
2467 irq = mp_irqs[idx].mpc_srcbusirq;
2468 break;
2469 @@ -1291,7 +1279,7 @@ static inline int IO_APIC_irq_trigger(in
2470 }
2471
2472 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
2473 -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
2474 +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
2475
2476 static int __assign_irq_vector(int irq)
2477 {
2478 @@ -1417,8 +1405,8 @@ static void __init setup_IO_APIC_irqs(vo
2479 if (!apic && (irq < 16))
2480 disable_8259A_irq(irq);
2481 }
2482 - ioapic_write_entry(apic, pin, entry);
2483 spin_lock_irqsave(&ioapic_lock, flags);
2484 + __ioapic_write_entry(apic, pin, entry);
2485 set_native_irq_info(irq, TARGET_CPUS);
2486 spin_unlock_irqrestore(&ioapic_lock, flags);
2487 }
2488 @@ -1988,6 +1976,15 @@ static void __init setup_ioapic_ids_from
2489 #endif
2490
2491 #ifndef CONFIG_XEN
2492 +static int no_timer_check __initdata;
2493 +
2494 +static int __init notimercheck(char *s)
2495 +{
2496 + no_timer_check = 1;
2497 + return 1;
2498 +}
2499 +__setup("no_timer_check", notimercheck);
2500 +
2501 /*
2502 * There is a nasty bug in some older SMP boards, their mptable lies
2503 * about the timer IRQ. We do the following to work around the situation:
2504 @@ -1996,10 +1993,13 @@ static void __init setup_ioapic_ids_from
2505 * - if this function detects that timer IRQs are defunct, then we fall
2506 * back to ISA timer IRQs
2507 */
2508 -static int __init timer_irq_works(void)
2509 +int __init timer_irq_works(void)
2510 {
2511 unsigned long t1 = jiffies;
2512
2513 + if (no_timer_check)
2514 + return 1;
2515 +
2516 local_irq_enable();
2517 /* Let ten ticks pass... */
2518 mdelay((10 * 1000) / HZ);
2519 @@ -2226,9 +2226,15 @@ static inline void unlock_ExtINT_logic(v
2520 unsigned char save_control, save_freq_select;
2521
2522 pin = find_isa_irq_pin(8, mp_INT);
2523 + if (pin == -1) {
2524 + WARN_ON_ONCE(1);
2525 + return;
2526 + }
2527 apic = find_isa_irq_apic(8, mp_INT);
2528 - if (pin == -1)
2529 + if (apic == -1) {
2530 + WARN_ON_ONCE(1);
2531 return;
2532 + }
2533
2534 entry0 = ioapic_read_entry(apic, pin);
2535 clear_IO_APIC_pin(apic, pin);
2536 @@ -2273,7 +2279,7 @@ int timer_uses_ioapic_pin_0;
2537 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2538 * fanatically on his truly buggy board.
2539 */
2540 -static inline void check_timer(void)
2541 +static inline void __init check_timer(void)
2542 {
2543 int apic1, pin1, apic2, pin2;
2544 int vector;
2545 @@ -2558,7 +2564,7 @@ device_initcall(ioapic_init_sysfs);
2546 int create_irq(void)
2547 {
2548 /* Allocate an unused irq */
2549 - int irq, new, vector;
2550 + int irq, new, vector = 0;
2551 unsigned long flags;
2552
2553 irq = -ENOSPC;
2554 @@ -2939,8 +2945,8 @@ int io_apic_set_pci_routing (int ioapic,
2555 if (!ioapic && (irq < 16))
2556 disable_8259A_irq(irq);
2557
2558 - ioapic_write_entry(ioapic, pin, entry);
2559 spin_lock_irqsave(&ioapic_lock, flags);
2560 + __ioapic_write_entry(ioapic, pin, entry);
2561 set_native_irq_info(irq, TARGET_CPUS);
2562 spin_unlock_irqrestore(&ioapic_lock, flags);
2563
2564 --- sle11-2009-06-29.orig/arch/x86/kernel/ldt_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2565 +++ sle11-2009-06-29/arch/x86/kernel/ldt_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2566 @@ -177,16 +177,14 @@ static int read_default_ldt(void __user
2567 {
2568 int err;
2569 unsigned long size;
2570 - void *address;
2571
2572 err = 0;
2573 - address = &default_ldt[0];
2574 size = 5*sizeof(struct desc_struct);
2575 if (size > bytecount)
2576 size = bytecount;
2577
2578 err = size;
2579 - if (copy_to_user(ptr, address, size))
2580 + if (clear_user(ptr, size))
2581 err = -EFAULT;
2582
2583 return err;
2584 --- sle11-2009-06-29.orig/arch/x86/kernel/microcode-xen.c 2009-03-04 11:28:34.000000000 +0100
2585 +++ sle11-2009-06-29/arch/x86/kernel/microcode-xen.c 2008-12-15 11:26:44.000000000 +0100
2586 @@ -1,7 +1,7 @@
2587 /*
2588 * Intel CPU Microcode Update Driver for Linux
2589 *
2590 - * Copyright (C) 2000-2004 Tigran Aivazian
2591 + * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
2592 * 2006 Shaohua Li <shaohua.li@intel.com>
2593 *
2594 * This driver allows to upgrade microcode on Intel processors
2595 @@ -43,7 +43,7 @@
2596 #include <asm/processor.h>
2597
2598 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
2599 -MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
2600 +MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
2601 MODULE_LICENSE("GPL");
2602
2603 static int verbose;
2604 @@ -195,7 +195,7 @@ static int __init microcode_init (void)
2605 request_microcode();
2606
2607 printk(KERN_INFO
2608 - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
2609 + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
2610 return 0;
2611 }
2612
2613 --- sle11-2009-06-29.orig/arch/x86/kernel/mpparse_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2614 +++ sle11-2009-06-29/arch/x86/kernel/mpparse_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2615 @@ -36,7 +36,7 @@
2616
2617 /* Have we found an MP table */
2618 int smp_found_config;
2619 -unsigned int __initdata maxcpus = NR_CPUS;
2620 +unsigned int __cpuinitdata maxcpus = NR_CPUS;
2621
2622 /*
2623 * Various Linux-internal data structures created from the
2624 @@ -102,10 +102,10 @@ static int __init mpf_checksum(unsigned
2625 */
2626
2627 static int mpc_record;
2628 -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
2629 +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
2630
2631 #ifndef CONFIG_XEN
2632 -static void __devinit MP_processor_info (struct mpc_config_processor *m)
2633 +static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2634 {
2635 int ver, apicid;
2636 physid_mask_t phys_cpu;
2637 @@ -221,7 +221,7 @@ static void __devinit MP_processor_info
2638 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
2639 }
2640 #else
2641 -void __init MP_processor_info (struct mpc_config_processor *m)
2642 +static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2643 {
2644 num_processors++;
2645 }
2646 @@ -256,8 +256,6 @@ static void __init MP_bus_info (struct m
2647 mp_current_pci_id++;
2648 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
2649 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
2650 - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
2651 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
2652 } else {
2653 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
2654 }
2655 @@ -842,7 +840,7 @@ void __init mp_register_lapic_address(u6
2656 #endif
2657 }
2658
2659 -void __devinit mp_register_lapic (u8 id, u8 enabled)
2660 +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
2661 {
2662 struct mpc_config_processor processor;
2663 int boot_cpu = 0;
2664 --- sle11-2009-06-29.orig/arch/x86/kernel/pci-dma-xen.c 2009-03-04 11:28:34.000000000 +0100
2665 +++ sle11-2009-06-29/arch/x86/kernel/pci-dma-xen.c 2008-12-15 11:26:44.000000000 +0100
2666 @@ -276,7 +276,7 @@ EXPORT_SYMBOL(dma_free_coherent);
2667 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
2668 dma_addr_t device_addr, size_t size, int flags)
2669 {
2670 - void __iomem *mem_base;
2671 + void __iomem *mem_base = NULL;
2672 int pages = size >> PAGE_SHIFT;
2673 int bitmap_size = (pages + 31)/32;
2674
2675 @@ -293,14 +293,12 @@ int dma_declare_coherent_memory(struct d
2676 if (!mem_base)
2677 goto out;
2678
2679 - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
2680 + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
2681 if (!dev->dma_mem)
2682 goto out;
2683 - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
2684 - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
2685 + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
2686 if (!dev->dma_mem->bitmap)
2687 goto free1_out;
2688 - memset(dev->dma_mem->bitmap, 0, bitmap_size);
2689
2690 dev->dma_mem->virt_base = mem_base;
2691 dev->dma_mem->device_base = device_addr;
2692 @@ -315,6 +313,8 @@ int dma_declare_coherent_memory(struct d
2693 free1_out:
2694 kfree(dev->dma_mem->bitmap);
2695 out:
2696 + if (mem_base)
2697 + iounmap(mem_base);
2698 return 0;
2699 }
2700 EXPORT_SYMBOL(dma_declare_coherent_memory);
2701 --- sle11-2009-06-29.orig/arch/x86/kernel/process_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2702 +++ sle11-2009-06-29/arch/x86/kernel/process_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2703 @@ -60,6 +60,7 @@
2704
2705 #include <asm/tlbflush.h>
2706 #include <asm/cpu.h>
2707 +#include <asm/pda.h>
2708
2709 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
2710
2711 @@ -104,28 +105,24 @@ EXPORT_SYMBOL(enable_hlt);
2712 */
2713 static void poll_idle (void)
2714 {
2715 - local_irq_enable();
2716 -
2717 - asm volatile(
2718 - "2:"
2719 - "testl %0, %1;"
2720 - "rep; nop;"
2721 - "je 2b;"
2722 - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
2723 + cpu_relax();
2724 }
2725
2726 static void xen_idle(void)
2727 {
2728 - local_irq_disable();
2729 + current_thread_info()->status &= ~TS_POLLING;
2730 + /*
2731 + * TS_POLLING-cleared state must be visible before we
2732 + * test NEED_RESCHED:
2733 + */
2734 + smp_mb();
2735
2736 - if (need_resched())
2737 + local_irq_disable();
2738 + if (!need_resched())
2739 + safe_halt(); /* enables interrupts racelessly */
2740 + else
2741 local_irq_enable();
2742 - else {
2743 - current_thread_info()->status &= ~TS_POLLING;
2744 - smp_mb__after_clear_bit();
2745 - safe_halt();
2746 - current_thread_info()->status |= TS_POLLING;
2747 - }
2748 + current_thread_info()->status |= TS_POLLING;
2749 }
2750 #ifdef CONFIG_APM_MODULE
2751 EXPORT_SYMBOL(default_idle);
2752 @@ -250,8 +247,8 @@ void show_regs(struct pt_regs * regs)
2753 regs->eax,regs->ebx,regs->ecx,regs->edx);
2754 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2755 regs->esi, regs->edi, regs->ebp);
2756 - printk(" DS: %04x ES: %04x\n",
2757 - 0xffff & regs->xds,0xffff & regs->xes);
2758 + printk(" DS: %04x ES: %04x GS: %04x\n",
2759 + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
2760
2761 cr0 = read_cr0();
2762 cr2 = read_cr2();
2763 @@ -282,6 +279,7 @@ int kernel_thread(int (*fn)(void *), voi
2764
2765 regs.xds = __USER_DS;
2766 regs.xes = __USER_DS;
2767 + regs.xgs = __KERNEL_PDA;
2768 regs.orig_eax = -1;
2769 regs.eip = (unsigned long) kernel_thread_helper;
2770 regs.xcs = __KERNEL_CS | get_kernel_rpl();
2771 @@ -359,7 +357,6 @@ int copy_thread(int nr, unsigned long cl
2772 p->thread.eip = (unsigned long) ret_from_fork;
2773
2774 savesegment(fs,p->thread.fs);
2775 - savesegment(gs,p->thread.gs);
2776
2777 tsk = current;
2778 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2779 @@ -438,7 +435,7 @@ void dump_thread(struct pt_regs * regs,
2780 dump->regs.ds = regs->xds;
2781 dump->regs.es = regs->xes;
2782 savesegment(fs,dump->regs.fs);
2783 - savesegment(gs,dump->regs.gs);
2784 + dump->regs.gs = regs->xgs;
2785 dump->regs.orig_eax = regs->orig_eax;
2786 dump->regs.eip = regs->eip;
2787 dump->regs.cs = regs->xcs;
2788 @@ -635,17 +632,19 @@ struct task_struct fastcall * __switch_t
2789 if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
2790 BUG();
2791
2792 + /* we're going to use this soon, after a few expensive things */
2793 + if (next_p->fpu_counter > 5)
2794 + prefetch(&next->i387.fxsave);
2795 +
2796 /*
2797 - * Restore %fs and %gs if needed.
2798 + * Restore %fs if needed.
2799 *
2800 - * Glibc normally makes %fs be zero, and %gs is one of
2801 - * the TLS segments.
2802 + * Glibc normally makes %fs be zero.
2803 */
2804 if (unlikely(next->fs))
2805 loadsegment(fs, next->fs);
2806
2807 - if (next->gs)
2808 - loadsegment(gs, next->gs);
2809 + write_pda(pcurrent, next_p);
2810
2811 /*
2812 * Now maybe handle debug registers
2813 @@ -655,6 +654,13 @@ struct task_struct fastcall * __switch_t
2814
2815 disable_tsc(prev_p, next_p);
2816
2817 + /* If the task has used fpu the last 5 timeslices, just do a full
2818 + * restore of the math state immediately to avoid the trap; the
2819 + * chances of needing FPU soon are obviously high now
2820 + */
2821 + if (next_p->fpu_counter > 5)
2822 + math_state_restore();
2823 +
2824 return prev_p;
2825 }
2826
2827 --- sle11-2009-06-29.orig/arch/x86/kernel/quirks-xen.c 2009-06-29 15:14:52.000000000 +0200
2828 +++ sle11-2009-06-29/arch/x86/kernel/quirks-xen.c 2008-12-15 11:26:44.000000000 +0100
2829 @@ -3,10 +3,12 @@
2830 */
2831 #include <linux/pci.h>
2832 #include <linux/irq.h>
2833 +#include <asm/pci-direct.h>
2834 +#include <asm/genapic.h>
2835 +#include <asm/cpu.h>
2836
2837 #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI)
2838 -
2839 -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
2840 +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
2841 {
2842 u8 config, rev;
2843 u32 word;
2844 @@ -14,14 +16,12 @@ static void __devinit quirk_intel_irqbal
2845 /* BIOS may enable hardware IRQ balancing for
2846 * E7520/E7320/E7525(revision ID 0x9 and below)
2847 * based platforms.
2848 - * Disable SW irqbalance/affinity on those platforms.
2849 + * For those platforms, make sure that the genapic is set to 'flat'
2850 */
2851 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
2852 if (rev > 0x9)
2853 return;
2854
2855 - printk(KERN_INFO "Intel E7520/7320/7525 detected.");
2856 -
2857 /* enable access to config space*/
2858 pci_read_config_byte(dev, 0xf4, &config);
2859 pci_write_config_byte(dev, 0xf4, config|0x2);
2860 @@ -30,6 +30,46 @@ static void __devinit quirk_intel_irqbal
2861 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
2862
2863 if (!(word & (1 << 13))) {
2864 +#ifndef CONFIG_XEN
2865 +#ifdef CONFIG_X86_64
2866 + if (genapic != &apic_flat)
2867 + panic("APIC mode must be flat on this system\n");
2868 +#elif defined(CONFIG_X86_GENERICARCH)
2869 + if (genapic != &apic_default)
2870 + panic("APIC mode must be default(flat) on this system. Use apic=default\n");
2871 +#endif
2872 +#endif
2873 + }
2874 +
2875 + /* put back the original value for config space*/
2876 + if (!(config & 0x2))
2877 + pci_write_config_byte(dev, 0xf4, config);
2878 +}
2879 +
2880 +void __init quirk_intel_irqbalance(void)
2881 +{
2882 + u8 config, rev;
2883 + u32 word;
2884 +
2885 + /* BIOS may enable hardware IRQ balancing for
2886 + * E7520/E7320/E7525(revision ID 0x9 and below)
2887 + * based platforms.
2888 + * Disable SW irqbalance/affinity on those platforms.
2889 + */
2890 + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
2891 + if (rev > 0x9)
2892 + return;
2893 +
2894 + printk(KERN_INFO "Intel E7520/7320/7525 detected.");
2895 +
2896 + /* enable access to config space */
2897 + config = read_pci_config_byte(0, 0, 0, 0xf4);
2898 + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
2899 +
2900 + /* read xTPR register */
2901 + word = read_pci_config_16(0, 0, 0x40, 0x4c);
2902 +
2903 + if (!(word & (1 << 13))) {
2904 struct xen_platform_op op;
2905 printk(KERN_INFO "Disabling irq balancing and affinity\n");
2906 op.cmd = XENPF_platform_quirk;
2907 @@ -37,11 +77,12 @@ static void __devinit quirk_intel_irqbal
2908 WARN_ON(HYPERVISOR_platform_op(&op));
2909 }
2910
2911 - /* put back the original value for config space*/
2912 + /* put back the original value for config space */
2913 if (!(config & 0x2))
2914 - pci_write_config_byte(dev, 0xf4, config);
2915 + write_pci_config_byte(0, 0, 0, 0xf4, config);
2916 }
2917 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
2918 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
2919 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
2920 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
2921 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
2922 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
2923 +
2924 #endif
2925 --- sle11-2009-06-29.orig/arch/x86/kernel/setup_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2926 +++ sle11-2009-06-29/arch/x86/kernel/setup_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2927 @@ -76,9 +76,6 @@
2928 #include <xen/interface/kexec.h>
2929 #endif
2930
2931 -/* Forward Declaration. */
2932 -void __init find_max_pfn(void);
2933 -
2934 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
2935 static struct notifier_block xen_panic_block = {
2936 xen_panic_event, NULL, 0 /* try to go last */
2937 @@ -89,14 +86,11 @@ int disable_pse __devinitdata = 0;
2938 /*
2939 * Machine setup..
2940 */
2941 -
2942 -#ifdef CONFIG_EFI
2943 -int efi_enabled = 0;
2944 -EXPORT_SYMBOL(efi_enabled);
2945 -#endif
2946 +extern struct resource code_resource;
2947 +extern struct resource data_resource;
2948
2949 /* cpu data as detected by the assembly code in head.S */
2950 -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2951 +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2952 /* common cpu data for all cpus */
2953 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2954 EXPORT_SYMBOL(boot_cpu_data);
2955 @@ -112,12 +106,6 @@ unsigned int machine_submodel_id;
2956 unsigned int BIOS_revision;
2957 unsigned int mca_pentium_flag;
2958
2959 -/* For PCI or other memory-mapped resources */
2960 -unsigned long pci_mem_start = 0x10000000;
2961 -#ifdef CONFIG_PCI
2962 -EXPORT_SYMBOL(pci_mem_start);
2963 -#endif
2964 -
2965 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
2966 int bootloader_type;
2967
2968 @@ -150,10 +138,6 @@ struct ist_info ist_info;
2969 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
2970 EXPORT_SYMBOL(ist_info);
2971 #endif
2972 -struct e820map e820;
2973 -#ifdef CONFIG_XEN
2974 -struct e820map machine_e820;
2975 -#endif
2976
2977 extern void early_cpu_init(void);
2978 extern int root_mountflags;
2979 @@ -168,209 +152,6 @@ static char command_line[COMMAND_LINE_SI
2980
2981 unsigned char __initdata boot_params[PARAM_SIZE];
2982
2983 -static struct resource data_resource = {
2984 - .name = "Kernel data",
2985 - .start = 0,
2986 - .end = 0,
2987 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM
2988 -};
2989 -
2990 -static struct resource code_resource = {
2991 - .name = "Kernel code",
2992 - .start = 0,
2993 - .end = 0,
2994 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM
2995 -};
2996 -
2997 -static struct resource system_rom_resource = {
2998 - .name = "System ROM",
2999 - .start = 0xf0000,
3000 - .end = 0xfffff,
3001 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3002 -};
3003 -
3004 -static struct resource extension_rom_resource = {
3005 - .name = "Extension ROM",
3006 - .start = 0xe0000,
3007 - .end = 0xeffff,
3008 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3009 -};
3010 -
3011 -static struct resource adapter_rom_resources[] = { {
3012 - .name = "Adapter ROM",
3013 - .start = 0xc8000,
3014 - .end = 0,
3015 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3016 -}, {
3017 - .name = "Adapter ROM",
3018 - .start = 0,
3019 - .end = 0,
3020 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3021 -}, {
3022 - .name = "Adapter ROM",
3023 - .start = 0,
3024 - .end = 0,
3025 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3026 -}, {
3027 - .name = "Adapter ROM",
3028 - .start = 0,
3029 - .end = 0,
3030 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3031 -}, {
3032 - .name = "Adapter ROM",
3033 - .start = 0,
3034 - .end = 0,
3035 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3036 -}, {
3037 - .name = "Adapter ROM",
3038 - .start = 0,
3039 - .end = 0,
3040 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3041 -} };
3042 -
3043 -static struct resource video_rom_resource = {
3044 - .name = "Video ROM",
3045 - .start = 0xc0000,
3046 - .end = 0xc7fff,
3047 - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3048 -};
3049 -
3050 -static struct resource video_ram_resource = {
3051 - .name = "Video RAM area",
3052 - .start = 0xa0000,
3053 - .end = 0xbffff,
3054 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM
3055 -};
3056 -
3057 -static struct resource standard_io_resources[] = { {
3058 - .name = "dma1",
3059 - .start = 0x0000,
3060 - .end = 0x001f,
3061 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3062 -}, {
3063 - .name = "pic1",
3064 - .start = 0x0020,
3065 - .end = 0x0021,
3066 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3067 -}, {
3068 - .name = "timer0",
3069 - .start = 0x0040,
3070 - .end = 0x0043,
3071 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3072 -}, {
3073 - .name = "timer1",
3074 - .start = 0x0050,
3075 - .end = 0x0053,
3076 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3077 -}, {
3078 - .name = "keyboard",
3079 - .start = 0x0060,
3080 - .end = 0x006f,
3081 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3082 -}, {
3083 - .name = "dma page reg",
3084 - .start = 0x0080,
3085 - .end = 0x008f,
3086 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3087 -}, {
3088 - .name = "pic2",
3089 - .start = 0x00a0,
3090 - .end = 0x00a1,
3091 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3092 -}, {
3093 - .name = "dma2",
3094 - .start = 0x00c0,
3095 - .end = 0x00df,
3096 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3097 -}, {
3098 - .name = "fpu",
3099 - .start = 0x00f0,
3100 - .end = 0x00ff,
3101 - .flags = IORESOURCE_BUSY | IORESOURCE_IO
3102 -} };
3103 -
3104 -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
3105 -
3106 -static int __init romchecksum(unsigned char *rom, unsigned long length)
3107 -{
3108 - unsigned char *p, sum = 0;
3109 -
3110 - for (p = rom; p < rom + length; p++)
3111 - sum += *p;
3112 - return sum == 0;
3113 -}
3114 -
3115 -static void __init probe_roms(void)
3116 -{
3117 - unsigned long start, length, upper;
3118 - unsigned char *rom;
3119 - int i;
3120 -
3121 -#ifdef CONFIG_XEN
3122 - /* Nothing to do if not running in dom0. */
3123 - if (!is_initial_xendomain())
3124 - return;
3125 -#endif
3126 -
3127 - /* video rom */
3128 - upper = adapter_rom_resources[0].start;
3129 - for (start = video_rom_resource.start; start < upper; start += 2048) {
3130 - rom = isa_bus_to_virt(start);
3131 - if (!romsignature(rom))
3132 - continue;
3133 -
3134 - video_rom_resource.start = start;
3135 -
3136 - /* 0 < length <= 0x7f * 512, historically */
3137 - length = rom[2] * 512;
3138 -
3139 - /* if checksum okay, trust length byte */
3140 - if (length && romchecksum(rom, length))
3141 - video_rom_resource.end = start + length - 1;
3142 -
3143 - request_resource(&iomem_resource, &video_rom_resource);
3144 - break;
3145 - }
3146 -
3147 - start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
3148 - if (start < upper)
3149 - start = upper;
3150 -
3151 - /* system rom */
3152 - request_resource(&iomem_resource, &system_rom_resource);
3153 - upper = system_rom_resource.start;
3154 -
3155 - /* check for extension rom (ignore length byte!) */
3156 - rom = isa_bus_to_virt(extension_rom_resource.start);
3157 - if (romsignature(rom)) {
3158 - length = extension_rom_resource.end - extension_rom_resource.start + 1;
3159 - if (romchecksum(rom, length)) {
3160 - request_resource(&iomem_resource, &extension_rom_resource);
3161 - upper = extension_rom_resource.start;
3162 - }
3163 - }
3164 -
3165 - /* check for adapter roms on 2k boundaries */
3166 - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
3167 - rom = isa_bus_to_virt(start);
3168 - if (!romsignature(rom))
3169 - continue;
3170 -
3171 - /* 0 < length <= 0x7f * 512, historically */
3172 - length = rom[2] * 512;
3173 -
3174 - /* but accept any length that fits if checksum okay */
3175 - if (!length || start + length > upper || !romchecksum(rom, length))
3176 - continue;
3177 -
3178 - adapter_rom_resources[i].start = start;
3179 - adapter_rom_resources[i].end = start + length - 1;
3180 - request_resource(&iomem_resource, &adapter_rom_resources[i]);
3181 -
3182 - start = adapter_rom_resources[i++].end & ~2047UL;
3183 - }
3184 -}
3185 -
3186 /*
3187 * Point at the empty zero page to start with. We map the real shared_info
3188 * page as soon as fixmap is up and running.
3189 @@ -386,353 +167,6 @@ EXPORT_SYMBOL(phys_to_machine_mapping);
3190 start_info_t *xen_start_info;
3191 EXPORT_SYMBOL(xen_start_info);
3192
3193 -void __init add_memory_region(unsigned long long start,
3194 - unsigned long long size, int type)
3195 -{
3196 - int x;
3197 -
3198 - if (!efi_enabled) {
3199 - x = e820.nr_map;
3200 -
3201 - if (x == E820MAX) {
3202 - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
3203 - return;
3204 - }
3205 -
3206 - e820.map[x].addr = start;
3207 - e820.map[x].size = size;
3208 - e820.map[x].type = type;
3209 - e820.nr_map++;
3210 - }
3211 -} /* add_memory_region */
3212 -
3213 -static void __init limit_regions(unsigned long long size)
3214 -{
3215 - unsigned long long current_addr = 0;
3216 - int i;
3217 -
3218 - if (efi_enabled) {
3219 - efi_memory_desc_t *md;
3220 - void *p;
3221 -
3222 - for (p = memmap.map, i = 0; p < memmap.map_end;
3223 - p += memmap.desc_size, i++) {
3224 - md = p;
3225 - current_addr = md->phys_addr + (md->num_pages << 12);
3226 - if (md->type == EFI_CONVENTIONAL_MEMORY) {
3227 - if (current_addr >= size) {
3228 - md->num_pages -=
3229 - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
3230 - memmap.nr_map = i + 1;
3231 - return;
3232 - }
3233 - }
3234 - }
3235 - }
3236 - for (i = 0; i < e820.nr_map; i++) {
3237 - current_addr = e820.map[i].addr + e820.map[i].size;
3238 - if (current_addr < size)
3239 - continue;
3240 -
3241 - if (e820.map[i].type != E820_RAM)
3242 - continue;
3243 -
3244 - if (e820.map[i].addr >= size) {
3245 - /*
3246 - * This region starts past the end of the
3247 - * requested size, skip it completely.
3248 - */
3249 - e820.nr_map = i;
3250 - } else {
3251 - e820.nr_map = i + 1;
3252 - e820.map[i].size -= current_addr - size;
3253 - }
3254 - return;
3255 - }
3256 -#ifdef CONFIG_XEN
3257 - if (i==e820.nr_map && current_addr < size) {
3258 - /*
3259 - * The e820 map finished before our requested size so
3260 - * extend the final entry to the requested address.
3261 - */
3262 - --i;
3263 - if (e820.map[i].type == E820_RAM)
3264 - e820.map[i].size -= current_addr - size;
3265 - else
3266 - add_memory_region(current_addr, size - current_addr, E820_RAM);
3267 - }
3268 -#endif
3269 -}
3270 -
3271 -#define E820_DEBUG 1
3272 -
3273 -static void __init print_memory_map(char *who)
3274 -{
3275 - int i;
3276 -
3277 - for (i = 0; i < e820.nr_map; i++) {
3278 - printk(" %s: %016Lx - %016Lx ", who,
3279 - e820.map[i].addr,
3280 - e820.map[i].addr + e820.map[i].size);
3281 - switch (e820.map[i].type) {
3282 - case E820_RAM: printk("(usable)\n");
3283 - break;
3284 - case E820_RESERVED:
3285 - printk("(reserved)\n");
3286 - break;
3287 - case E820_ACPI:
3288 - printk("(ACPI data)\n");
3289 - break;
3290 - case E820_NVS:
3291 - printk("(ACPI NVS)\n");
3292 - break;
3293 - default: printk("type %lu\n", e820.map[i].type);
3294 - break;
3295 - }
3296 - }
3297 -}
3298 -
3299 -/*
3300 - * Sanitize the BIOS e820 map.
3301 - *
3302 - * Some e820 responses include overlapping entries. The following
3303 - * replaces the original e820 map with a new one, removing overlaps.
3304 - *
3305 - */
3306 -struct change_member {
3307 - struct e820entry *pbios; /* pointer to original bios entry */
3308 - unsigned long long addr; /* address for this change point */
3309 -};
3310 -static struct change_member change_point_list[2*E820MAX] __initdata;
3311 -static struct change_member *change_point[2*E820MAX] __initdata;
3312 -static struct e820entry *overlap_list[E820MAX] __initdata;
3313 -static struct e820entry new_bios[E820MAX] __initdata;
3314 -
3315 -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
3316 -{
3317 - struct change_member *change_tmp;
3318 - unsigned long current_type, last_type;
3319 - unsigned long long last_addr;
3320 - int chgidx, still_changing;
3321 - int overlap_entries;
3322 - int new_bios_entry;
3323 - int old_nr, new_nr, chg_nr;
3324 - int i;
3325 -
3326 - /*
3327 - Visually we're performing the following (1,2,3,4 = memory types)...
3328 -
3329 - Sample memory map (w/overlaps):
3330 - ____22__________________
3331 - ______________________4_
3332 - ____1111________________
3333 - _44_____________________
3334 - 11111111________________
3335 - ____________________33__
3336 - ___________44___________
3337 - __________33333_________
3338 - ______________22________
3339 - ___________________2222_
3340 - _________111111111______
3341 - _____________________11_
3342 - _________________4______
3343 -
3344 - Sanitized equivalent (no overlap):
3345 - 1_______________________
3346 - _44_____________________
3347 - ___1____________________
3348 - ____22__________________
3349 - ______11________________
3350 - _________1______________
3351 - __________3_____________
3352 - ___________44___________
3353 - _____________33_________
3354 - _______________2________
3355 - ________________1_______
3356 - _________________4______
3357 - ___________________2____
3358 - ____________________33__
3359 - ______________________4_
3360 - */
3361 -
3362 - /* if there's only one memory region, don't bother */
3363 - if (*pnr_map < 2)
3364 - return -1;
3365 -
3366 - old_nr = *pnr_map;
3367 -
3368 - /* bail out if we find any unreasonable addresses in bios map */
3369 - for (i=0; i<old_nr; i++)
3370 - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
3371 - return -1;
3372 -
3373 - /* create pointers for initial change-point information (for sorting) */
3374 - for (i=0; i < 2*old_nr; i++)
3375 - change_point[i] = &change_point_list[i];
3376 -
3377 - /* record all known change-points (starting and ending addresses),
3378 - omitting those that are for empty memory regions */
3379 - chgidx = 0;
3380 - for (i=0; i < old_nr; i++) {
3381 - if (biosmap[i].size != 0) {
3382 - change_point[chgidx]->addr = biosmap[i].addr;
3383 - change_point[chgidx++]->pbios = &biosmap[i];
3384 - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
3385 - change_point[chgidx++]->pbios = &biosmap[i];
3386 - }
3387 - }
3388 - chg_nr = chgidx; /* true number of change-points */
3389 -
3390 - /* sort change-point list by memory addresses (low -> high) */
3391 - still_changing = 1;
3392 - while (still_changing) {
3393 - still_changing = 0;
3394 - for (i=1; i < chg_nr; i++) {
3395 - /* if <current_addr> > <last_addr>, swap */
3396 - /* or, if current=<start_addr> & last=<end_addr>, swap */
3397 - if ((change_point[i]->addr < change_point[i-1]->addr) ||
3398 - ((change_point[i]->addr == change_point[i-1]->addr) &&
3399 - (change_point[i]->addr == change_point[i]->pbios->addr) &&
3400 - (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
3401 - )
3402 - {
3403 - change_tmp = change_point[i];
3404 - change_point[i] = change_point[i-1];
3405 - change_point[i-1] = change_tmp;
3406 - still_changing=1;
3407 - }
3408 - }
3409 - }
3410 -
3411 - /* create a new bios memory map, removing overlaps */
3412 - overlap_entries=0; /* number of entries in the overlap table */
3413 - new_bios_entry=0; /* index for creating new bios map entries */
3414 - last_type = 0; /* start with undefined memory type */
3415 - last_addr = 0; /* start with 0 as last starting address */
3416 - /* loop through change-points, determining affect on the new bios map */
3417 - for (chgidx=0; chgidx < chg_nr; chgidx++)
3418 - {
3419 - /* keep track of all overlapping bios entries */
3420 - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
3421 - {
3422 - /* add map entry to overlap list (> 1 entry implies an overlap) */
3423 - overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
3424 - }
3425 - else
3426 - {
3427 - /* remove entry from list (order independent, so swap with last) */
3428 - for (i=0; i<overlap_entries; i++)
3429 - {
3430 - if (overlap_list[i] == change_point[chgidx]->pbios)
3431 - overlap_list[i] = overlap_list[overlap_entries-1];
3432 - }
3433 - overlap_entries--;
3434 - }
3435 - /* if there are overlapping entries, decide which "type" to use */
3436 - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
3437 - current_type = 0;
3438 - for (i=0; i<overlap_entries; i++)
3439 - if (overlap_list[i]->type > current_type)
3440 - current_type = overlap_list[i]->type;
3441 - /* continue building up new bios map based on this information */
3442 - if (current_type != last_type) {
3443 - if (last_type != 0) {
3444 - new_bios[new_bios_entry].size =
3445 - change_point[chgidx]->addr - last_addr;
3446 - /* move forward only if the new size was non-zero */
3447 - if (new_bios[new_bios_entry].size != 0)
3448 - if (++new_bios_entry >= E820MAX)
3449 - break; /* no more space left for new bios entries */
3450 - }
3451 - if (current_type != 0) {
3452 - new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
3453 - new_bios[new_bios_entry].type = current_type;
3454 - last_addr=change_point[chgidx]->addr;
3455 - }
3456 - last_type = current_type;
3457 - }
3458 - }
3459 - new_nr = new_bios_entry; /* retain count for new bios entries */
3460 -
3461 - /* copy new bios mapping into original location */
3462 - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
3463 - *pnr_map = new_nr;
3464 -
3465 - return 0;
3466 -}
3467 -
3468 -/*
3469 - * Copy the BIOS e820 map into a safe place.
3470 - *
3471 - * Sanity-check it while we're at it..
3472 - *
3473 - * If we're lucky and live on a modern system, the setup code
3474 - * will have given us a memory map that we can use to properly
3475 - * set up memory. If we aren't, we'll fake a memory map.
3476 - *
3477 - * We check to see that the memory map contains at least 2 elements
3478 - * before we'll use it, because the detection code in setup.S may
3479 - * not be perfect and most every PC known to man has two memory
3480 - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
3481 - * thinkpad 560x, for example, does not cooperate with the memory
3482 - * detection code.)
3483 - */
3484 -int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
3485 -{
3486 -#ifndef CONFIG_XEN
3487 - /* Only one memory region (or negative)? Ignore it */
3488 - if (nr_map < 2)
3489 - return -1;
3490 -#else
3491 - BUG_ON(nr_map < 1);
3492 -#endif
3493 -
3494 - do {
3495 - unsigned long long start = biosmap->addr;
3496 - unsigned long long size = biosmap->size;
3497 - unsigned long long end = start + size;
3498 - unsigned long type = biosmap->type;
3499 -
3500 - /* Overflow in 64 bits? Ignore the memory map. */
3501 - if (start > end)
3502 - return -1;
3503 -
3504 -#ifndef CONFIG_XEN
3505 - /*
3506 - * Some BIOSes claim RAM in the 640k - 1M region.
3507 - * Not right. Fix it up.
3508 - */
3509 - if (type == E820_RAM) {
3510 - if (start < 0x100000ULL && end > 0xA0000ULL) {
3511 - if (start < 0xA0000ULL)
3512 - add_memory_region(start, 0xA0000ULL-start, type);
3513 - if (end <= 0x100000ULL)
3514 - continue;
3515 - start = 0x100000ULL;
3516 - size = end - start;
3517 - }
3518 - }
3519 -#endif
3520 - add_memory_region(start, size, type);
3521 - } while (biosmap++,--nr_map);
3522 -
3523 -#ifdef CONFIG_XEN
3524 - if (is_initial_xendomain()) {
3525 - struct xen_memory_map memmap;
3526 -
3527 - memmap.nr_entries = E820MAX;
3528 - set_xen_guest_handle(memmap.buffer, machine_e820.map);
3529 -
3530 - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
3531 - BUG();
3532 - machine_e820.nr_map = memmap.nr_entries;
3533 - } else
3534 - machine_e820 = e820;
3535 -#endif
3536 -
3537 - return 0;
3538 -}
3539 -
3540 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
3541 struct edd edd;
3542 #ifdef CONFIG_EDD_MODULE
3543 @@ -758,7 +192,7 @@ static inline void copy_edd(void)
3544 }
3545 #endif
3546
3547 -static int __initdata user_defined_memmap = 0;
3548 +int __initdata user_defined_memmap = 0;
3549
3550 /*
3551 * "mem=nopentium" disables the 4MB page tables.
3552 @@ -795,51 +229,6 @@ static int __init parse_mem(char *arg)
3553 }
3554 early_param("mem", parse_mem);
3555
3556 -static int __init parse_memmap(char *arg)
3557 -{
3558 - if (!arg)
3559 - return -EINVAL;
3560 -
3561 - if (strcmp(arg, "exactmap") == 0) {
3562 -#ifdef CONFIG_CRASH_DUMP
3563 - /* If we are doing a crash dump, we
3564 - * still need to know the real mem
3565 - * size before original memory map is
3566 - * reset.
3567 - */
3568 - find_max_pfn();
3569 - saved_max_pfn = max_pfn;
3570 -#endif
3571 - e820.nr_map = 0;
3572 - user_defined_memmap = 1;
3573 - } else {
3574 - /* If the user specifies memory size, we
3575 - * limit the BIOS-provided memory map to
3576 - * that size. exactmap can be used to specify
3577 - * the exact map. mem=number can be used to
3578 - * trim the existing memory map.
3579 - */
3580 - unsigned long long start_at, mem_size;
3581 -
3582 - mem_size = memparse(arg, &arg);
3583 - if (*arg == '@') {
3584 - start_at = memparse(arg+1, &arg);
3585 - add_memory_region(start_at, mem_size, E820_RAM);
3586 - } else if (*arg == '#') {
3587 - start_at = memparse(arg+1, &arg);
3588 - add_memory_region(start_at, mem_size, E820_ACPI);
3589 - } else if (*arg == '$') {
3590 - start_at = memparse(arg+1, &arg);
3591 - add_memory_region(start_at, mem_size, E820_RESERVED);
3592 - } else {
3593 - limit_regions(mem_size);
3594 - user_defined_memmap = 1;
3595 - }
3596 - }
3597 - return 0;
3598 -}
3599 -early_param("memmap", parse_memmap);
3600 -
3601 #ifdef CONFIG_PROC_VMCORE
3602 /* elfcorehdr= specifies the location of elf core header
3603 * stored by the crashed kernel.
3604 @@ -906,127 +295,6 @@ early_param("reservetop", parse_reservet
3605 #endif
3606
3607 /*
3608 - * Callback for efi_memory_walk.
3609 - */
3610 -static int __init
3611 -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
3612 -{
3613 - unsigned long *max_pfn = arg, pfn;
3614 -
3615 - if (start < end) {
3616 - pfn = PFN_UP(end -1);
3617 - if (pfn > *max_pfn)
3618 - *max_pfn = pfn;
3619 - }
3620 - return 0;
3621 -}
3622 -
3623 -static int __init
3624 -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
3625 -{
3626 - memory_present(0, PFN_UP(start), PFN_DOWN(end));
3627 - return 0;
3628 -}
3629 -
3630 -/*
3631 - * This function checks if any part of the range <start,end> is mapped
3632 - * with type.
3633 - */
3634 -int
3635 -e820_any_mapped(u64 start, u64 end, unsigned type)
3636 -{
3637 - int i;
3638 -
3639 -#ifndef CONFIG_XEN
3640 - for (i = 0; i < e820.nr_map; i++) {
3641 - const struct e820entry *ei = &e820.map[i];
3642 -#else
3643 - if (!is_initial_xendomain())
3644 - return 0;
3645 - for (i = 0; i < machine_e820.nr_map; ++i) {
3646 - const struct e820entry *ei = &machine_e820.map[i];
3647 -#endif
3648 -
3649 - if (type && ei->type != type)
3650 - continue;
3651 - if (ei->addr >= end || ei->addr + ei->size <= start)
3652 - continue;
3653 - return 1;
3654 - }
3655 - return 0;
3656 -}
3657 -EXPORT_SYMBOL_GPL(e820_any_mapped);
3658 -
3659 - /*
3660 - * This function checks if the entire range <start,end> is mapped with type.
3661 - *
3662 - * Note: this function only works correct if the e820 table is sorted and
3663 - * not-overlapping, which is the case
3664 - */
3665 -int __init
3666 -e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
3667 -{
3668 - u64 start = s;
3669 - u64 end = e;
3670 - int i;
3671 -
3672 -#ifndef CONFIG_XEN
3673 - for (i = 0; i < e820.nr_map; i++) {
3674 - struct e820entry *ei = &e820.map[i];
3675 -#else
3676 - if (!is_initial_xendomain())
3677 - return 0;
3678 - for (i = 0; i < machine_e820.nr_map; ++i) {
3679 - const struct e820entry *ei = &machine_e820.map[i];
3680 -#endif
3681 - if (type && ei->type != type)
3682 - continue;
3683 - /* is the region (part) in overlap with the current region ?*/
3684 - if (ei->addr >= end || ei->addr + ei->size <= start)
3685 - continue;
3686 - /* if the region is at the beginning of <start,end> we move
3687 - * start to the end of the region since it's ok until there
3688 - */
3689 - if (ei->addr <= start)
3690 - start = ei->addr + ei->size;
3691 - /* if start is now at or beyond end, we're done, full
3692 - * coverage */
3693 - if (start >= end)
3694 - return 1; /* we're done */
3695 - }
3696 - return 0;
3697 -}
3698 -
3699 -/*
3700 - * Find the highest page frame number we have available
3701 - */
3702 -void __init find_max_pfn(void)
3703 -{
3704 - int i;
3705 -
3706 - max_pfn = 0;
3707 - if (efi_enabled) {
3708 - efi_memmap_walk(efi_find_max_pfn, &max_pfn);
3709 - efi_memmap_walk(efi_memory_present_wrapper, NULL);
3710 - return;
3711 - }
3712 -
3713 - for (i = 0; i < e820.nr_map; i++) {
3714 - unsigned long start, end;
3715 - /* RAM? */
3716 - if (e820.map[i].type != E820_RAM)
3717 - continue;
3718 - start = PFN_UP(e820.map[i].addr);
3719 - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3720 - if (start >= end)
3721 - continue;
3722 - if (end > max_pfn)
3723 - max_pfn = end;
3724 - memory_present(0, start, end);
3725 - }
3726 -}
3727 -
3728 -/*
3729 * Determine low and high memory ranges:
3730 */
3731 unsigned long __init find_max_low_pfn(void)
3732 @@ -1085,77 +353,6 @@ unsigned long __init find_max_low_pfn(vo
3733 return max_low_pfn;
3734 }
3735
3736 -/*
3737 - * Free all available memory for boot time allocation. Used
3738 - * as a callback function by efi_memory_walk()
3739 - */
3740 -
3741 -static int __init
3742 -free_available_memory(unsigned long start, unsigned long end, void *arg)
3743 -{
3744 - /* check max_low_pfn */
3745 - if (start >= (max_low_pfn << PAGE_SHIFT))
3746 - return 0;
3747 - if (end >= (max_low_pfn << PAGE_SHIFT))
3748 - end = max_low_pfn << PAGE_SHIFT;
3749 - if (start < end)
3750 - free_bootmem(start, end - start);
3751 -
3752 - return 0;
3753 -}
3754 -/*
3755 - * Register fully available low RAM pages with the bootmem allocator.
3756 - */
3757 -static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
3758 -{
3759 - int i;
3760 -
3761 - if (efi_enabled) {
3762 - efi_memmap_walk(free_available_memory, NULL);
3763 - return;
3764 - }
3765 - for (i = 0; i < e820.nr_map; i++) {
3766 - unsigned long curr_pfn, last_pfn, size;
3767 - /*
3768 - * Reserve usable low memory
3769 - */
3770 - if (e820.map[i].type != E820_RAM)
3771 - continue;
3772 - /*
3773 - * We are rounding up the start address of usable memory:
3774 - */
3775 - curr_pfn = PFN_UP(e820.map[i].addr);
3776 - if (curr_pfn >= max_low_pfn)
3777 - continue;
3778 - /*
3779 - * ... and at the end of the usable range downwards:
3780 - */
3781 - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3782 -
3783 -#ifdef CONFIG_XEN
3784 - /*
3785 - * Truncate to the number of actual pages currently
3786 - * present.
3787 - */
3788 - if (last_pfn > xen_start_info->nr_pages)
3789 - last_pfn = xen_start_info->nr_pages;
3790 -#endif
3791 -
3792 - if (last_pfn > max_low_pfn)
3793 - last_pfn = max_low_pfn;
3794 -
3795 - /*
3796 - * .. finally, did all the rounding and playing
3797 - * around just make the area go away?
3798 - */
3799 - if (last_pfn <= curr_pfn)
3800 - continue;
3801 -
3802 - size = last_pfn - curr_pfn;
3803 - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
3804 - }
3805 -}
3806 -
3807 #ifndef CONFIG_XEN
3808 /*
3809 * workaround for Dell systems that neglect to reserve EBDA
3810 @@ -1245,8 +442,8 @@ void __init setup_bootmem_allocator(void
3811 * the (very unlikely) case of us accidentally initializing the
3812 * bootmem allocator with an invalid RAM area.
3813 */
3814 - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
3815 - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
3816 + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
3817 + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
3818
3819 #ifndef CONFIG_XEN
3820 /*
3821 @@ -1328,160 +525,6 @@ void __init remapped_pgdat_init(void)
3822 }
3823 }
3824
3825 -/*
3826 - * Request address space for all standard RAM and ROM resources
3827 - * and also for regions reported as reserved by the e820.
3828 - */
3829 -static void __init
3830 -legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
3831 - struct resource *code_resource,
3832 - struct resource *data_resource)
3833 -{
3834 - int i;
3835 -
3836 - probe_roms();
3837 -
3838 - for (i = 0; i < nr_map; i++) {
3839 - struct resource *res;
3840 -#ifndef CONFIG_RESOURCES_64BIT
3841 - if (e820[i].addr + e820[i].size > 0x100000000ULL)
3842 - continue;
3843 -#endif
3844 - res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
3845 - switch (e820[i].type) {
3846 - case E820_RAM: res->name = "System RAM"; break;
3847 - case E820_ACPI: res->name = "ACPI Tables"; break;
3848 - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
3849 - default: res->name = "reserved";
3850 - }
3851 - res->start = e820[i].addr;
3852 - res->end = res->start + e820[i].size - 1;
3853 - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3854 - if (request_resource(&iomem_resource, res)) {
3855 - kfree(res);
3856 - continue;
3857 - }
3858 - if (e820[i].type == E820_RAM) {
3859 - /*
3860 - * We don't know which RAM region contains kernel data,
3861 - * so we try it repeatedly and let the resource manager
3862 - * test it.
3863 - */
3864 -#ifndef CONFIG_XEN
3865 - request_resource(res, code_resource);
3866 - request_resource(res, data_resource);
3867 -#endif
3868 -#ifdef CONFIG_KEXEC
3869 - if (crashk_res.start != crashk_res.end)
3870 - request_resource(res, &crashk_res);
3871 -#ifdef CONFIG_XEN
3872 - xen_machine_kexec_register_resources(res);
3873 -#endif
3874 -#endif
3875 - }
3876 - }
3877 -}
3878 -
3879 -/*
3880 - * Locate a unused range of the physical address space below 4G which
3881 - * can be used for PCI mappings.
3882 - */
3883 -static void __init
3884 -e820_setup_gap(struct e820entry *e820, int nr_map)
3885 -{
3886 - unsigned long gapstart, gapsize, round;
3887 - unsigned long long last;
3888 - int i;
3889 -
3890 - /*
3891 - * Search for the bigest gap in the low 32 bits of the e820
3892 - * memory space.
3893 - */
3894 - last = 0x100000000ull;
3895 - gapstart = 0x10000000;
3896 - gapsize = 0x400000;
3897 - i = nr_map;
3898 - while (--i >= 0) {
3899 - unsigned long long start = e820[i].addr;
3900 - unsigned long long end = start + e820[i].size;
3901 -
3902 - /*
3903 - * Since "last" is at most 4GB, we know we'll
3904 - * fit in 32 bits if this condition is true
3905 - */
3906 - if (last > end) {
3907 - unsigned long gap = last - end;
3908 -
3909 - if (gap > gapsize) {
3910 - gapsize = gap;
3911 - gapstart = end;
3912 - }
3913 - }
3914 - if (start < last)
3915 - last = start;
3916 - }
3917 -
3918 - /*
3919 - * See how much we want to round up: start off with
3920 - * rounding to the next 1MB area.
3921 - */
3922 - round = 0x100000;
3923 - while ((gapsize >> 4) > round)
3924 - round += round;
3925 - /* Fun with two's complement */
3926 - pci_mem_start = (gapstart + round) & -round;
3927 -
3928 - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
3929 - pci_mem_start, gapstart, gapsize);
3930 -}
3931 -
3932 -/*
3933 - * Request address space for all standard resources
3934 - *
3935 - * This is called just before pcibios_init(), which is also a
3936 - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
3937 - */
3938 -static int __init request_standard_resources(void)
3939 -{
3940 - int i;
3941 -
3942 - /* Nothing to do if not running in dom0. */
3943 - if (!is_initial_xendomain())
3944 - return 0;
3945 -
3946 - printk("Setting up standard PCI resources\n");
3947 -#ifdef CONFIG_XEN
3948 - legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
3949 - &code_resource, &data_resource);
3950 -#else
3951 - if (efi_enabled)
3952 - efi_initialize_iomem_resources(&code_resource, &data_resource);
3953 - else
3954 - legacy_init_iomem_resources(e820.map, e820.nr_map,
3955 - &code_resource, &data_resource);
3956 -#endif
3957 -
3958 - /* EFI systems may still have VGA */
3959 - request_resource(&iomem_resource, &video_ram_resource);
3960 -
3961 - /* request I/O space for devices used on all i[345]86 PCs */
3962 - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
3963 - request_resource(&ioport_resource, &standard_io_resources[i]);
3964 - return 0;
3965 -}
3966 -
3967 -subsys_initcall(request_standard_resources);
3968 -
3969 -static void __init register_memory(void)
3970 -{
3971 -#ifdef CONFIG_XEN
3972 - if (is_initial_xendomain())
3973 - e820_setup_gap(machine_e820.map, machine_e820.nr_map);
3974 - else
3975 -#endif
3976 - e820_setup_gap(e820.map, e820.nr_map);
3977 -}
3978 -
3979 #ifdef CONFIG_MCA
3980 static void set_mca_bus(int x)
3981 {
3982 @@ -1491,6 +534,12 @@ static void set_mca_bus(int x)
3983 static void set_mca_bus(int x) { }
3984 #endif
3985
3986 +/* Overridden in paravirt.c if CONFIG_PARAVIRT */
3987 +char * __init __attribute__((weak)) memory_setup(void)
3988 +{
3989 + return machine_specific_memory_setup();
3990 +}
3991 +
3992 /*
3993 * Determine if we were loaded by an EFI loader. If so, then we have also been
3994 * passed the efi memmap, systab, etc., so we should use these data structures
3995 @@ -1578,7 +627,7 @@ void __init setup_arch(char **cmdline_p)
3996 efi_init();
3997 else {
3998 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
3999 - print_memory_map(machine_specific_memory_setup());
4000 + print_memory_map(memory_setup());
4001 }
4002
4003 copy_edd();
4004 @@ -1757,7 +806,7 @@ void __init setup_arch(char **cmdline_p)
4005 get_smp_config();
4006 #endif
4007
4008 - register_memory();
4009 + e820_register_memory();
4010
4011 if (is_initial_xendomain()) {
4012 #ifdef CONFIG_VT
4013 --- sle11-2009-06-29.orig/arch/x86/kernel/smp_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4014 +++ sle11-2009-06-29/arch/x86/kernel/smp_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4015 @@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo
4016 put_cpu();
4017 return -EBUSY;
4018 }
4019 +
4020 + /* Can deadlock when called with interrupts disabled */
4021 + WARN_ON(irqs_disabled());
4022 +
4023 spin_lock_bh(&call_lock);
4024 __smp_call_function_single(cpu, func, info, nonatomic, wait);
4025 spin_unlock_bh(&call_lock);
4026 --- sle11-2009-06-29.orig/arch/x86/kernel/time_32-xen.c 2009-03-24 10:08:00.000000000 +0100
4027 +++ sle11-2009-06-29/arch/x86/kernel/time_32-xen.c 2009-03-24 10:08:30.000000000 +0100
4028 @@ -61,6 +61,7 @@
4029 #include <asm/uaccess.h>
4030 #include <asm/processor.h>
4031 #include <asm/timer.h>
4032 +#include <asm/time.h>
4033 #include <asm/sections.h>
4034
4035 #include "mach_time.h"
4036 @@ -129,11 +130,11 @@ static DEFINE_PER_CPU(struct vcpu_runsta
4037 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
4038 #define NS_PER_TICK (1000000000LL/HZ)
4039
4040 -static void __clock_was_set(void *unused)
4041 +static void __clock_was_set(struct work_struct *unused)
4042 {
4043 clock_was_set();
4044 }
4045 -static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL);
4046 +static DECLARE_WORK(clock_was_set_work, __clock_was_set);
4047
4048 /*
4049 * GCC 4.3 can turn loops over an induction variable into division. We do
4050 @@ -543,10 +544,7 @@ static int set_rtc_mmss(unsigned long no
4051 /* gets recalled with irq locally disabled */
4052 /* XXX - does irqsave resolve this? -johnstul */
4053 spin_lock_irqsave(&rtc_lock, flags);
4054 - if (efi_enabled)
4055 - retval = efi_set_rtc_mmss(nowtime);
4056 - else
4057 - retval = mach_set_rtc_mmss(nowtime);
4058 + retval = set_wallclock(nowtime);
4059 spin_unlock_irqrestore(&rtc_lock, flags);
4060
4061 return retval;
4062 @@ -873,10 +871,7 @@ unsigned long get_cmos_time(void)
4063
4064 spin_lock_irqsave(&rtc_lock, flags);
4065
4066 - if (efi_enabled)
4067 - retval = efi_get_time();
4068 - else
4069 - retval = mach_get_cmos_time();
4070 + retval = get_wallclock();
4071
4072 spin_unlock_irqrestore(&rtc_lock, flags);
4073
4074 @@ -978,7 +973,7 @@ static void __init hpet_time_init(void)
4075 printk("Using HPET for base-timer\n");
4076 }
4077
4078 - time_init_hook();
4079 + do_time_init();
4080 }
4081 #endif
4082
4083 --- sle11-2009-06-29.orig/arch/x86/kernel/traps_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4084 +++ sle11-2009-06-29/arch/x86/kernel/traps_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4085 @@ -29,6 +29,8 @@
4086 #include <linux/kexec.h>
4087 #include <linux/unwind.h>
4088 #include <linux/uaccess.h>
4089 +#include <linux/nmi.h>
4090 +#include <linux/bug.h>
4091
4092 #ifdef CONFIG_EISA
4093 #include <linux/ioport.h>
4094 @@ -61,9 +63,6 @@ int panic_on_unrecovered_nmi;
4095
4096 asmlinkage int system_call(void);
4097
4098 -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
4099 - { 0, 0 }, { 0, 0 } };
4100 -
4101 /* Do we ignore FPU interrupts ? */
4102 char ignore_fpu_irq = 0;
4103
4104 @@ -100,12 +99,7 @@ asmlinkage void fixup_4gb_segment(void);
4105 #endif
4106 asmlinkage void machine_check(void);
4107
4108 -static int kstack_depth_to_print = 24;
4109 -#ifdef CONFIG_STACK_UNWIND
4110 -static int call_trace = 1;
4111 -#else
4112 -#define call_trace (-1)
4113 -#endif
4114 +int kstack_depth_to_print = 24;
4115 ATOMIC_NOTIFIER_HEAD(i386die_chain);
4116
4117 int register_die_notifier(struct notifier_block *nb)
4118 @@ -159,25 +153,7 @@ static inline unsigned long print_contex
4119 return ebp;
4120 }
4121
4122 -struct ops_and_data {
4123 - struct stacktrace_ops *ops;
4124 - void *data;
4125 -};
4126 -
4127 -static asmlinkage int
4128 -dump_trace_unwind(struct unwind_frame_info *info, void *data)
4129 -{
4130 - struct ops_and_data *oad = (struct ops_and_data *)data;
4131 - int n = 0;
4132 -
4133 - while (unwind(info) == 0 && UNW_PC(info)) {
4134 - n++;
4135 - oad->ops->address(oad->data, UNW_PC(info));
4136 - if (arch_unw_user_mode(info))
4137 - break;
4138 - }
4139 - return n;
4140 -}
4141 +#define MSG(msg) ops->warning(data, msg)
4142
4143 void dump_trace(struct task_struct *task, struct pt_regs *regs,
4144 unsigned long *stack,
4145 @@ -188,39 +164,6 @@ void dump_trace(struct task_struct *task
4146 if (!task)
4147 task = current;
4148
4149 - if (call_trace >= 0) {
4150 - int unw_ret = 0;
4151 - struct unwind_frame_info info;
4152 - struct ops_and_data oad = { .ops = ops, .data = data };
4153 -
4154 - if (regs) {
4155 - if (unwind_init_frame_info(&info, task, regs) == 0)
4156 - unw_ret = dump_trace_unwind(&info, &oad);
4157 - } else if (task == current)
4158 - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
4159 - else {
4160 - if (unwind_init_blocked(&info, task) == 0)
4161 - unw_ret = dump_trace_unwind(&info, &oad);
4162 - }
4163 - if (unw_ret > 0) {
4164 - if (call_trace == 1 && !arch_unw_user_mode(&info)) {
4165 - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
4166 - UNW_PC(&info));
4167 - if (UNW_SP(&info) >= PAGE_OFFSET) {
4168 - ops->warning(data, "Leftover inexact backtrace:\n");
4169 - stack = (void *)UNW_SP(&info);
4170 - if (!stack)
4171 - return;
4172 - ebp = UNW_FP(&info);
4173 - } else
4174 - ops->warning(data, "Full inexact backtrace again:\n");
4175 - } else if (call_trace >= 1)
4176 - return;
4177 - else
4178 - ops->warning(data, "Full inexact backtrace again:\n");
4179 - } else
4180 - ops->warning(data, "Inexact backtrace:\n");
4181 - }
4182 if (!stack) {
4183 unsigned long dummy;
4184 stack = &dummy;
4185 @@ -253,6 +196,7 @@ void dump_trace(struct task_struct *task
4186 stack = (unsigned long*)context->previous_esp;
4187 if (!stack)
4188 break;
4189 + touch_nmi_watchdog();
4190 }
4191 }
4192 EXPORT_SYMBOL(dump_trace);
4193 @@ -385,7 +329,7 @@ void show_registers(struct pt_regs *regs
4194 * time of the fault..
4195 */
4196 if (in_kernel) {
4197 - u8 __user *eip;
4198 + u8 *eip;
4199 int code_bytes = 64;
4200 unsigned char c;
4201
4202 @@ -394,18 +338,20 @@ void show_registers(struct pt_regs *regs
4203
4204 printk(KERN_EMERG "Code: ");
4205
4206 - eip = (u8 __user *)regs->eip - 43;
4207 - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
4208 + eip = (u8 *)regs->eip - 43;
4209 + if (eip < (u8 *)PAGE_OFFSET ||
4210 + probe_kernel_address(eip, c)) {
4211 /* try starting at EIP */
4212 - eip = (u8 __user *)regs->eip;
4213 + eip = (u8 *)regs->eip;
4214 code_bytes = 32;
4215 }
4216 for (i = 0; i < code_bytes; i++, eip++) {
4217 - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
4218 + if (eip < (u8 *)PAGE_OFFSET ||
4219 + probe_kernel_address(eip, c)) {
4220 printk(" Bad EIP value.");
4221 break;
4222 }
4223 - if (eip == (u8 __user *)regs->eip)
4224 + if (eip == (u8 *)regs->eip)
4225 printk("<%02x> ", c);
4226 else
4227 printk("%02x ", c);
4228 @@ -414,43 +360,22 @@ void show_registers(struct pt_regs *regs
4229 printk("\n");
4230 }
4231
4232 -static void handle_BUG(struct pt_regs *regs)
4233 +int is_valid_bugaddr(unsigned long eip)
4234 {
4235 - unsigned long eip = regs->eip;
4236 unsigned short ud2;
4237
4238 if (eip < PAGE_OFFSET)
4239 - return;
4240 - if (probe_kernel_address((unsigned short __user *)eip, ud2))
4241 - return;
4242 - if (ud2 != 0x0b0f)
4243 - return;
4244 + return 0;
4245 + if (probe_kernel_address((unsigned short *)eip, ud2))
4246 + return 0;
4247
4248 - printk(KERN_EMERG "------------[ cut here ]------------\n");
4249 -
4250 -#ifdef CONFIG_DEBUG_BUGVERBOSE
4251 - do {
4252 - unsigned short line;
4253 - char *file;
4254 - char c;
4255 -
4256 - if (probe_kernel_address((unsigned short __user *)(eip + 2),
4257 - line))
4258 - break;
4259 - if (__get_user(file, (char * __user *)(eip + 4)) ||
4260 - (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
4261 - file = "<bad filename>";
4262 -
4263 - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
4264 - return;
4265 - } while (0);
4266 -#endif
4267 - printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n");
4268 + return ud2 == 0x0b0f;
4269 }
4270
4271 -/* This is gone through when something in the kernel
4272 - * has done something bad and is about to be terminated.
4273 -*/
4274 +/*
4275 + * This is gone through when something in the kernel has done something bad and
4276 + * is about to be terminated.
4277 + */
4278 void die(const char * str, struct pt_regs * regs, long err)
4279 {
4280 static struct {
4281 @@ -458,7 +383,7 @@ void die(const char * str, struct pt_reg
4282 u32 lock_owner;
4283 int lock_owner_depth;
4284 } die = {
4285 - .lock = SPIN_LOCK_UNLOCKED,
4286 + .lock = __SPIN_LOCK_UNLOCKED(die.lock),
4287 .lock_owner = -1,
4288 .lock_owner_depth = 0
4289 };
4290 @@ -482,7 +407,8 @@ void die(const char * str, struct pt_reg
4291 unsigned long esp;
4292 unsigned short ss;
4293
4294 - handle_BUG(regs);
4295 + report_bug(regs->eip);
4296 +
4297 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
4298 #ifdef CONFIG_PREEMPT
4299 printk(KERN_EMERG "PREEMPT ");
4300 @@ -682,8 +608,7 @@ mem_parity_error(unsigned char reason, s
4301 {
4302 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
4303 "CPU %d.\n", reason, smp_processor_id());
4304 - printk(KERN_EMERG "You probably have a hardware problem with your RAM "
4305 - "chips\n");
4306 + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
4307 if (panic_on_unrecovered_nmi)
4308 panic("NMI: Not continuing");
4309
4310 @@ -741,7 +666,6 @@ void __kprobes die_nmi(struct pt_regs *r
4311 printk(" on CPU%d, eip %08lx, registers:\n",
4312 smp_processor_id(), regs->eip);
4313 show_registers(regs);
4314 - printk(KERN_EMERG "console shuts up ...\n");
4315 console_silent();
4316 spin_unlock(&nmi_print_lock);
4317 bust_spinlocks(0);
4318 @@ -1057,49 +981,24 @@ fastcall void do_spurious_interrupt_bug(
4319 #endif
4320 }
4321
4322 -fastcall void setup_x86_bogus_stack(unsigned char * stk)
4323 +fastcall unsigned long patch_espfix_desc(unsigned long uesp,
4324 + unsigned long kesp)
4325 {
4326 - unsigned long *switch16_ptr, *switch32_ptr;
4327 - struct pt_regs *regs;
4328 - unsigned long stack_top, stack_bot;
4329 - unsigned short iret_frame16_off;
4330 - int cpu = smp_processor_id();
4331 - /* reserve the space on 32bit stack for the magic switch16 pointer */
4332 - memmove(stk, stk + 8, sizeof(struct pt_regs));
4333 - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
4334 - regs = (struct pt_regs *)stk;
4335 - /* now the switch32 on 16bit stack */
4336 - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
4337 - stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
4338 - switch32_ptr = (unsigned long *)(stack_top - 8);
4339 - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
4340 - /* copy iret frame on 16bit stack */
4341 - memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
4342 - /* fill in the switch pointers */
4343 - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
4344 - switch16_ptr[1] = __ESPFIX_SS;
4345 - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
4346 - 8 - CPU_16BIT_STACK_SIZE;
4347 - switch32_ptr[1] = __KERNEL_DS;
4348 -}
4349 -
4350 -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
4351 -{
4352 - unsigned long *switch32_ptr;
4353 - unsigned char *stack16, *stack32;
4354 - unsigned long stack_top, stack_bot;
4355 - int len;
4356 int cpu = smp_processor_id();
4357 - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
4358 - stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
4359 - switch32_ptr = (unsigned long *)(stack_top - 8);
4360 - /* copy the data from 16bit stack to 32bit stack */
4361 - len = CPU_16BIT_STACK_SIZE - 8 - sp;
4362 - stack16 = (unsigned char *)(stack_bot + sp);
4363 - stack32 = (unsigned char *)
4364 - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
4365 - memcpy(stack32, stack16, len);
4366 - return stack32;
4367 + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
4368 + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
4369 + unsigned long base = (kesp - uesp) & -THREAD_SIZE;
4370 + unsigned long new_kesp = kesp - base;
4371 + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
4372 + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
4373 + /* Set up base for espfix segment */
4374 + desc &= 0x00f0ff0000000000ULL;
4375 + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
4376 + ((((__u64)base) << 32) & 0xff00000000000000ULL) |
4377 + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
4378 + (lim_pages & 0xffff);
4379 + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
4380 + return new_kesp;
4381 }
4382 #endif
4383
4384 @@ -1113,7 +1012,7 @@ fastcall unsigned char * fixup_x86_bogus
4385 * Must be called with kernel preemption disabled (in this case,
4386 * local interrupts are disabled at the call-site in entry.S).
4387 */
4388 -asmlinkage void math_state_restore(struct pt_regs regs)
4389 +asmlinkage void math_state_restore(void)
4390 {
4391 struct thread_info *thread = current_thread_info();
4392 struct task_struct *tsk = thread->task;
4393 @@ -1123,6 +1022,7 @@ asmlinkage void math_state_restore(struc
4394 init_fpu(tsk);
4395 restore_fpu(tsk);
4396 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
4397 + tsk->fpu_counter++;
4398 }
4399
4400 #ifndef CONFIG_MATH_EMULATION
4401 @@ -1234,19 +1134,3 @@ static int __init kstack_setup(char *s)
4402 return 1;
4403 }
4404 __setup("kstack=", kstack_setup);
4405 -
4406 -#ifdef CONFIG_STACK_UNWIND
4407 -static int __init call_trace_setup(char *s)
4408 -{
4409 - if (strcmp(s, "old") == 0)
4410 - call_trace = -1;
4411 - else if (strcmp(s, "both") == 0)
4412 - call_trace = 0;
4413 - else if (strcmp(s, "newfallback") == 0)
4414 - call_trace = 1;
4415 - else if (strcmp(s, "new") == 2)
4416 - call_trace = 2;
4417 - return 1;
4418 -}
4419 -__setup("call_trace=", call_trace_setup);
4420 -#endif
4421 --- sle11-2009-06-29.orig/arch/x86/kernel/vmlinux_32.lds.S 2009-06-29 15:14:52.000000000 +0200
4422 +++ sle11-2009-06-29/arch/x86/kernel/vmlinux_32.lds.S 2008-12-15 11:26:44.000000000 +0100
4423 @@ -29,6 +29,12 @@ PHDRS {
4424 SECTIONS
4425 {
4426 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
4427 +
4428 +#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002
4429 +#undef LOAD_OFFSET
4430 +#define LOAD_OFFSET 0
4431 +#endif
4432 +
4433 phys_startup_32 = startup_32 - LOAD_OFFSET;
4434
4435 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
4436 --- sle11-2009-06-29.orig/arch/x86/kvm/Kconfig 2009-06-29 15:14:52.000000000 +0200
4437 +++ sle11-2009-06-29/arch/x86/kvm/Kconfig 2008-12-15 11:26:44.000000000 +0100
4438 @@ -7,6 +7,7 @@ config HAVE_KVM
4439 menuconfig VIRTUALIZATION
4440 bool "Virtualization"
4441 depends on HAVE_KVM || X86
4442 + depends on !XEN
4443 default y
4444 ---help---
4445 Say Y here to get to see options for using your Linux host to run other
4446 --- sle11-2009-06-29.orig/arch/x86/mm/fault_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4447 +++ sle11-2009-06-29/arch/x86/mm/fault_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4448 @@ -22,9 +22,9 @@
4449 #include <linux/highmem.h>
4450 #include <linux/module.h>
4451 #include <linux/kprobes.h>
4452 +#include <linux/uaccess.h>
4453
4454 #include <asm/system.h>
4455 -#include <asm/uaccess.h>
4456 #include <asm/desc.h>
4457 #include <asm/kdebug.h>
4458 #include <asm/segment.h>
4459 @@ -167,7 +167,7 @@ static inline unsigned long get_segment_
4460 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
4461 {
4462 unsigned long limit;
4463 - unsigned long instr = get_segment_eip (regs, &limit);
4464 + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
4465 int scan_more = 1;
4466 int prefetch = 0;
4467 int i;
4468 @@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs
4469 unsigned char instr_hi;
4470 unsigned char instr_lo;
4471
4472 - if (instr > limit)
4473 + if (instr > (unsigned char *)limit)
4474 break;
4475 - if (__get_user(opcode, (unsigned char __user *) instr))
4476 + if (probe_kernel_address(instr, opcode))
4477 break;
4478
4479 instr_hi = opcode & 0xf0;
4480 @@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs
4481 case 0x00:
4482 /* Prefetch instruction is 0x0F0D or 0x0F18 */
4483 scan_more = 0;
4484 - if (instr > limit)
4485 + if (instr > (unsigned char *)limit)
4486 break;
4487 - if (__get_user(opcode, (unsigned char __user *) instr))
4488 + if (probe_kernel_address(instr, opcode))
4489 break;
4490 prefetch = (instr_lo == 0xF) &&
4491 (opcode == 0x0D || opcode == 0x18);
4492 --- sle11-2009-06-29.orig/arch/x86/mm/highmem_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4493 +++ sle11-2009-06-29/arch/x86/mm/highmem_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4494 @@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page *
4495 unsigned long vaddr;
4496
4497 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
4498 - inc_preempt_count();
4499 + pagefault_disable();
4500 if (!PageHighMem(page))
4501 return page_address(page);
4502
4503 @@ -63,26 +63,22 @@ void kunmap_atomic(void *kvaddr, enum km
4504 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
4505 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
4506
4507 -#ifdef CONFIG_DEBUG_HIGHMEM
4508 - if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
4509 - dec_preempt_count();
4510 - preempt_check_resched();
4511 - return;
4512 - }
4513 -
4514 - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
4515 - BUG();
4516 -#endif
4517 /*
4518 * Force other mappings to Oops if they'll try to access this pte
4519 * without first remap it. Keeping stale mappings around is a bad idea
4520 * also, in case the page changes cacheability attributes or becomes
4521 * a protected page in a hypervisor.
4522 */
4523 - kpte_clear_flush(kmap_pte-idx, vaddr);
4524 + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
4525 + kpte_clear_flush(kmap_pte-idx, vaddr);
4526 + else {
4527 +#ifdef CONFIG_DEBUG_HIGHMEM
4528 + BUG_ON(vaddr < PAGE_OFFSET);
4529 + BUG_ON(vaddr >= (unsigned long)high_memory);
4530 +#endif
4531 + }
4532
4533 - dec_preempt_count();
4534 - preempt_check_resched();
4535 + pagefault_enable();
4536 }
4537
4538 /* This is the same as kmap_atomic() but can map memory that doesn't
4539 @@ -93,7 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
4540 enum fixed_addresses idx;
4541 unsigned long vaddr;
4542
4543 - inc_preempt_count();
4544 + pagefault_disable();
4545
4546 idx = type + KM_TYPE_NR*smp_processor_id();
4547 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
4548 --- sle11-2009-06-29.orig/arch/x86/mm/init_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4549 +++ sle11-2009-06-29/arch/x86/mm/init_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4550 @@ -233,8 +233,6 @@ static inline int page_kills_ppro(unsign
4551
4552 #endif
4553
4554 -extern int is_available_memory(efi_memory_desc_t *);
4555 -
4556 int page_is_ram(unsigned long pagenr)
4557 {
4558 int i;
4559 @@ -327,7 +325,7 @@ void __init add_one_highpage_init(struct
4560 SetPageReserved(page);
4561 }
4562
4563 -static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
4564 +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
4565 {
4566 free_new_highpage(page, pfn);
4567 totalram_pages++;
4568 @@ -344,7 +342,7 @@ static int add_one_highpage_hotplug(stru
4569 * has been added dynamically that would be
4570 * onlined here is in HIGHMEM
4571 */
4572 -void online_page(struct page *page)
4573 +void __meminit online_page(struct page *page)
4574 {
4575 ClearPageReserved(page);
4576 add_one_highpage_hotplug(page, page_to_pfn(page));
4577 @@ -732,16 +730,10 @@ void __init mem_init(void)
4578 set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
4579 }
4580
4581 -/*
4582 - * this is for the non-NUMA, single node SMP system case.
4583 - * Specifically, in the case of x86, we will always add
4584 - * memory to the highmem for now.
4585 - */
4586 #ifdef CONFIG_MEMORY_HOTPLUG
4587 -#ifndef CONFIG_NEED_MULTIPLE_NODES
4588 int arch_add_memory(int nid, u64 start, u64 size)
4589 {
4590 - struct pglist_data *pgdata = &contig_page_data;
4591 + struct pglist_data *pgdata = NODE_DATA(nid);
4592 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
4593 unsigned long start_pfn = start >> PAGE_SHIFT;
4594 unsigned long nr_pages = size >> PAGE_SHIFT;
4595 @@ -753,11 +745,11 @@ int remove_memory(u64 start, u64 size)
4596 {
4597 return -EINVAL;
4598 }
4599 -#endif
4600 +EXPORT_SYMBOL_GPL(remove_memory);
4601 #endif
4602
4603 -kmem_cache_t *pgd_cache;
4604 -kmem_cache_t *pmd_cache;
4605 +struct kmem_cache *pgd_cache;
4606 +struct kmem_cache *pmd_cache;
4607
4608 void __init pgtable_cache_init(void)
4609 {
4610 --- sle11-2009-06-29.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4611 +++ sle11-2009-06-29/arch/x86/mm/pgtable_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4612 @@ -203,7 +203,7 @@ void pte_free(struct page *pte)
4613 __free_page(pte);
4614 }
4615
4616 -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
4617 +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
4618 {
4619 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
4620 }
4621 @@ -243,7 +243,7 @@ static inline void pgd_list_del(pgd_t *p
4622 set_page_private(next, (unsigned long)pprev);
4623 }
4624
4625 -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
4626 +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
4627 {
4628 unsigned long flags;
4629
4630 @@ -264,7 +264,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
4631 }
4632
4633 /* never called when PTRS_PER_PMD > 1 */
4634 -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
4635 +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
4636 {
4637 unsigned long flags; /* can be called from interrupt context */
4638
4639 --- sle11-2009-06-29.orig/arch/x86/pci/irq-xen.c 2009-03-04 11:28:34.000000000 +0100
4640 +++ sle11-2009-06-29/arch/x86/pci/irq-xen.c 2008-12-15 11:26:44.000000000 +0100
4641 @@ -768,7 +768,7 @@ static void __init pirq_find_router(stru
4642 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
4643 rt->rtr_vendor, rt->rtr_device);
4644
4645 - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
4646 + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
4647 if (!pirq_router_dev) {
4648 DBG(KERN_DEBUG "PCI: Interrupt router not found at "
4649 "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
4650 @@ -788,6 +788,8 @@ static void __init pirq_find_router(stru
4651 pirq_router_dev->vendor,
4652 pirq_router_dev->device,
4653 pci_name(pirq_router_dev));
4654 +
4655 + /* The device remains referenced for the kernel lifetime */
4656 }
4657
4658 static struct irq_info *pirq_get_info(struct pci_dev *dev)
4659 --- sle11-2009-06-29.orig/arch/x86/kernel/entry_64-xen.S 2009-03-04 11:28:34.000000000 +0100
4660 +++ sle11-2009-06-29/arch/x86/kernel/entry_64-xen.S 2008-12-15 11:26:44.000000000 +0100
4661 @@ -261,7 +261,6 @@ ENTRY(system_call)
4662 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
4663 GET_THREAD_INFO(%rcx)
4664 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
4665 - CFI_REMEMBER_STATE
4666 jnz tracesys
4667 cmpq $__NR_syscall_max,%rax
4668 ja badsys
4669 @@ -272,7 +271,6 @@ ENTRY(system_call)
4670 * Syscall return path ending with SYSRET (fast path)
4671 * Has incomplete stack frame and undefined top of stack.
4672 */
4673 - .globl ret_from_sys_call
4674 ret_from_sys_call:
4675 movl $_TIF_ALLWORK_MASK,%edi
4676 /* edi: flagmask */
4677 @@ -282,8 +280,8 @@ sysret_check:
4678 TRACE_IRQS_OFF
4679 movl threadinfo_flags(%rcx),%edx
4680 andl %edi,%edx
4681 - CFI_REMEMBER_STATE
4682 jnz sysret_careful
4683 + CFI_REMEMBER_STATE
4684 /*
4685 * sysretq will re-enable interrupts:
4686 */
4687 @@ -292,10 +290,10 @@ sysret_check:
4688 RESTORE_ARGS 0,8,0
4689 HYPERVISOR_IRET VGCF_IN_SYSCALL
4690
4691 + CFI_RESTORE_STATE
4692 /* Handle reschedules */
4693 /* edx: work, edi: workmask */
4694 sysret_careful:
4695 - CFI_RESTORE_STATE
4696 bt $TIF_NEED_RESCHED,%edx
4697 jnc sysret_signal
4698 TRACE_IRQS_ON
4699 @@ -334,7 +332,6 @@ badsys:
4700
4701 /* Do syscall tracing */
4702 tracesys:
4703 - CFI_RESTORE_STATE
4704 SAVE_REST
4705 movq $-ENOSYS,RAX(%rsp)
4706 FIXUP_TOP_OF_STACK %rdi
4707 @@ -350,32 +347,13 @@ tracesys:
4708 call *sys_call_table(,%rax,8)
4709 1: movq %rax,RAX-ARGOFFSET(%rsp)
4710 /* Use IRET because user could have changed frame */
4711 - jmp int_ret_from_sys_call
4712 - CFI_ENDPROC
4713 -END(system_call)
4714
4715 /*
4716 * Syscall return path ending with IRET.
4717 * Has correct top of stack, but partial stack frame.
4718 - */
4719 -ENTRY(int_ret_from_sys_call)
4720 - CFI_STARTPROC simple
4721 - CFI_SIGNAL_FRAME
4722 - CFI_DEF_CFA rsp,SS+8-ARGOFFSET
4723 - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
4724 - CFI_REL_OFFSET rsp,RSP-ARGOFFSET
4725 - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
4726 - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
4727 - CFI_REL_OFFSET rip,RIP-ARGOFFSET
4728 - CFI_REL_OFFSET rdx,RDX-ARGOFFSET
4729 - CFI_REL_OFFSET rcx,RCX-ARGOFFSET
4730 - CFI_REL_OFFSET rax,RAX-ARGOFFSET
4731 - CFI_REL_OFFSET rdi,RDI-ARGOFFSET
4732 - CFI_REL_OFFSET rsi,RSI-ARGOFFSET
4733 - CFI_REL_OFFSET r8,R8-ARGOFFSET
4734 - CFI_REL_OFFSET r9,R9-ARGOFFSET
4735 - CFI_REL_OFFSET r10,R10-ARGOFFSET
4736 - CFI_REL_OFFSET r11,R11-ARGOFFSET
4737 + */
4738 + .globl int_ret_from_sys_call
4739 +int_ret_from_sys_call:
4740 XEN_BLOCK_EVENTS(%rsi)
4741 TRACE_IRQS_OFF
4742 testb $3,CS-ARGOFFSET(%rsp)
4743 @@ -428,8 +406,6 @@ int_very_careful:
4744 popq %rdi
4745 CFI_ADJUST_CFA_OFFSET -8
4746 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
4747 - XEN_BLOCK_EVENTS(%rsi)
4748 - TRACE_IRQS_OFF
4749 jmp int_restore_rest
4750
4751 int_signal:
4752 @@ -445,7 +421,7 @@ int_restore_rest:
4753 TRACE_IRQS_OFF
4754 jmp int_with_check
4755 CFI_ENDPROC
4756 -END(int_ret_from_sys_call)
4757 +END(system_call)
4758
4759 /*
4760 * Certain special system calls that need to save a complete full stack frame.
4761 @@ -1275,36 +1251,3 @@ ENTRY(call_softirq)
4762 ret
4763 CFI_ENDPROC
4764 ENDPROC(call_softirq)
4765 -
4766 -#ifdef CONFIG_STACK_UNWIND
4767 -ENTRY(arch_unwind_init_running)
4768 - CFI_STARTPROC
4769 - movq %r15, R15(%rdi)
4770 - movq %r14, R14(%rdi)
4771 - xchgq %rsi, %rdx
4772 - movq %r13, R13(%rdi)
4773 - movq %r12, R12(%rdi)
4774 - xorl %eax, %eax
4775 - movq %rbp, RBP(%rdi)
4776 - movq %rbx, RBX(%rdi)
4777 - movq (%rsp), %rcx
4778 - movq %rax, R11(%rdi)
4779 - movq %rax, R10(%rdi)
4780 - movq %rax, R9(%rdi)
4781 - movq %rax, R8(%rdi)
4782 - movq %rax, RAX(%rdi)
4783 - movq %rax, RCX(%rdi)
4784 - movq %rax, RDX(%rdi)
4785 - movq %rax, RSI(%rdi)
4786 - movq %rax, RDI(%rdi)
4787 - movq %rax, ORIG_RAX(%rdi)
4788 - movq %rcx, RIP(%rdi)
4789 - leaq 8(%rsp), %rcx
4790 - movq $__KERNEL_CS, CS(%rdi)
4791 - movq %rax, EFLAGS(%rdi)
4792 - movq %rcx, RSP(%rdi)
4793 - movq $__KERNEL_DS, SS(%rdi)
4794 - jmpq *%rdx
4795 - CFI_ENDPROC
4796 -ENDPROC(arch_unwind_init_running)
4797 -#endif
4798 --- sle11-2009-06-29.orig/arch/x86/kernel/genapic_64-xen.c 2009-06-29 15:14:52.000000000 +0200
4799 +++ sle11-2009-06-29/arch/x86/kernel/genapic_64-xen.c 2008-12-15 11:26:44.000000000 +0100
4800 @@ -34,6 +34,7 @@ extern struct genapic apic_physflat;
4801
4802 #ifndef CONFIG_XEN
4803 struct genapic *genapic = &apic_flat;
4804 +struct genapic *genapic_force;
4805 #else
4806 extern struct genapic apic_xen;
4807 struct genapic *genapic = &apic_xen;
4808 @@ -52,6 +53,13 @@ void __init clustered_apic_check(void)
4809 u8 cluster_cnt[NUM_APIC_CLUSTERS];
4810 int max_apic = 0;
4811
4812 + /* genapic selection can be forced because of certain quirks.
4813 + */
4814 + if (genapic_force) {
4815 + genapic = genapic_force;
4816 + goto print;
4817 + }
4818 +
4819 #if defined(CONFIG_ACPI)
4820 /*
4821 * Some x86_64 machines use physical APIC mode regardless of how many
4822 --- sle11-2009-06-29.orig/arch/x86/kernel/head64-xen.c 2009-03-04 11:28:34.000000000 +0100
4823 +++ sle11-2009-06-29/arch/x86/kernel/head64-xen.c 2008-12-15 11:26:44.000000000 +0100
4824 @@ -104,7 +104,10 @@ void __init x86_64_start_kernel(char * r
4825 machine_to_phys_order++;
4826
4827 #if 0
4828 - for (i = 0; i < 256; i++)
4829 + /* clear bss before set_intr_gate with early_idt_handler */
4830 + clear_bss();
4831 +
4832 + for (i = 0; i < IDT_ENTRIES; i++)
4833 set_intr_gate(i, early_idt_handler);
4834 asm volatile("lidt %0" :: "m" (idt_descr));
4835 #endif
4836 --- sle11-2009-06-29.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
4837 +++ sle11-2009-06-29/arch/x86/kernel/io_apic_64-xen.c 2008-12-15 11:26:44.000000000 +0100
4838 @@ -202,14 +202,20 @@ static struct IO_APIC_route_entry ioapic
4839 * the interrupt, and we need to make sure the entry is fully populated
4840 * before that happens.
4841 */
4842 -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4843 +static void
4844 +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4845 {
4846 - unsigned long flags;
4847 union entry_union eu;
4848 eu.entry = e;
4849 - spin_lock_irqsave(&ioapic_lock, flags);
4850 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
4851 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
4852 +}
4853 +
4854 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4855 +{
4856 + unsigned long flags;
4857 + spin_lock_irqsave(&ioapic_lock, flags);
4858 + __ioapic_write_entry(apic, pin, e);
4859 spin_unlock_irqrestore(&ioapic_lock, flags);
4860 }
4861
4862 @@ -720,6 +726,22 @@ static int assign_irq_vector(int irq, cp
4863 }
4864
4865 #ifndef CONFIG_XEN
4866 +static void __clear_irq_vector(int irq)
4867 +{
4868 + cpumask_t mask;
4869 + int cpu, vector;
4870 +
4871 + BUG_ON(!irq_vector[irq]);
4872 +
4873 + vector = irq_vector[irq];
4874 + cpus_and(mask, irq_domain[irq], cpu_online_map);
4875 + for_each_cpu_mask(cpu, mask)
4876 + per_cpu(vector_irq, cpu)[vector] = -1;
4877 +
4878 + irq_vector[irq] = 0;
4879 + irq_domain[irq] = CPU_MASK_NONE;
4880 +}
4881 +
4882 void __setup_vector_irq(int cpu)
4883 {
4884 /* Initialize vector_irq on a new cpu */
4885 @@ -767,26 +789,65 @@ static void ioapic_register_intr(int irq
4886 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
4887 #endif /* !CONFIG_XEN */
4888
4889 -static void __init setup_IO_APIC_irqs(void)
4890 +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
4891 {
4892 struct IO_APIC_route_entry entry;
4893 - int apic, pin, idx, irq, first_notcon = 1, vector;
4894 + int vector;
4895 unsigned long flags;
4896
4897 - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
4898
4899 - for (apic = 0; apic < nr_ioapics; apic++) {
4900 - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
4901 + /*
4902 + * add it to the IO-APIC irq-routing table:
4903 + */
4904 + memset(&entry,0,sizeof(entry));
4905
4906 - /*
4907 - * add it to the IO-APIC irq-routing table:
4908 - */
4909 - memset(&entry,0,sizeof(entry));
4910 + entry.delivery_mode = INT_DELIVERY_MODE;
4911 + entry.dest_mode = INT_DEST_MODE;
4912 + entry.mask = 0; /* enable IRQ */
4913 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4914
4915 - entry.delivery_mode = INT_DELIVERY_MODE;
4916 - entry.dest_mode = INT_DEST_MODE;
4917 - entry.mask = 0; /* enable IRQ */
4918 + entry.trigger = irq_trigger(idx);
4919 + entry.polarity = irq_polarity(idx);
4920 +
4921 + if (irq_trigger(idx)) {
4922 + entry.trigger = 1;
4923 + entry.mask = 1;
4924 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4925 + }
4926 +
4927 + if (/* !apic && */ !IO_APIC_IRQ(irq))
4928 + return;
4929 +
4930 + if (IO_APIC_IRQ(irq)) {
4931 + cpumask_t mask;
4932 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4933 + if (vector < 0)
4934 + return;
4935 +
4936 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4937 + entry.vector = vector;
4938 +
4939 + ioapic_register_intr(irq, vector, IOAPIC_AUTO);
4940 + if (!apic && (irq < 16))
4941 + disable_8259A_irq(irq);
4942 + }
4943 +
4944 + ioapic_write_entry(apic, pin, entry);
4945 +
4946 + spin_lock_irqsave(&ioapic_lock, flags);
4947 + set_native_irq_info(irq, TARGET_CPUS);
4948 + spin_unlock_irqrestore(&ioapic_lock, flags);
4949 +
4950 +}
4951 +
4952 +static void __init setup_IO_APIC_irqs(void)
4953 +{
4954 + int apic, pin, idx, irq, first_notcon = 1;
4955 +
4956 + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
4957 +
4958 + for (apic = 0; apic < nr_ioapics; apic++) {
4959 + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
4960
4961 idx = find_irq_entry(apic,pin,mp_INT);
4962 if (idx == -1) {
4963 @@ -798,39 +859,11 @@ static void __init setup_IO_APIC_irqs(vo
4964 continue;
4965 }
4966
4967 - entry.trigger = irq_trigger(idx);
4968 - entry.polarity = irq_polarity(idx);
4969 -
4970 - if (irq_trigger(idx)) {
4971 - entry.trigger = 1;
4972 - entry.mask = 1;
4973 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4974 - }
4975 -
4976 irq = pin_2_irq(idx, apic, pin);
4977 add_pin_to_irq(irq, apic, pin);
4978
4979 - if (/* !apic && */ !IO_APIC_IRQ(irq))
4980 - continue;
4981 -
4982 - if (IO_APIC_IRQ(irq)) {
4983 - cpumask_t mask;
4984 - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4985 - if (vector < 0)
4986 - continue;
4987 -
4988 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4989 - entry.vector = vector;
4990 + setup_IO_APIC_irq(apic, pin, idx, irq);
4991
4992 - ioapic_register_intr(irq, vector, IOAPIC_AUTO);
4993 - if (!apic && (irq < 16))
4994 - disable_8259A_irq(irq);
4995 - }
4996 - ioapic_write_entry(apic, pin, entry);
4997 -
4998 - spin_lock_irqsave(&ioapic_lock, flags);
4999 - set_native_irq_info(irq, TARGET_CPUS);
5000 - spin_unlock_irqrestore(&ioapic_lock, flags);
5001 }
5002 }
5003
5004 @@ -1826,7 +1859,7 @@ void destroy_irq(unsigned int irq)
5005 dynamic_irq_cleanup(irq);
5006
5007 spin_lock_irqsave(&vector_lock, flags);
5008 - irq_vector[irq] = 0;
5009 + __clear_irq_vector(irq);
5010 spin_unlock_irqrestore(&vector_lock, flags);
5011 }
5012
5013 @@ -2131,7 +2164,15 @@ void __init setup_ioapic_dest(void)
5014 if (irq_entry == -1)
5015 continue;
5016 irq = pin_2_irq(irq_entry, ioapic, pin);
5017 - set_ioapic_affinity_irq(irq, TARGET_CPUS);
5018 +
5019 + /* setup_IO_APIC_irqs could fail to get vector for some device
5020 + * when you have too many devices, because at that time only boot
5021 + * cpu is online.
5022 + */
5023 + if(!irq_vector[irq])
5024 + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
5025 + else
5026 + set_ioapic_affinity_irq(irq, TARGET_CPUS);
5027 }
5028
5029 }
5030 --- sle11-2009-06-29.orig/arch/x86/kernel/irq_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5031 +++ sle11-2009-06-29/arch/x86/kernel/irq_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5032 @@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt
5033
5034 if (likely(irq < NR_IRQS))
5035 generic_handle_irq(irq);
5036 - else
5037 + else if (printk_ratelimit())
5038 printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
5039 __func__, smp_processor_id(), irq);
5040
5041 --- sle11-2009-06-29.orig/arch/x86/kernel/mpparse_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5042 +++ sle11-2009-06-29/arch/x86/kernel/mpparse_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5043 @@ -35,8 +35,6 @@
5044 int smp_found_config;
5045 unsigned int __initdata maxcpus = NR_CPUS;
5046
5047 -int acpi_found_madt;
5048 -
5049 /*
5050 * Various Linux-internal data structures created from the
5051 * MP-table.
5052 --- sle11-2009-06-29.orig/arch/x86/kernel/process_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5053 +++ sle11-2009-06-29/arch/x86/kernel/process_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5054 @@ -119,29 +119,23 @@ void exit_idle(void)
5055 static void poll_idle (void)
5056 {
5057 local_irq_enable();
5058 -
5059 - asm volatile(
5060 - "2:"
5061 - "testl %0,%1;"
5062 - "rep; nop;"
5063 - "je 2b;"
5064 - : :
5065 - "i" (_TIF_NEED_RESCHED),
5066 - "m" (current_thread_info()->flags));
5067 + cpu_relax();
5068 }
5069
5070 static void xen_idle(void)
5071 {
5072 + current_thread_info()->status &= ~TS_POLLING;
5073 + /*
5074 + * TS_POLLING-cleared state must be visible before we
5075 + * test NEED_RESCHED:
5076 + */
5077 + smp_mb();
5078 local_irq_disable();
5079 -
5080 - if (need_resched())
5081 - local_irq_enable();
5082 - else {
5083 - current_thread_info()->status &= ~TS_POLLING;
5084 - smp_mb__after_clear_bit();
5085 + if (!need_resched())
5086 safe_halt();
5087 - current_thread_info()->status |= TS_POLLING;
5088 - }
5089 + else
5090 + local_irq_enable();
5091 + current_thread_info()->status |= TS_POLLING;
5092 }
5093
5094 #ifdef CONFIG_HOTPLUG_CPU
5095 @@ -181,6 +175,12 @@ void cpu_idle (void)
5096 idle = xen_idle; /* no alternatives */
5097 if (cpu_is_offline(smp_processor_id()))
5098 play_dead();
5099 + /*
5100 + * Idle routines should keep interrupts disabled
5101 + * from here on, until they go to idle.
5102 + * Otherwise, idle callbacks can misfire.
5103 + */
5104 + local_irq_disable();
5105 enter_idle();
5106 idle();
5107 /* In many cases the interrupt that ended idle
5108 --- sle11-2009-06-29.orig/arch/x86/kernel/setup_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5109 +++ sle11-2009-06-29/arch/x86/kernel/setup_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5110 @@ -573,8 +573,7 @@ void __init setup_arch(char **cmdline_p)
5111 if (LOADER_TYPE && INITRD_START) {
5112 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
5113 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
5114 - initrd_start =
5115 - INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
5116 + initrd_start = INITRD_START + PAGE_OFFSET;
5117 initrd_end = initrd_start+INITRD_SIZE;
5118 }
5119 else {
5120 @@ -990,11 +989,8 @@ static void __cpuinit init_amd(struct cp
5121 /* Fix cpuid4 emulation for more */
5122 num_cache_leaves = 3;
5123
5124 - /* When there is only one core no need to synchronize RDTSC */
5125 - if (num_possible_cpus() == 1)
5126 - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5127 - else
5128 - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5129 + /* RDTSC can be speculated around */
5130 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5131 }
5132
5133 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
5134 @@ -1093,6 +1089,15 @@ static void __cpuinit init_intel(struct
5135 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
5136 }
5137
5138 + if (cpu_has_ds) {
5139 + unsigned int l1, l2;
5140 + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
5141 + if (!(l1 & (1<<11)))
5142 + set_bit(X86_FEATURE_BTS, c->x86_capability);
5143 + if (!(l1 & (1<<12)))
5144 + set_bit(X86_FEATURE_PEBS, c->x86_capability);
5145 + }
5146 +
5147 n = c->extended_cpuid_level;
5148 if (n >= 0x80000008) {
5149 unsigned eax = cpuid_eax(0x80000008);
5150 @@ -1112,7 +1117,10 @@ static void __cpuinit init_intel(struct
5151 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
5152 if (c->x86 == 6)
5153 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
5154 - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5155 + if (c->x86 == 15)
5156 + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5157 + else
5158 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5159 c->x86_max_cores = intel_num_cpu_cores(c);
5160
5161 srat_detect_node();
5162 --- sle11-2009-06-29.orig/arch/x86/kernel/smp_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5163 +++ sle11-2009-06-29/arch/x86/kernel/smp_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5164 @@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v
5165 put_cpu();
5166 return 0;
5167 }
5168 +
5169 + /* Can deadlock when called with interrupts disabled */
5170 + WARN_ON(irqs_disabled());
5171 +
5172 spin_lock_bh(&call_lock);
5173 __smp_call_function_single(cpu, func, info, nonatomic, wait);
5174 spin_unlock_bh(&call_lock);
5175 put_cpu();
5176 return 0;
5177 }
5178 +EXPORT_SYMBOL(smp_call_function_single);
5179
5180 /*
5181 * this function sends a 'generic call function' IPI to all other CPUs
5182 --- sle11-2009-06-29.orig/arch/x86/kernel/traps_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5183 +++ sle11-2009-06-29/arch/x86/kernel/traps_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5184 @@ -30,9 +30,10 @@
5185 #include <linux/kprobes.h>
5186 #include <linux/kexec.h>
5187 #include <linux/unwind.h>
5188 +#include <linux/uaccess.h>
5189 +#include <linux/bug.h>
5190
5191 #include <asm/system.h>
5192 -#include <asm/uaccess.h>
5193 #include <asm/io.h>
5194 #include <asm/atomic.h>
5195 #include <asm/debugreg.h>
5196 @@ -108,12 +109,7 @@ static inline void preempt_conditional_c
5197 preempt_enable_no_resched();
5198 }
5199
5200 -static int kstack_depth_to_print = 12;
5201 -#ifdef CONFIG_STACK_UNWIND
5202 -static int call_trace = 1;
5203 -#else
5204 -#define call_trace (-1)
5205 -#endif
5206 +int kstack_depth_to_print = 12;
5207
5208 #ifdef CONFIG_KALLSYMS
5209 void printk_address(unsigned long address)
5210 @@ -218,24 +214,7 @@ static unsigned long *in_exception_stack
5211 return NULL;
5212 }
5213
5214 -struct ops_and_data {
5215 - struct stacktrace_ops *ops;
5216 - void *data;
5217 -};
5218 -
5219 -static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
5220 -{
5221 - struct ops_and_data *oad = (struct ops_and_data *)context;
5222 - int n = 0;
5223 -
5224 - while (unwind(info) == 0 && UNW_PC(info)) {
5225 - n++;
5226 - oad->ops->address(oad->data, UNW_PC(info));
5227 - if (arch_unw_user_mode(info))
5228 - break;
5229 - }
5230 - return n;
5231 -}
5232 +#define MSG(txt) ops->warning(data, txt)
5233
5234 /*
5235 * x86-64 can have upto three kernel stacks:
5236 @@ -250,61 +229,24 @@ static inline int valid_stack_ptr(struct
5237 return p > t && p < t + THREAD_SIZE - 3;
5238 }
5239
5240 -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
5241 +void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
5242 + unsigned long *stack,
5243 struct stacktrace_ops *ops, void *data)
5244 {
5245 - const unsigned cpu = smp_processor_id();
5246 - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
5247 + const unsigned cpu = get_cpu();
5248 + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
5249 unsigned used = 0;
5250 struct thread_info *tinfo;
5251
5252 if (!tsk)
5253 tsk = current;
5254
5255 - if (call_trace >= 0) {
5256 - int unw_ret = 0;
5257 - struct unwind_frame_info info;
5258 - struct ops_and_data oad = { .ops = ops, .data = data };
5259 -
5260 - if (regs) {
5261 - if (unwind_init_frame_info(&info, tsk, regs) == 0)
5262 - unw_ret = dump_trace_unwind(&info, &oad);
5263 - } else if (tsk == current)
5264 - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
5265 - else {
5266 - if (unwind_init_blocked(&info, tsk) == 0)
5267 - unw_ret = dump_trace_unwind(&info, &oad);
5268 - }
5269 - if (unw_ret > 0) {
5270 - if (call_trace == 1 && !arch_unw_user_mode(&info)) {
5271 - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
5272 - UNW_PC(&info));
5273 - if ((long)UNW_SP(&info) < 0) {
5274 - ops->warning(data, "Leftover inexact backtrace:\n");
5275 - stack = (unsigned long *)UNW_SP(&info);
5276 - if (!stack)
5277 - return;
5278 - } else
5279 - ops->warning(data, "Full inexact backtrace again:\n");
5280 - } else if (call_trace >= 1)
5281 - return;
5282 - else
5283 - ops->warning(data, "Full inexact backtrace again:\n");
5284 - } else
5285 - ops->warning(data, "Inexact backtrace:\n");
5286 - }
5287 if (!stack) {
5288 unsigned long dummy;
5289 stack = &dummy;
5290 if (tsk && tsk != current)
5291 stack = (unsigned long *)tsk->thread.rsp;
5292 }
5293 - /*
5294 - * Align the stack pointer on word boundary, later loops
5295 - * rely on that (and corruption / debug info bugs can cause
5296 - * unaligned values here):
5297 - */
5298 - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
5299
5300 /*
5301 * Print function call entries within a stack. 'cond' is the
5302 @@ -314,9 +256,9 @@ void dump_trace(struct task_struct *tsk,
5303 #define HANDLE_STACK(cond) \
5304 do while (cond) { \
5305 unsigned long addr = *stack++; \
5306 - if (oops_in_progress ? \
5307 - __kernel_text_address(addr) : \
5308 - kernel_text_address(addr)) { \
5309 + /* Use unlocked access here because except for NMIs \
5310 + we should be already protected against module unloads */ \
5311 + if (__kernel_text_address(addr)) { \
5312 /* \
5313 * If the address is either in the text segment of the \
5314 * kernel, or in the region which contains vmalloc'ed \
5315 @@ -379,9 +321,10 @@ void dump_trace(struct task_struct *tsk,
5316 /*
5317 * This handles the process stack:
5318 */
5319 - tinfo = current_thread_info();
5320 + tinfo = task_thread_info(tsk);
5321 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
5322 #undef HANDLE_STACK
5323 + put_cpu();
5324 }
5325 EXPORT_SYMBOL(dump_trace);
5326
5327 @@ -518,30 +461,15 @@ bad:
5328 printk("\n");
5329 }
5330
5331 -void handle_BUG(struct pt_regs *regs)
5332 -{
5333 - struct bug_frame f;
5334 - long len;
5335 - const char *prefix = "";
5336 +int is_valid_bugaddr(unsigned long rip)
5337 +{
5338 + unsigned short ud2;
5339
5340 - if (user_mode(regs))
5341 - return;
5342 - if (__copy_from_user(&f, (const void __user *) regs->rip,
5343 - sizeof(struct bug_frame)))
5344 - return;
5345 - if (f.filename >= 0 ||
5346 - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
5347 - return;
5348 - len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
5349 - if (len < 0 || len >= PATH_MAX)
5350 - f.filename = (int)(long)"unmapped filename";
5351 - else if (len > 50) {
5352 - f.filename += len - 50;
5353 - prefix = "...";
5354 - }
5355 - printk("----------- [cut here ] --------- [please bite here ] ---------\n");
5356 - printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
5357 -}
5358 + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
5359 + return 0;
5360 +
5361 + return ud2 == 0x0b0f;
5362 +}
5363
5364 #ifdef CONFIG_BUG
5365 void out_of_line_bug(void)
5366 @@ -621,7 +549,9 @@ void die(const char * str, struct pt_reg
5367 {
5368 unsigned long flags = oops_begin();
5369
5370 - handle_BUG(regs);
5371 + if (!user_mode(regs))
5372 + report_bug(regs->rip);
5373 +
5374 __die(str, regs, err);
5375 oops_end(flags);
5376 do_exit(SIGSEGV);
5377 @@ -790,8 +720,7 @@ mem_parity_error(unsigned char reason, s
5378 {
5379 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
5380 reason);
5381 - printk(KERN_EMERG "You probably have a hardware problem with your "
5382 - "RAM chips\n");
5383 + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
5384
5385 if (panic_on_unrecovered_nmi)
5386 panic("NMI: Not continuing");
5387 @@ -1227,21 +1156,3 @@ static int __init kstack_setup(char *s)
5388 return 0;
5389 }
5390 early_param("kstack", kstack_setup);
5391 -
5392 -#ifdef CONFIG_STACK_UNWIND
5393 -static int __init call_trace_setup(char *s)
5394 -{
5395 - if (!s)
5396 - return -EINVAL;
5397 - if (strcmp(s, "old") == 0)
5398 - call_trace = -1;
5399 - else if (strcmp(s, "both") == 0)
5400 - call_trace = 0;
5401 - else if (strcmp(s, "newfallback") == 0)
5402 - call_trace = 1;
5403 - else if (strcmp(s, "new") == 0)
5404 - call_trace = 2;
5405 - return 0;
5406 -}
5407 -early_param("call_trace", call_trace_setup);
5408 -#endif
5409 --- sle11-2009-06-29.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5410 +++ sle11-2009-06-29/arch/x86/kernel/vsyscall_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5411 @@ -42,6 +42,7 @@
5412 #include <asm/topology.h>
5413
5414 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
5415 +#define __syscall_clobber "r11","rcx","memory"
5416
5417 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
5418 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
5419 @@ -224,8 +225,7 @@ out:
5420
5421 static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
5422 void __user *oldval, size_t __user *oldlenp,
5423 - void __user *newval, size_t newlen,
5424 - void **context)
5425 + void __user *newval, size_t newlen)
5426 {
5427 return -ENOSYS;
5428 }
5429 @@ -277,7 +277,6 @@ static void __cpuinit cpu_vsyscall_init(
5430 vsyscall_set_cpu(raw_smp_processor_id());
5431 }
5432
5433 -#ifdef CONFIG_HOTPLUG_CPU
5434 static int __cpuinit
5435 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
5436 {
5437 @@ -286,13 +285,13 @@ cpu_vsyscall_notifier(struct notifier_bl
5438 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
5439 return NOTIFY_DONE;
5440 }
5441 -#endif
5442
5443 static void __init map_vsyscall(void)
5444 {
5445 extern char __vsyscall_0;
5446 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
5447
5448 + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
5449 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
5450 }
5451
5452 --- sle11-2009-06-29.orig/arch/x86/mm/fault_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5453 +++ sle11-2009-06-29/arch/x86/mm/fault_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5454 @@ -23,9 +23,9 @@
5455 #include <linux/compiler.h>
5456 #include <linux/module.h>
5457 #include <linux/kprobes.h>
5458 +#include <linux/uaccess.h>
5459
5460 #include <asm/system.h>
5461 -#include <asm/uaccess.h>
5462 #include <asm/pgalloc.h>
5463 #include <asm/smp.h>
5464 #include <asm/tlbflush.h>
5465 @@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
5466 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
5467 unsigned long error_code)
5468 {
5469 - unsigned char __user *instr;
5470 + unsigned char *instr;
5471 int scan_more = 1;
5472 int prefetch = 0;
5473 unsigned char *max_instr;
5474 @@ -116,7 +116,7 @@ static noinline int is_prefetch(struct p
5475 unsigned char instr_hi;
5476 unsigned char instr_lo;
5477
5478 - if (__get_user(opcode, (char __user *)instr))
5479 + if (probe_kernel_address(instr, opcode))
5480 break;
5481
5482 instr_hi = opcode & 0xf0;
5483 @@ -154,7 +154,7 @@ static noinline int is_prefetch(struct p
5484 case 0x00:
5485 /* Prefetch instruction is 0x0F0D or 0x0F18 */
5486 scan_more = 0;
5487 - if (__get_user(opcode, (char __user *)instr))
5488 + if (probe_kernel_address(instr, opcode))
5489 break;
5490 prefetch = (instr_lo == 0xF) &&
5491 (opcode == 0x0D || opcode == 0x18);
5492 @@ -170,7 +170,7 @@ static noinline int is_prefetch(struct p
5493 static int bad_address(void *p)
5494 {
5495 unsigned long dummy;
5496 - return __get_user(dummy, (unsigned long __user *)p);
5497 + return probe_kernel_address((unsigned long *)p, dummy);
5498 }
5499
5500 void dump_pagetable(unsigned long address)
5501 --- sle11-2009-06-29.orig/arch/x86/mm/init_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5502 +++ sle11-2009-06-29/arch/x86/mm/init_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5503 @@ -1127,14 +1127,15 @@ static __init int x8664_sysctl_init(void
5504 __initcall(x8664_sysctl_init);
5505 #endif
5506
5507 -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
5508 +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
5509 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
5510 not need special handling anymore. */
5511
5512 static struct vm_area_struct gate_vma = {
5513 .vm_start = VSYSCALL_START,
5514 - .vm_end = VSYSCALL_END,
5515 - .vm_page_prot = PAGE_READONLY
5516 + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
5517 + .vm_page_prot = PAGE_READONLY_EXEC,
5518 + .vm_flags = VM_READ | VM_EXEC
5519 };
5520
5521 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
5522 --- sle11-2009-06-29.orig/arch/x86/mm/pageattr_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5523 +++ sle11-2009-06-29/arch/x86/mm/pageattr_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5524 @@ -330,34 +330,40 @@ static struct page *split_large_page(uns
5525 return base;
5526 }
5527
5528 -
5529 -static void flush_kernel_map(void *address)
5530 +static void cache_flush_page(void *adr)
5531 {
5532 - if (0 && address && cpu_has_clflush) {
5533 - /* is this worth it? */
5534 - int i;
5535 - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
5536 - asm volatile("clflush (%0)" :: "r" (address + i));
5537 - } else
5538 - asm volatile("wbinvd":::"memory");
5539 - if (address)
5540 - __flush_tlb_one(address);
5541 - else
5542 - __flush_tlb_all();
5543 + int i;
5544 + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
5545 + asm volatile("clflush (%0)" :: "r" (adr + i));
5546 }
5547
5548 +static void flush_kernel_map(void *arg)
5549 +{
5550 + struct list_head *l = (struct list_head *)arg;
5551 + struct page *pg;
5552
5553 -static inline void flush_map(unsigned long address)
5554 + /* When clflush is available always use it because it is
5555 + much cheaper than WBINVD */
5556 + if (!cpu_has_clflush)
5557 + asm volatile("wbinvd" ::: "memory");
5558 + list_for_each_entry(pg, l, lru) {
5559 + void *adr = page_address(pg);
5560 + if (cpu_has_clflush)
5561 + cache_flush_page(adr);
5562 + __flush_tlb_one(adr);
5563 + }
5564 +}
5565 +
5566 +static inline void flush_map(struct list_head *l)
5567 {
5568 - on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
5569 + on_each_cpu(flush_kernel_map, l, 1, 1);
5570 }
5571
5572 -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
5573 +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
5574
5575 static inline void save_page(struct page *fpage)
5576 {
5577 - fpage->lru.next = (struct list_head *)deferred_pages;
5578 - deferred_pages = fpage;
5579 + list_add(&fpage->lru, &deferred_pages);
5580 }
5581
5582 /*
5583 @@ -487,18 +493,18 @@ int change_page_attr(struct page *page,
5584
5585 void global_flush_tlb(void)
5586 {
5587 - struct page *dpage;
5588 + struct page *pg, *next;
5589 + struct list_head l;
5590
5591 down_read(&init_mm.mmap_sem);
5592 - dpage = xchg(&deferred_pages, NULL);
5593 + list_replace_init(&deferred_pages, &l);
5594 up_read(&init_mm.mmap_sem);
5595
5596 - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
5597 - while (dpage) {
5598 - struct page *tmp = dpage;
5599 - dpage = (struct page *)dpage->lru.next;
5600 - ClearPagePrivate(tmp);
5601 - __free_page(tmp);
5602 + flush_map(&l);
5603 +
5604 + list_for_each_entry_safe(pg, next, &l, lru) {
5605 + ClearPagePrivate(pg);
5606 + __free_page(pg);
5607 }
5608 }
5609
5610 --- sle11-2009-06-29.orig/drivers/pci/msi-xen.c 2009-04-24 13:31:56.000000000 +0200
5611 +++ sle11-2009-06-29/drivers/pci/msi-xen.c 2008-12-15 11:26:44.000000000 +0100
5612 @@ -263,10 +263,8 @@ void disable_msi_mode(struct pci_dev *de
5613 pci_write_config_word(dev, msi_control_reg(pos), control);
5614 dev->msix_enabled = 0;
5615 }
5616 - if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
5617 - /* PCI Express Endpoint device detected */
5618 - pci_intx(dev, 1); /* enable intx */
5619 - }
5620 +
5621 + pci_intx(dev, 1); /* enable intx */
5622 }
5623
5624 static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
5625 @@ -284,10 +282,8 @@ static void enable_msi_mode(struct pci_d
5626 pci_write_config_word(dev, msi_control_reg(pos), control);
5627 dev->msix_enabled = 1;
5628 }
5629 - if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
5630 - /* PCI Express Endpoint device detected */
5631 - pci_intx(dev, 0); /* disable intx */
5632 - }
5633 +
5634 + pci_intx(dev, 0); /* disable intx */
5635 }
5636
5637 #ifdef CONFIG_PM
5638 --- sle11-2009-06-29.orig/drivers/xen/balloon/balloon.c 2009-03-04 11:28:34.000000000 +0100
5639 +++ sle11-2009-06-29/drivers/xen/balloon/balloon.c 2009-06-29 15:28:36.000000000 +0200
5640 @@ -97,8 +97,8 @@ extern unsigned long totalhigh_pages;
5641 static LIST_HEAD(ballooned_pages);
5642
5643 /* Main work function, always executed in process context. */
5644 -static void balloon_process(void *unused);
5645 -static DECLARE_WORK(balloon_worker, balloon_process, NULL);
5646 +static void balloon_process(struct work_struct *unused);
5647 +static DECLARE_WORK(balloon_worker, balloon_process);
5648 static struct timer_list balloon_timer;
5649
5650 /* When ballooning out (allocating memory to return to Xen) we don't really
5651 @@ -375,7 +375,7 @@ static int decrease_reservation(unsigned
5652 * by the balloon lock), or with changes to the Xen hard limit, but we will
5653 * recover from these in time.
5654 */
5655 -static void balloon_process(void *unused)
5656 +static void balloon_process(struct work_struct *unused)
5657 {
5658 int need_sleep = 0;
5659 long credit;
5660 --- sle11-2009-06-29.orig/drivers/xen/blkback/blkback.c 2009-03-04 11:28:34.000000000 +0100
5661 +++ sle11-2009-06-29/drivers/xen/blkback/blkback.c 2008-12-15 11:26:44.000000000 +0100
5662 @@ -37,6 +37,7 @@
5663
5664 #include <linux/spinlock.h>
5665 #include <linux/kthread.h>
5666 +#include <linux/freezer.h>
5667 #include <linux/list.h>
5668 #include <linux/delay.h>
5669 #include <xen/balloon.h>
5670 --- sle11-2009-06-29.orig/drivers/xen/blkback/interface.c 2009-06-29 15:14:52.000000000 +0200
5671 +++ sle11-2009-06-29/drivers/xen/blkback/interface.c 2008-12-15 11:26:44.000000000 +0100
5672 @@ -34,7 +34,7 @@
5673 #include <xen/evtchn.h>
5674 #include <linux/kthread.h>
5675
5676 -static kmem_cache_t *blkif_cachep;
5677 +static struct kmem_cache *blkif_cachep;
5678
5679 blkif_t *blkif_alloc(domid_t domid)
5680 {
5681 --- sle11-2009-06-29.orig/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:16.000000000 +0100
5682 +++ sle11-2009-06-29/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:27.000000000 +0100
5683 @@ -71,7 +71,7 @@ static int setup_blkring(struct xenbus_d
5684 static void kick_pending_request_queues(struct blkfront_info *);
5685
5686 static irqreturn_t blkif_int(int irq, void *dev_id);
5687 -static void blkif_restart_queue(void *arg);
5688 +static void blkif_restart_queue(struct work_struct *arg);
5689 static void blkif_recover(struct blkfront_info *);
5690 static void blkif_completion(struct blk_shadow *);
5691 static void blkif_free(struct blkfront_info *, int);
5692 @@ -111,7 +111,7 @@ static int blkfront_probe(struct xenbus_
5693 info->xbdev = dev;
5694 info->vdevice = vdevice;
5695 info->connected = BLKIF_STATE_DISCONNECTED;
5696 - INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
5697 + INIT_WORK(&info->work, blkif_restart_queue);
5698
5699 for (i = 0; i < BLK_RING_SIZE; i++)
5700 info->shadow[i].req.id = i+1;
5701 @@ -462,9 +462,9 @@ static void kick_pending_request_queues(
5702 }
5703 }
5704
5705 -static void blkif_restart_queue(void *arg)
5706 +static void blkif_restart_queue(struct work_struct *arg)
5707 {
5708 - struct blkfront_info *info = (struct blkfront_info *)arg;
5709 + struct blkfront_info *info = container_of(arg, struct blkfront_info, work);
5710 spin_lock_irq(&blkif_io_lock);
5711 if (info->connected == BLKIF_STATE_CONNECTED)
5712 kick_pending_request_queues(info);
5713 --- sle11-2009-06-29.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:37:34.000000000 +0200
5714 +++ sle11-2009-06-29/drivers/xen/blktap/blktap.c 2009-04-20 11:37:50.000000000 +0200
5715 @@ -40,6 +40,7 @@
5716
5717 #include <linux/spinlock.h>
5718 #include <linux/kthread.h>
5719 +#include <linux/freezer.h>
5720 #include <linux/list.h>
5721 #include <asm/hypervisor.h>
5722 #include "common.h"
5723 --- sle11-2009-06-29.orig/drivers/xen/blktap/interface.c 2009-06-29 15:14:52.000000000 +0200
5724 +++ sle11-2009-06-29/drivers/xen/blktap/interface.c 2008-12-15 11:26:44.000000000 +0100
5725 @@ -34,7 +34,7 @@
5726 #include "common.h"
5727 #include <xen/evtchn.h>
5728
5729 -static kmem_cache_t *blkif_cachep;
5730 +static struct kmem_cache *blkif_cachep;
5731
5732 blkif_t *tap_alloc_blkif(domid_t domid)
5733 {
5734 --- sle11-2009-06-29.orig/drivers/xen/char/mem.c 2009-06-29 15:14:52.000000000 +0200
5735 +++ sle11-2009-06-29/drivers/xen/char/mem.c 2008-12-15 11:26:44.000000000 +0100
5736 @@ -157,7 +157,7 @@ static loff_t memory_lseek(struct file *
5737 {
5738 loff_t ret;
5739
5740 - mutex_lock(&file->f_dentry->d_inode->i_mutex);
5741 + mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
5742 switch (orig) {
5743 case 0:
5744 file->f_pos = offset;
5745 @@ -172,7 +172,7 @@ static loff_t memory_lseek(struct file *
5746 default:
5747 ret = -EINVAL;
5748 }
5749 - mutex_unlock(&file->f_dentry->d_inode->i_mutex);
5750 + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
5751 return ret;
5752 }
5753
5754 --- sle11-2009-06-29.orig/drivers/xen/console/console.c 2009-03-04 11:28:34.000000000 +0100
5755 +++ sle11-2009-06-29/drivers/xen/console/console.c 2008-12-15 11:26:44.000000000 +0100
5756 @@ -85,11 +85,6 @@ static int xc_num = -1;
5757 #define XEN_HVC_MAJOR 229
5758 #define XEN_HVC_MINOR 0
5759
5760 -#ifdef CONFIG_MAGIC_SYSRQ
5761 -static unsigned long sysrq_requested;
5762 -extern int sysrq_enabled;
5763 -#endif
5764 -
5765 static int __init xencons_setup(char *str)
5766 {
5767 char *q;
5768 @@ -355,8 +350,8 @@ void __init dom0_init_screen_info(const
5769 #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
5770 ((_tty)->index != (xc_num - 1)))
5771
5772 -static struct termios *xencons_termios[MAX_NR_CONSOLES];
5773 -static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
5774 +static struct ktermios *xencons_termios[MAX_NR_CONSOLES];
5775 +static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES];
5776 static struct tty_struct *xencons_tty;
5777 static int xencons_priv_irq;
5778 static char x_char;
5779 @@ -372,7 +367,9 @@ void xencons_rx(char *buf, unsigned len)
5780
5781 for (i = 0; i < len; i++) {
5782 #ifdef CONFIG_MAGIC_SYSRQ
5783 - if (sysrq_enabled) {
5784 + if (sysrq_on()) {
5785 + static unsigned long sysrq_requested;
5786 +
5787 if (buf[i] == '\x0f') { /* ^O */
5788 if (!sysrq_requested) {
5789 sysrq_requested = jiffies;
5790 --- sle11-2009-06-29.orig/drivers/xen/core/reboot.c 2009-03-04 11:28:34.000000000 +0100
5791 +++ sle11-2009-06-29/drivers/xen/core/reboot.c 2008-12-15 11:26:44.000000000 +0100
5792 @@ -34,8 +34,8 @@ static int suspend_cancelled;
5793 /* Can we leave APs online when we suspend? */
5794 static int fast_suspend;
5795
5796 -static void __shutdown_handler(void *unused);
5797 -static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
5798 +static void __shutdown_handler(struct work_struct *unused);
5799 +static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler);
5800
5801 static int setup_suspend_evtchn(void);
5802
5803 @@ -105,7 +105,7 @@ static int xen_suspend(void *__unused)
5804 case SHUTDOWN_RESUMING:
5805 break;
5806 default:
5807 - schedule_work(&shutdown_work);
5808 + schedule_delayed_work(&shutdown_work, 0);
5809 break;
5810 }
5811
5812 @@ -137,12 +137,12 @@ static void switch_shutdown_state(int ne
5813
5814 /* Either we kick off the work, or we leave it to xen_suspend(). */
5815 if (old_state == SHUTDOWN_INVALID)
5816 - schedule_work(&shutdown_work);
5817 + schedule_delayed_work(&shutdown_work, 0);
5818 else
5819 BUG_ON(old_state != SHUTDOWN_RESUMING);
5820 }
5821
5822 -static void __shutdown_handler(void *unused)
5823 +static void __shutdown_handler(struct work_struct *unused)
5824 {
5825 int err;
5826
5827 --- sle11-2009-06-29.orig/drivers/xen/core/smpboot.c 2009-03-04 11:28:34.000000000 +0100
5828 +++ sle11-2009-06-29/drivers/xen/core/smpboot.c 2008-12-15 11:26:44.000000000 +0100
5829 @@ -161,7 +161,12 @@ static void xen_smp_intr_exit(unsigned i
5830
5831 void __cpuinit cpu_bringup(void)
5832 {
5833 +#ifdef __i386__
5834 + cpu_set_gdt(current_thread_info()->cpu);
5835 + secondary_cpu_init();
5836 +#else
5837 cpu_init();
5838 +#endif
5839 identify_cpu(cpu_data + smp_processor_id());
5840 touch_softlockup_watchdog();
5841 preempt_disable();
5842 @@ -300,11 +305,12 @@ void __init smp_prepare_cpus(unsigned in
5843 if (cpu == 0)
5844 continue;
5845
5846 + idle = fork_idle(cpu);
5847 + if (IS_ERR(idle))
5848 + panic("failed fork for CPU %d", cpu);
5849 +
5850 #ifdef __x86_64__
5851 gdt_descr = &cpu_gdt_descr[cpu];
5852 -#else
5853 - gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
5854 -#endif
5855 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
5856 if (unlikely(!gdt_descr->address)) {
5857 printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
5858 @@ -313,6 +319,11 @@ void __init smp_prepare_cpus(unsigned in
5859 }
5860 gdt_descr->size = GDT_SIZE;
5861 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
5862 +#else
5863 + if (unlikely(!init_gdt(cpu, idle)))
5864 + continue;
5865 + gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
5866 +#endif
5867 make_page_readonly(
5868 (void *)gdt_descr->address,
5869 XENFEAT_writable_descriptor_tables);
5870 @@ -332,10 +343,6 @@ void __init smp_prepare_cpus(unsigned in
5871 cpu_2_logical_apicid[cpu] = apicid;
5872 x86_cpu_to_apicid[cpu] = apicid;
5873
5874 - idle = fork_idle(cpu);
5875 - if (IS_ERR(idle))
5876 - panic("failed fork for CPU %d", cpu);
5877 -
5878 #ifdef __x86_64__
5879 cpu_pda(cpu)->pcurrent = idle;
5880 cpu_pda(cpu)->cpunumber = cpu;
5881 --- sle11-2009-06-29.orig/drivers/xen/fbfront/xenfb.c 2009-03-04 11:28:34.000000000 +0100
5882 +++ sle11-2009-06-29/drivers/xen/fbfront/xenfb.c 2008-12-15 11:26:44.000000000 +0100
5883 @@ -25,6 +25,7 @@
5884 #include <linux/vmalloc.h>
5885 #include <linux/mm.h>
5886 #include <linux/mutex.h>
5887 +#include <linux/freezer.h>
5888 #include <asm/hypervisor.h>
5889 #include <xen/evtchn.h>
5890 #include <xen/interface/io/fbif.h>
5891 --- sle11-2009-06-29.orig/drivers/xen/netback/loopback.c 2009-03-04 11:28:34.000000000 +0100
5892 +++ sle11-2009-06-29/drivers/xen/netback/loopback.c 2008-12-15 11:26:44.000000000 +0100
5893 @@ -54,6 +54,7 @@
5894 #include <net/dst.h>
5895 #include <net/xfrm.h> /* secpath_reset() */
5896 #include <asm/hypervisor.h> /* is_initial_xendomain() */
5897 +#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */
5898
5899 static int nloopbacks = -1;
5900 module_param(nloopbacks, int, 0);
5901 --- sle11-2009-06-29.orig/drivers/xen/pciback/conf_space_header.c 2009-06-29 15:14:52.000000000 +0200
5902 +++ sle11-2009-06-29/drivers/xen/pciback/conf_space_header.c 2008-12-15 11:26:44.000000000 +0100
5903 @@ -22,14 +22,14 @@ static int command_write(struct pci_dev
5904 {
5905 int err;
5906
5907 - if (!dev->is_enabled && is_enable_cmd(value)) {
5908 + if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) {
5909 if (unlikely(verbose_request))
5910 printk(KERN_DEBUG "pciback: %s: enable\n",
5911 pci_name(dev));
5912 err = pci_enable_device(dev);
5913 if (err)
5914 return err;
5915 - } else if (dev->is_enabled && !is_enable_cmd(value)) {
5916 + } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) {
5917 if (unlikely(verbose_request))
5918 printk(KERN_DEBUG "pciback: %s: disable\n",
5919 pci_name(dev));
5920 --- sle11-2009-06-29.orig/drivers/xen/pciback/pciback.h 2009-03-04 11:28:34.000000000 +0100
5921 +++ sle11-2009-06-29/drivers/xen/pciback/pciback.h 2008-12-15 11:26:44.000000000 +0100
5922 @@ -88,7 +88,7 @@ void pciback_release_devices(struct pcib
5923
5924 /* Handles events from front-end */
5925 irqreturn_t pciback_handle_event(int irq, void *dev_id);
5926 -void pciback_do_op(void *data);
5927 +void pciback_do_op(struct work_struct *work);
5928
5929 int pciback_xenbus_register(void);
5930 void pciback_xenbus_unregister(void);
5931 --- sle11-2009-06-29.orig/drivers/xen/pciback/pciback_ops.c 2009-03-04 11:28:34.000000000 +0100
5932 +++ sle11-2009-06-29/drivers/xen/pciback/pciback_ops.c 2008-12-15 11:26:44.000000000 +0100
5933 @@ -25,7 +25,7 @@ void pciback_reset_device(struct pci_dev
5934
5935 pci_write_config_word(dev, PCI_COMMAND, 0);
5936
5937 - dev->is_enabled = 0;
5938 + atomic_set(&dev->enable_cnt, 0);
5939 dev->is_busmaster = 0;
5940 } else {
5941 pci_read_config_word(dev, PCI_COMMAND, &cmd);
5942 @@ -51,9 +51,9 @@ static inline void test_and_schedule_op(
5943 * context because some of the pci_* functions can sleep (mostly due to ACPI
5944 * use of semaphores). This function is intended to be called from a work
5945 * queue in process context taking a struct pciback_device as a parameter */
5946 -void pciback_do_op(void *data)
5947 +void pciback_do_op(struct work_struct *work)
5948 {
5949 - struct pciback_device *pdev = data;
5950 + struct pciback_device *pdev = container_of(work, struct pciback_device, op_work);
5951 struct pci_dev *dev;
5952 struct xen_pci_op *op = &pdev->sh_info->op;
5953
5954 --- sle11-2009-06-29.orig/drivers/xen/pciback/xenbus.c 2009-06-29 15:14:52.000000000 +0200
5955 +++ sle11-2009-06-29/drivers/xen/pciback/xenbus.c 2008-12-15 11:26:44.000000000 +0100
5956 @@ -32,7 +32,7 @@ static struct pciback_device *alloc_pdev
5957 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
5958 pdev->be_watching = 0;
5959
5960 - INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
5961 + INIT_WORK(&pdev->op_work, pciback_do_op);
5962
5963 if (pciback_init_devices(pdev)) {
5964 kfree(pdev);
5965 @@ -54,7 +54,6 @@ static void pciback_disconnect(struct pc
5966
5967 /* If the driver domain started an op, make sure we complete it or
5968 * delete it before releasing the shared memory */
5969 - cancel_delayed_work(&pdev->op_work);
5970 flush_scheduled_work();
5971
5972 if (pdev->sh_info != NULL) {
5973 --- sle11-2009-06-29.orig/drivers/xen/scsiback/interface.c 2009-06-29 15:14:52.000000000 +0200
5974 +++ sle11-2009-06-29/drivers/xen/scsiback/interface.c 2008-12-15 11:26:44.000000000 +0100
5975 @@ -39,7 +39,7 @@
5976 #include <linux/kthread.h>
5977
5978
5979 -static kmem_cache_t *scsiback_cachep;
5980 +static struct kmem_cache *scsiback_cachep;
5981
5982 struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
5983 {
5984 --- sle11-2009-06-29.orig/drivers/xen/scsiback/scsiback.c 2009-03-04 11:28:34.000000000 +0100
5985 +++ sle11-2009-06-29/drivers/xen/scsiback/scsiback.c 2008-12-15 11:26:44.000000000 +0100
5986 @@ -322,13 +322,11 @@ static int scsiback_merge_bio(struct req
5987
5988 if (!rq->bio)
5989 blk_rq_bio_prep(q, rq, bio);
5990 - else if (!q->back_merge_fn(q, rq, bio))
5991 + else if (!ll_back_merge_fn(q, rq, bio))
5992 return -EINVAL;
5993 else {
5994 rq->biotail->bi_next = bio;
5995 rq->biotail = bio;
5996 - rq->hard_nr_sectors += bio_sectors(bio);
5997 - rq->nr_sectors = rq->hard_nr_sectors;
5998 }
5999
6000 return 0;
6001 --- sle11-2009-06-29.orig/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:11.000000000 +0200
6002 +++ sle11-2009-06-29/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:25.000000000 +0200
6003 @@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron
6004
6005 if (skb->ip_summed == CHECKSUM_PARTIAL) {
6006 /* Set to zero to encourage falcon to work it out for us */
6007 - *(u16*)(skb->h.raw + skb->csum) = 0;
6008 + *(u16*)(skb->h.raw + skb->csum_offset) = 0;
6009 }
6010
6011 if (multi_post_start_new_buffer(vnic, &state)) {
6012 @@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro
6013
6014 if (skb->ip_summed == CHECKSUM_PARTIAL) {
6015 /* Set to zero to encourage falcon to work it out for us */
6016 - *(u16*)(skb->h.raw + skb->csum) = 0;
6017 + *(u16*)(skb->h.raw + skb->csum_offset) = 0;
6018 }
6019 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
6020 (skb, idx, frag_data, frag_len, {
6021 --- sle11-2009-06-29.orig/drivers/xen/tpmback/interface.c 2009-06-29 15:14:52.000000000 +0200
6022 +++ sle11-2009-06-29/drivers/xen/tpmback/interface.c 2008-12-15 11:26:44.000000000 +0100
6023 @@ -15,7 +15,7 @@
6024 #include <xen/balloon.h>
6025 #include <xen/gnttab.h>
6026
6027 -static kmem_cache_t *tpmif_cachep;
6028 +static struct kmem_cache *tpmif_cachep;
6029 int num_frontends = 0;
6030
6031 LIST_HEAD(tpmif_list);
6032 --- sle11-2009-06-29.orig/drivers/xen/xenbus/xenbus_comms.c 2009-03-04 11:28:34.000000000 +0100
6033 +++ sle11-2009-06-29/drivers/xen/xenbus/xenbus_comms.c 2008-12-15 11:26:44.000000000 +0100
6034 @@ -49,9 +49,9 @@
6035
6036 static int xenbus_irq;
6037
6038 -extern void xenbus_probe(void *);
6039 +extern void xenbus_probe(struct work_struct *);
6040 extern int xenstored_ready;
6041 -static DECLARE_WORK(probe_work, xenbus_probe, NULL);
6042 +static DECLARE_WORK(probe_work, xenbus_probe);
6043
6044 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
6045
6046 --- sle11-2009-06-29.orig/drivers/xen/xenbus/xenbus_probe.c 2008-11-25 12:35:56.000000000 +0100
6047 +++ sle11-2009-06-29/drivers/xen/xenbus/xenbus_probe.c 2008-12-15 11:26:44.000000000 +0100
6048 @@ -843,7 +843,7 @@ void unregister_xenstore_notifier(struct
6049 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
6050
6051
6052 -void xenbus_probe(void *unused)
6053 +void xenbus_probe(struct work_struct *unused)
6054 {
6055 BUG_ON((xenstored_ready <= 0));
6056
6057 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/desc_32.h 2009-03-04 11:28:34.000000000 +0100
6058 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-15 11:26:44.000000000 +0100
6059 @@ -4,8 +4,6 @@
6060 #include <asm/ldt.h>
6061 #include <asm/segment.h>
6062
6063 -#define CPU_16BIT_STACK_SIZE 1024
6064 -
6065 #ifndef __ASSEMBLY__
6066
6067 #include <linux/preempt.h>
6068 @@ -15,8 +13,6 @@
6069
6070 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
6071
6072 -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
6073 -
6074 struct Xgt_desc_struct {
6075 unsigned short size;
6076 unsigned long address __attribute__((packed));
6077 @@ -32,11 +28,6 @@ static inline struct desc_struct *get_cp
6078 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
6079 }
6080
6081 -/*
6082 - * This is the ldt that every process will get unless we need
6083 - * something other than this.
6084 - */
6085 -extern struct desc_struct default_ldt[];
6086 extern struct desc_struct idt_table[];
6087 extern void set_intr_gate(unsigned int irq, void * addr);
6088
6089 @@ -63,8 +54,8 @@ static inline void pack_gate(__u32 *a, _
6090 #define DESCTYPE_DPL3 0x60 /* DPL-3 */
6091 #define DESCTYPE_S 0x10 /* !system */
6092
6093 +#ifndef CONFIG_XEN
6094 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
6095 -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
6096
6097 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
6098 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
6099 @@ -75,6 +66,7 @@ static inline void pack_gate(__u32 *a, _
6100 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
6101 #define store_tr(tr) __asm__ ("str %0":"=m" (tr))
6102 #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
6103 +#endif
6104
6105 #if TLS_SIZE != 24
6106 # error update this code.
6107 @@ -90,22 +82,43 @@ static inline void load_TLS(struct threa
6108 }
6109
6110 #ifndef CONFIG_XEN
6111 +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6112 +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6113 +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6114 +
6115 static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
6116 {
6117 __u32 *lp = (__u32 *)((char *)dt + entry*8);
6118 *lp = entry_a;
6119 *(lp+1) = entry_b;
6120 }
6121 -
6122 -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6123 -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6124 +#define set_ldt native_set_ldt
6125 #else
6126 extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
6127 extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
6128 +#define set_ldt xen_set_ldt
6129 +#endif
6130 +
6131 +#ifndef CONFIG_XEN
6132 +static inline fastcall void native_set_ldt(const void *addr,
6133 + unsigned int entries)
6134 +{
6135 + if (likely(entries == 0))
6136 + __asm__ __volatile__("lldt %w0"::"q" (0));
6137 + else {
6138 + unsigned cpu = smp_processor_id();
6139 + __u32 a, b;
6140 +
6141 + pack_descriptor(&a, &b, (unsigned long)addr,
6142 + entries * sizeof(struct desc_struct) - 1,
6143 + DESCTYPE_LDT, 0);
6144 + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
6145 + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
6146 + }
6147 +}
6148 #endif
6149 -#ifndef CONFIG_X86_NO_IDT
6150 -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6151
6152 +#ifndef CONFIG_X86_NO_IDT
6153 static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
6154 {
6155 __u32 a, b;
6156 @@ -125,14 +138,6 @@ static inline void __set_tss_desc(unsign
6157 }
6158 #endif
6159
6160 -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
6161 -{
6162 - __u32 a, b;
6163 - pack_descriptor(&a, &b, (unsigned long)addr,
6164 - entries * sizeof(struct desc_struct) - 1,
6165 - DESCTYPE_LDT, 0);
6166 - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
6167 -}
6168
6169 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
6170
6171 @@ -163,36 +168,22 @@ static inline void set_ldt_desc(unsigned
6172
6173 static inline void clear_LDT(void)
6174 {
6175 - int cpu = get_cpu();
6176 -
6177 - /*
6178 - * NB. We load the default_ldt for lcall7/27 handling on demand, as
6179 - * it slows down context switching. Noone uses it anyway.
6180 - */
6181 - cpu = cpu; /* XXX avoid compiler warning */
6182 - xen_set_ldt(NULL, 0);
6183 - put_cpu();
6184 + set_ldt(NULL, 0);
6185 }
6186
6187 /*
6188 * load one particular LDT into the current CPU
6189 */
6190 -static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
6191 +static inline void load_LDT_nolock(mm_context_t *pc)
6192 {
6193 - void *segments = pc->ldt;
6194 - int count = pc->size;
6195 -
6196 - if (likely(!count))
6197 - segments = NULL;
6198 -
6199 - xen_set_ldt(segments, count);
6200 + set_ldt(pc->ldt, pc->size);
6201 }
6202
6203 static inline void load_LDT(mm_context_t *pc)
6204 {
6205 - int cpu = get_cpu();
6206 - load_LDT_nolock(pc, cpu);
6207 - put_cpu();
6208 + preempt_disable();
6209 + load_LDT_nolock(pc);
6210 + preempt_enable();
6211 }
6212
6213 static inline unsigned long get_desc_base(unsigned long *desc)
6214 @@ -204,6 +195,29 @@ static inline unsigned long get_desc_bas
6215 return base;
6216 }
6217
6218 +#else /* __ASSEMBLY__ */
6219 +
6220 +/*
6221 + * GET_DESC_BASE reads the descriptor base of the specified segment.
6222 + *
6223 + * Args:
6224 + * idx - descriptor index
6225 + * gdt - GDT pointer
6226 + * base - 32bit register to which the base will be written
6227 + * lo_w - lo word of the "base" register
6228 + * lo_b - lo byte of the "base" register
6229 + * hi_b - hi byte of the low word of the "base" register
6230 + *
6231 + * Example:
6232 + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
6233 + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
6234 + */
6235 +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
6236 + movb idx*8+4(gdt), lo_b; \
6237 + movb idx*8+7(gdt), hi_b; \
6238 + shll $16, base; \
6239 + movw idx*8+2(gdt), lo_w;
6240 +
6241 #endif /* !__ASSEMBLY__ */
6242
6243 #endif
6244 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2009-06-29 15:14:52.000000000 +0200
6245 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-12-15 11:26:44.000000000 +0100
6246 @@ -127,10 +127,10 @@ dma_get_cache_alignment(void)
6247 return (1 << INTERNODE_CACHE_SHIFT);
6248 }
6249
6250 -#define dma_is_consistent(d) (1)
6251 +#define dma_is_consistent(d, h) (1)
6252
6253 static inline void
6254 -dma_cache_sync(void *vaddr, size_t size,
6255 +dma_cache_sync(struct device *dev, void *vaddr, size_t size,
6256 enum dma_data_direction direction)
6257 {
6258 flush_write_buffers();
6259 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-04 11:28:34.000000000 +0100
6260 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-15 11:26:44.000000000 +0100
6261 @@ -13,13 +13,16 @@
6262 #ifndef _ASM_FIXMAP_H
6263 #define _ASM_FIXMAP_H
6264
6265 -
6266 /* used by vmalloc.c, vsyscall.lds.S.
6267 *
6268 * Leave one empty page between vmalloc'ed areas and
6269 * the start of the fixmap.
6270 */
6271 extern unsigned long __FIXADDR_TOP;
6272 +#ifdef CONFIG_COMPAT_VDSO
6273 +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
6274 +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
6275 +#endif
6276
6277 #ifndef __ASSEMBLY__
6278 #include <linux/kernel.h>
6279 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-06-29 15:14:52.000000000 +0200
6280 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/highmem.h 2008-12-15 11:26:44.000000000 +0100
6281 @@ -85,7 +85,7 @@ static inline void clear_user_highpage(s
6282
6283 void copy_highpage(struct page *to, struct page *from);
6284 static inline void copy_user_highpage(struct page *to, struct page *from,
6285 - unsigned long vaddr)
6286 + unsigned long vaddr, struct vm_area_struct *vma)
6287 {
6288 copy_highpage(to, from);
6289 }
6290 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-04 11:28:34.000000000 +0100
6291 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-15 11:26:44.000000000 +0100
6292 @@ -46,15 +46,6 @@
6293 #include <asm/percpu.h>
6294 #include <asm/ptrace.h>
6295 #include <asm/page.h>
6296 -#if defined(__i386__)
6297 -# ifdef CONFIG_X86_PAE
6298 -# include <asm-generic/pgtable-nopud.h>
6299 -# else
6300 -# include <asm-generic/pgtable-nopmd.h>
6301 -# endif
6302 -#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
6303 -# include <asm-generic/pgtable-nopud.h>
6304 -#endif
6305
6306 extern shared_info_t *HYPERVISOR_shared_info;
6307
6308 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2009-06-29 15:14:52.000000000 +0200
6309 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-15 11:26:44.000000000 +0100
6310 @@ -22,9 +22,6 @@
6311
6312 #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
6313
6314 -#define raw_local_save_flags(flags) \
6315 - do { (flags) = __raw_local_save_flags(); } while (0)
6316 -
6317 #define raw_local_irq_restore(x) \
6318 do { \
6319 vcpu_info_t *_vcpu; \
6320 @@ -66,18 +63,6 @@ void raw_safe_halt(void);
6321 */
6322 void halt(void);
6323
6324 -static inline int raw_irqs_disabled_flags(unsigned long flags)
6325 -{
6326 - return (flags != 0);
6327 -}
6328 -
6329 -#define raw_irqs_disabled() \
6330 -({ \
6331 - unsigned long flags = __raw_local_save_flags(); \
6332 - \
6333 - raw_irqs_disabled_flags(flags); \
6334 -})
6335 -
6336 /*
6337 * For spinlocks, etc:
6338 */
6339 @@ -90,9 +75,62 @@ static inline int raw_irqs_disabled_flag
6340 flags; \
6341 })
6342
6343 +#else
6344 +/* Offsets into shared_info_t. */
6345 +#define evtchn_upcall_pending /* 0 */
6346 +#define evtchn_upcall_mask 1
6347 +
6348 +#define sizeof_vcpu_shift 6
6349 +
6350 +#ifdef CONFIG_SMP
6351 +#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
6352 + shl $sizeof_vcpu_shift,%esi ; \
6353 + addl HYPERVISOR_shared_info,%esi
6354 +#else
6355 +#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
6356 +#endif
6357 +
6358 +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
6359 +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
6360 +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
6361 +#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
6362 + __DISABLE_INTERRUPTS
6363 +#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
6364 + __ENABLE_INTERRUPTS
6365 +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
6366 +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
6367 + __TEST_PENDING ; \
6368 + jnz 14f /* process more events if necessary... */ ; \
6369 + movl PT_ESI(%esp), %esi ; \
6370 + sysexit ; \
6371 +14: __DISABLE_INTERRUPTS ; \
6372 + TRACE_IRQS_OFF ; \
6373 +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
6374 + push %esp ; \
6375 + call evtchn_do_upcall ; \
6376 + add $4,%esp ; \
6377 + jmp ret_from_intr
6378 +#define INTERRUPT_RETURN iret
6379 +#endif /* __ASSEMBLY__ */
6380 +
6381 +#ifndef __ASSEMBLY__
6382 +#define raw_local_save_flags(flags) \
6383 + do { (flags) = __raw_local_save_flags(); } while (0)
6384 +
6385 #define raw_local_irq_save(flags) \
6386 do { (flags) = __raw_local_irq_save(); } while (0)
6387
6388 +static inline int raw_irqs_disabled_flags(unsigned long flags)
6389 +{
6390 + return (flags != 0);
6391 +}
6392 +
6393 +#define raw_irqs_disabled() \
6394 +({ \
6395 + unsigned long flags = __raw_local_save_flags(); \
6396 + \
6397 + raw_irqs_disabled_flags(flags); \
6398 +})
6399 #endif /* __ASSEMBLY__ */
6400
6401 /*
6402 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-06-29 15:14:52.000000000 +0200
6403 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-15 11:26:44.000000000 +0100
6404 @@ -27,14 +27,13 @@ static inline void enter_lazy_tlb(struct
6405 static inline void __prepare_arch_switch(void)
6406 {
6407 /*
6408 - * Save away %fs and %gs. No need to save %es and %ds, as those
6409 - * are always kernel segments while inside the kernel. Must
6410 - * happen before reload of cr3/ldt (i.e., not in __switch_to).
6411 + * Save away %fs. No need to save %gs, as it was saved on the
6412 + * stack on entry. No need to save %es and %ds, as those are
6413 + * always kernel segments while inside the kernel.
6414 */
6415 - asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
6416 - : "=m" (current->thread.fs),
6417 - "=m" (current->thread.gs));
6418 - asm volatile ( "movl %0,%%fs ; movl %0,%%gs"
6419 + asm volatile ( "mov %%fs,%0"
6420 + : "=m" (current->thread.fs));
6421 + asm volatile ( "movl %0,%%fs"
6422 : : "r" (0) );
6423 }
6424
6425 @@ -89,14 +88,14 @@ static inline void switch_mm(struct mm_s
6426 * tlb flush IPI delivery. We must reload %cr3.
6427 */
6428 load_cr3(next->pgd);
6429 - load_LDT_nolock(&next->context, cpu);
6430 + load_LDT_nolock(&next->context);
6431 }
6432 }
6433 #endif
6434 }
6435
6436 -#define deactivate_mm(tsk, mm) \
6437 - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
6438 +#define deactivate_mm(tsk, mm) \
6439 + asm("movl %0,%%fs": :"r" (0));
6440
6441 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
6442 {
6443 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-04 11:28:34.000000000 +0100
6444 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-15 11:26:44.000000000 +0100
6445 @@ -1,8 +1,6 @@
6446 #ifndef _I386_PGTABLE_3LEVEL_H
6447 #define _I386_PGTABLE_3LEVEL_H
6448
6449 -#include <asm-generic/pgtable-nopud.h>
6450 -
6451 /*
6452 * Intel Physical Address Extension (PAE) Mode - three-level page
6453 * tables on PPro+ CPUs.
6454 @@ -75,6 +73,23 @@ static inline void set_pte(pte_t *ptep,
6455 xen_l3_entry_update((pudptr), (pudval))
6456
6457 /*
6458 + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6459 + * entry, so clear the bottom half first and enforce ordering with a compiler
6460 + * barrier.
6461 + */
6462 +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6463 +{
6464 + if ((mm != current->mm && mm != &init_mm)
6465 + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6466 + ptep->pte_low = 0;
6467 + smp_wmb();
6468 + ptep->pte_high = 0;
6469 + }
6470 +}
6471 +
6472 +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
6473 +
6474 +/*
6475 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
6476 * the TLB via cr3 if the top-level pgd is changed...
6477 * We do not let the generic code free and clear pgd entries due to
6478 @@ -93,45 +108,16 @@ static inline void pud_clear (pud_t * pu
6479 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
6480 pmd_index(address))
6481
6482 -static inline int pte_none(pte_t pte)
6483 -{
6484 - return !(pte.pte_low | pte.pte_high);
6485 -}
6486 -
6487 -/*
6488 - * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6489 - * entry, so clear the bottom half first and enforce ordering with a compiler
6490 - * barrier.
6491 - */
6492 -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6493 +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
6494 {
6495 - if ((mm != current->mm && mm != &init_mm)
6496 - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6497 - ptep->pte_low = 0;
6498 - smp_wmb();
6499 + uint64_t val = __pte_val(res);
6500 + if (__cmpxchg64(ptep, val, 0) != val) {
6501 + /* xchg acts as a barrier before the setting of the high bits */
6502 + res.pte_low = xchg(&ptep->pte_low, 0);
6503 + res.pte_high = ptep->pte_high;
6504 ptep->pte_high = 0;
6505 }
6506 -}
6507 -
6508 -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
6509 -
6510 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
6511 -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6512 -{
6513 - pte_t pte = *ptep;
6514 - if (!pte_none(pte)) {
6515 - if ((mm != &init_mm) ||
6516 - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6517 - uint64_t val = __pte_val(pte);
6518 - if (__cmpxchg64(ptep, val, 0) != val) {
6519 - /* xchg acts as a barrier before the setting of the high bits */
6520 - pte.pte_low = xchg(&ptep->pte_low, 0);
6521 - pte.pte_high = ptep->pte_high;
6522 - ptep->pte_high = 0;
6523 - }
6524 - }
6525 - }
6526 - return pte;
6527 + return res;
6528 }
6529
6530 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
6531 @@ -160,6 +146,11 @@ static inline int pte_same(pte_t a, pte_
6532
6533 #define pte_page(x) pfn_to_page(pte_pfn(x))
6534
6535 +static inline int pte_none(pte_t pte)
6536 +{
6537 + return !(pte.pte_low | pte.pte_high);
6538 +}
6539 +
6540 #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
6541 ((_pte).pte_high << (32-PAGE_SHIFT)))
6542 #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
6543 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:28:34.000000000 +0100
6544 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-15 11:26:44.000000000 +0100
6545 @@ -38,14 +38,14 @@ struct vm_area_struct;
6546 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
6547 extern unsigned long empty_zero_page[1024];
6548 extern pgd_t *swapper_pg_dir;
6549 -extern kmem_cache_t *pgd_cache;
6550 -extern kmem_cache_t *pmd_cache;
6551 +extern struct kmem_cache *pgd_cache;
6552 +extern struct kmem_cache *pmd_cache;
6553 extern spinlock_t pgd_lock;
6554 extern struct page *pgd_list;
6555
6556 -void pmd_ctor(void *, kmem_cache_t *, unsigned long);
6557 -void pgd_ctor(void *, kmem_cache_t *, unsigned long);
6558 -void pgd_dtor(void *, kmem_cache_t *, unsigned long);
6559 +void pmd_ctor(void *, struct kmem_cache *, unsigned long);
6560 +void pgd_ctor(void *, struct kmem_cache *, unsigned long);
6561 +void pgd_dtor(void *, struct kmem_cache *, unsigned long);
6562 void pgtable_cache_init(void);
6563 void paging_init(void);
6564
6565 @@ -276,7 +276,6 @@ static inline pte_t pte_mkhuge(pte_t pte
6566 #define pte_update(mm, addr, ptep) do { } while (0)
6567 #define pte_update_defer(mm, addr, ptep) do { } while (0)
6568
6569 -
6570 /*
6571 * We only update the dirty/accessed state if we set
6572 * the dirty bit by hand in the kernel, since the hardware
6573 @@ -342,6 +341,19 @@ do { \
6574 __young; \
6575 })
6576
6577 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
6578 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6579 +{
6580 + pte_t pte = *ptep;
6581 + if (!pte_none(pte)
6582 + && (mm != &init_mm
6583 + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
6584 + pte = raw_ptep_get_and_clear(ptep, pte);
6585 + pte_update(mm, addr, ptep);
6586 + }
6587 + return pte;
6588 +}
6589 +
6590 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
6591 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
6592 ((full) ? ({ \
6593 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:28:34.000000000 +0100
6594 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-15 11:26:44.000000000 +0100
6595 @@ -20,6 +20,7 @@
6596 #include <linux/threads.h>
6597 #include <asm/percpu.h>
6598 #include <linux/cpumask.h>
6599 +#include <linux/init.h>
6600 #include <xen/interface/physdev.h>
6601
6602 /* flag for disabling the tsc */
6603 @@ -73,6 +74,7 @@ struct cpuinfo_x86 {
6604 #endif
6605 unsigned char x86_max_cores; /* cpuid returned max cores value */
6606 unsigned char apicid;
6607 + unsigned short x86_clflush_size;
6608 #ifdef CONFIG_SMP
6609 unsigned char booted_cores; /* number of cores as seen by OS */
6610 __u8 phys_proc_id; /* Physical processor id. */
6611 @@ -114,6 +116,8 @@ extern struct cpuinfo_x86 cpu_data[];
6612 extern int cpu_llc_id[NR_CPUS];
6613 extern char ignore_fpu_irq;
6614
6615 +void __init cpu_detect(struct cpuinfo_x86 *c);
6616 +
6617 extern void identify_cpu(struct cpuinfo_x86 *);
6618 extern void print_cpu_info(struct cpuinfo_x86 *);
6619 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
6620 @@ -146,8 +150,8 @@ static inline void detect_ht(struct cpui
6621 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
6622 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
6623
6624 -static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
6625 - unsigned int *ecx, unsigned int *edx)
6626 +static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
6627 + unsigned int *ecx, unsigned int *edx)
6628 {
6629 /* ecx is often an input as well as an output. */
6630 __asm__(XEN_CPUID
6631 @@ -158,59 +162,6 @@ static inline void __cpuid(unsigned int
6632 : "0" (*eax), "2" (*ecx));
6633 }
6634
6635 -/*
6636 - * Generic CPUID function
6637 - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6638 - * resulting in stale register contents being returned.
6639 - */
6640 -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6641 -{
6642 - *eax = op;
6643 - *ecx = 0;
6644 - __cpuid(eax, ebx, ecx, edx);
6645 -}
6646 -
6647 -/* Some CPUID calls want 'count' to be placed in ecx */
6648 -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6649 - int *edx)
6650 -{
6651 - *eax = op;
6652 - *ecx = count;
6653 - __cpuid(eax, ebx, ecx, edx);
6654 -}
6655 -
6656 -/*
6657 - * CPUID functions returning a single datum
6658 - */
6659 -static inline unsigned int cpuid_eax(unsigned int op)
6660 -{
6661 - unsigned int eax, ebx, ecx, edx;
6662 -
6663 - cpuid(op, &eax, &ebx, &ecx, &edx);
6664 - return eax;
6665 -}
6666 -static inline unsigned int cpuid_ebx(unsigned int op)
6667 -{
6668 - unsigned int eax, ebx, ecx, edx;
6669 -
6670 - cpuid(op, &eax, &ebx, &ecx, &edx);
6671 - return ebx;
6672 -}
6673 -static inline unsigned int cpuid_ecx(unsigned int op)
6674 -{
6675 - unsigned int eax, ebx, ecx, edx;
6676 -
6677 - cpuid(op, &eax, &ebx, &ecx, &edx);
6678 - return ecx;
6679 -}
6680 -static inline unsigned int cpuid_edx(unsigned int op)
6681 -{
6682 - unsigned int eax, ebx, ecx, edx;
6683 -
6684 - cpuid(op, &eax, &ebx, &ecx, &edx);
6685 - return edx;
6686 -}
6687 -
6688 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
6689
6690 /*
6691 @@ -480,9 +431,9 @@ struct thread_struct {
6692 .vm86_info = NULL, \
6693 .sysenter_cs = __KERNEL_CS, \
6694 .io_bitmap_ptr = NULL, \
6695 + .gs = __KERNEL_PDA, \
6696 }
6697
6698 -#ifndef CONFIG_X86_NO_TSS
6699 /*
6700 * Note that the .io_bitmap member must be extra-big. This is because
6701 * the CPU will access an additional byte beyond the end of the IO
6702 @@ -497,26 +448,9 @@ struct thread_struct {
6703 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
6704 }
6705
6706 -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
6707 -{
6708 - tss->esp0 = thread->esp0;
6709 - /* This can only happen when SEP is enabled, no need to test "SEP"arately */
6710 - if (unlikely(tss->ss1 != thread->sysenter_cs)) {
6711 - tss->ss1 = thread->sysenter_cs;
6712 - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6713 - }
6714 -}
6715 -#define load_esp0(tss, thread) \
6716 - __load_esp0(tss, thread)
6717 -#else
6718 -#define load_esp0(tss, thread) do { \
6719 - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6720 - BUG(); \
6721 -} while (0)
6722 -#endif
6723 -
6724 #define start_thread(regs, new_eip, new_esp) do { \
6725 - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
6726 + __asm__("movl %0,%%fs": :"r" (0)); \
6727 + regs->xgs = 0; \
6728 set_fs(USER_DS); \
6729 regs->xds = __USER_DS; \
6730 regs->xes = __USER_DS; \
6731 @@ -526,26 +460,6 @@ static inline void __load_esp0(struct ts
6732 regs->esp = new_esp; \
6733 } while (0)
6734
6735 -/*
6736 - * These special macros can be used to get or set a debugging register
6737 - */
6738 -#define get_debugreg(var, register) \
6739 - (var) = HYPERVISOR_get_debugreg((register))
6740 -#define set_debugreg(value, register) \
6741 - WARN_ON(HYPERVISOR_set_debugreg((register), (value)))
6742 -
6743 -/*
6744 - * Set IOPL bits in EFLAGS from given mask
6745 - */
6746 -static inline void set_iopl_mask(unsigned mask)
6747 -{
6748 - struct physdev_set_iopl set_iopl;
6749 -
6750 - /* Force the change at ring 0. */
6751 - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
6752 - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
6753 -}
6754 -
6755 /* Forward declaration, a strange C thing */
6756 struct task_struct;
6757 struct mm_struct;
6758 @@ -637,6 +551,105 @@ static inline void rep_nop(void)
6759
6760 #define cpu_relax() rep_nop()
6761
6762 +#define paravirt_enabled() 0
6763 +#define __cpuid xen_cpuid
6764 +
6765 +#ifndef CONFIG_X86_NO_TSS
6766 +static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
6767 +{
6768 + tss->esp0 = thread->esp0;
6769 + /* This can only happen when SEP is enabled, no need to test "SEP"arately */
6770 + if (unlikely(tss->ss1 != thread->sysenter_cs)) {
6771 + tss->ss1 = thread->sysenter_cs;
6772 + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6773 + }
6774 +}
6775 +#define load_esp0(tss, thread) \
6776 + __load_esp0(tss, thread)
6777 +#else
6778 +#define load_esp0(tss, thread) do { \
6779 + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6780 + BUG(); \
6781 +} while (0)
6782 +#endif
6783 +
6784 +
6785 +/*
6786 + * These special macros can be used to get or set a debugging register
6787 + */
6788 +#define get_debugreg(var, register) \
6789 + (var) = HYPERVISOR_get_debugreg(register)
6790 +#define set_debugreg(value, register) \
6791 + WARN_ON(HYPERVISOR_set_debugreg(register, value))
6792 +
6793 +#define set_iopl_mask xen_set_iopl_mask
6794 +
6795 +/*
6796 + * Set IOPL bits in EFLAGS from given mask
6797 + */
6798 +static inline void xen_set_iopl_mask(unsigned mask)
6799 +{
6800 + struct physdev_set_iopl set_iopl;
6801 +
6802 + /* Force the change at ring 0. */
6803 + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
6804 + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
6805 +}
6806 +
6807 +
6808 +/*
6809 + * Generic CPUID function
6810 + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6811 + * resulting in stale register contents being returned.
6812 + */
6813 +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6814 +{
6815 + *eax = op;
6816 + *ecx = 0;
6817 + __cpuid(eax, ebx, ecx, edx);
6818 +}
6819 +
6820 +/* Some CPUID calls want 'count' to be placed in ecx */
6821 +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6822 + int *edx)
6823 +{
6824 + *eax = op;
6825 + *ecx = count;
6826 + __cpuid(eax, ebx, ecx, edx);
6827 +}
6828 +
6829 +/*
6830 + * CPUID functions returning a single datum
6831 + */
6832 +static inline unsigned int cpuid_eax(unsigned int op)
6833 +{
6834 + unsigned int eax, ebx, ecx, edx;
6835 +
6836 + cpuid(op, &eax, &ebx, &ecx, &edx);
6837 + return eax;
6838 +}
6839 +static inline unsigned int cpuid_ebx(unsigned int op)
6840 +{
6841 + unsigned int eax, ebx, ecx, edx;
6842 +
6843 + cpuid(op, &eax, &ebx, &ecx, &edx);
6844 + return ebx;
6845 +}
6846 +static inline unsigned int cpuid_ecx(unsigned int op)
6847 +{
6848 + unsigned int eax, ebx, ecx, edx;
6849 +
6850 + cpuid(op, &eax, &ebx, &ecx, &edx);
6851 + return ecx;
6852 +}
6853 +static inline unsigned int cpuid_edx(unsigned int op)
6854 +{
6855 + unsigned int eax, ebx, ecx, edx;
6856 +
6857 + cpuid(op, &eax, &ebx, &ecx, &edx);
6858 + return edx;
6859 +}
6860 +
6861 /* generic versions from gas */
6862 #define GENERIC_NOP1 ".byte 0x90\n"
6863 #define GENERIC_NOP2 ".byte 0x89,0xf6\n"
6864 @@ -736,4 +749,8 @@ extern unsigned long boot_option_idle_ov
6865 extern void enable_sep_cpu(void);
6866 extern int sysenter_setup(void);
6867
6868 +extern int init_gdt(int cpu, struct task_struct *idle);
6869 +extern void cpu_set_gdt(int);
6870 +extern void secondary_cpu_init(void);
6871 +
6872 #endif /* __ASM_I386_PROCESSOR_H */
6873 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:28:34.000000000 +0100
6874 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-15 11:26:44.000000000 +0100
6875 @@ -39,7 +39,7 @@
6876 * 25 - APM BIOS support
6877 *
6878 * 26 - ESPFIX small SS
6879 - * 27 - unused
6880 + * 27 - PDA [ per-cpu private data area ]
6881 * 28 - unused
6882 * 29 - unused
6883 * 30 - unused
6884 @@ -74,6 +74,9 @@
6885 #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
6886 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
6887
6888 +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
6889 +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
6890 +
6891 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
6892
6893 /*
6894 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:28:34.000000000 +0100
6895 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-15 11:26:44.000000000 +0100
6896 @@ -8,6 +8,7 @@
6897 #include <linux/kernel.h>
6898 #include <linux/threads.h>
6899 #include <linux/cpumask.h>
6900 +#include <asm/pda.h>
6901 #endif
6902
6903 #ifdef CONFIG_X86_LOCAL_APIC
6904 @@ -56,7 +57,7 @@ extern void cpu_uninit(void);
6905 * from the initial startup. We map APIC_BASE very early in page_setup(),
6906 * so this is correct in the x86 case.
6907 */
6908 -#define raw_smp_processor_id() (current_thread_info()->cpu)
6909 +#define raw_smp_processor_id() (read_pda(cpu_number))
6910
6911 extern cpumask_t cpu_possible_map;
6912 #define cpu_callin_map cpu_possible_map
6913 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/system_32.h 2009-03-04 11:28:34.000000000 +0100
6914 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:26:44.000000000 +0100
6915 @@ -139,17 +139,17 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
6916 #define write_cr4(x) \
6917 __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
6918
6919 -/*
6920 - * Clear and set 'TS' bit respectively
6921 - */
6922 +#define wbinvd() \
6923 + __asm__ __volatile__ ("wbinvd": : :"memory")
6924 +
6925 +/* Clear the 'TS' bit */
6926 #define clts() (HYPERVISOR_fpu_taskswitch(0))
6927 +
6928 +/* Set the 'TS' bit */
6929 #define stts() (HYPERVISOR_fpu_taskswitch(1))
6930
6931 #endif /* __KERNEL__ */
6932
6933 -#define wbinvd() \
6934 - __asm__ __volatile__ ("wbinvd": : :"memory")
6935 -
6936 static inline unsigned long get_limit(unsigned long segment)
6937 {
6938 unsigned long __limit;
6939 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/desc_64.h 2009-06-29 15:14:52.000000000 +0200
6940 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-15 11:26:44.000000000 +0100
6941 @@ -9,62 +9,11 @@
6942
6943 #include <linux/string.h>
6944 #include <linux/smp.h>
6945 +#include <asm/desc_defs.h>
6946
6947 #include <asm/segment.h>
6948 #include <asm/mmu.h>
6949
6950 -// 8 byte segment descriptor
6951 -struct desc_struct {
6952 - u16 limit0;
6953 - u16 base0;
6954 - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
6955 - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
6956 -} __attribute__((packed));
6957 -
6958 -struct n_desc_struct {
6959 - unsigned int a,b;
6960 -};
6961 -
6962 -enum {
6963 - GATE_INTERRUPT = 0xE,
6964 - GATE_TRAP = 0xF,
6965 - GATE_CALL = 0xC,
6966 -};
6967 -
6968 -// 16byte gate
6969 -struct gate_struct {
6970 - u16 offset_low;
6971 - u16 segment;
6972 - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
6973 - u16 offset_middle;
6974 - u32 offset_high;
6975 - u32 zero1;
6976 -} __attribute__((packed));
6977 -
6978 -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
6979 -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
6980 -#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
6981 -
6982 -enum {
6983 - DESC_TSS = 0x9,
6984 - DESC_LDT = 0x2,
6985 -};
6986 -
6987 -// LDT or TSS descriptor in the GDT. 16 bytes.
6988 -struct ldttss_desc {
6989 - u16 limit0;
6990 - u16 base0;
6991 - unsigned base1 : 8, type : 5, dpl : 2, p : 1;
6992 - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
6993 - u32 base3;
6994 - u32 zero1;
6995 -} __attribute__((packed));
6996 -
6997 -struct desc_ptr {
6998 - unsigned short size;
6999 - unsigned long address;
7000 -} __attribute__((packed)) ;
7001 -
7002 extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS];
7003
7004 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
7005 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-03-04 11:28:34.000000000 +0100
7006 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-15 11:26:44.000000000 +0100
7007 @@ -64,6 +64,9 @@ static inline int dma_mapping_error(dma_
7008 return (dma_addr == bad_dma_address);
7009 }
7010
7011 +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
7012 +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
7013 +
7014 extern void *dma_alloc_coherent(struct device *dev, size_t size,
7015 dma_addr_t *dma_handle, gfp_t gfp);
7016 extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
7017 @@ -181,12 +184,13 @@ static inline int dma_get_cache_alignmen
7018 return boot_cpu_data.x86_clflush_size;
7019 }
7020
7021 -#define dma_is_consistent(h) 1
7022 +#define dma_is_consistent(d, h) 1
7023
7024 extern int dma_set_mask(struct device *dev, u64 mask);
7025
7026 static inline void
7027 -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir)
7028 +dma_cache_sync(struct device *dev, void *vaddr, size_t size,
7029 + enum dma_data_direction dir)
7030 {
7031 flush_write_buffers();
7032 }
7033 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:28:34.000000000 +0100
7034 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-15 11:26:44.000000000 +0100
7035 @@ -237,19 +237,18 @@ extern unsigned int __kernel_page_user;
7036
7037 static inline unsigned long pgd_bad(pgd_t pgd)
7038 {
7039 - unsigned long val = __pgd_val(pgd);
7040 - val &= ~PTE_MASK;
7041 - val &= ~(_PAGE_USER | _PAGE_DIRTY);
7042 - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
7043 + return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7044 }
7045
7046 -static inline unsigned long pud_bad(pud_t pud)
7047 -{
7048 - unsigned long val = __pud_val(pud);
7049 - val &= ~PTE_MASK;
7050 - val &= ~(_PAGE_USER | _PAGE_DIRTY);
7051 - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
7052 -}
7053 +static inline unsigned long pud_bad(pud_t pud)
7054 +{
7055 + return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7056 +}
7057 +
7058 +static inline unsigned long pmd_bad(pmd_t pmd)
7059 +{
7060 + return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7061 +}
7062
7063 #define set_pte_at(_mm,addr,ptep,pteval) do { \
7064 if (((_mm) != current->mm && (_mm) != &init_mm) || \
7065 @@ -404,8 +403,6 @@ static inline int pmd_large(pmd_t pte) {
7066 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
7067 #endif
7068 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
7069 -#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \
7070 - != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT)))
7071 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
7072 #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
7073
7074 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/processor_64.h 2009-03-04 11:28:34.000000000 +0100
7075 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-15 11:26:44.000000000 +0100
7076 @@ -484,6 +484,14 @@ static inline void __mwait(unsigned long
7077 : :"a" (eax), "c" (ecx));
7078 }
7079
7080 +static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
7081 +{
7082 + /* "mwait %eax,%ecx;" */
7083 + asm volatile(
7084 + "sti; .byte 0x0f,0x01,0xc9;"
7085 + : :"a" (eax), "c" (ecx));
7086 +}
7087 +
7088 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
7089
7090 #define stack_current() \
7091 --- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-04 11:28:34.000000000 +0100
7092 +++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-15 11:26:44.000000000 +0100
7093 @@ -88,11 +88,6 @@ extern u8 x86_cpu_to_log_apicid[NR_CPUS]
7094 extern u8 bios_cpu_apicid[];
7095
7096 #ifdef CONFIG_X86_LOCAL_APIC
7097 -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
7098 -{
7099 - return cpus_addr(cpumask)[0];
7100 -}
7101 -
7102 static inline int cpu_present_to_apicid(int mps_cpu)
7103 {
7104 if (mps_cpu < NR_CPUS)
7105 @@ -127,13 +122,6 @@ static __inline int logical_smp_processo
7106 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
7107 #else
7108 #define cpu_physical_id(cpu) boot_cpu_id
7109 -static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
7110 - void *info, int retry, int wait)
7111 -{
7112 - /* Disable interrupts here? */
7113 - func(info);
7114 - return 0;
7115 -}
7116 #endif /* !CONFIG_SMP */
7117 #endif
7118
7119 --- sle11-2009-06-29.orig/kernel/kexec.c 2009-02-17 11:27:16.000000000 +0100
7120 +++ sle11-2009-06-29/kernel/kexec.c 2009-02-17 11:34:22.000000000 +0100
7121 @@ -374,7 +374,7 @@ static struct page *kimage_alloc_pages(g
7122 if (limit == ~0UL)
7123 address_bits = BITS_PER_LONG;
7124 else
7125 - address_bits = long_log2(limit);
7126 + address_bits = ilog2(limit);
7127
7128 if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
7129 __free_pages(pages, order);
7130 --- sle11-2009-06-29.orig/net/core/dev.c 2009-03-04 11:28:34.000000000 +0100
7131 +++ sle11-2009-06-29/net/core/dev.c 2008-12-15 11:26:44.000000000 +0100
7132 @@ -1751,10 +1751,10 @@ inline int skb_checksum_setup(struct sk_
7133 goto out;
7134 switch (skb->nh.iph->protocol) {
7135 case IPPROTO_TCP:
7136 - skb->csum = offsetof(struct tcphdr, check);
7137 + skb->csum_offset = offsetof(struct tcphdr, check);
7138 break;
7139 case IPPROTO_UDP:
7140 - skb->csum = offsetof(struct udphdr, check);
7141 + skb->csum_offset = offsetof(struct udphdr, check);
7142 break;
7143 default:
7144 if (net_ratelimit())
7145 @@ -1763,7 +1763,7 @@ inline int skb_checksum_setup(struct sk_
7146 " %d packet", skb->nh.iph->protocol);
7147 goto out;
7148 }
7149 - if ((skb->h.raw + skb->csum + 2) > skb->tail)
7150 + if ((skb->h.raw + skb->csum_offset + 2) > skb->tail)
7151 goto out;
7152 skb->ip_summed = CHECKSUM_PARTIAL;
7153 skb->proto_csum_blank = 0;