]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.20
Add a patch to fix Intel E100 wake-on-lan problems.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen3-patch-2.6.20
CommitLineData
2cb7cef9
BS
1From: www.kernel.org
2Subject: Linux 2.6.20
3Patch-mainline: 2.6.20
4
5Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py
6
7Acked-by: jbeulich@novell.com
8
9--- sle11-2009-06-29.orig/arch/x86/Kconfig 2009-03-04 11:28:34.000000000 +0100
10+++ sle11-2009-06-29/arch/x86/Kconfig 2009-02-05 10:22:19.000000000 +0100
11@@ -1431,7 +1431,7 @@ config PHYSICAL_START
12
13 config RELOCATABLE
14 bool "Build a relocatable kernel (EXPERIMENTAL)"
15- depends on EXPERIMENTAL
16+ depends on EXPERIMENTAL && !X86_XEN
17 help
18 This builds a kernel image that retains relocation information
19 so it can be loaded someplace besides the default 1MB.
20--- sle11-2009-06-29.orig/arch/x86/kernel/asm-offsets_32.c 2008-11-25 12:35:53.000000000 +0100
21+++ sle11-2009-06-29/arch/x86/kernel/asm-offsets_32.c 2008-12-15 11:26:44.000000000 +0100
22@@ -54,6 +54,7 @@ void foo(void)
23 OFFSET(TI_exec_domain, thread_info, exec_domain);
24 OFFSET(TI_flags, thread_info, flags);
25 OFFSET(TI_status, thread_info, status);
26+ OFFSET(TI_cpu, thread_info, cpu);
27 OFFSET(TI_preempt_count, thread_info, preempt_count);
28 OFFSET(TI_addr_limit, thread_info, addr_limit);
29 OFFSET(TI_restart_block, thread_info, restart_block);
30@@ -108,6 +109,11 @@ void foo(void)
31
32 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
33
34+#ifdef CONFIG_XEN
35+ BLANK();
36+ OFFSET(XEN_START_mfn_list, start_info, mfn_list);
37+#endif
38+
39 #ifdef CONFIG_PARAVIRT
40 BLANK();
41 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
42--- sle11-2009-06-29.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-04 11:28:34.000000000 +0100
43+++ sle11-2009-06-29/arch/x86/kernel/cpu/common-xen.c 2008-12-15 11:26:44.000000000 +0100
44@@ -22,6 +22,7 @@
45 #define phys_pkg_id(a,b) a
46 #endif
47 #endif
48+#include <asm/pda.h>
49 #include <asm/hypervisor.h>
50
51 #include "cpu.h"
52@@ -29,10 +30,8 @@
53 DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
54 EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
55
56-#ifndef CONFIG_XEN
57-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
58-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
59-#endif
60+struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
61+EXPORT_SYMBOL(_cpu_pda);
62
63 static int cachesize_override __cpuinitdata = -1;
64 static int disable_x86_fxsr __cpuinitdata;
65@@ -60,7 +59,7 @@ static struct cpu_dev __cpuinitdata defa
66 .c_init = default_init,
67 .c_vendor = "Unknown",
68 };
69-static struct cpu_dev * this_cpu = &default_cpu;
70+static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
71
72 static int __init cachesize_setup(char *str)
73 {
74@@ -242,29 +241,14 @@ static int __cpuinit have_cpuid_p(void)
75 return flag_is_changeable_p(X86_EFLAGS_ID);
76 }
77
78-/* Do minimum CPU detection early.
79- Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
80- The others are not touched to avoid unwanted side effects.
81-
82- WARNING: this function is only called on the BP. Don't add code here
83- that is supposed to run on all CPUs. */
84-static void __init early_cpu_detect(void)
85+void __init cpu_detect(struct cpuinfo_x86 *c)
86 {
87- struct cpuinfo_x86 *c = &boot_cpu_data;
88-
89- c->x86_cache_alignment = 32;
90-
91- if (!have_cpuid_p())
92- return;
93-
94 /* Get vendor name */
95 cpuid(0x00000000, &c->cpuid_level,
96 (int *)&c->x86_vendor_id[0],
97 (int *)&c->x86_vendor_id[8],
98 (int *)&c->x86_vendor_id[4]);
99
100- get_cpu_vendor(c, 1);
101-
102 c->x86 = 4;
103 if (c->cpuid_level >= 0x00000001) {
104 u32 junk, tfms, cap0, misc;
105@@ -281,6 +265,26 @@ static void __init early_cpu_detect(void
106 }
107 }
108
109+/* Do minimum CPU detection early.
110+ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
111+ The others are not touched to avoid unwanted side effects.
112+
113+ WARNING: this function is only called on the BP. Don't add code here
114+ that is supposed to run on all CPUs. */
115+static void __init early_cpu_detect(void)
116+{
117+ struct cpuinfo_x86 *c = &boot_cpu_data;
118+
119+ c->x86_cache_alignment = 32;
120+
121+ if (!have_cpuid_p())
122+ return;
123+
124+ cpu_detect(c);
125+
126+ get_cpu_vendor(c, 1);
127+}
128+
129 static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
130 {
131 u32 tfms, xlvl;
132@@ -315,6 +319,8 @@ static void __cpuinit generic_identify(s
133 #else
134 c->apicid = (ebx >> 24) & 0xFF;
135 #endif
136+ if (c->x86_capability[0] & (1<<19))
137+ c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
138 } else {
139 /* Have CPUID level 0 only - unheard of */
140 c->x86 = 4;
141@@ -379,6 +385,7 @@ void __cpuinit identify_cpu(struct cpuin
142 c->x86_vendor_id[0] = '\0'; /* Unset */
143 c->x86_model_id[0] = '\0'; /* Unset */
144 c->x86_max_cores = 1;
145+ c->x86_clflush_size = 32;
146 memset(&c->x86_capability, 0, sizeof c->x86_capability);
147
148 if (!have_cpuid_p()) {
149@@ -599,61 +606,23 @@ void __init early_cpu_init(void)
150 #endif
151 }
152
153-static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr)
154+/* Make sure %gs is initialized properly in idle threads */
155+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
156 {
157- unsigned long frames[16];
158- unsigned long va;
159- int f;
160-
161- for (va = gdt_descr->address, f = 0;
162- va < gdt_descr->address + gdt_descr->size;
163- va += PAGE_SIZE, f++) {
164- frames[f] = virt_to_mfn(va);
165- make_lowmem_page_readonly(
166- (void *)va, XENFEAT_writable_descriptor_tables);
167- }
168- if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8))
169- BUG();
170+ memset(regs, 0, sizeof(struct pt_regs));
171+ regs->xgs = __KERNEL_PDA;
172+ return regs;
173 }
174
175-/*
176- * cpu_init() initializes state that is per-CPU. Some data is already
177- * initialized (naturally) in the bootstrap process, such as the GDT
178- * and IDT. We reload them nevertheless, this function acts as a
179- * 'CPU state barrier', nothing should get across.
180- */
181-void __cpuinit cpu_init(void)
182+static __cpuinit int alloc_gdt(int cpu)
183 {
184- int cpu = smp_processor_id();
185-#ifndef CONFIG_X86_NO_TSS
186- struct tss_struct * t = &per_cpu(init_tss, cpu);
187-#endif
188- struct thread_struct *thread = &current->thread;
189- struct desc_struct *gdt;
190 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
191+ struct desc_struct *gdt;
192+ struct i386_pda *pda;
193
194- if (cpu_test_and_set(cpu, cpu_initialized)) {
195- printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
196- for (;;) local_irq_enable();
197- }
198- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
199-
200- if (cpu_has_vme || cpu_has_de)
201- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
202- if (tsc_disable && cpu_has_tsc) {
203- printk(KERN_NOTICE "Disabling TSC...\n");
204- /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
205- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
206- set_in_cr4(X86_CR4_TSD);
207- }
208+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
209+ pda = cpu_pda(cpu);
210
211-#ifndef CONFIG_XEN
212- /* The CPU hotplug case */
213- if (cpu_gdt_descr->address) {
214- gdt = (struct desc_struct *)cpu_gdt_descr->address;
215- memset(gdt, 0, PAGE_SIZE);
216- goto old_gdt;
217- }
218 /*
219 * This is a horrible hack to allocate the GDT. The problem
220 * is that cpu_init() is called really early for the boot CPU
221@@ -661,54 +630,141 @@ void __cpuinit cpu_init(void)
222 * CPUs, when bootmem will have gone away
223 */
224 if (NODE_DATA(0)->bdata->node_bootmem_map) {
225- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
226- /* alloc_bootmem_pages panics on failure, so no check */
227+ BUG_ON(gdt != NULL || pda != NULL);
228+
229+ gdt = alloc_bootmem_pages(PAGE_SIZE);
230+ pda = alloc_bootmem(sizeof(*pda));
231+ /* alloc_bootmem(_pages) panics on failure, so no check */
232+
233 memset(gdt, 0, PAGE_SIZE);
234+ memset(pda, 0, sizeof(*pda));
235 } else {
236- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
237- if (unlikely(!gdt)) {
238- printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
239- for (;;)
240- local_irq_enable();
241+ /* GDT and PDA might already have been allocated if
242+ this is a CPU hotplug re-insertion. */
243+ if (gdt == NULL)
244+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
245+
246+ if (pda == NULL)
247+ pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
248+
249+ if (unlikely(!gdt || !pda)) {
250+ free_pages((unsigned long)gdt, 0);
251+ kfree(pda);
252+ return 0;
253 }
254 }
255-old_gdt:
256+
257+ cpu_gdt_descr->address = (unsigned long)gdt;
258+ cpu_pda(cpu) = pda;
259+
260+ return 1;
261+}
262+
263+/* Initial PDA used by boot CPU */
264+struct i386_pda boot_pda = {
265+ ._pda = &boot_pda,
266+ .cpu_number = 0,
267+ .pcurrent = &init_task,
268+};
269+
270+static inline void set_kernel_gs(void)
271+{
272+ /* Set %gs for this CPU's PDA. Memory clobber is to create a
273+ barrier with respect to any PDA operations, so the compiler
274+ doesn't move any before here. */
275+ asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
276+}
277+
278+/* Initialize the CPU's GDT and PDA. The boot CPU does this for
279+ itself, but secondaries find this done for them. */
280+__cpuinit int init_gdt(int cpu, struct task_struct *idle)
281+{
282+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
283+ struct desc_struct *gdt;
284+ struct i386_pda *pda;
285+
286+ /* For non-boot CPUs, the GDT and PDA should already have been
287+ allocated. */
288+ if (!alloc_gdt(cpu)) {
289+ printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
290+ return 0;
291+ }
292+
293+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
294+ pda = cpu_pda(cpu);
295+
296+ BUG_ON(gdt == NULL || pda == NULL);
297+
298 /*
299 * Initialize the per-CPU GDT with the boot GDT,
300 * and set up the GDT descriptor:
301 */
302 memcpy(gdt, cpu_gdt_table, GDT_SIZE);
303+ cpu_gdt_descr->size = GDT_SIZE - 1;
304
305- /* Set up GDT entry for 16bit stack */
306- *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
307- ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
308- ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
309- (CPU_16BIT_STACK_SIZE - 1);
310+ pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
311+ (u32 *)&gdt[GDT_ENTRY_PDA].b,
312+ (unsigned long)pda, sizeof(*pda) - 1,
313+ 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
314+
315+ memset(pda, 0, sizeof(*pda));
316+ pda->_pda = pda;
317+ pda->cpu_number = cpu;
318+ pda->pcurrent = idle;
319
320- cpu_gdt_descr->size = GDT_SIZE - 1;
321- cpu_gdt_descr->address = (unsigned long)gdt;
322-#else
323- if (cpu == 0 && cpu_gdt_descr->address == 0) {
324- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
325- /* alloc_bootmem_pages panics on failure, so no check */
326- memset(gdt, 0, PAGE_SIZE);
327+ return 1;
328+}
329
330- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
331-
332- cpu_gdt_descr->size = GDT_SIZE;
333- cpu_gdt_descr->address = (unsigned long)gdt;
334+void __cpuinit cpu_set_gdt(int cpu)
335+{
336+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
337+ unsigned long va, frames[16];
338+ int f;
339+
340+ for (va = cpu_gdt_descr->address, f = 0;
341+ va < cpu_gdt_descr->address + cpu_gdt_descr->size;
342+ va += PAGE_SIZE, f++) {
343+ frames[f] = virt_to_mfn(va);
344+ make_lowmem_page_readonly(
345+ (void *)va, XENFEAT_writable_descriptor_tables);
346 }
347+ BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
348+
349+ set_kernel_gs();
350+}
351+
352+/* Common CPU init for both boot and secondary CPUs */
353+static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
354+{
355+#ifndef CONFIG_X86_NO_TSS
356+ struct tss_struct * t = &per_cpu(init_tss, cpu);
357 #endif
358+ struct thread_struct *thread = &curr->thread;
359+
360+ if (cpu_test_and_set(cpu, cpu_initialized)) {
361+ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
362+ for (;;) local_irq_enable();
363+ }
364
365- cpu_gdt_init(cpu_gdt_descr);
366+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
367+
368+ if (cpu_has_vme || cpu_has_de)
369+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
370+ if (tsc_disable && cpu_has_tsc) {
371+ printk(KERN_NOTICE "Disabling TSC...\n");
372+ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
373+ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
374+ set_in_cr4(X86_CR4_TSD);
375+ }
376
377 /*
378 * Set up and load the per-CPU TSS and LDT
379 */
380 atomic_inc(&init_mm.mm_count);
381- current->active_mm = &init_mm;
382- BUG_ON(current->mm);
383- enter_lazy_tlb(&init_mm, current);
384+ curr->active_mm = &init_mm;
385+ if (curr->mm)
386+ BUG();
387+ enter_lazy_tlb(&init_mm, curr);
388
389 load_esp0(t, thread);
390
391@@ -719,8 +775,8 @@ old_gdt:
392 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
393 #endif
394
395- /* Clear %fs and %gs. */
396- asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
397+ /* Clear %fs. */
398+ asm volatile ("mov %0, %%fs" : : "r" (0));
399
400 /* Clear all 6 debug registers: */
401 set_debugreg(0, 0);
402@@ -738,6 +794,38 @@ old_gdt:
403 mxcsr_feature_mask_init();
404 }
405
406+/* Entrypoint to initialize secondary CPU */
407+void __cpuinit secondary_cpu_init(void)
408+{
409+ int cpu = smp_processor_id();
410+ struct task_struct *curr = current;
411+
412+ _cpu_init(cpu, curr);
413+}
414+
415+/*
416+ * cpu_init() initializes state that is per-CPU. Some data is already
417+ * initialized (naturally) in the bootstrap process, such as the GDT
418+ * and IDT. We reload them nevertheless, this function acts as a
419+ * 'CPU state barrier', nothing should get across.
420+ */
421+void __cpuinit cpu_init(void)
422+{
423+ int cpu = smp_processor_id();
424+ struct task_struct *curr = current;
425+
426+ /* Set up the real GDT and PDA, so we can transition from the
427+ boot versions. */
428+ if (!init_gdt(cpu, curr)) {
429+ /* failed to allocate something; not much we can do... */
430+ for (;;)
431+ local_irq_enable();
432+ }
433+
434+ cpu_set_gdt(cpu);
435+ _cpu_init(cpu, curr);
436+}
437+
438 #ifdef CONFIG_HOTPLUG_CPU
439 void __cpuinit cpu_uninit(void)
440 {
441--- sle11-2009-06-29.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2009-06-29 15:14:52.000000000 +0200
442+++ sle11-2009-06-29/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-15 11:26:44.000000000 +0100
443@@ -12,7 +12,7 @@
444 static DEFINE_MUTEX(mtrr_mutex);
445
446 void generic_get_mtrr(unsigned int reg, unsigned long *base,
447- unsigned int *size, mtrr_type * type)
448+ unsigned long *size, mtrr_type * type)
449 {
450 struct xen_platform_op op;
451
452@@ -115,8 +115,7 @@ int mtrr_del_page(int reg, unsigned long
453 {
454 unsigned i;
455 mtrr_type ltype;
456- unsigned long lbase;
457- unsigned int lsize;
458+ unsigned long lbase, lsize;
459 int error = -EINVAL;
460 struct xen_platform_op op;
461
462--- /dev/null 1970-01-01 00:00:00.000000000 +0000
463+++ sle11-2009-06-29/arch/x86/kernel/e820_32-xen.c 2008-12-15 11:26:44.000000000 +0100
464@@ -0,0 +1,1015 @@
465+#include <linux/kernel.h>
466+#include <linux/types.h>
467+#include <linux/init.h>
468+#include <linux/bootmem.h>
469+#include <linux/ioport.h>
470+#include <linux/string.h>
471+#include <linux/kexec.h>
472+#include <linux/module.h>
473+#include <linux/mm.h>
474+#include <linux/efi.h>
475+#include <linux/pfn.h>
476+#include <linux/uaccess.h>
477+
478+#include <asm/pgtable.h>
479+#include <asm/page.h>
480+#include <asm/e820.h>
481+#include <xen/interface/memory.h>
482+
483+#ifdef CONFIG_EFI
484+int efi_enabled = 0;
485+EXPORT_SYMBOL(efi_enabled);
486+#endif
487+
488+struct e820map e820;
489+struct change_member {
490+ struct e820entry *pbios; /* pointer to original bios entry */
491+ unsigned long long addr; /* address for this change point */
492+};
493+static struct change_member change_point_list[2*E820MAX] __initdata;
494+static struct change_member *change_point[2*E820MAX] __initdata;
495+static struct e820entry *overlap_list[E820MAX] __initdata;
496+static struct e820entry new_bios[E820MAX] __initdata;
497+/* For PCI or other memory-mapped resources */
498+unsigned long pci_mem_start = 0x10000000;
499+#ifdef CONFIG_PCI
500+EXPORT_SYMBOL(pci_mem_start);
501+#endif
502+extern int user_defined_memmap;
503+struct resource data_resource = {
504+ .name = "Kernel data",
505+ .start = 0,
506+ .end = 0,
507+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
508+};
509+
510+struct resource code_resource = {
511+ .name = "Kernel code",
512+ .start = 0,
513+ .end = 0,
514+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
515+};
516+
517+static struct resource system_rom_resource = {
518+ .name = "System ROM",
519+ .start = 0xf0000,
520+ .end = 0xfffff,
521+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
522+};
523+
524+static struct resource extension_rom_resource = {
525+ .name = "Extension ROM",
526+ .start = 0xe0000,
527+ .end = 0xeffff,
528+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
529+};
530+
531+static struct resource adapter_rom_resources[] = { {
532+ .name = "Adapter ROM",
533+ .start = 0xc8000,
534+ .end = 0,
535+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
536+}, {
537+ .name = "Adapter ROM",
538+ .start = 0,
539+ .end = 0,
540+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
541+}, {
542+ .name = "Adapter ROM",
543+ .start = 0,
544+ .end = 0,
545+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
546+}, {
547+ .name = "Adapter ROM",
548+ .start = 0,
549+ .end = 0,
550+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
551+}, {
552+ .name = "Adapter ROM",
553+ .start = 0,
554+ .end = 0,
555+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
556+}, {
557+ .name = "Adapter ROM",
558+ .start = 0,
559+ .end = 0,
560+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
561+} };
562+
563+static struct resource video_rom_resource = {
564+ .name = "Video ROM",
565+ .start = 0xc0000,
566+ .end = 0xc7fff,
567+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
568+};
569+
570+static struct resource video_ram_resource = {
571+ .name = "Video RAM area",
572+ .start = 0xa0000,
573+ .end = 0xbffff,
574+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
575+};
576+
577+static struct resource standard_io_resources[] = { {
578+ .name = "dma1",
579+ .start = 0x0000,
580+ .end = 0x001f,
581+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
582+}, {
583+ .name = "pic1",
584+ .start = 0x0020,
585+ .end = 0x0021,
586+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
587+}, {
588+ .name = "timer0",
589+ .start = 0x0040,
590+ .end = 0x0043,
591+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
592+}, {
593+ .name = "timer1",
594+ .start = 0x0050,
595+ .end = 0x0053,
596+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
597+}, {
598+ .name = "keyboard",
599+ .start = 0x0060,
600+ .end = 0x006f,
601+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
602+}, {
603+ .name = "dma page reg",
604+ .start = 0x0080,
605+ .end = 0x008f,
606+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
607+}, {
608+ .name = "pic2",
609+ .start = 0x00a0,
610+ .end = 0x00a1,
611+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
612+}, {
613+ .name = "dma2",
614+ .start = 0x00c0,
615+ .end = 0x00df,
616+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
617+}, {
618+ .name = "fpu",
619+ .start = 0x00f0,
620+ .end = 0x00ff,
621+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
622+} };
623+
624+static int romsignature(const unsigned char *x)
625+{
626+ unsigned short sig;
627+ int ret = 0;
628+ if (probe_kernel_address((const unsigned short *)x, sig) == 0)
629+ ret = (sig == 0xaa55);
630+ return ret;
631+}
632+
633+static int __init romchecksum(unsigned char *rom, unsigned long length)
634+{
635+ unsigned char *p, sum = 0;
636+
637+ for (p = rom; p < rom + length; p++)
638+ sum += *p;
639+ return sum == 0;
640+}
641+
642+static void __init probe_roms(void)
643+{
644+ unsigned long start, length, upper;
645+ unsigned char *rom;
646+ int i;
647+
648+#ifdef CONFIG_XEN
649+ /* Nothing to do if not running in dom0. */
650+ if (!is_initial_xendomain())
651+ return;
652+#endif
653+
654+ /* video rom */
655+ upper = adapter_rom_resources[0].start;
656+ for (start = video_rom_resource.start; start < upper; start += 2048) {
657+ rom = isa_bus_to_virt(start);
658+ if (!romsignature(rom))
659+ continue;
660+
661+ video_rom_resource.start = start;
662+
663+ /* 0 < length <= 0x7f * 512, historically */
664+ length = rom[2] * 512;
665+
666+ /* if checksum okay, trust length byte */
667+ if (length && romchecksum(rom, length))
668+ video_rom_resource.end = start + length - 1;
669+
670+ request_resource(&iomem_resource, &video_rom_resource);
671+ break;
672+ }
673+
674+ start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
675+ if (start < upper)
676+ start = upper;
677+
678+ /* system rom */
679+ request_resource(&iomem_resource, &system_rom_resource);
680+ upper = system_rom_resource.start;
681+
682+ /* check for extension rom (ignore length byte!) */
683+ rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
684+ if (romsignature(rom)) {
685+ length = extension_rom_resource.end - extension_rom_resource.start + 1;
686+ if (romchecksum(rom, length)) {
687+ request_resource(&iomem_resource, &extension_rom_resource);
688+ upper = extension_rom_resource.start;
689+ }
690+ }
691+
692+ /* check for adapter roms on 2k boundaries */
693+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
694+ rom = isa_bus_to_virt(start);
695+ if (!romsignature(rom))
696+ continue;
697+
698+ /* 0 < length <= 0x7f * 512, historically */
699+ length = rom[2] * 512;
700+
701+ /* but accept any length that fits if checksum okay */
702+ if (!length || start + length > upper || !romchecksum(rom, length))
703+ continue;
704+
705+ adapter_rom_resources[i].start = start;
706+ adapter_rom_resources[i].end = start + length - 1;
707+ request_resource(&iomem_resource, &adapter_rom_resources[i]);
708+
709+ start = adapter_rom_resources[i++].end & ~2047UL;
710+ }
711+}
712+
713+#ifdef CONFIG_XEN
714+static struct e820map machine_e820 __initdata;
715+#define e820 machine_e820
716+#endif
717+
718+/*
719+ * Request address space for all standard RAM and ROM resources
720+ * and also for regions reported as reserved by the e820.
721+ */
722+static void __init
723+legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
724+{
725+ int i;
726+
727+ probe_roms();
728+ for (i = 0; i < e820.nr_map; i++) {
729+ struct resource *res;
730+#ifndef CONFIG_RESOURCES_64BIT
731+ if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
732+ continue;
733+#endif
734+ res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
735+ switch (e820.map[i].type) {
736+ case E820_RAM: res->name = "System RAM"; break;
737+ case E820_ACPI: res->name = "ACPI Tables"; break;
738+ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
739+ default: res->name = "reserved";
740+ }
741+ res->start = e820.map[i].addr;
742+ res->end = res->start + e820.map[i].size - 1;
743+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
744+ if (request_resource(&iomem_resource, res)) {
745+ kfree(res);
746+ continue;
747+ }
748+ if (e820.map[i].type == E820_RAM) {
749+ /*
750+ * We don't know which RAM region contains kernel data,
751+ * so we try it repeatedly and let the resource manager
752+ * test it.
753+ */
754+#ifndef CONFIG_XEN
755+ request_resource(res, code_resource);
756+ request_resource(res, data_resource);
757+#endif
758+#ifdef CONFIG_KEXEC
759+ request_resource(res, &crashk_res);
760+#ifdef CONFIG_XEN
761+ xen_machine_kexec_register_resources(res);
762+#endif
763+#endif
764+ }
765+ }
766+}
767+
768+#undef e820
769+
770+/*
771+ * Request address space for all standard resources
772+ *
773+ * This is called just before pcibios_init(), which is also a
774+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
775+ */
776+static int __init request_standard_resources(void)
777+{
778+ int i;
779+
780+ /* Nothing to do if not running in dom0. */
781+ if (!is_initial_xendomain())
782+ return 0;
783+
784+ printk("Setting up standard PCI resources\n");
785+ if (efi_enabled)
786+ efi_initialize_iomem_resources(&code_resource, &data_resource);
787+ else
788+ legacy_init_iomem_resources(&code_resource, &data_resource);
789+
790+ /* EFI systems may still have VGA */
791+ request_resource(&iomem_resource, &video_ram_resource);
792+
793+ /* request I/O space for devices used on all i[345]86 PCs */
794+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
795+ request_resource(&ioport_resource, &standard_io_resources[i]);
796+ return 0;
797+}
798+
799+subsys_initcall(request_standard_resources);
800+
801+void __init add_memory_region(unsigned long long start,
802+ unsigned long long size, int type)
803+{
804+ int x;
805+
806+ if (!efi_enabled) {
807+ x = e820.nr_map;
808+
809+ if (x == E820MAX) {
810+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
811+ return;
812+ }
813+
814+ e820.map[x].addr = start;
815+ e820.map[x].size = size;
816+ e820.map[x].type = type;
817+ e820.nr_map++;
818+ }
819+} /* add_memory_region */
820+
821+/*
822+ * Sanitize the BIOS e820 map.
823+ *
824+ * Some e820 responses include overlapping entries. The following
825+ * replaces the original e820 map with a new one, removing overlaps.
826+ *
827+ */
828+int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
829+{
830+ struct change_member *change_tmp;
831+ unsigned long current_type, last_type;
832+ unsigned long long last_addr;
833+ int chgidx, still_changing;
834+ int overlap_entries;
835+ int new_bios_entry;
836+ int old_nr, new_nr, chg_nr;
837+ int i;
838+
839+ /*
840+ Visually we're performing the following (1,2,3,4 = memory types)...
841+
842+ Sample memory map (w/overlaps):
843+ ____22__________________
844+ ______________________4_
845+ ____1111________________
846+ _44_____________________
847+ 11111111________________
848+ ____________________33__
849+ ___________44___________
850+ __________33333_________
851+ ______________22________
852+ ___________________2222_
853+ _________111111111______
854+ _____________________11_
855+ _________________4______
856+
857+ Sanitized equivalent (no overlap):
858+ 1_______________________
859+ _44_____________________
860+ ___1____________________
861+ ____22__________________
862+ ______11________________
863+ _________1______________
864+ __________3_____________
865+ ___________44___________
866+ _____________33_________
867+ _______________2________
868+ ________________1_______
869+ _________________4______
870+ ___________________2____
871+ ____________________33__
872+ ______________________4_
873+ */
874+ printk("sanitize start\n");
875+ /* if there's only one memory region, don't bother */
876+ if (*pnr_map < 2) {
877+ printk("sanitize bail 0\n");
878+ return -1;
879+ }
880+
881+ old_nr = *pnr_map;
882+
883+ /* bail out if we find any unreasonable addresses in bios map */
884+ for (i=0; i<old_nr; i++)
885+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
886+ printk("sanitize bail 1\n");
887+ return -1;
888+ }
889+
890+ /* create pointers for initial change-point information (for sorting) */
891+ for (i=0; i < 2*old_nr; i++)
892+ change_point[i] = &change_point_list[i];
893+
894+ /* record all known change-points (starting and ending addresses),
895+ omitting those that are for empty memory regions */
896+ chgidx = 0;
897+ for (i=0; i < old_nr; i++) {
898+ if (biosmap[i].size != 0) {
899+ change_point[chgidx]->addr = biosmap[i].addr;
900+ change_point[chgidx++]->pbios = &biosmap[i];
901+ change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
902+ change_point[chgidx++]->pbios = &biosmap[i];
903+ }
904+ }
905+ chg_nr = chgidx; /* true number of change-points */
906+
907+ /* sort change-point list by memory addresses (low -> high) */
908+ still_changing = 1;
909+ while (still_changing) {
910+ still_changing = 0;
911+ for (i=1; i < chg_nr; i++) {
912+ /* if <current_addr> > <last_addr>, swap */
913+ /* or, if current=<start_addr> & last=<end_addr>, swap */
914+ if ((change_point[i]->addr < change_point[i-1]->addr) ||
915+ ((change_point[i]->addr == change_point[i-1]->addr) &&
916+ (change_point[i]->addr == change_point[i]->pbios->addr) &&
917+ (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
918+ )
919+ {
920+ change_tmp = change_point[i];
921+ change_point[i] = change_point[i-1];
922+ change_point[i-1] = change_tmp;
923+ still_changing=1;
924+ }
925+ }
926+ }
927+
928+ /* create a new bios memory map, removing overlaps */
929+ overlap_entries=0; /* number of entries in the overlap table */
930+ new_bios_entry=0; /* index for creating new bios map entries */
931+ last_type = 0; /* start with undefined memory type */
932+ last_addr = 0; /* start with 0 as last starting address */
933+ /* loop through change-points, determining affect on the new bios map */
934+ for (chgidx=0; chgidx < chg_nr; chgidx++)
935+ {
936+ /* keep track of all overlapping bios entries */
937+ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
938+ {
939+ /* add map entry to overlap list (> 1 entry implies an overlap) */
940+ overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
941+ }
942+ else
943+ {
944+ /* remove entry from list (order independent, so swap with last) */
945+ for (i=0; i<overlap_entries; i++)
946+ {
947+ if (overlap_list[i] == change_point[chgidx]->pbios)
948+ overlap_list[i] = overlap_list[overlap_entries-1];
949+ }
950+ overlap_entries--;
951+ }
952+ /* if there are overlapping entries, decide which "type" to use */
953+ /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
954+ current_type = 0;
955+ for (i=0; i<overlap_entries; i++)
956+ if (overlap_list[i]->type > current_type)
957+ current_type = overlap_list[i]->type;
958+ /* continue building up new bios map based on this information */
959+ if (current_type != last_type) {
960+ if (last_type != 0) {
961+ new_bios[new_bios_entry].size =
962+ change_point[chgidx]->addr - last_addr;
963+ /* move forward only if the new size was non-zero */
964+ if (new_bios[new_bios_entry].size != 0)
965+ if (++new_bios_entry >= E820MAX)
966+ break; /* no more space left for new bios entries */
967+ }
968+ if (current_type != 0) {
969+ new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
970+ new_bios[new_bios_entry].type = current_type;
971+ last_addr=change_point[chgidx]->addr;
972+ }
973+ last_type = current_type;
974+ }
975+ }
976+ new_nr = new_bios_entry; /* retain count for new bios entries */
977+
978+ /* copy new bios mapping into original location */
979+ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
980+ *pnr_map = new_nr;
981+
982+ printk("sanitize end\n");
983+ return 0;
984+}
985+
986+/*
987+ * Copy the BIOS e820 map into a safe place.
988+ *
989+ * Sanity-check it while we're at it..
990+ *
991+ * If we're lucky and live on a modern system, the setup code
992+ * will have given us a memory map that we can use to properly
993+ * set up memory. If we aren't, we'll fake a memory map.
994+ *
995+ * We check to see that the memory map contains at least 2 elements
996+ * before we'll use it, because the detection code in setup.S may
997+ * not be perfect and most every PC known to man has two memory
998+ * regions: one from 0 to 640k, and one from 1mb up. (The IBM
999+ * thinkpad 560x, for example, does not cooperate with the memory
1000+ * detection code.)
1001+ */
1002+int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
1003+{
1004+#ifndef CONFIG_XEN
1005+ /* Only one memory region (or negative)? Ignore it */
1006+ if (nr_map < 2)
1007+ return -1;
1008+#else
1009+ BUG_ON(nr_map < 1);
1010+#endif
1011+
1012+ do {
1013+ unsigned long long start = biosmap->addr;
1014+ unsigned long long size = biosmap->size;
1015+ unsigned long long end = start + size;
1016+ unsigned long type = biosmap->type;
1017+ printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
1018+
1019+ /* Overflow in 64 bits? Ignore the memory map. */
1020+ if (start > end)
1021+ return -1;
1022+
1023+#ifndef CONFIG_XEN
1024+ /*
1025+ * Some BIOSes claim RAM in the 640k - 1M region.
1026+ * Not right. Fix it up.
1027+ */
1028+ if (type == E820_RAM) {
1029+ printk("copy_e820_map() type is E820_RAM\n");
1030+ if (start < 0x100000ULL && end > 0xA0000ULL) {
1031+ printk("copy_e820_map() lies in range...\n");
1032+ if (start < 0xA0000ULL) {
1033+ printk("copy_e820_map() start < 0xA0000ULL\n");
1034+ add_memory_region(start, 0xA0000ULL-start, type);
1035+ }
1036+ if (end <= 0x100000ULL) {
1037+ printk("copy_e820_map() end <= 0x100000ULL\n");
1038+ continue;
1039+ }
1040+ start = 0x100000ULL;
1041+ size = end - start;
1042+ }
1043+ }
1044+#endif
1045+ add_memory_region(start, size, type);
1046+ } while (biosmap++,--nr_map);
1047+
1048+#ifdef CONFIG_XEN
1049+ if (is_initial_xendomain()) {
1050+ struct xen_memory_map memmap;
1051+
1052+ memmap.nr_entries = E820MAX;
1053+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
1054+
1055+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1056+ BUG();
1057+ machine_e820.nr_map = memmap.nr_entries;
1058+ } else
1059+ machine_e820 = e820;
1060+#endif
1061+
1062+ return 0;
1063+}
1064+
1065+/*
1066+ * Callback for efi_memory_walk.
1067+ */
1068+static int __init
1069+efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
1070+{
1071+ unsigned long *max_pfn = arg, pfn;
1072+
1073+ if (start < end) {
1074+ pfn = PFN_UP(end -1);
1075+ if (pfn > *max_pfn)
1076+ *max_pfn = pfn;
1077+ }
1078+ return 0;
1079+}
1080+
1081+static int __init
1082+efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1083+{
1084+ memory_present(0, PFN_UP(start), PFN_DOWN(end));
1085+ return 0;
1086+}
1087+
1088+/*
1089+ * Find the highest page frame number we have available
1090+ */
1091+void __init find_max_pfn(void)
1092+{
1093+ int i;
1094+
1095+ max_pfn = 0;
1096+ if (efi_enabled) {
1097+ efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1098+ efi_memmap_walk(efi_memory_present_wrapper, NULL);
1099+ return;
1100+ }
1101+
1102+ for (i = 0; i < e820.nr_map; i++) {
1103+ unsigned long start, end;
1104+ /* RAM? */
1105+ if (e820.map[i].type != E820_RAM)
1106+ continue;
1107+ start = PFN_UP(e820.map[i].addr);
1108+ end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1109+ if (start >= end)
1110+ continue;
1111+ if (end > max_pfn)
1112+ max_pfn = end;
1113+ memory_present(0, start, end);
1114+ }
1115+}
1116+
1117+/*
1118+ * Free all available memory for boot time allocation. Used
1119+ * as a callback function by efi_memory_walk()
1120+ */
1121+
1122+static int __init
1123+free_available_memory(unsigned long start, unsigned long end, void *arg)
1124+{
1125+ /* check max_low_pfn */
1126+ if (start >= (max_low_pfn << PAGE_SHIFT))
1127+ return 0;
1128+ if (end >= (max_low_pfn << PAGE_SHIFT))
1129+ end = max_low_pfn << PAGE_SHIFT;
1130+ if (start < end)
1131+ free_bootmem(start, end - start);
1132+
1133+ return 0;
1134+}
1135+/*
1136+ * Register fully available low RAM pages with the bootmem allocator.
1137+ */
1138+void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1139+{
1140+ int i;
1141+
1142+ if (efi_enabled) {
1143+ efi_memmap_walk(free_available_memory, NULL);
1144+ return;
1145+ }
1146+ for (i = 0; i < e820.nr_map; i++) {
1147+ unsigned long curr_pfn, last_pfn, size;
1148+ /*
1149+ * Reserve usable low memory
1150+ */
1151+ if (e820.map[i].type != E820_RAM)
1152+ continue;
1153+ /*
1154+ * We are rounding up the start address of usable memory:
1155+ */
1156+ curr_pfn = PFN_UP(e820.map[i].addr);
1157+ if (curr_pfn >= max_low_pfn)
1158+ continue;
1159+ /*
1160+ * ... and at the end of the usable range downwards:
1161+ */
1162+ last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1163+
1164+#ifdef CONFIG_XEN
1165+ /*
1166+ * Truncate to the number of actual pages currently
1167+ * present.
1168+ */
1169+ if (last_pfn > xen_start_info->nr_pages)
1170+ last_pfn = xen_start_info->nr_pages;
1171+#endif
1172+
1173+ if (last_pfn > max_low_pfn)
1174+ last_pfn = max_low_pfn;
1175+
1176+ /*
1177+ * .. finally, did all the rounding and playing
1178+ * around just make the area go away?
1179+ */
1180+ if (last_pfn <= curr_pfn)
1181+ continue;
1182+
1183+ size = last_pfn - curr_pfn;
1184+ free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1185+ }
1186+}
1187+
1188+void __init e820_register_memory(void)
1189+{
1190+ unsigned long gapstart, gapsize, round;
1191+ unsigned long long last;
1192+ int i;
1193+
1194+#ifdef CONFIG_XEN
1195+ if (is_initial_xendomain()) {
1196+ struct xen_memory_map memmap;
1197+
1198+ memmap.nr_entries = E820MAX;
1199+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
1200+
1201+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1202+ BUG();
1203+ machine_e820.nr_map = memmap.nr_entries;
1204+ }
1205+ else
1206+ machine_e820 = e820;
1207+#define e820 machine_e820
1208+#endif
1209+
1210+ /*
1211+ * Search for the bigest gap in the low 32 bits of the e820
1212+ * memory space.
1213+ */
1214+ last = 0x100000000ull;
1215+ gapstart = 0x10000000;
1216+ gapsize = 0x400000;
1217+ i = e820.nr_map;
1218+ while (--i >= 0) {
1219+ unsigned long long start = e820.map[i].addr;
1220+ unsigned long long end = start + e820.map[i].size;
1221+
1222+ /*
1223+ * Since "last" is at most 4GB, we know we'll
1224+ * fit in 32 bits if this condition is true
1225+ */
1226+ if (last > end) {
1227+ unsigned long gap = last - end;
1228+
1229+ if (gap > gapsize) {
1230+ gapsize = gap;
1231+ gapstart = end;
1232+ }
1233+ }
1234+ if (start < last)
1235+ last = start;
1236+ }
1237+#undef e820
1238+
1239+ /*
1240+ * See how much we want to round up: start off with
1241+ * rounding to the next 1MB area.
1242+ */
1243+ round = 0x100000;
1244+ while ((gapsize >> 4) > round)
1245+ round += round;
1246+ /* Fun with two's complement */
1247+ pci_mem_start = (gapstart + round) & -round;
1248+
1249+ printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1250+ pci_mem_start, gapstart, gapsize);
1251+}
1252+
1253+void __init print_memory_map(char *who)
1254+{
1255+ int i;
1256+
1257+ for (i = 0; i < e820.nr_map; i++) {
1258+ printk(" %s: %016Lx - %016Lx ", who,
1259+ e820.map[i].addr,
1260+ e820.map[i].addr + e820.map[i].size);
1261+ switch (e820.map[i].type) {
1262+ case E820_RAM: printk("(usable)\n");
1263+ break;
1264+ case E820_RESERVED:
1265+ printk("(reserved)\n");
1266+ break;
1267+ case E820_ACPI:
1268+ printk("(ACPI data)\n");
1269+ break;
1270+ case E820_NVS:
1271+ printk("(ACPI NVS)\n");
1272+ break;
1273+ default: printk("type %lu\n", e820.map[i].type);
1274+ break;
1275+ }
1276+ }
1277+}
1278+
1279+static __init __always_inline void efi_limit_regions(unsigned long long size)
1280+{
1281+ unsigned long long current_addr = 0;
1282+ efi_memory_desc_t *md, *next_md;
1283+ void *p, *p1;
1284+ int i, j;
1285+
1286+ j = 0;
1287+ p1 = memmap.map;
1288+ for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
1289+ md = p;
1290+ next_md = p1;
1291+ current_addr = md->phys_addr +
1292+ PFN_PHYS(md->num_pages);
1293+ if (is_available_memory(md)) {
1294+ if (md->phys_addr >= size) continue;
1295+ memcpy(next_md, md, memmap.desc_size);
1296+ if (current_addr >= size) {
1297+ next_md->num_pages -=
1298+ PFN_UP(current_addr-size);
1299+ }
1300+ p1 += memmap.desc_size;
1301+ next_md = p1;
1302+ j++;
1303+ } else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
1304+ EFI_MEMORY_RUNTIME) {
1305+ /* In order to make runtime services
1306+ * available we have to include runtime
1307+ * memory regions in memory map */
1308+ memcpy(next_md, md, memmap.desc_size);
1309+ p1 += memmap.desc_size;
1310+ next_md = p1;
1311+ j++;
1312+ }
1313+ }
1314+ memmap.nr_map = j;
1315+ memmap.map_end = memmap.map +
1316+ (memmap.nr_map * memmap.desc_size);
1317+}
1318+
1319+void __init limit_regions(unsigned long long size)
1320+{
1321+ unsigned long long current_addr = 0;
1322+ int i;
1323+
1324+ print_memory_map("limit_regions start");
1325+ if (efi_enabled) {
1326+ efi_limit_regions(size);
1327+ return;
1328+ }
1329+ for (i = 0; i < e820.nr_map; i++) {
1330+ current_addr = e820.map[i].addr + e820.map[i].size;
1331+ if (current_addr < size)
1332+ continue;
1333+
1334+ if (e820.map[i].type != E820_RAM)
1335+ continue;
1336+
1337+ if (e820.map[i].addr >= size) {
1338+ /*
1339+ * This region starts past the end of the
1340+ * requested size, skip it completely.
1341+ */
1342+ e820.nr_map = i;
1343+ } else {
1344+ e820.nr_map = i + 1;
1345+ e820.map[i].size -= current_addr - size;
1346+ }
1347+ print_memory_map("limit_regions endfor");
1348+ return;
1349+ }
1350+#ifdef CONFIG_XEN
1351+ if (current_addr < size) {
1352+ /*
1353+ * The e820 map finished before our requested size so
1354+ * extend the final entry to the requested address.
1355+ */
1356+ --i;
1357+ if (e820.map[i].type == E820_RAM)
1358+ e820.map[i].size -= current_addr - size;
1359+ else
1360+ add_memory_region(current_addr, size - current_addr, E820_RAM);
1361+ }
1362+#endif
1363+ print_memory_map("limit_regions endfunc");
1364+}
1365+
1366+/*
1367+ * This function checks if any part of the range <start,end> is mapped
1368+ * with type.
1369+ */
1370+int
1371+e820_any_mapped(u64 start, u64 end, unsigned type)
1372+{
1373+ int i;
1374+
1375+#ifndef CONFIG_XEN
1376+ for (i = 0; i < e820.nr_map; i++) {
1377+ const struct e820entry *ei = &e820.map[i];
1378+#else
1379+ if (!is_initial_xendomain())
1380+ return 0;
1381+ for (i = 0; i < machine_e820.nr_map; ++i) {
1382+ const struct e820entry *ei = &machine_e820.map[i];
1383+#endif
1384+
1385+ if (type && ei->type != type)
1386+ continue;
1387+ if (ei->addr >= end || ei->addr + ei->size <= start)
1388+ continue;
1389+ return 1;
1390+ }
1391+ return 0;
1392+}
1393+EXPORT_SYMBOL_GPL(e820_any_mapped);
1394+
1395+ /*
1396+ * This function checks if the entire range <start,end> is mapped with type.
1397+ *
1398+ * Note: this function only works correct if the e820 table is sorted and
1399+ * not-overlapping, which is the case
1400+ */
1401+int __init
1402+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1403+{
1404+ u64 start = s;
1405+ u64 end = e;
1406+ int i;
1407+
1408+#ifndef CONFIG_XEN
1409+ for (i = 0; i < e820.nr_map; i++) {
1410+ struct e820entry *ei = &e820.map[i];
1411+#else
1412+ if (!is_initial_xendomain())
1413+ return 0;
1414+ for (i = 0; i < machine_e820.nr_map; ++i) {
1415+ const struct e820entry *ei = &machine_e820.map[i];
1416+#endif
1417+
1418+ if (type && ei->type != type)
1419+ continue;
1420+ /* is the region (part) in overlap with the current region ?*/
1421+ if (ei->addr >= end || ei->addr + ei->size <= start)
1422+ continue;
1423+ /* if the region is at the beginning of <start,end> we move
1424+ * start to the end of the region since it's ok until there
1425+ */
1426+ if (ei->addr <= start)
1427+ start = ei->addr + ei->size;
1428+ /* if start is now at or beyond end, we're done, full
1429+ * coverage */
1430+ if (start >= end)
1431+ return 1; /* we're done */
1432+ }
1433+ return 0;
1434+}
1435+
1436+static int __init parse_memmap(char *arg)
1437+{
1438+ if (!arg)
1439+ return -EINVAL;
1440+
1441+ if (strcmp(arg, "exactmap") == 0) {
1442+#ifdef CONFIG_CRASH_DUMP
1443+ /* If we are doing a crash dump, we
1444+ * still need to know the real mem
1445+ * size before original memory map is
1446+ * reset.
1447+ */
1448+ find_max_pfn();
1449+ saved_max_pfn = max_pfn;
1450+#endif
1451+ e820.nr_map = 0;
1452+ user_defined_memmap = 1;
1453+ } else {
1454+ /* If the user specifies memory size, we
1455+ * limit the BIOS-provided memory map to
1456+ * that size. exactmap can be used to specify
1457+ * the exact map. mem=number can be used to
1458+ * trim the existing memory map.
1459+ */
1460+ unsigned long long start_at, mem_size;
1461+
1462+ mem_size = memparse(arg, &arg);
1463+ if (*arg == '@') {
1464+ start_at = memparse(arg+1, &arg);
1465+ add_memory_region(start_at, mem_size, E820_RAM);
1466+ } else if (*arg == '#') {
1467+ start_at = memparse(arg+1, &arg);
1468+ add_memory_region(start_at, mem_size, E820_ACPI);
1469+ } else if (*arg == '$') {
1470+ start_at = memparse(arg+1, &arg);
1471+ add_memory_region(start_at, mem_size, E820_RESERVED);
1472+ } else {
1473+ limit_regions(mem_size);
1474+ user_defined_memmap = 1;
1475+ }
1476+ }
1477+ return 0;
1478+}
1479+early_param("memmap", parse_memmap);
1480--- sle11-2009-06-29.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:07:47.000000000 +0200
1481+++ sle11-2009-06-29/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:08:06.000000000 +0200
1482@@ -30,12 +30,13 @@
1483 * 18(%esp) - %eax
1484 * 1C(%esp) - %ds
1485 * 20(%esp) - %es
1486- * 24(%esp) - orig_eax
1487- * 28(%esp) - %eip
1488- * 2C(%esp) - %cs
1489- * 30(%esp) - %eflags
1490- * 34(%esp) - %oldesp
1491- * 38(%esp) - %oldss
1492+ * 24(%esp) - %gs
1493+ * 28(%esp) - orig_eax
1494+ * 2C(%esp) - %eip
1495+ * 30(%esp) - %cs
1496+ * 34(%esp) - %eflags
1497+ * 38(%esp) - %oldesp
1498+ * 3C(%esp) - %oldss
1499 *
1500 * "current" is in register %ebx during any slow entries.
1501 */
1502@@ -48,27 +49,25 @@
1503 #include <asm/smp.h>
1504 #include <asm/page.h>
1505 #include <asm/desc.h>
1506+#include <asm/percpu.h>
1507 #include <asm/dwarf2.h>
1508 #include "irq_vectors.h"
1509 #include <xen/interface/xen.h>
1510
1511-#define nr_syscalls ((syscall_table_size)/4)
1512+/*
1513+ * We use macros for low-level operations which need to be overridden
1514+ * for paravirtualization. The following will never clobber any registers:
1515+ * INTERRUPT_RETURN (aka. "iret")
1516+ * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
1517+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
1518+ *
1519+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
1520+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
1521+ * Allowing a register to be clobbered can shrink the paravirt replacement
1522+ * enough to patch inline, increasing performance.
1523+ */
1524
1525-EBX = 0x00
1526-ECX = 0x04
1527-EDX = 0x08
1528-ESI = 0x0C
1529-EDI = 0x10
1530-EBP = 0x14
1531-EAX = 0x18
1532-DS = 0x1C
1533-ES = 0x20
1534-ORIG_EAX = 0x24
1535-EIP = 0x28
1536-CS = 0x2C
1537-EFLAGS = 0x30
1538-OLDESP = 0x34
1539-OLDSS = 0x38
1540+#define nr_syscalls ((syscall_table_size)/4)
1541
1542 CF_MASK = 0x00000001
1543 TF_MASK = 0x00000100
1544@@ -79,61 +78,16 @@ VM_MASK = 0x00020000
1545 /* Pseudo-eflags. */
1546 NMI_MASK = 0x80000000
1547
1548-#ifndef CONFIG_XEN
1549-/* These are replaces for paravirtualization */
1550-#define DISABLE_INTERRUPTS cli
1551-#define ENABLE_INTERRUPTS sti
1552-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
1553-#define INTERRUPT_RETURN iret
1554-#define GET_CR0_INTO_EAX movl %cr0, %eax
1555-#else
1556-/* Offsets into shared_info_t. */
1557-#define evtchn_upcall_pending /* 0 */
1558-#define evtchn_upcall_mask 1
1559-
1560-#define sizeof_vcpu_shift 6
1561-
1562-#ifdef CONFIG_SMP
1563-#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
1564- shl $sizeof_vcpu_shift,%esi ; \
1565- addl HYPERVISOR_shared_info,%esi
1566-#else
1567-#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
1568-#endif
1569-
1570-#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
1571-#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
1572-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
1573-#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
1574- __DISABLE_INTERRUPTS
1575-#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
1576- __ENABLE_INTERRUPTS
1577-#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
1578-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
1579- __TEST_PENDING ; \
1580- jnz 14f # process more events if necessary... ; \
1581- movl ESI(%esp), %esi ; \
1582- sysexit ; \
1583-14: __DISABLE_INTERRUPTS ; \
1584- TRACE_IRQS_OFF ; \
1585-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
1586- push %esp ; \
1587- call evtchn_do_upcall ; \
1588- add $4,%esp ; \
1589- jmp ret_from_intr
1590-#define INTERRUPT_RETURN iret
1591-#endif
1592-
1593 #ifdef CONFIG_PREEMPT
1594-#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
1595+#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
1596 #else
1597-#define preempt_stop
1598+#define preempt_stop(clobbers)
1599 #define resume_kernel restore_nocheck
1600 #endif
1601
1602 .macro TRACE_IRQS_IRET
1603 #ifdef CONFIG_TRACE_IRQFLAGS
1604- testl $IF_MASK,EFLAGS(%esp) # interrupts off?
1605+ testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
1606 jz 1f
1607 TRACE_IRQS_ON
1608 1:
1609@@ -148,6 +102,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1610
1611 #define SAVE_ALL \
1612 cld; \
1613+ pushl %gs; \
1614+ CFI_ADJUST_CFA_OFFSET 4;\
1615+ /*CFI_REL_OFFSET gs, 0;*/\
1616 pushl %es; \
1617 CFI_ADJUST_CFA_OFFSET 4;\
1618 /*CFI_REL_OFFSET es, 0;*/\
1619@@ -177,7 +134,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1620 CFI_REL_OFFSET ebx, 0;\
1621 movl $(__USER_DS), %edx; \
1622 movl %edx, %ds; \
1623- movl %edx, %es;
1624+ movl %edx, %es; \
1625+ movl $(__KERNEL_PDA), %edx; \
1626+ movl %edx, %gs
1627
1628 #define RESTORE_INT_REGS \
1629 popl %ebx; \
1630@@ -210,17 +169,22 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1631 2: popl %es; \
1632 CFI_ADJUST_CFA_OFFSET -4;\
1633 /*CFI_RESTORE es;*/\
1634-.section .fixup,"ax"; \
1635-3: movl $0,(%esp); \
1636- jmp 1b; \
1637+3: popl %gs; \
1638+ CFI_ADJUST_CFA_OFFSET -4;\
1639+ /*CFI_RESTORE gs;*/\
1640+.pushsection .fixup,"ax"; \
1641 4: movl $0,(%esp); \
1642+ jmp 1b; \
1643+5: movl $0,(%esp); \
1644 jmp 2b; \
1645-.previous; \
1646+6: movl $0,(%esp); \
1647+ jmp 3b; \
1648 .section __ex_table,"a";\
1649 .align 4; \
1650- .long 1b,3b; \
1651- .long 2b,4b; \
1652-.previous
1653+ .long 1b,4b; \
1654+ .long 2b,5b; \
1655+ .long 3b,6b; \
1656+.popsection
1657
1658 #define RING0_INT_FRAME \
1659 CFI_STARTPROC simple;\
1660@@ -239,18 +203,18 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
1661 #define RING0_PTREGS_FRAME \
1662 CFI_STARTPROC simple;\
1663 CFI_SIGNAL_FRAME;\
1664- CFI_DEF_CFA esp, OLDESP-EBX;\
1665- /*CFI_OFFSET cs, CS-OLDESP;*/\
1666- CFI_OFFSET eip, EIP-OLDESP;\
1667- /*CFI_OFFSET es, ES-OLDESP;*/\
1668- /*CFI_OFFSET ds, DS-OLDESP;*/\
1669- CFI_OFFSET eax, EAX-OLDESP;\
1670- CFI_OFFSET ebp, EBP-OLDESP;\
1671- CFI_OFFSET edi, EDI-OLDESP;\
1672- CFI_OFFSET esi, ESI-OLDESP;\
1673- CFI_OFFSET edx, EDX-OLDESP;\
1674- CFI_OFFSET ecx, ECX-OLDESP;\
1675- CFI_OFFSET ebx, EBX-OLDESP
1676+ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
1677+ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
1678+ CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
1679+ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
1680+ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
1681+ CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
1682+ CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
1683+ CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
1684+ CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
1685+ CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
1686+ CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
1687+ CFI_OFFSET ebx, PT_EBX-PT_OLDESP
1688
1689 ENTRY(ret_from_fork)
1690 CFI_STARTPROC
1691@@ -278,17 +242,18 @@ ENTRY(ret_from_fork)
1692 ALIGN
1693 RING0_PTREGS_FRAME
1694 ret_from_exception:
1695- preempt_stop
1696+ preempt_stop(CLBR_ANY)
1697 ret_from_intr:
1698 GET_THREAD_INFO(%ebp)
1699 check_userspace:
1700- movl EFLAGS(%esp), %eax # mix EFLAGS and CS
1701- movb CS(%esp), %al
1702+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1703+ movb PT_CS(%esp), %al
1704 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
1705 cmpl $USER_RPL, %eax
1706 jb resume_kernel # not returning to v8086 or userspace
1707+
1708 ENTRY(resume_userspace)
1709- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1710+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1711 # setting need_resched or sigpending
1712 # between sampling and the iret
1713 movl TI_flags(%ebp), %ecx
1714@@ -299,14 +264,14 @@ ENTRY(resume_userspace)
1715
1716 #ifdef CONFIG_PREEMPT
1717 ENTRY(resume_kernel)
1718- DISABLE_INTERRUPTS
1719+ DISABLE_INTERRUPTS(CLBR_ANY)
1720 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
1721 jnz restore_nocheck
1722 need_resched:
1723 movl TI_flags(%ebp), %ecx # need_resched set ?
1724 testb $_TIF_NEED_RESCHED, %cl
1725 jz restore_all
1726- testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
1727+ testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1728 jz restore_all
1729 call preempt_schedule_irq
1730 jmp need_resched
1731@@ -328,7 +293,7 @@ sysenter_past_esp:
1732 * No need to follow this irqs on/off section: the syscall
1733 * disabled irqs and here we enable it straight after entry:
1734 */
1735- ENABLE_INTERRUPTS
1736+ ENABLE_INTERRUPTS(CLBR_NONE)
1737 pushl $(__USER_DS)
1738 CFI_ADJUST_CFA_OFFSET 4
1739 /*CFI_REL_OFFSET ss, 0*/
1740@@ -340,12 +305,16 @@ sysenter_past_esp:
1741 pushl $(__USER_CS)
1742 CFI_ADJUST_CFA_OFFSET 4
1743 /*CFI_REL_OFFSET cs, 0*/
1744+#ifndef CONFIG_COMPAT_VDSO
1745 /*
1746 * Push current_thread_info()->sysenter_return to the stack.
1747 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
1748 * pushed above; +8 corresponds to copy_thread's esp0 setting.
1749 */
1750 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
1751+#else
1752+ pushl $SYSENTER_RETURN
1753+#endif
1754 CFI_ADJUST_CFA_OFFSET 4
1755 CFI_REL_OFFSET eip, 0
1756
1757@@ -372,19 +341,27 @@ sysenter_past_esp:
1758 cmpl $(nr_syscalls), %eax
1759 jae syscall_badsys
1760 call *sys_call_table(,%eax,4)
1761- movl %eax,EAX(%esp)
1762- DISABLE_INTERRUPTS
1763+ movl %eax,PT_EAX(%esp)
1764+ DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
1765 TRACE_IRQS_OFF
1766 movl TI_flags(%ebp), %ecx
1767 testw $_TIF_ALLWORK_MASK, %cx
1768 jne syscall_exit_work
1769 /* if something modifies registers it must also disable sysexit */
1770- movl EIP(%esp), %edx
1771- movl OLDESP(%esp), %ecx
1772+ movl PT_EIP(%esp), %edx
1773+ movl PT_OLDESP(%esp), %ecx
1774 xorl %ebp,%ebp
1775 TRACE_IRQS_ON
1776+1: mov PT_GS(%esp), %gs
1777 ENABLE_INTERRUPTS_SYSEXIT
1778 CFI_ENDPROC
1779+.pushsection .fixup,"ax"
1780+2: movl $0,PT_GS(%esp)
1781+ jmp 1b
1782+.section __ex_table,"a"
1783+ .align 4
1784+ .long 1b,2b
1785+.popsection
1786
1787 # pv sysenter call handler stub
1788 ENTRY(sysenter_entry_pv)
1789@@ -419,7 +396,7 @@ ENTRY(system_call)
1790 CFI_ADJUST_CFA_OFFSET 4
1791 SAVE_ALL
1792 GET_THREAD_INFO(%ebp)
1793- testl $TF_MASK,EFLAGS(%esp)
1794+ testl $TF_MASK,PT_EFLAGS(%esp)
1795 jz no_singlestep
1796 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
1797 no_singlestep:
1798@@ -431,9 +408,9 @@ no_singlestep:
1799 jae syscall_badsys
1800 syscall_call:
1801 call *sys_call_table(,%eax,4)
1802- movl %eax,EAX(%esp) # store the return value
1803+ movl %eax,PT_EAX(%esp) # store the return value
1804 syscall_exit:
1805- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1806+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1807 # setting need_resched or sigpending
1808 # between sampling and the iret
1809 TRACE_IRQS_OFF
1810@@ -443,12 +420,12 @@ syscall_exit:
1811
1812 restore_all:
1813 #ifndef CONFIG_XEN
1814- movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
1815- # Warning: OLDSS(%esp) contains the wrong/random values if we
1816+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
1817+ # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
1818 # are returning to the kernel.
1819 # See comments in process.c:copy_thread() for details.
1820- movb OLDSS(%esp), %ah
1821- movb CS(%esp), %al
1822+ movb PT_OLDSS(%esp), %ah
1823+ movb PT_CS(%esp), %al
1824 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1825 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1826 CFI_REMEMBER_STATE
1827@@ -456,7 +433,7 @@ restore_all:
1828 restore_nocheck:
1829 #else
1830 restore_nocheck:
1831- movl EFLAGS(%esp), %eax
1832+ movl PT_EFLAGS(%esp), %eax
1833 testl $(VM_MASK|NMI_MASK), %eax
1834 CFI_REMEMBER_STATE
1835 jnz hypervisor_iret
1836@@ -470,13 +447,13 @@ restore_nocheck:
1837 TRACE_IRQS_IRET
1838 restore_nocheck_notrace:
1839 RESTORE_REGS
1840- addl $4, %esp
1841+ addl $4, %esp # skip orig_eax/error_code
1842 CFI_ADJUST_CFA_OFFSET -4
1843 1: INTERRUPT_RETURN
1844 .section .fixup,"ax"
1845 iret_exc:
1846 #ifndef CONFIG_XEN
1847- ENABLE_INTERRUPTS
1848+ ENABLE_INTERRUPTS(CLBR_NONE)
1849 #endif
1850 pushl $0 # no error code
1851 pushl $do_iret_error
1852@@ -490,33 +467,42 @@ iret_exc:
1853 CFI_RESTORE_STATE
1854 #ifndef CONFIG_XEN
1855 ldt_ss:
1856- larl OLDSS(%esp), %eax
1857+ larl PT_OLDSS(%esp), %eax
1858 jnz restore_nocheck
1859 testl $0x00400000, %eax # returning to 32bit stack?
1860 jnz restore_nocheck # allright, normal return
1861+
1862+#ifdef CONFIG_PARAVIRT
1863+ /*
1864+ * The kernel can't run on a non-flat stack if paravirt mode
1865+ * is active. Rather than try to fixup the high bits of
1866+ * ESP, bypass this code entirely. This may break DOSemu
1867+ * and/or Wine support in a paravirt VM, although the option
1868+ * is still available to implement the setting of the high
1869+ * 16-bits in the INTERRUPT_RETURN paravirt-op.
1870+ */
1871+ cmpl $0, paravirt_ops+PARAVIRT_enabled
1872+ jne restore_nocheck
1873+#endif
1874+
1875 /* If returning to userspace with 16bit stack,
1876 * try to fix the higher word of ESP, as the CPU
1877 * won't restore it.
1878 * This is an "official" bug of all the x86-compatible
1879 * CPUs, which we can try to work around to make
1880 * dosemu and wine happy. */
1881- subl $8, %esp # reserve space for switch16 pointer
1882- CFI_ADJUST_CFA_OFFSET 8
1883- DISABLE_INTERRUPTS
1884+ movl PT_OLDESP(%esp), %eax
1885+ movl %esp, %edx
1886+ call patch_espfix_desc
1887+ pushl $__ESPFIX_SS
1888+ CFI_ADJUST_CFA_OFFSET 4
1889+ pushl %eax
1890+ CFI_ADJUST_CFA_OFFSET 4
1891+ DISABLE_INTERRUPTS(CLBR_EAX)
1892 TRACE_IRQS_OFF
1893- movl %esp, %eax
1894- /* Set up the 16bit stack frame with switch32 pointer on top,
1895- * and a switch16 pointer on top of the current frame. */
1896- call setup_x86_bogus_stack
1897- CFI_ADJUST_CFA_OFFSET -8 # frame has moved
1898- TRACE_IRQS_IRET
1899- RESTORE_REGS
1900- lss 20+4(%esp), %esp # switch to 16bit stack
1901-1: INTERRUPT_RETURN
1902-.section __ex_table,"a"
1903- .align 4
1904- .long 1b,iret_exc
1905-.previous
1906+ lss (%esp), %esp
1907+ CFI_ADJUST_CFA_OFFSET -8
1908+ jmp restore_nocheck
1909 #else
1910 ALIGN
1911 restore_all_enable_events:
1912@@ -540,7 +526,7 @@ ecrit: /**** END OF CRITICAL REGION ***
1913
1914 CFI_RESTORE_STATE
1915 hypervisor_iret:
1916- andl $~NMI_MASK, EFLAGS(%esp)
1917+ andl $~NMI_MASK, PT_EFLAGS(%esp)
1918 RESTORE_REGS
1919 addl $4, %esp
1920 CFI_ADJUST_CFA_OFFSET -4
1921@@ -556,7 +542,7 @@ work_pending:
1922 jz work_notifysig
1923 work_resched:
1924 call schedule
1925- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
1926+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1927 # setting need_resched or sigpending
1928 # between sampling and the iret
1929 TRACE_IRQS_OFF
1930@@ -569,7 +555,8 @@ work_resched:
1931
1932 work_notifysig: # deal with pending signals and
1933 # notify-resume requests
1934- testl $VM_MASK, EFLAGS(%esp)
1935+#ifdef CONFIG_VM86
1936+ testl $VM_MASK, PT_EFLAGS(%esp)
1937 movl %esp, %eax
1938 jne work_notifysig_v86 # returning to kernel-space or
1939 # vm86-space
1940@@ -579,29 +566,30 @@ work_notifysig: # deal with pending s
1941
1942 ALIGN
1943 work_notifysig_v86:
1944-#ifdef CONFIG_VM86
1945 pushl %ecx # save ti_flags for do_notify_resume
1946 CFI_ADJUST_CFA_OFFSET 4
1947 call save_v86_state # %eax contains pt_regs pointer
1948 popl %ecx
1949 CFI_ADJUST_CFA_OFFSET -4
1950 movl %eax, %esp
1951+#else
1952+ movl %esp, %eax
1953+#endif
1954 xorl %edx, %edx
1955 call do_notify_resume
1956 jmp resume_userspace_sig
1957-#endif
1958
1959 # perform syscall exit tracing
1960 ALIGN
1961 syscall_trace_entry:
1962- movl $-ENOSYS,EAX(%esp)
1963+ movl $-ENOSYS,PT_EAX(%esp)
1964 movl %esp, %eax
1965 xorl %edx,%edx
1966 call do_syscall_trace
1967 cmpl $0, %eax
1968 jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
1969 # so must skip actual syscall
1970- movl ORIG_EAX(%esp), %eax
1971+ movl PT_ORIG_EAX(%esp), %eax
1972 cmpl $(nr_syscalls), %eax
1973 jnae syscall_call
1974 jmp syscall_exit
1975@@ -612,7 +600,7 @@ syscall_exit_work:
1976 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
1977 jz work_pending
1978 TRACE_IRQS_ON
1979- ENABLE_INTERRUPTS # could let do_syscall_trace() call
1980+ ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
1981 # schedule() instead
1982 movl %esp, %eax
1983 movl $1, %edx
1984@@ -626,40 +614,39 @@ syscall_fault:
1985 CFI_ADJUST_CFA_OFFSET 4
1986 SAVE_ALL
1987 GET_THREAD_INFO(%ebp)
1988- movl $-EFAULT,EAX(%esp)
1989+ movl $-EFAULT,PT_EAX(%esp)
1990 jmp resume_userspace
1991
1992 syscall_badsys:
1993- movl $-ENOSYS,EAX(%esp)
1994+ movl $-ENOSYS,PT_EAX(%esp)
1995 jmp resume_userspace
1996 CFI_ENDPROC
1997
1998 #ifndef CONFIG_XEN
1999 #define FIXUP_ESPFIX_STACK \
2000- movl %esp, %eax; \
2001- /* switch to 32bit stack using the pointer on top of 16bit stack */ \
2002- lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
2003- /* copy data from 16bit stack to 32bit stack */ \
2004- call fixup_x86_bogus_stack; \
2005- /* put ESP to the proper location */ \
2006- movl %eax, %esp;
2007-#define UNWIND_ESPFIX_STACK \
2008+ /* since we are on a wrong stack, we cant make it a C code :( */ \
2009+ movl %gs:PDA_cpu, %ebx; \
2010+ PER_CPU(cpu_gdt_descr, %ebx); \
2011+ movl GDS_address(%ebx), %ebx; \
2012+ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
2013+ addl %esp, %eax; \
2014+ pushl $__KERNEL_DS; \
2015+ CFI_ADJUST_CFA_OFFSET 4; \
2016 pushl %eax; \
2017 CFI_ADJUST_CFA_OFFSET 4; \
2018+ lss (%esp), %esp; \
2019+ CFI_ADJUST_CFA_OFFSET -8;
2020+#define UNWIND_ESPFIX_STACK \
2021 movl %ss, %eax; \
2022- /* see if on 16bit stack */ \
2023+ /* see if on espfix stack */ \
2024 cmpw $__ESPFIX_SS, %ax; \
2025- je 28f; \
2026-27: popl %eax; \
2027- CFI_ADJUST_CFA_OFFSET -4; \
2028-.section .fixup,"ax"; \
2029-28: movl $__KERNEL_DS, %eax; \
2030+ jne 27f; \
2031+ movl $__KERNEL_DS, %eax; \
2032 movl %eax, %ds; \
2033 movl %eax, %es; \
2034- /* switch to 32bit stack */ \
2035+ /* switch to normal stack */ \
2036 FIXUP_ESPFIX_STACK; \
2037- jmp 27b; \
2038-.previous
2039+27:;
2040
2041 /*
2042 * Build the entry stubs and pointer table with
2043@@ -723,13 +710,16 @@ KPROBE_ENTRY(page_fault)
2044 CFI_ADJUST_CFA_OFFSET 4
2045 ALIGN
2046 error_code:
2047+ /* the function address is in %gs's slot on the stack */
2048+ pushl %es
2049+ CFI_ADJUST_CFA_OFFSET 4
2050+ /*CFI_REL_OFFSET es, 0*/
2051 pushl %ds
2052 CFI_ADJUST_CFA_OFFSET 4
2053 /*CFI_REL_OFFSET ds, 0*/
2054 pushl %eax
2055 CFI_ADJUST_CFA_OFFSET 4
2056 CFI_REL_OFFSET eax, 0
2057- xorl %eax, %eax
2058 pushl %ebp
2059 CFI_ADJUST_CFA_OFFSET 4
2060 CFI_REL_OFFSET ebp, 0
2061@@ -742,7 +732,6 @@ error_code:
2062 pushl %edx
2063 CFI_ADJUST_CFA_OFFSET 4
2064 CFI_REL_OFFSET edx, 0
2065- decl %eax # eax = -1
2066 pushl %ecx
2067 CFI_ADJUST_CFA_OFFSET 4
2068 CFI_REL_OFFSET ecx, 0
2069@@ -750,18 +739,20 @@ error_code:
2070 CFI_ADJUST_CFA_OFFSET 4
2071 CFI_REL_OFFSET ebx, 0
2072 cld
2073- pushl %es
2074+ pushl %gs
2075 CFI_ADJUST_CFA_OFFSET 4
2076- /*CFI_REL_OFFSET es, 0*/
2077+ /*CFI_REL_OFFSET gs, 0*/
2078+ movl $(__KERNEL_PDA), %ecx
2079+ movl %ecx, %gs
2080 UNWIND_ESPFIX_STACK
2081 popl %ecx
2082 CFI_ADJUST_CFA_OFFSET -4
2083 /*CFI_REGISTER es, ecx*/
2084- movl ES(%esp), %edi # get the function address
2085- movl ORIG_EAX(%esp), %edx # get the error code
2086- movl %eax, ORIG_EAX(%esp)
2087- movl %ecx, ES(%esp)
2088- /*CFI_REL_OFFSET es, ES*/
2089+ movl PT_GS(%esp), %edi # get the function address
2090+ movl PT_ORIG_EAX(%esp), %edx # get the error code
2091+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
2092+ mov %ecx, PT_GS(%esp)
2093+ /*CFI_REL_OFFSET gs, ES*/
2094 movl $(__USER_DS), %ecx
2095 movl %ecx, %ds
2096 movl %ecx, %es
2097@@ -793,8 +784,8 @@ ENTRY(hypervisor_callback)
2098 pushl %eax
2099 CFI_ADJUST_CFA_OFFSET 4
2100 SAVE_ALL
2101- movl CS(%esp),%ecx
2102- movl EIP(%esp),%eax
2103+ movl PT_CS(%esp),%ecx
2104+ movl PT_EIP(%esp),%eax
2105 andl $SEGMENT_RPL_MASK,%ecx
2106 cmpl $USER_RPL,%ecx
2107 jae .Ldo_upcall
2108@@ -808,7 +799,7 @@ ENTRY(hypervisor_callback)
2109 jb .Ldo_upcall
2110 cmpl $sysexit_ecrit,%eax
2111 ja .Ldo_upcall
2112- addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
2113+ addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame.
2114 #endif
2115 .Ldo_upcall:
2116 push %esp
2117@@ -830,7 +821,7 @@ critical_region_fixup:
2118 movsbl critical_fixup_table-scrit(%eax),%ecx # %ecx contains num slots popped
2119 testl %ecx,%ecx
2120 leal (%esp,%ecx,4),%esi # %esi points at end of src region
2121- leal OLDESP(%esp),%edi # %edi points at end of dst region
2122+ leal PT_OLDESP(%esp),%edi # %edi points at end of dst region
2123 jle 17f # skip loop if nothing to copy
2124 16: subl $4,%esi # pre-decrementing copy loop
2125 subl $4,%edi
2126@@ -853,8 +844,9 @@ critical_fixup_table:
2127 .byte 6 # pop %eax
2128 .byte 7 # pop %ds
2129 .byte 8 # pop %es
2130- .byte 9,9,9 # add $4,%esp
2131- .byte 10 # iret
2132+ .byte 9,9 # pop %gs
2133+ .byte 10,10,10 # add $4,%esp
2134+ .byte 11 # iret
2135 .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS
2136 .previous
2137
2138@@ -944,7 +936,7 @@ ENTRY(device_not_available)
2139 jmp ret_from_exception
2140 device_available_emulate:
2141 #endif
2142- preempt_stop
2143+ preempt_stop(CLBR_ANY)
2144 call math_state_restore
2145 jmp ret_from_exception
2146 CFI_ENDPROC
2147@@ -1014,7 +1006,7 @@ KPROBE_ENTRY(nmi)
2148 cmpw $__ESPFIX_SS, %ax
2149 popl %eax
2150 CFI_ADJUST_CFA_OFFSET -4
2151- je nmi_16bit_stack
2152+ je nmi_espfix_stack
2153 cmpl $sysenter_entry,(%esp)
2154 je nmi_stack_fixup
2155 pushl %eax
2156@@ -1057,7 +1049,7 @@ nmi_debug_stack_check:
2157 FIX_STACK(24,nmi_stack_correct, 1)
2158 jmp nmi_stack_correct
2159
2160-nmi_16bit_stack:
2161+nmi_espfix_stack:
2162 /* We have a RING0_INT_FRAME here.
2163 *
2164 * create the pointer to lss back
2165@@ -1066,7 +1058,6 @@ nmi_16bit_stack:
2166 CFI_ADJUST_CFA_OFFSET 4
2167 pushl %esp
2168 CFI_ADJUST_CFA_OFFSET 4
2169- movzwl %sp, %esp
2170 addw $4, (%esp)
2171 /* copy the iret frame of 12 bytes */
2172 .rept 3
2173@@ -1077,11 +1068,11 @@ nmi_16bit_stack:
2174 CFI_ADJUST_CFA_OFFSET 4
2175 SAVE_ALL
2176 FIXUP_ESPFIX_STACK # %eax == %esp
2177- CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
2178 xorl %edx,%edx # zero error code
2179 call do_nmi
2180 RESTORE_REGS
2181- lss 12+4(%esp), %esp # back to 16bit stack
2182+ lss 12+4(%esp), %esp # back to espfix stack
2183+ CFI_ADJUST_CFA_OFFSET -24
2184 1: INTERRUPT_RETURN
2185 CFI_ENDPROC
2186 .section __ex_table,"a"
2187@@ -1097,12 +1088,25 @@ KPROBE_ENTRY(nmi)
2188 xorl %edx,%edx # zero error code
2189 movl %esp,%eax # pt_regs pointer
2190 call do_nmi
2191- orl $NMI_MASK, EFLAGS(%esp)
2192+ orl $NMI_MASK, PT_EFLAGS(%esp)
2193 jmp restore_all
2194 CFI_ENDPROC
2195 #endif
2196 KPROBE_END(nmi)
2197
2198+#ifdef CONFIG_PARAVIRT
2199+ENTRY(native_iret)
2200+1: iret
2201+.section __ex_table,"a"
2202+ .align 4
2203+ .long 1b,iret_exc
2204+.previous
2205+
2206+ENTRY(native_irq_enable_sysexit)
2207+ sti
2208+ sysexit
2209+#endif
2210+
2211 KPROBE_ENTRY(int3)
2212 RING0_INT_FRAME
2213 pushl $-1 # mark this as an int
2214@@ -1218,37 +1222,6 @@ ENTRY(spurious_interrupt_bug)
2215 CFI_ENDPROC
2216 #endif /* !CONFIG_XEN */
2217
2218-#ifdef CONFIG_STACK_UNWIND
2219-ENTRY(arch_unwind_init_running)
2220- CFI_STARTPROC
2221- movl 4(%esp), %edx
2222- movl (%esp), %ecx
2223- leal 4(%esp), %eax
2224- movl %ebx, EBX(%edx)
2225- xorl %ebx, %ebx
2226- movl %ebx, ECX(%edx)
2227- movl %ebx, EDX(%edx)
2228- movl %esi, ESI(%edx)
2229- movl %edi, EDI(%edx)
2230- movl %ebp, EBP(%edx)
2231- movl %ebx, EAX(%edx)
2232- movl $__USER_DS, DS(%edx)
2233- movl $__USER_DS, ES(%edx)
2234- movl %ebx, ORIG_EAX(%edx)
2235- movl %ecx, EIP(%edx)
2236- movl 12(%esp), %ecx
2237- movl $__KERNEL_CS, CS(%edx)
2238- movl %ebx, EFLAGS(%edx)
2239- movl %eax, OLDESP(%edx)
2240- movl 8(%esp), %eax
2241- movl %ecx, 8(%esp)
2242- movl EBX(%edx), %ebx
2243- movl $__KERNEL_DS, OLDSS(%edx)
2244- jmpl *%eax
2245- CFI_ENDPROC
2246-ENDPROC(arch_unwind_init_running)
2247-#endif
2248-
2249 ENTRY(fixup_4gb_segment)
2250 RING0_EC_FRAME
2251 pushl $do_fixup_4gb_segment
2252--- sle11-2009-06-29.orig/arch/x86/kernel/head_32-xen.S 2009-03-04 11:28:34.000000000 +0100
2253+++ sle11-2009-06-29/arch/x86/kernel/head_32-xen.S 2008-12-15 11:26:44.000000000 +0100
2254@@ -9,6 +9,7 @@
2255 #include <asm/cache.h>
2256 #include <asm/thread_info.h>
2257 #include <asm/asm-offsets.h>
2258+#include <asm/boot.h>
2259 #include <asm/dwarf2.h>
2260 #include <xen/interface/xen.h>
2261 #include <xen/interface/elfnote.h>
2262@@ -35,6 +36,8 @@ ENTRY(startup_32)
2263 /* Set up the stack pointer */
2264 movl $(init_thread_union+THREAD_SIZE),%esp
2265
2266+ call setup_pda
2267+
2268 /* get vendor info */
2269 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
2270 XEN_CPUID
2271@@ -57,14 +60,58 @@ ENTRY(startup_32)
2272
2273 movb $1,X86_HARD_MATH
2274
2275- xorl %eax,%eax # Clear FS/GS and LDT
2276+ xorl %eax,%eax # Clear FS
2277 movl %eax,%fs
2278- movl %eax,%gs
2279+
2280+ movl $(__KERNEL_PDA),%eax
2281+ mov %eax,%gs
2282+
2283 cld # gcc2 wants the direction flag cleared at all times
2284
2285 pushl $0 # fake return address for unwinder
2286 jmp start_kernel
2287
2288+/*
2289+ * Point the GDT at this CPU's PDA. This will be
2290+ * cpu_gdt_table and boot_pda.
2291+ */
2292+setup_pda:
2293+ /* get the PDA pointer */
2294+ movl $boot_pda, %eax
2295+
2296+ /* slot the PDA address into the GDT */
2297+ mov $cpu_gdt_table, %ecx
2298+ mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
2299+ shr $16, %eax
2300+ mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
2301+ mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
2302+
2303+ # %esi still points to start_info, and no registers
2304+ # need to be preserved.
2305+
2306+ movl XEN_START_mfn_list(%esi), %ebx
2307+ movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
2308+ shrl $PAGE_SHIFT, %eax
2309+ movl (%ebx,%eax,4), %ecx
2310+ pushl %ecx # frame number for set_gdt below
2311+
2312+ xorl %esi, %esi
2313+ xorl %edx, %edx
2314+ shldl $PAGE_SHIFT, %ecx, %edx
2315+ shll $PAGE_SHIFT, %ecx
2316+ orl $0x61, %ecx
2317+ movl $cpu_gdt_table, %ebx
2318+ movl $__HYPERVISOR_update_va_mapping, %eax
2319+ int $0x82
2320+
2321+ movl $(PAGE_SIZE_asm / 8), %ecx
2322+ movl %esp, %ebx
2323+ movl $__HYPERVISOR_set_gdt, %eax
2324+ int $0x82
2325+
2326+ popl %ecx
2327+ ret
2328+
2329 #define HYPERCALL_PAGE_OFFSET 0x1000
2330 .org HYPERCALL_PAGE_OFFSET
2331 ENTRY(hypercall_page)
2332@@ -93,7 +140,8 @@ ENTRY(empty_zero_page)
2333 /*
2334 * The Global Descriptor Table contains 28 quadwords, per-CPU.
2335 */
2336- .align L1_CACHE_BYTES
2337+ .section .data.page_aligned, "aw"
2338+ .align PAGE_SIZE_asm
2339 ENTRY(cpu_gdt_table)
2340 .quad 0x0000000000000000 /* NULL descriptor */
2341 .quad 0x0000000000000000 /* 0x0b reserved */
2342@@ -135,12 +183,13 @@ ENTRY(cpu_gdt_table)
2343 .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
2344 .quad 0x0000000000000000 /* 0xc8 APM DS data */
2345
2346- .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */
2347- .quad 0x0000000000000000 /* 0xd8 - unused */
2348+ .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
2349+ .quad 0x00cf92000000ffff /* 0xd8 - PDA */
2350 .quad 0x0000000000000000 /* 0xe0 - unused */
2351 .quad 0x0000000000000000 /* 0xe8 - unused */
2352 .quad 0x0000000000000000 /* 0xf0 - unused */
2353 .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
2354+ .align PAGE_SIZE_asm
2355
2356 #if CONFIG_XEN_COMPAT <= 0x030002
2357 /*
2358@@ -165,9 +214,9 @@ ENTRY(cpu_gdt_table)
2359 .ascii ",ELF_PADDR_OFFSET=0x"
2360 utoa __PAGE_OFFSET
2361 .ascii ",VIRT_ENTRY=0x"
2362- utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
2363+ utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET)
2364 .ascii ",HYPERCALL_PAGE=0x"
2365- utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
2366+ utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
2367 .ascii ",FEATURES=writable_page_tables"
2368 .ascii "|writable_descriptor_tables"
2369 .ascii "|auto_translated_physmap"
2370--- sle11-2009-06-29.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2371+++ sle11-2009-06-29/arch/x86/kernel/io_apic_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2372@@ -34,6 +34,7 @@
2373 #include <linux/pci.h>
2374 #include <linux/msi.h>
2375 #include <linux/htirq.h>
2376+#include <linux/freezer.h>
2377
2378 #include <asm/io.h>
2379 #include <asm/smp.h>
2380@@ -199,14 +200,20 @@ static struct IO_APIC_route_entry ioapic
2381 * the interrupt, and we need to make sure the entry is fully populated
2382 * before that happens.
2383 */
2384-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2385+static void
2386+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2387 {
2388- unsigned long flags;
2389 union entry_union eu;
2390 eu.entry = e;
2391- spin_lock_irqsave(&ioapic_lock, flags);
2392 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2393 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2394+}
2395+
2396+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2397+{
2398+ unsigned long flags;
2399+ spin_lock_irqsave(&ioapic_lock, flags);
2400+ __ioapic_write_entry(apic, pin, e);
2401 spin_unlock_irqrestore(&ioapic_lock, flags);
2402 }
2403
2404@@ -889,8 +896,7 @@ static int __init find_isa_irq_pin(int i
2405
2406 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
2407 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
2408- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
2409- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
2410+ mp_bus_id_to_type[lbus] == MP_BUS_MCA
2411 ) &&
2412 (mp_irqs[i].mpc_irqtype == type) &&
2413 (mp_irqs[i].mpc_srcbusirq == irq))
2414@@ -909,8 +915,7 @@ static int __init find_isa_irq_apic(int
2415
2416 if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
2417 mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
2418- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
2419- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
2420+ mp_bus_id_to_type[lbus] == MP_BUS_MCA
2421 ) &&
2422 (mp_irqs[i].mpc_irqtype == type) &&
2423 (mp_irqs[i].mpc_srcbusirq == irq))
2424@@ -1043,12 +1048,6 @@ static int EISA_ELCR(unsigned int irq)
2425 #define default_MCA_trigger(idx) (1)
2426 #define default_MCA_polarity(idx) (0)
2427
2428-/* NEC98 interrupts are always polarity zero edge triggered,
2429- * when listed as conforming in the MP table. */
2430-
2431-#define default_NEC98_trigger(idx) (0)
2432-#define default_NEC98_polarity(idx) (0)
2433-
2434 static int __init MPBIOS_polarity(int idx)
2435 {
2436 int bus = mp_irqs[idx].mpc_srcbus;
2437@@ -1083,11 +1082,6 @@ static int __init MPBIOS_polarity(int id
2438 polarity = default_MCA_polarity(idx);
2439 break;
2440 }
2441- case MP_BUS_NEC98: /* NEC 98 pin */
2442- {
2443- polarity = default_NEC98_polarity(idx);
2444- break;
2445- }
2446 default:
2447 {
2448 printk(KERN_WARNING "broken BIOS!!\n");
2449@@ -1157,11 +1151,6 @@ static int MPBIOS_trigger(int idx)
2450 trigger = default_MCA_trigger(idx);
2451 break;
2452 }
2453- case MP_BUS_NEC98: /* NEC 98 pin */
2454- {
2455- trigger = default_NEC98_trigger(idx);
2456- break;
2457- }
2458 default:
2459 {
2460 printk(KERN_WARNING "broken BIOS!!\n");
2461@@ -1223,7 +1212,6 @@ static int pin_2_irq(int idx, int apic,
2462 case MP_BUS_ISA: /* ISA pin */
2463 case MP_BUS_EISA:
2464 case MP_BUS_MCA:
2465- case MP_BUS_NEC98:
2466 {
2467 irq = mp_irqs[idx].mpc_srcbusirq;
2468 break;
2469@@ -1291,7 +1279,7 @@ static inline int IO_APIC_irq_trigger(in
2470 }
2471
2472 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
2473-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
2474+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
2475
2476 static int __assign_irq_vector(int irq)
2477 {
2478@@ -1417,8 +1405,8 @@ static void __init setup_IO_APIC_irqs(vo
2479 if (!apic && (irq < 16))
2480 disable_8259A_irq(irq);
2481 }
2482- ioapic_write_entry(apic, pin, entry);
2483 spin_lock_irqsave(&ioapic_lock, flags);
2484+ __ioapic_write_entry(apic, pin, entry);
2485 set_native_irq_info(irq, TARGET_CPUS);
2486 spin_unlock_irqrestore(&ioapic_lock, flags);
2487 }
2488@@ -1988,6 +1976,15 @@ static void __init setup_ioapic_ids_from
2489 #endif
2490
2491 #ifndef CONFIG_XEN
2492+static int no_timer_check __initdata;
2493+
2494+static int __init notimercheck(char *s)
2495+{
2496+ no_timer_check = 1;
2497+ return 1;
2498+}
2499+__setup("no_timer_check", notimercheck);
2500+
2501 /*
2502 * There is a nasty bug in some older SMP boards, their mptable lies
2503 * about the timer IRQ. We do the following to work around the situation:
2504@@ -1996,10 +1993,13 @@ static void __init setup_ioapic_ids_from
2505 * - if this function detects that timer IRQs are defunct, then we fall
2506 * back to ISA timer IRQs
2507 */
2508-static int __init timer_irq_works(void)
2509+int __init timer_irq_works(void)
2510 {
2511 unsigned long t1 = jiffies;
2512
2513+ if (no_timer_check)
2514+ return 1;
2515+
2516 local_irq_enable();
2517 /* Let ten ticks pass... */
2518 mdelay((10 * 1000) / HZ);
2519@@ -2226,9 +2226,15 @@ static inline void unlock_ExtINT_logic(v
2520 unsigned char save_control, save_freq_select;
2521
2522 pin = find_isa_irq_pin(8, mp_INT);
2523+ if (pin == -1) {
2524+ WARN_ON_ONCE(1);
2525+ return;
2526+ }
2527 apic = find_isa_irq_apic(8, mp_INT);
2528- if (pin == -1)
2529+ if (apic == -1) {
2530+ WARN_ON_ONCE(1);
2531 return;
2532+ }
2533
2534 entry0 = ioapic_read_entry(apic, pin);
2535 clear_IO_APIC_pin(apic, pin);
2536@@ -2273,7 +2279,7 @@ int timer_uses_ioapic_pin_0;
2537 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2538 * fanatically on his truly buggy board.
2539 */
2540-static inline void check_timer(void)
2541+static inline void __init check_timer(void)
2542 {
2543 int apic1, pin1, apic2, pin2;
2544 int vector;
2545@@ -2558,7 +2564,7 @@ device_initcall(ioapic_init_sysfs);
2546 int create_irq(void)
2547 {
2548 /* Allocate an unused irq */
2549- int irq, new, vector;
2550+ int irq, new, vector = 0;
2551 unsigned long flags;
2552
2553 irq = -ENOSPC;
2554@@ -2939,8 +2945,8 @@ int io_apic_set_pci_routing (int ioapic,
2555 if (!ioapic && (irq < 16))
2556 disable_8259A_irq(irq);
2557
2558- ioapic_write_entry(ioapic, pin, entry);
2559 spin_lock_irqsave(&ioapic_lock, flags);
2560+ __ioapic_write_entry(ioapic, pin, entry);
2561 set_native_irq_info(irq, TARGET_CPUS);
2562 spin_unlock_irqrestore(&ioapic_lock, flags);
2563
2564--- sle11-2009-06-29.orig/arch/x86/kernel/ldt_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2565+++ sle11-2009-06-29/arch/x86/kernel/ldt_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2566@@ -177,16 +177,14 @@ static int read_default_ldt(void __user
2567 {
2568 int err;
2569 unsigned long size;
2570- void *address;
2571
2572 err = 0;
2573- address = &default_ldt[0];
2574 size = 5*sizeof(struct desc_struct);
2575 if (size > bytecount)
2576 size = bytecount;
2577
2578 err = size;
2579- if (copy_to_user(ptr, address, size))
2580+ if (clear_user(ptr, size))
2581 err = -EFAULT;
2582
2583 return err;
2584--- sle11-2009-06-29.orig/arch/x86/kernel/microcode-xen.c 2009-03-04 11:28:34.000000000 +0100
2585+++ sle11-2009-06-29/arch/x86/kernel/microcode-xen.c 2008-12-15 11:26:44.000000000 +0100
2586@@ -1,7 +1,7 @@
2587 /*
2588 * Intel CPU Microcode Update Driver for Linux
2589 *
2590- * Copyright (C) 2000-2004 Tigran Aivazian
2591+ * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
2592 * 2006 Shaohua Li <shaohua.li@intel.com>
2593 *
2594 * This driver allows to upgrade microcode on Intel processors
2595@@ -43,7 +43,7 @@
2596 #include <asm/processor.h>
2597
2598 MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
2599-MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
2600+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
2601 MODULE_LICENSE("GPL");
2602
2603 static int verbose;
2604@@ -195,7 +195,7 @@ static int __init microcode_init (void)
2605 request_microcode();
2606
2607 printk(KERN_INFO
2608- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
2609+ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
2610 return 0;
2611 }
2612
2613--- sle11-2009-06-29.orig/arch/x86/kernel/mpparse_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2614+++ sle11-2009-06-29/arch/x86/kernel/mpparse_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2615@@ -36,7 +36,7 @@
2616
2617 /* Have we found an MP table */
2618 int smp_found_config;
2619-unsigned int __initdata maxcpus = NR_CPUS;
2620+unsigned int __cpuinitdata maxcpus = NR_CPUS;
2621
2622 /*
2623 * Various Linux-internal data structures created from the
2624@@ -102,10 +102,10 @@ static int __init mpf_checksum(unsigned
2625 */
2626
2627 static int mpc_record;
2628-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
2629+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
2630
2631 #ifndef CONFIG_XEN
2632-static void __devinit MP_processor_info (struct mpc_config_processor *m)
2633+static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2634 {
2635 int ver, apicid;
2636 physid_mask_t phys_cpu;
2637@@ -221,7 +221,7 @@ static void __devinit MP_processor_info
2638 bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
2639 }
2640 #else
2641-void __init MP_processor_info (struct mpc_config_processor *m)
2642+static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2643 {
2644 num_processors++;
2645 }
2646@@ -256,8 +256,6 @@ static void __init MP_bus_info (struct m
2647 mp_current_pci_id++;
2648 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
2649 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
2650- } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
2651- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
2652 } else {
2653 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
2654 }
2655@@ -842,7 +840,7 @@ void __init mp_register_lapic_address(u6
2656 #endif
2657 }
2658
2659-void __devinit mp_register_lapic (u8 id, u8 enabled)
2660+void __cpuinit mp_register_lapic (u8 id, u8 enabled)
2661 {
2662 struct mpc_config_processor processor;
2663 int boot_cpu = 0;
2664--- sle11-2009-06-29.orig/arch/x86/kernel/pci-dma-xen.c 2009-03-04 11:28:34.000000000 +0100
2665+++ sle11-2009-06-29/arch/x86/kernel/pci-dma-xen.c 2008-12-15 11:26:44.000000000 +0100
2666@@ -276,7 +276,7 @@ EXPORT_SYMBOL(dma_free_coherent);
2667 int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
2668 dma_addr_t device_addr, size_t size, int flags)
2669 {
2670- void __iomem *mem_base;
2671+ void __iomem *mem_base = NULL;
2672 int pages = size >> PAGE_SHIFT;
2673 int bitmap_size = (pages + 31)/32;
2674
2675@@ -293,14 +293,12 @@ int dma_declare_coherent_memory(struct d
2676 if (!mem_base)
2677 goto out;
2678
2679- dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
2680+ dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
2681 if (!dev->dma_mem)
2682 goto out;
2683- memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
2684- dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
2685+ dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
2686 if (!dev->dma_mem->bitmap)
2687 goto free1_out;
2688- memset(dev->dma_mem->bitmap, 0, bitmap_size);
2689
2690 dev->dma_mem->virt_base = mem_base;
2691 dev->dma_mem->device_base = device_addr;
2692@@ -315,6 +313,8 @@ int dma_declare_coherent_memory(struct d
2693 free1_out:
2694 kfree(dev->dma_mem->bitmap);
2695 out:
2696+ if (mem_base)
2697+ iounmap(mem_base);
2698 return 0;
2699 }
2700 EXPORT_SYMBOL(dma_declare_coherent_memory);
2701--- sle11-2009-06-29.orig/arch/x86/kernel/process_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2702+++ sle11-2009-06-29/arch/x86/kernel/process_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2703@@ -60,6 +60,7 @@
2704
2705 #include <asm/tlbflush.h>
2706 #include <asm/cpu.h>
2707+#include <asm/pda.h>
2708
2709 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
2710
2711@@ -104,28 +105,24 @@ EXPORT_SYMBOL(enable_hlt);
2712 */
2713 static void poll_idle (void)
2714 {
2715- local_irq_enable();
2716-
2717- asm volatile(
2718- "2:"
2719- "testl %0, %1;"
2720- "rep; nop;"
2721- "je 2b;"
2722- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
2723+ cpu_relax();
2724 }
2725
2726 static void xen_idle(void)
2727 {
2728- local_irq_disable();
2729+ current_thread_info()->status &= ~TS_POLLING;
2730+ /*
2731+ * TS_POLLING-cleared state must be visible before we
2732+ * test NEED_RESCHED:
2733+ */
2734+ smp_mb();
2735
2736- if (need_resched())
2737+ local_irq_disable();
2738+ if (!need_resched())
2739+ safe_halt(); /* enables interrupts racelessly */
2740+ else
2741 local_irq_enable();
2742- else {
2743- current_thread_info()->status &= ~TS_POLLING;
2744- smp_mb__after_clear_bit();
2745- safe_halt();
2746- current_thread_info()->status |= TS_POLLING;
2747- }
2748+ current_thread_info()->status |= TS_POLLING;
2749 }
2750 #ifdef CONFIG_APM_MODULE
2751 EXPORT_SYMBOL(default_idle);
2752@@ -250,8 +247,8 @@ void show_regs(struct pt_regs * regs)
2753 regs->eax,regs->ebx,regs->ecx,regs->edx);
2754 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2755 regs->esi, regs->edi, regs->ebp);
2756- printk(" DS: %04x ES: %04x\n",
2757- 0xffff & regs->xds,0xffff & regs->xes);
2758+ printk(" DS: %04x ES: %04x GS: %04x\n",
2759+ 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
2760
2761 cr0 = read_cr0();
2762 cr2 = read_cr2();
2763@@ -282,6 +279,7 @@ int kernel_thread(int (*fn)(void *), voi
2764
2765 regs.xds = __USER_DS;
2766 regs.xes = __USER_DS;
2767+ regs.xgs = __KERNEL_PDA;
2768 regs.orig_eax = -1;
2769 regs.eip = (unsigned long) kernel_thread_helper;
2770 regs.xcs = __KERNEL_CS | get_kernel_rpl();
2771@@ -359,7 +357,6 @@ int copy_thread(int nr, unsigned long cl
2772 p->thread.eip = (unsigned long) ret_from_fork;
2773
2774 savesegment(fs,p->thread.fs);
2775- savesegment(gs,p->thread.gs);
2776
2777 tsk = current;
2778 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2779@@ -438,7 +435,7 @@ void dump_thread(struct pt_regs * regs,
2780 dump->regs.ds = regs->xds;
2781 dump->regs.es = regs->xes;
2782 savesegment(fs,dump->regs.fs);
2783- savesegment(gs,dump->regs.gs);
2784+ dump->regs.gs = regs->xgs;
2785 dump->regs.orig_eax = regs->orig_eax;
2786 dump->regs.eip = regs->eip;
2787 dump->regs.cs = regs->xcs;
2788@@ -635,17 +632,19 @@ struct task_struct fastcall * __switch_t
2789 if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
2790 BUG();
2791
2792+ /* we're going to use this soon, after a few expensive things */
2793+ if (next_p->fpu_counter > 5)
2794+ prefetch(&next->i387.fxsave);
2795+
2796 /*
2797- * Restore %fs and %gs if needed.
2798+ * Restore %fs if needed.
2799 *
2800- * Glibc normally makes %fs be zero, and %gs is one of
2801- * the TLS segments.
2802+ * Glibc normally makes %fs be zero.
2803 */
2804 if (unlikely(next->fs))
2805 loadsegment(fs, next->fs);
2806
2807- if (next->gs)
2808- loadsegment(gs, next->gs);
2809+ write_pda(pcurrent, next_p);
2810
2811 /*
2812 * Now maybe handle debug registers
2813@@ -655,6 +654,13 @@ struct task_struct fastcall * __switch_t
2814
2815 disable_tsc(prev_p, next_p);
2816
2817+ /* If the task has used fpu the last 5 timeslices, just do a full
2818+ * restore of the math state immediately to avoid the trap; the
2819+ * chances of needing FPU soon are obviously high now
2820+ */
2821+ if (next_p->fpu_counter > 5)
2822+ math_state_restore();
2823+
2824 return prev_p;
2825 }
2826
2827--- sle11-2009-06-29.orig/arch/x86/kernel/quirks-xen.c 2009-06-29 15:14:52.000000000 +0200
2828+++ sle11-2009-06-29/arch/x86/kernel/quirks-xen.c 2008-12-15 11:26:44.000000000 +0100
2829@@ -3,10 +3,12 @@
2830 */
2831 #include <linux/pci.h>
2832 #include <linux/irq.h>
2833+#include <asm/pci-direct.h>
2834+#include <asm/genapic.h>
2835+#include <asm/cpu.h>
2836
2837 #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI)
2838-
2839-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
2840+static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
2841 {
2842 u8 config, rev;
2843 u32 word;
2844@@ -14,14 +16,12 @@ static void __devinit quirk_intel_irqbal
2845 /* BIOS may enable hardware IRQ balancing for
2846 * E7520/E7320/E7525(revision ID 0x9 and below)
2847 * based platforms.
2848- * Disable SW irqbalance/affinity on those platforms.
2849+ * For those platforms, make sure that the genapic is set to 'flat'
2850 */
2851 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
2852 if (rev > 0x9)
2853 return;
2854
2855- printk(KERN_INFO "Intel E7520/7320/7525 detected.");
2856-
2857 /* enable access to config space*/
2858 pci_read_config_byte(dev, 0xf4, &config);
2859 pci_write_config_byte(dev, 0xf4, config|0x2);
2860@@ -30,6 +30,46 @@ static void __devinit quirk_intel_irqbal
2861 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
2862
2863 if (!(word & (1 << 13))) {
2864+#ifndef CONFIG_XEN
2865+#ifdef CONFIG_X86_64
2866+ if (genapic != &apic_flat)
2867+ panic("APIC mode must be flat on this system\n");
2868+#elif defined(CONFIG_X86_GENERICARCH)
2869+ if (genapic != &apic_default)
2870+ panic("APIC mode must be default(flat) on this system. Use apic=default\n");
2871+#endif
2872+#endif
2873+ }
2874+
2875+ /* put back the original value for config space*/
2876+ if (!(config & 0x2))
2877+ pci_write_config_byte(dev, 0xf4, config);
2878+}
2879+
2880+void __init quirk_intel_irqbalance(void)
2881+{
2882+ u8 config, rev;
2883+ u32 word;
2884+
2885+ /* BIOS may enable hardware IRQ balancing for
2886+ * E7520/E7320/E7525(revision ID 0x9 and below)
2887+ * based platforms.
2888+ * Disable SW irqbalance/affinity on those platforms.
2889+ */
2890+ rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
2891+ if (rev > 0x9)
2892+ return;
2893+
2894+ printk(KERN_INFO "Intel E7520/7320/7525 detected.");
2895+
2896+ /* enable access to config space */
2897+ config = read_pci_config_byte(0, 0, 0, 0xf4);
2898+ write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
2899+
2900+ /* read xTPR register */
2901+ word = read_pci_config_16(0, 0, 0x40, 0x4c);
2902+
2903+ if (!(word & (1 << 13))) {
2904 struct xen_platform_op op;
2905 printk(KERN_INFO "Disabling irq balancing and affinity\n");
2906 op.cmd = XENPF_platform_quirk;
2907@@ -37,11 +77,12 @@ static void __devinit quirk_intel_irqbal
2908 WARN_ON(HYPERVISOR_platform_op(&op));
2909 }
2910
2911- /* put back the original value for config space*/
2912+ /* put back the original value for config space */
2913 if (!(config & 0x2))
2914- pci_write_config_byte(dev, 0xf4, config);
2915+ write_pci_config_byte(0, 0, 0, 0xf4, config);
2916 }
2917-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
2918-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
2919-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
2920+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
2921+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
2922+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
2923+
2924 #endif
2925--- sle11-2009-06-29.orig/arch/x86/kernel/setup_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2926+++ sle11-2009-06-29/arch/x86/kernel/setup_32-xen.c 2008-12-15 11:26:44.000000000 +0100
2927@@ -76,9 +76,6 @@
2928 #include <xen/interface/kexec.h>
2929 #endif
2930
2931-/* Forward Declaration. */
2932-void __init find_max_pfn(void);
2933-
2934 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
2935 static struct notifier_block xen_panic_block = {
2936 xen_panic_event, NULL, 0 /* try to go last */
2937@@ -89,14 +86,11 @@ int disable_pse __devinitdata = 0;
2938 /*
2939 * Machine setup..
2940 */
2941-
2942-#ifdef CONFIG_EFI
2943-int efi_enabled = 0;
2944-EXPORT_SYMBOL(efi_enabled);
2945-#endif
2946+extern struct resource code_resource;
2947+extern struct resource data_resource;
2948
2949 /* cpu data as detected by the assembly code in head.S */
2950-struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2951+struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2952 /* common cpu data for all cpus */
2953 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2954 EXPORT_SYMBOL(boot_cpu_data);
2955@@ -112,12 +106,6 @@ unsigned int machine_submodel_id;
2956 unsigned int BIOS_revision;
2957 unsigned int mca_pentium_flag;
2958
2959-/* For PCI or other memory-mapped resources */
2960-unsigned long pci_mem_start = 0x10000000;
2961-#ifdef CONFIG_PCI
2962-EXPORT_SYMBOL(pci_mem_start);
2963-#endif
2964-
2965 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
2966 int bootloader_type;
2967
2968@@ -150,10 +138,6 @@ struct ist_info ist_info;
2969 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
2970 EXPORT_SYMBOL(ist_info);
2971 #endif
2972-struct e820map e820;
2973-#ifdef CONFIG_XEN
2974-struct e820map machine_e820;
2975-#endif
2976
2977 extern void early_cpu_init(void);
2978 extern int root_mountflags;
2979@@ -168,209 +152,6 @@ static char command_line[COMMAND_LINE_SI
2980
2981 unsigned char __initdata boot_params[PARAM_SIZE];
2982
2983-static struct resource data_resource = {
2984- .name = "Kernel data",
2985- .start = 0,
2986- .end = 0,
2987- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
2988-};
2989-
2990-static struct resource code_resource = {
2991- .name = "Kernel code",
2992- .start = 0,
2993- .end = 0,
2994- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
2995-};
2996-
2997-static struct resource system_rom_resource = {
2998- .name = "System ROM",
2999- .start = 0xf0000,
3000- .end = 0xfffff,
3001- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3002-};
3003-
3004-static struct resource extension_rom_resource = {
3005- .name = "Extension ROM",
3006- .start = 0xe0000,
3007- .end = 0xeffff,
3008- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3009-};
3010-
3011-static struct resource adapter_rom_resources[] = { {
3012- .name = "Adapter ROM",
3013- .start = 0xc8000,
3014- .end = 0,
3015- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3016-}, {
3017- .name = "Adapter ROM",
3018- .start = 0,
3019- .end = 0,
3020- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3021-}, {
3022- .name = "Adapter ROM",
3023- .start = 0,
3024- .end = 0,
3025- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3026-}, {
3027- .name = "Adapter ROM",
3028- .start = 0,
3029- .end = 0,
3030- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3031-}, {
3032- .name = "Adapter ROM",
3033- .start = 0,
3034- .end = 0,
3035- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3036-}, {
3037- .name = "Adapter ROM",
3038- .start = 0,
3039- .end = 0,
3040- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3041-} };
3042-
3043-static struct resource video_rom_resource = {
3044- .name = "Video ROM",
3045- .start = 0xc0000,
3046- .end = 0xc7fff,
3047- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3048-};
3049-
3050-static struct resource video_ram_resource = {
3051- .name = "Video RAM area",
3052- .start = 0xa0000,
3053- .end = 0xbffff,
3054- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
3055-};
3056-
3057-static struct resource standard_io_resources[] = { {
3058- .name = "dma1",
3059- .start = 0x0000,
3060- .end = 0x001f,
3061- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3062-}, {
3063- .name = "pic1",
3064- .start = 0x0020,
3065- .end = 0x0021,
3066- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3067-}, {
3068- .name = "timer0",
3069- .start = 0x0040,
3070- .end = 0x0043,
3071- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3072-}, {
3073- .name = "timer1",
3074- .start = 0x0050,
3075- .end = 0x0053,
3076- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3077-}, {
3078- .name = "keyboard",
3079- .start = 0x0060,
3080- .end = 0x006f,
3081- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3082-}, {
3083- .name = "dma page reg",
3084- .start = 0x0080,
3085- .end = 0x008f,
3086- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3087-}, {
3088- .name = "pic2",
3089- .start = 0x00a0,
3090- .end = 0x00a1,
3091- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3092-}, {
3093- .name = "dma2",
3094- .start = 0x00c0,
3095- .end = 0x00df,
3096- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3097-}, {
3098- .name = "fpu",
3099- .start = 0x00f0,
3100- .end = 0x00ff,
3101- .flags = IORESOURCE_BUSY | IORESOURCE_IO
3102-} };
3103-
3104-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
3105-
3106-static int __init romchecksum(unsigned char *rom, unsigned long length)
3107-{
3108- unsigned char *p, sum = 0;
3109-
3110- for (p = rom; p < rom + length; p++)
3111- sum += *p;
3112- return sum == 0;
3113-}
3114-
3115-static void __init probe_roms(void)
3116-{
3117- unsigned long start, length, upper;
3118- unsigned char *rom;
3119- int i;
3120-
3121-#ifdef CONFIG_XEN
3122- /* Nothing to do if not running in dom0. */
3123- if (!is_initial_xendomain())
3124- return;
3125-#endif
3126-
3127- /* video rom */
3128- upper = adapter_rom_resources[0].start;
3129- for (start = video_rom_resource.start; start < upper; start += 2048) {
3130- rom = isa_bus_to_virt(start);
3131- if (!romsignature(rom))
3132- continue;
3133-
3134- video_rom_resource.start = start;
3135-
3136- /* 0 < length <= 0x7f * 512, historically */
3137- length = rom[2] * 512;
3138-
3139- /* if checksum okay, trust length byte */
3140- if (length && romchecksum(rom, length))
3141- video_rom_resource.end = start + length - 1;
3142-
3143- request_resource(&iomem_resource, &video_rom_resource);
3144- break;
3145- }
3146-
3147- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
3148- if (start < upper)
3149- start = upper;
3150-
3151- /* system rom */
3152- request_resource(&iomem_resource, &system_rom_resource);
3153- upper = system_rom_resource.start;
3154-
3155- /* check for extension rom (ignore length byte!) */
3156- rom = isa_bus_to_virt(extension_rom_resource.start);
3157- if (romsignature(rom)) {
3158- length = extension_rom_resource.end - extension_rom_resource.start + 1;
3159- if (romchecksum(rom, length)) {
3160- request_resource(&iomem_resource, &extension_rom_resource);
3161- upper = extension_rom_resource.start;
3162- }
3163- }
3164-
3165- /* check for adapter roms on 2k boundaries */
3166- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
3167- rom = isa_bus_to_virt(start);
3168- if (!romsignature(rom))
3169- continue;
3170-
3171- /* 0 < length <= 0x7f * 512, historically */
3172- length = rom[2] * 512;
3173-
3174- /* but accept any length that fits if checksum okay */
3175- if (!length || start + length > upper || !romchecksum(rom, length))
3176- continue;
3177-
3178- adapter_rom_resources[i].start = start;
3179- adapter_rom_resources[i].end = start + length - 1;
3180- request_resource(&iomem_resource, &adapter_rom_resources[i]);
3181-
3182- start = adapter_rom_resources[i++].end & ~2047UL;
3183- }
3184-}
3185-
3186 /*
3187 * Point at the empty zero page to start with. We map the real shared_info
3188 * page as soon as fixmap is up and running.
3189@@ -386,353 +167,6 @@ EXPORT_SYMBOL(phys_to_machine_mapping);
3190 start_info_t *xen_start_info;
3191 EXPORT_SYMBOL(xen_start_info);
3192
3193-void __init add_memory_region(unsigned long long start,
3194- unsigned long long size, int type)
3195-{
3196- int x;
3197-
3198- if (!efi_enabled) {
3199- x = e820.nr_map;
3200-
3201- if (x == E820MAX) {
3202- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
3203- return;
3204- }
3205-
3206- e820.map[x].addr = start;
3207- e820.map[x].size = size;
3208- e820.map[x].type = type;
3209- e820.nr_map++;
3210- }
3211-} /* add_memory_region */
3212-
3213-static void __init limit_regions(unsigned long long size)
3214-{
3215- unsigned long long current_addr = 0;
3216- int i;
3217-
3218- if (efi_enabled) {
3219- efi_memory_desc_t *md;
3220- void *p;
3221-
3222- for (p = memmap.map, i = 0; p < memmap.map_end;
3223- p += memmap.desc_size, i++) {
3224- md = p;
3225- current_addr = md->phys_addr + (md->num_pages << 12);
3226- if (md->type == EFI_CONVENTIONAL_MEMORY) {
3227- if (current_addr >= size) {
3228- md->num_pages -=
3229- (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
3230- memmap.nr_map = i + 1;
3231- return;
3232- }
3233- }
3234- }
3235- }
3236- for (i = 0; i < e820.nr_map; i++) {
3237- current_addr = e820.map[i].addr + e820.map[i].size;
3238- if (current_addr < size)
3239- continue;
3240-
3241- if (e820.map[i].type != E820_RAM)
3242- continue;
3243-
3244- if (e820.map[i].addr >= size) {
3245- /*
3246- * This region starts past the end of the
3247- * requested size, skip it completely.
3248- */
3249- e820.nr_map = i;
3250- } else {
3251- e820.nr_map = i + 1;
3252- e820.map[i].size -= current_addr - size;
3253- }
3254- return;
3255- }
3256-#ifdef CONFIG_XEN
3257- if (i==e820.nr_map && current_addr < size) {
3258- /*
3259- * The e820 map finished before our requested size so
3260- * extend the final entry to the requested address.
3261- */
3262- --i;
3263- if (e820.map[i].type == E820_RAM)
3264- e820.map[i].size -= current_addr - size;
3265- else
3266- add_memory_region(current_addr, size - current_addr, E820_RAM);
3267- }
3268-#endif
3269-}
3270-
3271-#define E820_DEBUG 1
3272-
3273-static void __init print_memory_map(char *who)
3274-{
3275- int i;
3276-
3277- for (i = 0; i < e820.nr_map; i++) {
3278- printk(" %s: %016Lx - %016Lx ", who,
3279- e820.map[i].addr,
3280- e820.map[i].addr + e820.map[i].size);
3281- switch (e820.map[i].type) {
3282- case E820_RAM: printk("(usable)\n");
3283- break;
3284- case E820_RESERVED:
3285- printk("(reserved)\n");
3286- break;
3287- case E820_ACPI:
3288- printk("(ACPI data)\n");
3289- break;
3290- case E820_NVS:
3291- printk("(ACPI NVS)\n");
3292- break;
3293- default: printk("type %lu\n", e820.map[i].type);
3294- break;
3295- }
3296- }
3297-}
3298-
3299-/*
3300- * Sanitize the BIOS e820 map.
3301- *
3302- * Some e820 responses include overlapping entries. The following
3303- * replaces the original e820 map with a new one, removing overlaps.
3304- *
3305- */
3306-struct change_member {
3307- struct e820entry *pbios; /* pointer to original bios entry */
3308- unsigned long long addr; /* address for this change point */
3309-};
3310-static struct change_member change_point_list[2*E820MAX] __initdata;
3311-static struct change_member *change_point[2*E820MAX] __initdata;
3312-static struct e820entry *overlap_list[E820MAX] __initdata;
3313-static struct e820entry new_bios[E820MAX] __initdata;
3314-
3315-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
3316-{
3317- struct change_member *change_tmp;
3318- unsigned long current_type, last_type;
3319- unsigned long long last_addr;
3320- int chgidx, still_changing;
3321- int overlap_entries;
3322- int new_bios_entry;
3323- int old_nr, new_nr, chg_nr;
3324- int i;
3325-
3326- /*
3327- Visually we're performing the following (1,2,3,4 = memory types)...
3328-
3329- Sample memory map (w/overlaps):
3330- ____22__________________
3331- ______________________4_
3332- ____1111________________
3333- _44_____________________
3334- 11111111________________
3335- ____________________33__
3336- ___________44___________
3337- __________33333_________
3338- ______________22________
3339- ___________________2222_
3340- _________111111111______
3341- _____________________11_
3342- _________________4______
3343-
3344- Sanitized equivalent (no overlap):
3345- 1_______________________
3346- _44_____________________
3347- ___1____________________
3348- ____22__________________
3349- ______11________________
3350- _________1______________
3351- __________3_____________
3352- ___________44___________
3353- _____________33_________
3354- _______________2________
3355- ________________1_______
3356- _________________4______
3357- ___________________2____
3358- ____________________33__
3359- ______________________4_
3360- */
3361-
3362- /* if there's only one memory region, don't bother */
3363- if (*pnr_map < 2)
3364- return -1;
3365-
3366- old_nr = *pnr_map;
3367-
3368- /* bail out if we find any unreasonable addresses in bios map */
3369- for (i=0; i<old_nr; i++)
3370- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
3371- return -1;
3372-
3373- /* create pointers for initial change-point information (for sorting) */
3374- for (i=0; i < 2*old_nr; i++)
3375- change_point[i] = &change_point_list[i];
3376-
3377- /* record all known change-points (starting and ending addresses),
3378- omitting those that are for empty memory regions */
3379- chgidx = 0;
3380- for (i=0; i < old_nr; i++) {
3381- if (biosmap[i].size != 0) {
3382- change_point[chgidx]->addr = biosmap[i].addr;
3383- change_point[chgidx++]->pbios = &biosmap[i];
3384- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
3385- change_point[chgidx++]->pbios = &biosmap[i];
3386- }
3387- }
3388- chg_nr = chgidx; /* true number of change-points */
3389-
3390- /* sort change-point list by memory addresses (low -> high) */
3391- still_changing = 1;
3392- while (still_changing) {
3393- still_changing = 0;
3394- for (i=1; i < chg_nr; i++) {
3395- /* if <current_addr> > <last_addr>, swap */
3396- /* or, if current=<start_addr> & last=<end_addr>, swap */
3397- if ((change_point[i]->addr < change_point[i-1]->addr) ||
3398- ((change_point[i]->addr == change_point[i-1]->addr) &&
3399- (change_point[i]->addr == change_point[i]->pbios->addr) &&
3400- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
3401- )
3402- {
3403- change_tmp = change_point[i];
3404- change_point[i] = change_point[i-1];
3405- change_point[i-1] = change_tmp;
3406- still_changing=1;
3407- }
3408- }
3409- }
3410-
3411- /* create a new bios memory map, removing overlaps */
3412- overlap_entries=0; /* number of entries in the overlap table */
3413- new_bios_entry=0; /* index for creating new bios map entries */
3414- last_type = 0; /* start with undefined memory type */
3415- last_addr = 0; /* start with 0 as last starting address */
3416- /* loop through change-points, determining affect on the new bios map */
3417- for (chgidx=0; chgidx < chg_nr; chgidx++)
3418- {
3419- /* keep track of all overlapping bios entries */
3420- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
3421- {
3422- /* add map entry to overlap list (> 1 entry implies an overlap) */
3423- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
3424- }
3425- else
3426- {
3427- /* remove entry from list (order independent, so swap with last) */
3428- for (i=0; i<overlap_entries; i++)
3429- {
3430- if (overlap_list[i] == change_point[chgidx]->pbios)
3431- overlap_list[i] = overlap_list[overlap_entries-1];
3432- }
3433- overlap_entries--;
3434- }
3435- /* if there are overlapping entries, decide which "type" to use */
3436- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
3437- current_type = 0;
3438- for (i=0; i<overlap_entries; i++)
3439- if (overlap_list[i]->type > current_type)
3440- current_type = overlap_list[i]->type;
3441- /* continue building up new bios map based on this information */
3442- if (current_type != last_type) {
3443- if (last_type != 0) {
3444- new_bios[new_bios_entry].size =
3445- change_point[chgidx]->addr - last_addr;
3446- /* move forward only if the new size was non-zero */
3447- if (new_bios[new_bios_entry].size != 0)
3448- if (++new_bios_entry >= E820MAX)
3449- break; /* no more space left for new bios entries */
3450- }
3451- if (current_type != 0) {
3452- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
3453- new_bios[new_bios_entry].type = current_type;
3454- last_addr=change_point[chgidx]->addr;
3455- }
3456- last_type = current_type;
3457- }
3458- }
3459- new_nr = new_bios_entry; /* retain count for new bios entries */
3460-
3461- /* copy new bios mapping into original location */
3462- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
3463- *pnr_map = new_nr;
3464-
3465- return 0;
3466-}
3467-
3468-/*
3469- * Copy the BIOS e820 map into a safe place.
3470- *
3471- * Sanity-check it while we're at it..
3472- *
3473- * If we're lucky and live on a modern system, the setup code
3474- * will have given us a memory map that we can use to properly
3475- * set up memory. If we aren't, we'll fake a memory map.
3476- *
3477- * We check to see that the memory map contains at least 2 elements
3478- * before we'll use it, because the detection code in setup.S may
3479- * not be perfect and most every PC known to man has two memory
3480- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
3481- * thinkpad 560x, for example, does not cooperate with the memory
3482- * detection code.)
3483- */
3484-int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
3485-{
3486-#ifndef CONFIG_XEN
3487- /* Only one memory region (or negative)? Ignore it */
3488- if (nr_map < 2)
3489- return -1;
3490-#else
3491- BUG_ON(nr_map < 1);
3492-#endif
3493-
3494- do {
3495- unsigned long long start = biosmap->addr;
3496- unsigned long long size = biosmap->size;
3497- unsigned long long end = start + size;
3498- unsigned long type = biosmap->type;
3499-
3500- /* Overflow in 64 bits? Ignore the memory map. */
3501- if (start > end)
3502- return -1;
3503-
3504-#ifndef CONFIG_XEN
3505- /*
3506- * Some BIOSes claim RAM in the 640k - 1M region.
3507- * Not right. Fix it up.
3508- */
3509- if (type == E820_RAM) {
3510- if (start < 0x100000ULL && end > 0xA0000ULL) {
3511- if (start < 0xA0000ULL)
3512- add_memory_region(start, 0xA0000ULL-start, type);
3513- if (end <= 0x100000ULL)
3514- continue;
3515- start = 0x100000ULL;
3516- size = end - start;
3517- }
3518- }
3519-#endif
3520- add_memory_region(start, size, type);
3521- } while (biosmap++,--nr_map);
3522-
3523-#ifdef CONFIG_XEN
3524- if (is_initial_xendomain()) {
3525- struct xen_memory_map memmap;
3526-
3527- memmap.nr_entries = E820MAX;
3528- set_xen_guest_handle(memmap.buffer, machine_e820.map);
3529-
3530- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
3531- BUG();
3532- machine_e820.nr_map = memmap.nr_entries;
3533- } else
3534- machine_e820 = e820;
3535-#endif
3536-
3537- return 0;
3538-}
3539-
3540 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
3541 struct edd edd;
3542 #ifdef CONFIG_EDD_MODULE
3543@@ -758,7 +192,7 @@ static inline void copy_edd(void)
3544 }
3545 #endif
3546
3547-static int __initdata user_defined_memmap = 0;
3548+int __initdata user_defined_memmap = 0;
3549
3550 /*
3551 * "mem=nopentium" disables the 4MB page tables.
3552@@ -795,51 +229,6 @@ static int __init parse_mem(char *arg)
3553 }
3554 early_param("mem", parse_mem);
3555
3556-static int __init parse_memmap(char *arg)
3557-{
3558- if (!arg)
3559- return -EINVAL;
3560-
3561- if (strcmp(arg, "exactmap") == 0) {
3562-#ifdef CONFIG_CRASH_DUMP
3563- /* If we are doing a crash dump, we
3564- * still need to know the real mem
3565- * size before original memory map is
3566- * reset.
3567- */
3568- find_max_pfn();
3569- saved_max_pfn = max_pfn;
3570-#endif
3571- e820.nr_map = 0;
3572- user_defined_memmap = 1;
3573- } else {
3574- /* If the user specifies memory size, we
3575- * limit the BIOS-provided memory map to
3576- * that size. exactmap can be used to specify
3577- * the exact map. mem=number can be used to
3578- * trim the existing memory map.
3579- */
3580- unsigned long long start_at, mem_size;
3581-
3582- mem_size = memparse(arg, &arg);
3583- if (*arg == '@') {
3584- start_at = memparse(arg+1, &arg);
3585- add_memory_region(start_at, mem_size, E820_RAM);
3586- } else if (*arg == '#') {
3587- start_at = memparse(arg+1, &arg);
3588- add_memory_region(start_at, mem_size, E820_ACPI);
3589- } else if (*arg == '$') {
3590- start_at = memparse(arg+1, &arg);
3591- add_memory_region(start_at, mem_size, E820_RESERVED);
3592- } else {
3593- limit_regions(mem_size);
3594- user_defined_memmap = 1;
3595- }
3596- }
3597- return 0;
3598-}
3599-early_param("memmap", parse_memmap);
3600-
3601 #ifdef CONFIG_PROC_VMCORE
3602 /* elfcorehdr= specifies the location of elf core header
3603 * stored by the crashed kernel.
3604@@ -906,127 +295,6 @@ early_param("reservetop", parse_reservet
3605 #endif
3606
3607 /*
3608- * Callback for efi_memory_walk.
3609- */
3610-static int __init
3611-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
3612-{
3613- unsigned long *max_pfn = arg, pfn;
3614-
3615- if (start < end) {
3616- pfn = PFN_UP(end -1);
3617- if (pfn > *max_pfn)
3618- *max_pfn = pfn;
3619- }
3620- return 0;
3621-}
3622-
3623-static int __init
3624-efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
3625-{
3626- memory_present(0, PFN_UP(start), PFN_DOWN(end));
3627- return 0;
3628-}
3629-
3630-/*
3631- * This function checks if any part of the range <start,end> is mapped
3632- * with type.
3633- */
3634-int
3635-e820_any_mapped(u64 start, u64 end, unsigned type)
3636-{
3637- int i;
3638-
3639-#ifndef CONFIG_XEN
3640- for (i = 0; i < e820.nr_map; i++) {
3641- const struct e820entry *ei = &e820.map[i];
3642-#else
3643- if (!is_initial_xendomain())
3644- return 0;
3645- for (i = 0; i < machine_e820.nr_map; ++i) {
3646- const struct e820entry *ei = &machine_e820.map[i];
3647-#endif
3648-
3649- if (type && ei->type != type)
3650- continue;
3651- if (ei->addr >= end || ei->addr + ei->size <= start)
3652- continue;
3653- return 1;
3654- }
3655- return 0;
3656-}
3657-EXPORT_SYMBOL_GPL(e820_any_mapped);
3658-
3659- /*
3660- * This function checks if the entire range <start,end> is mapped with type.
3661- *
3662- * Note: this function only works correct if the e820 table is sorted and
3663- * not-overlapping, which is the case
3664- */
3665-int __init
3666-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
3667-{
3668- u64 start = s;
3669- u64 end = e;
3670- int i;
3671-
3672-#ifndef CONFIG_XEN
3673- for (i = 0; i < e820.nr_map; i++) {
3674- struct e820entry *ei = &e820.map[i];
3675-#else
3676- if (!is_initial_xendomain())
3677- return 0;
3678- for (i = 0; i < machine_e820.nr_map; ++i) {
3679- const struct e820entry *ei = &machine_e820.map[i];
3680-#endif
3681- if (type && ei->type != type)
3682- continue;
3683- /* is the region (part) in overlap with the current region ?*/
3684- if (ei->addr >= end || ei->addr + ei->size <= start)
3685- continue;
3686- /* if the region is at the beginning of <start,end> we move
3687- * start to the end of the region since it's ok until there
3688- */
3689- if (ei->addr <= start)
3690- start = ei->addr + ei->size;
3691- /* if start is now at or beyond end, we're done, full
3692- * coverage */
3693- if (start >= end)
3694- return 1; /* we're done */
3695- }
3696- return 0;
3697-}
3698-
3699-/*
3700- * Find the highest page frame number we have available
3701- */
3702-void __init find_max_pfn(void)
3703-{
3704- int i;
3705-
3706- max_pfn = 0;
3707- if (efi_enabled) {
3708- efi_memmap_walk(efi_find_max_pfn, &max_pfn);
3709- efi_memmap_walk(efi_memory_present_wrapper, NULL);
3710- return;
3711- }
3712-
3713- for (i = 0; i < e820.nr_map; i++) {
3714- unsigned long start, end;
3715- /* RAM? */
3716- if (e820.map[i].type != E820_RAM)
3717- continue;
3718- start = PFN_UP(e820.map[i].addr);
3719- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3720- if (start >= end)
3721- continue;
3722- if (end > max_pfn)
3723- max_pfn = end;
3724- memory_present(0, start, end);
3725- }
3726-}
3727-
3728-/*
3729 * Determine low and high memory ranges:
3730 */
3731 unsigned long __init find_max_low_pfn(void)
3732@@ -1085,77 +353,6 @@ unsigned long __init find_max_low_pfn(vo
3733 return max_low_pfn;
3734 }
3735
3736-/*
3737- * Free all available memory for boot time allocation. Used
3738- * as a callback function by efi_memory_walk()
3739- */
3740-
3741-static int __init
3742-free_available_memory(unsigned long start, unsigned long end, void *arg)
3743-{
3744- /* check max_low_pfn */
3745- if (start >= (max_low_pfn << PAGE_SHIFT))
3746- return 0;
3747- if (end >= (max_low_pfn << PAGE_SHIFT))
3748- end = max_low_pfn << PAGE_SHIFT;
3749- if (start < end)
3750- free_bootmem(start, end - start);
3751-
3752- return 0;
3753-}
3754-/*
3755- * Register fully available low RAM pages with the bootmem allocator.
3756- */
3757-static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
3758-{
3759- int i;
3760-
3761- if (efi_enabled) {
3762- efi_memmap_walk(free_available_memory, NULL);
3763- return;
3764- }
3765- for (i = 0; i < e820.nr_map; i++) {
3766- unsigned long curr_pfn, last_pfn, size;
3767- /*
3768- * Reserve usable low memory
3769- */
3770- if (e820.map[i].type != E820_RAM)
3771- continue;
3772- /*
3773- * We are rounding up the start address of usable memory:
3774- */
3775- curr_pfn = PFN_UP(e820.map[i].addr);
3776- if (curr_pfn >= max_low_pfn)
3777- continue;
3778- /*
3779- * ... and at the end of the usable range downwards:
3780- */
3781- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3782-
3783-#ifdef CONFIG_XEN
3784- /*
3785- * Truncate to the number of actual pages currently
3786- * present.
3787- */
3788- if (last_pfn > xen_start_info->nr_pages)
3789- last_pfn = xen_start_info->nr_pages;
3790-#endif
3791-
3792- if (last_pfn > max_low_pfn)
3793- last_pfn = max_low_pfn;
3794-
3795- /*
3796- * .. finally, did all the rounding and playing
3797- * around just make the area go away?
3798- */
3799- if (last_pfn <= curr_pfn)
3800- continue;
3801-
3802- size = last_pfn - curr_pfn;
3803- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
3804- }
3805-}
3806-
3807 #ifndef CONFIG_XEN
3808 /*
3809 * workaround for Dell systems that neglect to reserve EBDA
3810@@ -1245,8 +442,8 @@ void __init setup_bootmem_allocator(void
3811 * the (very unlikely) case of us accidentally initializing the
3812 * bootmem allocator with an invalid RAM area.
3813 */
3814- reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
3815- bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
3816+ reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
3817+ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
3818
3819 #ifndef CONFIG_XEN
3820 /*
3821@@ -1328,160 +525,6 @@ void __init remapped_pgdat_init(void)
3822 }
3823 }
3824
3825-/*
3826- * Request address space for all standard RAM and ROM resources
3827- * and also for regions reported as reserved by the e820.
3828- */
3829-static void __init
3830-legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
3831- struct resource *code_resource,
3832- struct resource *data_resource)
3833-{
3834- int i;
3835-
3836- probe_roms();
3837-
3838- for (i = 0; i < nr_map; i++) {
3839- struct resource *res;
3840-#ifndef CONFIG_RESOURCES_64BIT
3841- if (e820[i].addr + e820[i].size > 0x100000000ULL)
3842- continue;
3843-#endif
3844- res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
3845- switch (e820[i].type) {
3846- case E820_RAM: res->name = "System RAM"; break;
3847- case E820_ACPI: res->name = "ACPI Tables"; break;
3848- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
3849- default: res->name = "reserved";
3850- }
3851- res->start = e820[i].addr;
3852- res->end = res->start + e820[i].size - 1;
3853- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3854- if (request_resource(&iomem_resource, res)) {
3855- kfree(res);
3856- continue;
3857- }
3858- if (e820[i].type == E820_RAM) {
3859- /*
3860- * We don't know which RAM region contains kernel data,
3861- * so we try it repeatedly and let the resource manager
3862- * test it.
3863- */
3864-#ifndef CONFIG_XEN
3865- request_resource(res, code_resource);
3866- request_resource(res, data_resource);
3867-#endif
3868-#ifdef CONFIG_KEXEC
3869- if (crashk_res.start != crashk_res.end)
3870- request_resource(res, &crashk_res);
3871-#ifdef CONFIG_XEN
3872- xen_machine_kexec_register_resources(res);
3873-#endif
3874-#endif
3875- }
3876- }
3877-}
3878-
3879-/*
3880- * Locate a unused range of the physical address space below 4G which
3881- * can be used for PCI mappings.
3882- */
3883-static void __init
3884-e820_setup_gap(struct e820entry *e820, int nr_map)
3885-{
3886- unsigned long gapstart, gapsize, round;
3887- unsigned long long last;
3888- int i;
3889-
3890- /*
3891- * Search for the bigest gap in the low 32 bits of the e820
3892- * memory space.
3893- */
3894- last = 0x100000000ull;
3895- gapstart = 0x10000000;
3896- gapsize = 0x400000;
3897- i = nr_map;
3898- while (--i >= 0) {
3899- unsigned long long start = e820[i].addr;
3900- unsigned long long end = start + e820[i].size;
3901-
3902- /*
3903- * Since "last" is at most 4GB, we know we'll
3904- * fit in 32 bits if this condition is true
3905- */
3906- if (last > end) {
3907- unsigned long gap = last - end;
3908-
3909- if (gap > gapsize) {
3910- gapsize = gap;
3911- gapstart = end;
3912- }
3913- }
3914- if (start < last)
3915- last = start;
3916- }
3917-
3918- /*
3919- * See how much we want to round up: start off with
3920- * rounding to the next 1MB area.
3921- */
3922- round = 0x100000;
3923- while ((gapsize >> 4) > round)
3924- round += round;
3925- /* Fun with two's complement */
3926- pci_mem_start = (gapstart + round) & -round;
3927-
3928- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
3929- pci_mem_start, gapstart, gapsize);
3930-}
3931-
3932-/*
3933- * Request address space for all standard resources
3934- *
3935- * This is called just before pcibios_init(), which is also a
3936- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
3937- */
3938-static int __init request_standard_resources(void)
3939-{
3940- int i;
3941-
3942- /* Nothing to do if not running in dom0. */
3943- if (!is_initial_xendomain())
3944- return 0;
3945-
3946- printk("Setting up standard PCI resources\n");
3947-#ifdef CONFIG_XEN
3948- legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
3949- &code_resource, &data_resource);
3950-#else
3951- if (efi_enabled)
3952- efi_initialize_iomem_resources(&code_resource, &data_resource);
3953- else
3954- legacy_init_iomem_resources(e820.map, e820.nr_map,
3955- &code_resource, &data_resource);
3956-#endif
3957-
3958- /* EFI systems may still have VGA */
3959- request_resource(&iomem_resource, &video_ram_resource);
3960-
3961- /* request I/O space for devices used on all i[345]86 PCs */
3962- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
3963- request_resource(&ioport_resource, &standard_io_resources[i]);
3964- return 0;
3965-}
3966-
3967-subsys_initcall(request_standard_resources);
3968-
3969-static void __init register_memory(void)
3970-{
3971-#ifdef CONFIG_XEN
3972- if (is_initial_xendomain())
3973- e820_setup_gap(machine_e820.map, machine_e820.nr_map);
3974- else
3975-#endif
3976- e820_setup_gap(e820.map, e820.nr_map);
3977-}
3978-
3979 #ifdef CONFIG_MCA
3980 static void set_mca_bus(int x)
3981 {
3982@@ -1491,6 +534,12 @@ static void set_mca_bus(int x)
3983 static void set_mca_bus(int x) { }
3984 #endif
3985
3986+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
3987+char * __init __attribute__((weak)) memory_setup(void)
3988+{
3989+ return machine_specific_memory_setup();
3990+}
3991+
3992 /*
3993 * Determine if we were loaded by an EFI loader. If so, then we have also been
3994 * passed the efi memmap, systab, etc., so we should use these data structures
3995@@ -1578,7 +627,7 @@ void __init setup_arch(char **cmdline_p)
3996 efi_init();
3997 else {
3998 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
3999- print_memory_map(machine_specific_memory_setup());
4000+ print_memory_map(memory_setup());
4001 }
4002
4003 copy_edd();
4004@@ -1757,7 +806,7 @@ void __init setup_arch(char **cmdline_p)
4005 get_smp_config();
4006 #endif
4007
4008- register_memory();
4009+ e820_register_memory();
4010
4011 if (is_initial_xendomain()) {
4012 #ifdef CONFIG_VT
4013--- sle11-2009-06-29.orig/arch/x86/kernel/smp_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4014+++ sle11-2009-06-29/arch/x86/kernel/smp_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4015@@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo
4016 put_cpu();
4017 return -EBUSY;
4018 }
4019+
4020+ /* Can deadlock when called with interrupts disabled */
4021+ WARN_ON(irqs_disabled());
4022+
4023 spin_lock_bh(&call_lock);
4024 __smp_call_function_single(cpu, func, info, nonatomic, wait);
4025 spin_unlock_bh(&call_lock);
4026--- sle11-2009-06-29.orig/arch/x86/kernel/time_32-xen.c 2009-03-24 10:08:00.000000000 +0100
4027+++ sle11-2009-06-29/arch/x86/kernel/time_32-xen.c 2009-03-24 10:08:30.000000000 +0100
4028@@ -61,6 +61,7 @@
4029 #include <asm/uaccess.h>
4030 #include <asm/processor.h>
4031 #include <asm/timer.h>
4032+#include <asm/time.h>
4033 #include <asm/sections.h>
4034
4035 #include "mach_time.h"
4036@@ -129,11 +130,11 @@ static DEFINE_PER_CPU(struct vcpu_runsta
4037 /* Must be signed, as it's compared with s64 quantities which can be -ve. */
4038 #define NS_PER_TICK (1000000000LL/HZ)
4039
4040-static void __clock_was_set(void *unused)
4041+static void __clock_was_set(struct work_struct *unused)
4042 {
4043 clock_was_set();
4044 }
4045-static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL);
4046+static DECLARE_WORK(clock_was_set_work, __clock_was_set);
4047
4048 /*
4049 * GCC 4.3 can turn loops over an induction variable into division. We do
4050@@ -543,10 +544,7 @@ static int set_rtc_mmss(unsigned long no
4051 /* gets recalled with irq locally disabled */
4052 /* XXX - does irqsave resolve this? -johnstul */
4053 spin_lock_irqsave(&rtc_lock, flags);
4054- if (efi_enabled)
4055- retval = efi_set_rtc_mmss(nowtime);
4056- else
4057- retval = mach_set_rtc_mmss(nowtime);
4058+ retval = set_wallclock(nowtime);
4059 spin_unlock_irqrestore(&rtc_lock, flags);
4060
4061 return retval;
4062@@ -873,10 +871,7 @@ unsigned long get_cmos_time(void)
4063
4064 spin_lock_irqsave(&rtc_lock, flags);
4065
4066- if (efi_enabled)
4067- retval = efi_get_time();
4068- else
4069- retval = mach_get_cmos_time();
4070+ retval = get_wallclock();
4071
4072 spin_unlock_irqrestore(&rtc_lock, flags);
4073
4074@@ -978,7 +973,7 @@ static void __init hpet_time_init(void)
4075 printk("Using HPET for base-timer\n");
4076 }
4077
4078- time_init_hook();
4079+ do_time_init();
4080 }
4081 #endif
4082
4083--- sle11-2009-06-29.orig/arch/x86/kernel/traps_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4084+++ sle11-2009-06-29/arch/x86/kernel/traps_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4085@@ -29,6 +29,8 @@
4086 #include <linux/kexec.h>
4087 #include <linux/unwind.h>
4088 #include <linux/uaccess.h>
4089+#include <linux/nmi.h>
4090+#include <linux/bug.h>
4091
4092 #ifdef CONFIG_EISA
4093 #include <linux/ioport.h>
4094@@ -61,9 +63,6 @@ int panic_on_unrecovered_nmi;
4095
4096 asmlinkage int system_call(void);
4097
4098-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
4099- { 0, 0 }, { 0, 0 } };
4100-
4101 /* Do we ignore FPU interrupts ? */
4102 char ignore_fpu_irq = 0;
4103
4104@@ -100,12 +99,7 @@ asmlinkage void fixup_4gb_segment(void);
4105 #endif
4106 asmlinkage void machine_check(void);
4107
4108-static int kstack_depth_to_print = 24;
4109-#ifdef CONFIG_STACK_UNWIND
4110-static int call_trace = 1;
4111-#else
4112-#define call_trace (-1)
4113-#endif
4114+int kstack_depth_to_print = 24;
4115 ATOMIC_NOTIFIER_HEAD(i386die_chain);
4116
4117 int register_die_notifier(struct notifier_block *nb)
4118@@ -159,25 +153,7 @@ static inline unsigned long print_contex
4119 return ebp;
4120 }
4121
4122-struct ops_and_data {
4123- struct stacktrace_ops *ops;
4124- void *data;
4125-};
4126-
4127-static asmlinkage int
4128-dump_trace_unwind(struct unwind_frame_info *info, void *data)
4129-{
4130- struct ops_and_data *oad = (struct ops_and_data *)data;
4131- int n = 0;
4132-
4133- while (unwind(info) == 0 && UNW_PC(info)) {
4134- n++;
4135- oad->ops->address(oad->data, UNW_PC(info));
4136- if (arch_unw_user_mode(info))
4137- break;
4138- }
4139- return n;
4140-}
4141+#define MSG(msg) ops->warning(data, msg)
4142
4143 void dump_trace(struct task_struct *task, struct pt_regs *regs,
4144 unsigned long *stack,
4145@@ -188,39 +164,6 @@ void dump_trace(struct task_struct *task
4146 if (!task)
4147 task = current;
4148
4149- if (call_trace >= 0) {
4150- int unw_ret = 0;
4151- struct unwind_frame_info info;
4152- struct ops_and_data oad = { .ops = ops, .data = data };
4153-
4154- if (regs) {
4155- if (unwind_init_frame_info(&info, task, regs) == 0)
4156- unw_ret = dump_trace_unwind(&info, &oad);
4157- } else if (task == current)
4158- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
4159- else {
4160- if (unwind_init_blocked(&info, task) == 0)
4161- unw_ret = dump_trace_unwind(&info, &oad);
4162- }
4163- if (unw_ret > 0) {
4164- if (call_trace == 1 && !arch_unw_user_mode(&info)) {
4165- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
4166- UNW_PC(&info));
4167- if (UNW_SP(&info) >= PAGE_OFFSET) {
4168- ops->warning(data, "Leftover inexact backtrace:\n");
4169- stack = (void *)UNW_SP(&info);
4170- if (!stack)
4171- return;
4172- ebp = UNW_FP(&info);
4173- } else
4174- ops->warning(data, "Full inexact backtrace again:\n");
4175- } else if (call_trace >= 1)
4176- return;
4177- else
4178- ops->warning(data, "Full inexact backtrace again:\n");
4179- } else
4180- ops->warning(data, "Inexact backtrace:\n");
4181- }
4182 if (!stack) {
4183 unsigned long dummy;
4184 stack = &dummy;
4185@@ -253,6 +196,7 @@ void dump_trace(struct task_struct *task
4186 stack = (unsigned long*)context->previous_esp;
4187 if (!stack)
4188 break;
4189+ touch_nmi_watchdog();
4190 }
4191 }
4192 EXPORT_SYMBOL(dump_trace);
4193@@ -385,7 +329,7 @@ void show_registers(struct pt_regs *regs
4194 * time of the fault..
4195 */
4196 if (in_kernel) {
4197- u8 __user *eip;
4198+ u8 *eip;
4199 int code_bytes = 64;
4200 unsigned char c;
4201
4202@@ -394,18 +338,20 @@ void show_registers(struct pt_regs *regs
4203
4204 printk(KERN_EMERG "Code: ");
4205
4206- eip = (u8 __user *)regs->eip - 43;
4207- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
4208+ eip = (u8 *)regs->eip - 43;
4209+ if (eip < (u8 *)PAGE_OFFSET ||
4210+ probe_kernel_address(eip, c)) {
4211 /* try starting at EIP */
4212- eip = (u8 __user *)regs->eip;
4213+ eip = (u8 *)regs->eip;
4214 code_bytes = 32;
4215 }
4216 for (i = 0; i < code_bytes; i++, eip++) {
4217- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
4218+ if (eip < (u8 *)PAGE_OFFSET ||
4219+ probe_kernel_address(eip, c)) {
4220 printk(" Bad EIP value.");
4221 break;
4222 }
4223- if (eip == (u8 __user *)regs->eip)
4224+ if (eip == (u8 *)regs->eip)
4225 printk("<%02x> ", c);
4226 else
4227 printk("%02x ", c);
4228@@ -414,43 +360,22 @@ void show_registers(struct pt_regs *regs
4229 printk("\n");
4230 }
4231
4232-static void handle_BUG(struct pt_regs *regs)
4233+int is_valid_bugaddr(unsigned long eip)
4234 {
4235- unsigned long eip = regs->eip;
4236 unsigned short ud2;
4237
4238 if (eip < PAGE_OFFSET)
4239- return;
4240- if (probe_kernel_address((unsigned short __user *)eip, ud2))
4241- return;
4242- if (ud2 != 0x0b0f)
4243- return;
4244+ return 0;
4245+ if (probe_kernel_address((unsigned short *)eip, ud2))
4246+ return 0;
4247
4248- printk(KERN_EMERG "------------[ cut here ]------------\n");
4249-
4250-#ifdef CONFIG_DEBUG_BUGVERBOSE
4251- do {
4252- unsigned short line;
4253- char *file;
4254- char c;
4255-
4256- if (probe_kernel_address((unsigned short __user *)(eip + 2),
4257- line))
4258- break;
4259- if (__get_user(file, (char * __user *)(eip + 4)) ||
4260- (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
4261- file = "<bad filename>";
4262-
4263- printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
4264- return;
4265- } while (0);
4266-#endif
4267- printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n");
4268+ return ud2 == 0x0b0f;
4269 }
4270
4271-/* This is gone through when something in the kernel
4272- * has done something bad and is about to be terminated.
4273-*/
4274+/*
4275+ * This is gone through when something in the kernel has done something bad and
4276+ * is about to be terminated.
4277+ */
4278 void die(const char * str, struct pt_regs * regs, long err)
4279 {
4280 static struct {
4281@@ -458,7 +383,7 @@ void die(const char * str, struct pt_reg
4282 u32 lock_owner;
4283 int lock_owner_depth;
4284 } die = {
4285- .lock = SPIN_LOCK_UNLOCKED,
4286+ .lock = __SPIN_LOCK_UNLOCKED(die.lock),
4287 .lock_owner = -1,
4288 .lock_owner_depth = 0
4289 };
4290@@ -482,7 +407,8 @@ void die(const char * str, struct pt_reg
4291 unsigned long esp;
4292 unsigned short ss;
4293
4294- handle_BUG(regs);
4295+ report_bug(regs->eip);
4296+
4297 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
4298 #ifdef CONFIG_PREEMPT
4299 printk(KERN_EMERG "PREEMPT ");
4300@@ -682,8 +608,7 @@ mem_parity_error(unsigned char reason, s
4301 {
4302 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
4303 "CPU %d.\n", reason, smp_processor_id());
4304- printk(KERN_EMERG "You probably have a hardware problem with your RAM "
4305- "chips\n");
4306+ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
4307 if (panic_on_unrecovered_nmi)
4308 panic("NMI: Not continuing");
4309
4310@@ -741,7 +666,6 @@ void __kprobes die_nmi(struct pt_regs *r
4311 printk(" on CPU%d, eip %08lx, registers:\n",
4312 smp_processor_id(), regs->eip);
4313 show_registers(regs);
4314- printk(KERN_EMERG "console shuts up ...\n");
4315 console_silent();
4316 spin_unlock(&nmi_print_lock);
4317 bust_spinlocks(0);
4318@@ -1057,49 +981,24 @@ fastcall void do_spurious_interrupt_bug(
4319 #endif
4320 }
4321
4322-fastcall void setup_x86_bogus_stack(unsigned char * stk)
4323+fastcall unsigned long patch_espfix_desc(unsigned long uesp,
4324+ unsigned long kesp)
4325 {
4326- unsigned long *switch16_ptr, *switch32_ptr;
4327- struct pt_regs *regs;
4328- unsigned long stack_top, stack_bot;
4329- unsigned short iret_frame16_off;
4330- int cpu = smp_processor_id();
4331- /* reserve the space on 32bit stack for the magic switch16 pointer */
4332- memmove(stk, stk + 8, sizeof(struct pt_regs));
4333- switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
4334- regs = (struct pt_regs *)stk;
4335- /* now the switch32 on 16bit stack */
4336- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
4337- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
4338- switch32_ptr = (unsigned long *)(stack_top - 8);
4339- iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
4340- /* copy iret frame on 16bit stack */
4341- memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
4342- /* fill in the switch pointers */
4343- switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
4344- switch16_ptr[1] = __ESPFIX_SS;
4345- switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
4346- 8 - CPU_16BIT_STACK_SIZE;
4347- switch32_ptr[1] = __KERNEL_DS;
4348-}
4349-
4350-fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
4351-{
4352- unsigned long *switch32_ptr;
4353- unsigned char *stack16, *stack32;
4354- unsigned long stack_top, stack_bot;
4355- int len;
4356 int cpu = smp_processor_id();
4357- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
4358- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
4359- switch32_ptr = (unsigned long *)(stack_top - 8);
4360- /* copy the data from 16bit stack to 32bit stack */
4361- len = CPU_16BIT_STACK_SIZE - 8 - sp;
4362- stack16 = (unsigned char *)(stack_bot + sp);
4363- stack32 = (unsigned char *)
4364- (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
4365- memcpy(stack32, stack16, len);
4366- return stack32;
4367+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
4368+ struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
4369+ unsigned long base = (kesp - uesp) & -THREAD_SIZE;
4370+ unsigned long new_kesp = kesp - base;
4371+ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
4372+ __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
4373+ /* Set up base for espfix segment */
4374+ desc &= 0x00f0ff0000000000ULL;
4375+ desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
4376+ ((((__u64)base) << 32) & 0xff00000000000000ULL) |
4377+ ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
4378+ (lim_pages & 0xffff);
4379+ *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
4380+ return new_kesp;
4381 }
4382 #endif
4383
4384@@ -1113,7 +1012,7 @@ fastcall unsigned char * fixup_x86_bogus
4385 * Must be called with kernel preemption disabled (in this case,
4386 * local interrupts are disabled at the call-site in entry.S).
4387 */
4388-asmlinkage void math_state_restore(struct pt_regs regs)
4389+asmlinkage void math_state_restore(void)
4390 {
4391 struct thread_info *thread = current_thread_info();
4392 struct task_struct *tsk = thread->task;
4393@@ -1123,6 +1022,7 @@ asmlinkage void math_state_restore(struc
4394 init_fpu(tsk);
4395 restore_fpu(tsk);
4396 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
4397+ tsk->fpu_counter++;
4398 }
4399
4400 #ifndef CONFIG_MATH_EMULATION
4401@@ -1234,19 +1134,3 @@ static int __init kstack_setup(char *s)
4402 return 1;
4403 }
4404 __setup("kstack=", kstack_setup);
4405-
4406-#ifdef CONFIG_STACK_UNWIND
4407-static int __init call_trace_setup(char *s)
4408-{
4409- if (strcmp(s, "old") == 0)
4410- call_trace = -1;
4411- else if (strcmp(s, "both") == 0)
4412- call_trace = 0;
4413- else if (strcmp(s, "newfallback") == 0)
4414- call_trace = 1;
4415- else if (strcmp(s, "new") == 2)
4416- call_trace = 2;
4417- return 1;
4418-}
4419-__setup("call_trace=", call_trace_setup);
4420-#endif
4421--- sle11-2009-06-29.orig/arch/x86/kernel/vmlinux_32.lds.S 2009-06-29 15:14:52.000000000 +0200
4422+++ sle11-2009-06-29/arch/x86/kernel/vmlinux_32.lds.S 2008-12-15 11:26:44.000000000 +0100
4423@@ -29,6 +29,12 @@ PHDRS {
4424 SECTIONS
4425 {
4426 . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
4427+
4428+#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002
4429+#undef LOAD_OFFSET
4430+#define LOAD_OFFSET 0
4431+#endif
4432+
4433 phys_startup_32 = startup_32 - LOAD_OFFSET;
4434
4435 .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
4436--- sle11-2009-06-29.orig/arch/x86/kvm/Kconfig 2009-06-29 15:14:52.000000000 +0200
4437+++ sle11-2009-06-29/arch/x86/kvm/Kconfig 2008-12-15 11:26:44.000000000 +0100
4438@@ -7,6 +7,7 @@ config HAVE_KVM
4439 menuconfig VIRTUALIZATION
4440 bool "Virtualization"
4441 depends on HAVE_KVM || X86
4442+ depends on !XEN
4443 default y
4444 ---help---
4445 Say Y here to get to see options for using your Linux host to run other
4446--- sle11-2009-06-29.orig/arch/x86/mm/fault_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4447+++ sle11-2009-06-29/arch/x86/mm/fault_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4448@@ -22,9 +22,9 @@
4449 #include <linux/highmem.h>
4450 #include <linux/module.h>
4451 #include <linux/kprobes.h>
4452+#include <linux/uaccess.h>
4453
4454 #include <asm/system.h>
4455-#include <asm/uaccess.h>
4456 #include <asm/desc.h>
4457 #include <asm/kdebug.h>
4458 #include <asm/segment.h>
4459@@ -167,7 +167,7 @@ static inline unsigned long get_segment_
4460 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
4461 {
4462 unsigned long limit;
4463- unsigned long instr = get_segment_eip (regs, &limit);
4464+ unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
4465 int scan_more = 1;
4466 int prefetch = 0;
4467 int i;
4468@@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs
4469 unsigned char instr_hi;
4470 unsigned char instr_lo;
4471
4472- if (instr > limit)
4473+ if (instr > (unsigned char *)limit)
4474 break;
4475- if (__get_user(opcode, (unsigned char __user *) instr))
4476+ if (probe_kernel_address(instr, opcode))
4477 break;
4478
4479 instr_hi = opcode & 0xf0;
4480@@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs
4481 case 0x00:
4482 /* Prefetch instruction is 0x0F0D or 0x0F18 */
4483 scan_more = 0;
4484- if (instr > limit)
4485+ if (instr > (unsigned char *)limit)
4486 break;
4487- if (__get_user(opcode, (unsigned char __user *) instr))
4488+ if (probe_kernel_address(instr, opcode))
4489 break;
4490 prefetch = (instr_lo == 0xF) &&
4491 (opcode == 0x0D || opcode == 0x18);
4492--- sle11-2009-06-29.orig/arch/x86/mm/highmem_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4493+++ sle11-2009-06-29/arch/x86/mm/highmem_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4494@@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page *
4495 unsigned long vaddr;
4496
4497 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
4498- inc_preempt_count();
4499+ pagefault_disable();
4500 if (!PageHighMem(page))
4501 return page_address(page);
4502
4503@@ -63,26 +63,22 @@ void kunmap_atomic(void *kvaddr, enum km
4504 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
4505 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
4506
4507-#ifdef CONFIG_DEBUG_HIGHMEM
4508- if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
4509- dec_preempt_count();
4510- preempt_check_resched();
4511- return;
4512- }
4513-
4514- if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
4515- BUG();
4516-#endif
4517 /*
4518 * Force other mappings to Oops if they'll try to access this pte
4519 * without first remap it. Keeping stale mappings around is a bad idea
4520 * also, in case the page changes cacheability attributes or becomes
4521 * a protected page in a hypervisor.
4522 */
4523- kpte_clear_flush(kmap_pte-idx, vaddr);
4524+ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
4525+ kpte_clear_flush(kmap_pte-idx, vaddr);
4526+ else {
4527+#ifdef CONFIG_DEBUG_HIGHMEM
4528+ BUG_ON(vaddr < PAGE_OFFSET);
4529+ BUG_ON(vaddr >= (unsigned long)high_memory);
4530+#endif
4531+ }
4532
4533- dec_preempt_count();
4534- preempt_check_resched();
4535+ pagefault_enable();
4536 }
4537
4538 /* This is the same as kmap_atomic() but can map memory that doesn't
4539@@ -93,7 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
4540 enum fixed_addresses idx;
4541 unsigned long vaddr;
4542
4543- inc_preempt_count();
4544+ pagefault_disable();
4545
4546 idx = type + KM_TYPE_NR*smp_processor_id();
4547 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
4548--- sle11-2009-06-29.orig/arch/x86/mm/init_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4549+++ sle11-2009-06-29/arch/x86/mm/init_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4550@@ -233,8 +233,6 @@ static inline int page_kills_ppro(unsign
4551
4552 #endif
4553
4554-extern int is_available_memory(efi_memory_desc_t *);
4555-
4556 int page_is_ram(unsigned long pagenr)
4557 {
4558 int i;
4559@@ -327,7 +325,7 @@ void __init add_one_highpage_init(struct
4560 SetPageReserved(page);
4561 }
4562
4563-static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
4564+static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
4565 {
4566 free_new_highpage(page, pfn);
4567 totalram_pages++;
4568@@ -344,7 +342,7 @@ static int add_one_highpage_hotplug(stru
4569 * has been added dynamically that would be
4570 * onlined here is in HIGHMEM
4571 */
4572-void online_page(struct page *page)
4573+void __meminit online_page(struct page *page)
4574 {
4575 ClearPageReserved(page);
4576 add_one_highpage_hotplug(page, page_to_pfn(page));
4577@@ -732,16 +730,10 @@ void __init mem_init(void)
4578 set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
4579 }
4580
4581-/*
4582- * this is for the non-NUMA, single node SMP system case.
4583- * Specifically, in the case of x86, we will always add
4584- * memory to the highmem for now.
4585- */
4586 #ifdef CONFIG_MEMORY_HOTPLUG
4587-#ifndef CONFIG_NEED_MULTIPLE_NODES
4588 int arch_add_memory(int nid, u64 start, u64 size)
4589 {
4590- struct pglist_data *pgdata = &contig_page_data;
4591+ struct pglist_data *pgdata = NODE_DATA(nid);
4592 struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
4593 unsigned long start_pfn = start >> PAGE_SHIFT;
4594 unsigned long nr_pages = size >> PAGE_SHIFT;
4595@@ -753,11 +745,11 @@ int remove_memory(u64 start, u64 size)
4596 {
4597 return -EINVAL;
4598 }
4599-#endif
4600+EXPORT_SYMBOL_GPL(remove_memory);
4601 #endif
4602
4603-kmem_cache_t *pgd_cache;
4604-kmem_cache_t *pmd_cache;
4605+struct kmem_cache *pgd_cache;
4606+struct kmem_cache *pmd_cache;
4607
4608 void __init pgtable_cache_init(void)
4609 {
4610--- sle11-2009-06-29.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4611+++ sle11-2009-06-29/arch/x86/mm/pgtable_32-xen.c 2008-12-15 11:26:44.000000000 +0100
4612@@ -203,7 +203,7 @@ void pte_free(struct page *pte)
4613 __free_page(pte);
4614 }
4615
4616-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
4617+void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
4618 {
4619 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
4620 }
4621@@ -243,7 +243,7 @@ static inline void pgd_list_del(pgd_t *p
4622 set_page_private(next, (unsigned long)pprev);
4623 }
4624
4625-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
4626+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
4627 {
4628 unsigned long flags;
4629
4630@@ -264,7 +264,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
4631 }
4632
4633 /* never called when PTRS_PER_PMD > 1 */
4634-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
4635+void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
4636 {
4637 unsigned long flags; /* can be called from interrupt context */
4638
4639--- sle11-2009-06-29.orig/arch/x86/pci/irq-xen.c 2009-03-04 11:28:34.000000000 +0100
4640+++ sle11-2009-06-29/arch/x86/pci/irq-xen.c 2008-12-15 11:26:44.000000000 +0100
4641@@ -768,7 +768,7 @@ static void __init pirq_find_router(stru
4642 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
4643 rt->rtr_vendor, rt->rtr_device);
4644
4645- pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
4646+ pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
4647 if (!pirq_router_dev) {
4648 DBG(KERN_DEBUG "PCI: Interrupt router not found at "
4649 "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
4650@@ -788,6 +788,8 @@ static void __init pirq_find_router(stru
4651 pirq_router_dev->vendor,
4652 pirq_router_dev->device,
4653 pci_name(pirq_router_dev));
4654+
4655+ /* The device remains referenced for the kernel lifetime */
4656 }
4657
4658 static struct irq_info *pirq_get_info(struct pci_dev *dev)
4659--- sle11-2009-06-29.orig/arch/x86/kernel/entry_64-xen.S 2009-03-04 11:28:34.000000000 +0100
4660+++ sle11-2009-06-29/arch/x86/kernel/entry_64-xen.S 2008-12-15 11:26:44.000000000 +0100
4661@@ -261,7 +261,6 @@ ENTRY(system_call)
4662 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
4663 GET_THREAD_INFO(%rcx)
4664 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
4665- CFI_REMEMBER_STATE
4666 jnz tracesys
4667 cmpq $__NR_syscall_max,%rax
4668 ja badsys
4669@@ -272,7 +271,6 @@ ENTRY(system_call)
4670 * Syscall return path ending with SYSRET (fast path)
4671 * Has incomplete stack frame and undefined top of stack.
4672 */
4673- .globl ret_from_sys_call
4674 ret_from_sys_call:
4675 movl $_TIF_ALLWORK_MASK,%edi
4676 /* edi: flagmask */
4677@@ -282,8 +280,8 @@ sysret_check:
4678 TRACE_IRQS_OFF
4679 movl threadinfo_flags(%rcx),%edx
4680 andl %edi,%edx
4681- CFI_REMEMBER_STATE
4682 jnz sysret_careful
4683+ CFI_REMEMBER_STATE
4684 /*
4685 * sysretq will re-enable interrupts:
4686 */
4687@@ -292,10 +290,10 @@ sysret_check:
4688 RESTORE_ARGS 0,8,0
4689 HYPERVISOR_IRET VGCF_IN_SYSCALL
4690
4691+ CFI_RESTORE_STATE
4692 /* Handle reschedules */
4693 /* edx: work, edi: workmask */
4694 sysret_careful:
4695- CFI_RESTORE_STATE
4696 bt $TIF_NEED_RESCHED,%edx
4697 jnc sysret_signal
4698 TRACE_IRQS_ON
4699@@ -334,7 +332,6 @@ badsys:
4700
4701 /* Do syscall tracing */
4702 tracesys:
4703- CFI_RESTORE_STATE
4704 SAVE_REST
4705 movq $-ENOSYS,RAX(%rsp)
4706 FIXUP_TOP_OF_STACK %rdi
4707@@ -350,32 +347,13 @@ tracesys:
4708 call *sys_call_table(,%rax,8)
4709 1: movq %rax,RAX-ARGOFFSET(%rsp)
4710 /* Use IRET because user could have changed frame */
4711- jmp int_ret_from_sys_call
4712- CFI_ENDPROC
4713-END(system_call)
4714
4715 /*
4716 * Syscall return path ending with IRET.
4717 * Has correct top of stack, but partial stack frame.
4718- */
4719-ENTRY(int_ret_from_sys_call)
4720- CFI_STARTPROC simple
4721- CFI_SIGNAL_FRAME
4722- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
4723- /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
4724- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
4725- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
4726- /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
4727- CFI_REL_OFFSET rip,RIP-ARGOFFSET
4728- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
4729- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
4730- CFI_REL_OFFSET rax,RAX-ARGOFFSET
4731- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
4732- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
4733- CFI_REL_OFFSET r8,R8-ARGOFFSET
4734- CFI_REL_OFFSET r9,R9-ARGOFFSET
4735- CFI_REL_OFFSET r10,R10-ARGOFFSET
4736- CFI_REL_OFFSET r11,R11-ARGOFFSET
4737+ */
4738+ .globl int_ret_from_sys_call
4739+int_ret_from_sys_call:
4740 XEN_BLOCK_EVENTS(%rsi)
4741 TRACE_IRQS_OFF
4742 testb $3,CS-ARGOFFSET(%rsp)
4743@@ -428,8 +406,6 @@ int_very_careful:
4744 popq %rdi
4745 CFI_ADJUST_CFA_OFFSET -8
4746 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
4747- XEN_BLOCK_EVENTS(%rsi)
4748- TRACE_IRQS_OFF
4749 jmp int_restore_rest
4750
4751 int_signal:
4752@@ -445,7 +421,7 @@ int_restore_rest:
4753 TRACE_IRQS_OFF
4754 jmp int_with_check
4755 CFI_ENDPROC
4756-END(int_ret_from_sys_call)
4757+END(system_call)
4758
4759 /*
4760 * Certain special system calls that need to save a complete full stack frame.
4761@@ -1275,36 +1251,3 @@ ENTRY(call_softirq)
4762 ret
4763 CFI_ENDPROC
4764 ENDPROC(call_softirq)
4765-
4766-#ifdef CONFIG_STACK_UNWIND
4767-ENTRY(arch_unwind_init_running)
4768- CFI_STARTPROC
4769- movq %r15, R15(%rdi)
4770- movq %r14, R14(%rdi)
4771- xchgq %rsi, %rdx
4772- movq %r13, R13(%rdi)
4773- movq %r12, R12(%rdi)
4774- xorl %eax, %eax
4775- movq %rbp, RBP(%rdi)
4776- movq %rbx, RBX(%rdi)
4777- movq (%rsp), %rcx
4778- movq %rax, R11(%rdi)
4779- movq %rax, R10(%rdi)
4780- movq %rax, R9(%rdi)
4781- movq %rax, R8(%rdi)
4782- movq %rax, RAX(%rdi)
4783- movq %rax, RCX(%rdi)
4784- movq %rax, RDX(%rdi)
4785- movq %rax, RSI(%rdi)
4786- movq %rax, RDI(%rdi)
4787- movq %rax, ORIG_RAX(%rdi)
4788- movq %rcx, RIP(%rdi)
4789- leaq 8(%rsp), %rcx
4790- movq $__KERNEL_CS, CS(%rdi)
4791- movq %rax, EFLAGS(%rdi)
4792- movq %rcx, RSP(%rdi)
4793- movq $__KERNEL_DS, SS(%rdi)
4794- jmpq *%rdx
4795- CFI_ENDPROC
4796-ENDPROC(arch_unwind_init_running)
4797-#endif
4798--- sle11-2009-06-29.orig/arch/x86/kernel/genapic_64-xen.c 2009-06-29 15:14:52.000000000 +0200
4799+++ sle11-2009-06-29/arch/x86/kernel/genapic_64-xen.c 2008-12-15 11:26:44.000000000 +0100
4800@@ -34,6 +34,7 @@ extern struct genapic apic_physflat;
4801
4802 #ifndef CONFIG_XEN
4803 struct genapic *genapic = &apic_flat;
4804+struct genapic *genapic_force;
4805 #else
4806 extern struct genapic apic_xen;
4807 struct genapic *genapic = &apic_xen;
4808@@ -52,6 +53,13 @@ void __init clustered_apic_check(void)
4809 u8 cluster_cnt[NUM_APIC_CLUSTERS];
4810 int max_apic = 0;
4811
4812+ /* genapic selection can be forced because of certain quirks.
4813+ */
4814+ if (genapic_force) {
4815+ genapic = genapic_force;
4816+ goto print;
4817+ }
4818+
4819 #if defined(CONFIG_ACPI)
4820 /*
4821 * Some x86_64 machines use physical APIC mode regardless of how many
4822--- sle11-2009-06-29.orig/arch/x86/kernel/head64-xen.c 2009-03-04 11:28:34.000000000 +0100
4823+++ sle11-2009-06-29/arch/x86/kernel/head64-xen.c 2008-12-15 11:26:44.000000000 +0100
4824@@ -104,7 +104,10 @@ void __init x86_64_start_kernel(char * r
4825 machine_to_phys_order++;
4826
4827 #if 0
4828- for (i = 0; i < 256; i++)
4829+ /* clear bss before set_intr_gate with early_idt_handler */
4830+ clear_bss();
4831+
4832+ for (i = 0; i < IDT_ENTRIES; i++)
4833 set_intr_gate(i, early_idt_handler);
4834 asm volatile("lidt %0" :: "m" (idt_descr));
4835 #endif
4836--- sle11-2009-06-29.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
4837+++ sle11-2009-06-29/arch/x86/kernel/io_apic_64-xen.c 2008-12-15 11:26:44.000000000 +0100
4838@@ -202,14 +202,20 @@ static struct IO_APIC_route_entry ioapic
4839 * the interrupt, and we need to make sure the entry is fully populated
4840 * before that happens.
4841 */
4842-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4843+static void
4844+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4845 {
4846- unsigned long flags;
4847 union entry_union eu;
4848 eu.entry = e;
4849- spin_lock_irqsave(&ioapic_lock, flags);
4850 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
4851 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
4852+}
4853+
4854+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
4855+{
4856+ unsigned long flags;
4857+ spin_lock_irqsave(&ioapic_lock, flags);
4858+ __ioapic_write_entry(apic, pin, e);
4859 spin_unlock_irqrestore(&ioapic_lock, flags);
4860 }
4861
4862@@ -720,6 +726,22 @@ static int assign_irq_vector(int irq, cp
4863 }
4864
4865 #ifndef CONFIG_XEN
4866+static void __clear_irq_vector(int irq)
4867+{
4868+ cpumask_t mask;
4869+ int cpu, vector;
4870+
4871+ BUG_ON(!irq_vector[irq]);
4872+
4873+ vector = irq_vector[irq];
4874+ cpus_and(mask, irq_domain[irq], cpu_online_map);
4875+ for_each_cpu_mask(cpu, mask)
4876+ per_cpu(vector_irq, cpu)[vector] = -1;
4877+
4878+ irq_vector[irq] = 0;
4879+ irq_domain[irq] = CPU_MASK_NONE;
4880+}
4881+
4882 void __setup_vector_irq(int cpu)
4883 {
4884 /* Initialize vector_irq on a new cpu */
4885@@ -767,26 +789,65 @@ static void ioapic_register_intr(int irq
4886 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
4887 #endif /* !CONFIG_XEN */
4888
4889-static void __init setup_IO_APIC_irqs(void)
4890+static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
4891 {
4892 struct IO_APIC_route_entry entry;
4893- int apic, pin, idx, irq, first_notcon = 1, vector;
4894+ int vector;
4895 unsigned long flags;
4896
4897- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
4898
4899- for (apic = 0; apic < nr_ioapics; apic++) {
4900- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
4901+ /*
4902+ * add it to the IO-APIC irq-routing table:
4903+ */
4904+ memset(&entry,0,sizeof(entry));
4905
4906- /*
4907- * add it to the IO-APIC irq-routing table:
4908- */
4909- memset(&entry,0,sizeof(entry));
4910+ entry.delivery_mode = INT_DELIVERY_MODE;
4911+ entry.dest_mode = INT_DEST_MODE;
4912+ entry.mask = 0; /* enable IRQ */
4913+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4914
4915- entry.delivery_mode = INT_DELIVERY_MODE;
4916- entry.dest_mode = INT_DEST_MODE;
4917- entry.mask = 0; /* enable IRQ */
4918+ entry.trigger = irq_trigger(idx);
4919+ entry.polarity = irq_polarity(idx);
4920+
4921+ if (irq_trigger(idx)) {
4922+ entry.trigger = 1;
4923+ entry.mask = 1;
4924 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4925+ }
4926+
4927+ if (/* !apic && */ !IO_APIC_IRQ(irq))
4928+ return;
4929+
4930+ if (IO_APIC_IRQ(irq)) {
4931+ cpumask_t mask;
4932+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4933+ if (vector < 0)
4934+ return;
4935+
4936+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4937+ entry.vector = vector;
4938+
4939+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
4940+ if (!apic && (irq < 16))
4941+ disable_8259A_irq(irq);
4942+ }
4943+
4944+ ioapic_write_entry(apic, pin, entry);
4945+
4946+ spin_lock_irqsave(&ioapic_lock, flags);
4947+ set_native_irq_info(irq, TARGET_CPUS);
4948+ spin_unlock_irqrestore(&ioapic_lock, flags);
4949+
4950+}
4951+
4952+static void __init setup_IO_APIC_irqs(void)
4953+{
4954+ int apic, pin, idx, irq, first_notcon = 1;
4955+
4956+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
4957+
4958+ for (apic = 0; apic < nr_ioapics; apic++) {
4959+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
4960
4961 idx = find_irq_entry(apic,pin,mp_INT);
4962 if (idx == -1) {
4963@@ -798,39 +859,11 @@ static void __init setup_IO_APIC_irqs(vo
4964 continue;
4965 }
4966
4967- entry.trigger = irq_trigger(idx);
4968- entry.polarity = irq_polarity(idx);
4969-
4970- if (irq_trigger(idx)) {
4971- entry.trigger = 1;
4972- entry.mask = 1;
4973- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4974- }
4975-
4976 irq = pin_2_irq(idx, apic, pin);
4977 add_pin_to_irq(irq, apic, pin);
4978
4979- if (/* !apic && */ !IO_APIC_IRQ(irq))
4980- continue;
4981-
4982- if (IO_APIC_IRQ(irq)) {
4983- cpumask_t mask;
4984- vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4985- if (vector < 0)
4986- continue;
4987-
4988- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4989- entry.vector = vector;
4990+ setup_IO_APIC_irq(apic, pin, idx, irq);
4991
4992- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
4993- if (!apic && (irq < 16))
4994- disable_8259A_irq(irq);
4995- }
4996- ioapic_write_entry(apic, pin, entry);
4997-
4998- spin_lock_irqsave(&ioapic_lock, flags);
4999- set_native_irq_info(irq, TARGET_CPUS);
5000- spin_unlock_irqrestore(&ioapic_lock, flags);
5001 }
5002 }
5003
5004@@ -1826,7 +1859,7 @@ void destroy_irq(unsigned int irq)
5005 dynamic_irq_cleanup(irq);
5006
5007 spin_lock_irqsave(&vector_lock, flags);
5008- irq_vector[irq] = 0;
5009+ __clear_irq_vector(irq);
5010 spin_unlock_irqrestore(&vector_lock, flags);
5011 }
5012
5013@@ -2131,7 +2164,15 @@ void __init setup_ioapic_dest(void)
5014 if (irq_entry == -1)
5015 continue;
5016 irq = pin_2_irq(irq_entry, ioapic, pin);
5017- set_ioapic_affinity_irq(irq, TARGET_CPUS);
5018+
5019+ /* setup_IO_APIC_irqs could fail to get vector for some device
5020+ * when you have too many devices, because at that time only boot
5021+ * cpu is online.
5022+ */
5023+ if(!irq_vector[irq])
5024+ setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
5025+ else
5026+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
5027 }
5028
5029 }
5030--- sle11-2009-06-29.orig/arch/x86/kernel/irq_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5031+++ sle11-2009-06-29/arch/x86/kernel/irq_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5032@@ -120,7 +120,7 @@ asmlinkage unsigned int do_IRQ(struct pt
5033
5034 if (likely(irq < NR_IRQS))
5035 generic_handle_irq(irq);
5036- else
5037+ else if (printk_ratelimit())
5038 printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
5039 __func__, smp_processor_id(), irq);
5040
5041--- sle11-2009-06-29.orig/arch/x86/kernel/mpparse_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5042+++ sle11-2009-06-29/arch/x86/kernel/mpparse_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5043@@ -35,8 +35,6 @@
5044 int smp_found_config;
5045 unsigned int __initdata maxcpus = NR_CPUS;
5046
5047-int acpi_found_madt;
5048-
5049 /*
5050 * Various Linux-internal data structures created from the
5051 * MP-table.
5052--- sle11-2009-06-29.orig/arch/x86/kernel/process_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5053+++ sle11-2009-06-29/arch/x86/kernel/process_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5054@@ -119,29 +119,23 @@ void exit_idle(void)
5055 static void poll_idle (void)
5056 {
5057 local_irq_enable();
5058-
5059- asm volatile(
5060- "2:"
5061- "testl %0,%1;"
5062- "rep; nop;"
5063- "je 2b;"
5064- : :
5065- "i" (_TIF_NEED_RESCHED),
5066- "m" (current_thread_info()->flags));
5067+ cpu_relax();
5068 }
5069
5070 static void xen_idle(void)
5071 {
5072+ current_thread_info()->status &= ~TS_POLLING;
5073+ /*
5074+ * TS_POLLING-cleared state must be visible before we
5075+ * test NEED_RESCHED:
5076+ */
5077+ smp_mb();
5078 local_irq_disable();
5079-
5080- if (need_resched())
5081- local_irq_enable();
5082- else {
5083- current_thread_info()->status &= ~TS_POLLING;
5084- smp_mb__after_clear_bit();
5085+ if (!need_resched())
5086 safe_halt();
5087- current_thread_info()->status |= TS_POLLING;
5088- }
5089+ else
5090+ local_irq_enable();
5091+ current_thread_info()->status |= TS_POLLING;
5092 }
5093
5094 #ifdef CONFIG_HOTPLUG_CPU
5095@@ -181,6 +175,12 @@ void cpu_idle (void)
5096 idle = xen_idle; /* no alternatives */
5097 if (cpu_is_offline(smp_processor_id()))
5098 play_dead();
5099+ /*
5100+ * Idle routines should keep interrupts disabled
5101+ * from here on, until they go to idle.
5102+ * Otherwise, idle callbacks can misfire.
5103+ */
5104+ local_irq_disable();
5105 enter_idle();
5106 idle();
5107 /* In many cases the interrupt that ended idle
5108--- sle11-2009-06-29.orig/arch/x86/kernel/setup_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5109+++ sle11-2009-06-29/arch/x86/kernel/setup_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5110@@ -573,8 +573,7 @@ void __init setup_arch(char **cmdline_p)
5111 if (LOADER_TYPE && INITRD_START) {
5112 if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
5113 reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
5114- initrd_start =
5115- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
5116+ initrd_start = INITRD_START + PAGE_OFFSET;
5117 initrd_end = initrd_start+INITRD_SIZE;
5118 }
5119 else {
5120@@ -990,11 +989,8 @@ static void __cpuinit init_amd(struct cp
5121 /* Fix cpuid4 emulation for more */
5122 num_cache_leaves = 3;
5123
5124- /* When there is only one core no need to synchronize RDTSC */
5125- if (num_possible_cpus() == 1)
5126- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5127- else
5128- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5129+ /* RDTSC can be speculated around */
5130+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5131 }
5132
5133 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
5134@@ -1093,6 +1089,15 @@ static void __cpuinit init_intel(struct
5135 set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
5136 }
5137
5138+ if (cpu_has_ds) {
5139+ unsigned int l1, l2;
5140+ rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
5141+ if (!(l1 & (1<<11)))
5142+ set_bit(X86_FEATURE_BTS, c->x86_capability);
5143+ if (!(l1 & (1<<12)))
5144+ set_bit(X86_FEATURE_PEBS, c->x86_capability);
5145+ }
5146+
5147 n = c->extended_cpuid_level;
5148 if (n >= 0x80000008) {
5149 unsigned eax = cpuid_eax(0x80000008);
5150@@ -1112,7 +1117,10 @@ static void __cpuinit init_intel(struct
5151 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
5152 if (c->x86 == 6)
5153 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
5154- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5155+ if (c->x86 == 15)
5156+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5157+ else
5158+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
5159 c->x86_max_cores = intel_num_cpu_cores(c);
5160
5161 srat_detect_node();
5162--- sle11-2009-06-29.orig/arch/x86/kernel/smp_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5163+++ sle11-2009-06-29/arch/x86/kernel/smp_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5164@@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v
5165 put_cpu();
5166 return 0;
5167 }
5168+
5169+ /* Can deadlock when called with interrupts disabled */
5170+ WARN_ON(irqs_disabled());
5171+
5172 spin_lock_bh(&call_lock);
5173 __smp_call_function_single(cpu, func, info, nonatomic, wait);
5174 spin_unlock_bh(&call_lock);
5175 put_cpu();
5176 return 0;
5177 }
5178+EXPORT_SYMBOL(smp_call_function_single);
5179
5180 /*
5181 * this function sends a 'generic call function' IPI to all other CPUs
5182--- sle11-2009-06-29.orig/arch/x86/kernel/traps_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5183+++ sle11-2009-06-29/arch/x86/kernel/traps_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5184@@ -30,9 +30,10 @@
5185 #include <linux/kprobes.h>
5186 #include <linux/kexec.h>
5187 #include <linux/unwind.h>
5188+#include <linux/uaccess.h>
5189+#include <linux/bug.h>
5190
5191 #include <asm/system.h>
5192-#include <asm/uaccess.h>
5193 #include <asm/io.h>
5194 #include <asm/atomic.h>
5195 #include <asm/debugreg.h>
5196@@ -108,12 +109,7 @@ static inline void preempt_conditional_c
5197 preempt_enable_no_resched();
5198 }
5199
5200-static int kstack_depth_to_print = 12;
5201-#ifdef CONFIG_STACK_UNWIND
5202-static int call_trace = 1;
5203-#else
5204-#define call_trace (-1)
5205-#endif
5206+int kstack_depth_to_print = 12;
5207
5208 #ifdef CONFIG_KALLSYMS
5209 void printk_address(unsigned long address)
5210@@ -218,24 +214,7 @@ static unsigned long *in_exception_stack
5211 return NULL;
5212 }
5213
5214-struct ops_and_data {
5215- struct stacktrace_ops *ops;
5216- void *data;
5217-};
5218-
5219-static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
5220-{
5221- struct ops_and_data *oad = (struct ops_and_data *)context;
5222- int n = 0;
5223-
5224- while (unwind(info) == 0 && UNW_PC(info)) {
5225- n++;
5226- oad->ops->address(oad->data, UNW_PC(info));
5227- if (arch_unw_user_mode(info))
5228- break;
5229- }
5230- return n;
5231-}
5232+#define MSG(txt) ops->warning(data, txt)
5233
5234 /*
5235 * x86-64 can have upto three kernel stacks:
5236@@ -250,61 +229,24 @@ static inline int valid_stack_ptr(struct
5237 return p > t && p < t + THREAD_SIZE - 3;
5238 }
5239
5240-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
5241+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
5242+ unsigned long *stack,
5243 struct stacktrace_ops *ops, void *data)
5244 {
5245- const unsigned cpu = smp_processor_id();
5246- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
5247+ const unsigned cpu = get_cpu();
5248+ unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
5249 unsigned used = 0;
5250 struct thread_info *tinfo;
5251
5252 if (!tsk)
5253 tsk = current;
5254
5255- if (call_trace >= 0) {
5256- int unw_ret = 0;
5257- struct unwind_frame_info info;
5258- struct ops_and_data oad = { .ops = ops, .data = data };
5259-
5260- if (regs) {
5261- if (unwind_init_frame_info(&info, tsk, regs) == 0)
5262- unw_ret = dump_trace_unwind(&info, &oad);
5263- } else if (tsk == current)
5264- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
5265- else {
5266- if (unwind_init_blocked(&info, tsk) == 0)
5267- unw_ret = dump_trace_unwind(&info, &oad);
5268- }
5269- if (unw_ret > 0) {
5270- if (call_trace == 1 && !arch_unw_user_mode(&info)) {
5271- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
5272- UNW_PC(&info));
5273- if ((long)UNW_SP(&info) < 0) {
5274- ops->warning(data, "Leftover inexact backtrace:\n");
5275- stack = (unsigned long *)UNW_SP(&info);
5276- if (!stack)
5277- return;
5278- } else
5279- ops->warning(data, "Full inexact backtrace again:\n");
5280- } else if (call_trace >= 1)
5281- return;
5282- else
5283- ops->warning(data, "Full inexact backtrace again:\n");
5284- } else
5285- ops->warning(data, "Inexact backtrace:\n");
5286- }
5287 if (!stack) {
5288 unsigned long dummy;
5289 stack = &dummy;
5290 if (tsk && tsk != current)
5291 stack = (unsigned long *)tsk->thread.rsp;
5292 }
5293- /*
5294- * Align the stack pointer on word boundary, later loops
5295- * rely on that (and corruption / debug info bugs can cause
5296- * unaligned values here):
5297- */
5298- stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
5299
5300 /*
5301 * Print function call entries within a stack. 'cond' is the
5302@@ -314,9 +256,9 @@ void dump_trace(struct task_struct *tsk,
5303 #define HANDLE_STACK(cond) \
5304 do while (cond) { \
5305 unsigned long addr = *stack++; \
5306- if (oops_in_progress ? \
5307- __kernel_text_address(addr) : \
5308- kernel_text_address(addr)) { \
5309+ /* Use unlocked access here because except for NMIs \
5310+ we should be already protected against module unloads */ \
5311+ if (__kernel_text_address(addr)) { \
5312 /* \
5313 * If the address is either in the text segment of the \
5314 * kernel, or in the region which contains vmalloc'ed \
5315@@ -379,9 +321,10 @@ void dump_trace(struct task_struct *tsk,
5316 /*
5317 * This handles the process stack:
5318 */
5319- tinfo = current_thread_info();
5320+ tinfo = task_thread_info(tsk);
5321 HANDLE_STACK (valid_stack_ptr(tinfo, stack));
5322 #undef HANDLE_STACK
5323+ put_cpu();
5324 }
5325 EXPORT_SYMBOL(dump_trace);
5326
5327@@ -518,30 +461,15 @@ bad:
5328 printk("\n");
5329 }
5330
5331-void handle_BUG(struct pt_regs *regs)
5332-{
5333- struct bug_frame f;
5334- long len;
5335- const char *prefix = "";
5336+int is_valid_bugaddr(unsigned long rip)
5337+{
5338+ unsigned short ud2;
5339
5340- if (user_mode(regs))
5341- return;
5342- if (__copy_from_user(&f, (const void __user *) regs->rip,
5343- sizeof(struct bug_frame)))
5344- return;
5345- if (f.filename >= 0 ||
5346- f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
5347- return;
5348- len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
5349- if (len < 0 || len >= PATH_MAX)
5350- f.filename = (int)(long)"unmapped filename";
5351- else if (len > 50) {
5352- f.filename += len - 50;
5353- prefix = "...";
5354- }
5355- printk("----------- [cut here ] --------- [please bite here ] ---------\n");
5356- printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
5357-}
5358+ if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
5359+ return 0;
5360+
5361+ return ud2 == 0x0b0f;
5362+}
5363
5364 #ifdef CONFIG_BUG
5365 void out_of_line_bug(void)
5366@@ -621,7 +549,9 @@ void die(const char * str, struct pt_reg
5367 {
5368 unsigned long flags = oops_begin();
5369
5370- handle_BUG(regs);
5371+ if (!user_mode(regs))
5372+ report_bug(regs->rip);
5373+
5374 __die(str, regs, err);
5375 oops_end(flags);
5376 do_exit(SIGSEGV);
5377@@ -790,8 +720,7 @@ mem_parity_error(unsigned char reason, s
5378 {
5379 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
5380 reason);
5381- printk(KERN_EMERG "You probably have a hardware problem with your "
5382- "RAM chips\n");
5383+ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
5384
5385 if (panic_on_unrecovered_nmi)
5386 panic("NMI: Not continuing");
5387@@ -1227,21 +1156,3 @@ static int __init kstack_setup(char *s)
5388 return 0;
5389 }
5390 early_param("kstack", kstack_setup);
5391-
5392-#ifdef CONFIG_STACK_UNWIND
5393-static int __init call_trace_setup(char *s)
5394-{
5395- if (!s)
5396- return -EINVAL;
5397- if (strcmp(s, "old") == 0)
5398- call_trace = -1;
5399- else if (strcmp(s, "both") == 0)
5400- call_trace = 0;
5401- else if (strcmp(s, "newfallback") == 0)
5402- call_trace = 1;
5403- else if (strcmp(s, "new") == 0)
5404- call_trace = 2;
5405- return 0;
5406-}
5407-early_param("call_trace", call_trace_setup);
5408-#endif
5409--- sle11-2009-06-29.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5410+++ sle11-2009-06-29/arch/x86/kernel/vsyscall_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5411@@ -42,6 +42,7 @@
5412 #include <asm/topology.h>
5413
5414 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
5415+#define __syscall_clobber "r11","rcx","memory"
5416
5417 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
5418 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
5419@@ -224,8 +225,7 @@ out:
5420
5421 static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
5422 void __user *oldval, size_t __user *oldlenp,
5423- void __user *newval, size_t newlen,
5424- void **context)
5425+ void __user *newval, size_t newlen)
5426 {
5427 return -ENOSYS;
5428 }
5429@@ -277,7 +277,6 @@ static void __cpuinit cpu_vsyscall_init(
5430 vsyscall_set_cpu(raw_smp_processor_id());
5431 }
5432
5433-#ifdef CONFIG_HOTPLUG_CPU
5434 static int __cpuinit
5435 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
5436 {
5437@@ -286,13 +285,13 @@ cpu_vsyscall_notifier(struct notifier_bl
5438 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
5439 return NOTIFY_DONE;
5440 }
5441-#endif
5442
5443 static void __init map_vsyscall(void)
5444 {
5445 extern char __vsyscall_0;
5446 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
5447
5448+ /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
5449 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
5450 }
5451
5452--- sle11-2009-06-29.orig/arch/x86/mm/fault_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5453+++ sle11-2009-06-29/arch/x86/mm/fault_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5454@@ -23,9 +23,9 @@
5455 #include <linux/compiler.h>
5456 #include <linux/module.h>
5457 #include <linux/kprobes.h>
5458+#include <linux/uaccess.h>
5459
5460 #include <asm/system.h>
5461-#include <asm/uaccess.h>
5462 #include <asm/pgalloc.h>
5463 #include <asm/smp.h>
5464 #include <asm/tlbflush.h>
5465@@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
5466 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
5467 unsigned long error_code)
5468 {
5469- unsigned char __user *instr;
5470+ unsigned char *instr;
5471 int scan_more = 1;
5472 int prefetch = 0;
5473 unsigned char *max_instr;
5474@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct p
5475 unsigned char instr_hi;
5476 unsigned char instr_lo;
5477
5478- if (__get_user(opcode, (char __user *)instr))
5479+ if (probe_kernel_address(instr, opcode))
5480 break;
5481
5482 instr_hi = opcode & 0xf0;
5483@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct p
5484 case 0x00:
5485 /* Prefetch instruction is 0x0F0D or 0x0F18 */
5486 scan_more = 0;
5487- if (__get_user(opcode, (char __user *)instr))
5488+ if (probe_kernel_address(instr, opcode))
5489 break;
5490 prefetch = (instr_lo == 0xF) &&
5491 (opcode == 0x0D || opcode == 0x18);
5492@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct p
5493 static int bad_address(void *p)
5494 {
5495 unsigned long dummy;
5496- return __get_user(dummy, (unsigned long __user *)p);
5497+ return probe_kernel_address((unsigned long *)p, dummy);
5498 }
5499
5500 void dump_pagetable(unsigned long address)
5501--- sle11-2009-06-29.orig/arch/x86/mm/init_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5502+++ sle11-2009-06-29/arch/x86/mm/init_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5503@@ -1127,14 +1127,15 @@ static __init int x8664_sysctl_init(void
5504 __initcall(x8664_sysctl_init);
5505 #endif
5506
5507-/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
5508+/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
5509 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
5510 not need special handling anymore. */
5511
5512 static struct vm_area_struct gate_vma = {
5513 .vm_start = VSYSCALL_START,
5514- .vm_end = VSYSCALL_END,
5515- .vm_page_prot = PAGE_READONLY
5516+ .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
5517+ .vm_page_prot = PAGE_READONLY_EXEC,
5518+ .vm_flags = VM_READ | VM_EXEC
5519 };
5520
5521 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
5522--- sle11-2009-06-29.orig/arch/x86/mm/pageattr_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5523+++ sle11-2009-06-29/arch/x86/mm/pageattr_64-xen.c 2008-12-15 11:26:44.000000000 +0100
5524@@ -330,34 +330,40 @@ static struct page *split_large_page(uns
5525 return base;
5526 }
5527
5528-
5529-static void flush_kernel_map(void *address)
5530+static void cache_flush_page(void *adr)
5531 {
5532- if (0 && address && cpu_has_clflush) {
5533- /* is this worth it? */
5534- int i;
5535- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
5536- asm volatile("clflush (%0)" :: "r" (address + i));
5537- } else
5538- asm volatile("wbinvd":::"memory");
5539- if (address)
5540- __flush_tlb_one(address);
5541- else
5542- __flush_tlb_all();
5543+ int i;
5544+ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
5545+ asm volatile("clflush (%0)" :: "r" (adr + i));
5546 }
5547
5548+static void flush_kernel_map(void *arg)
5549+{
5550+ struct list_head *l = (struct list_head *)arg;
5551+ struct page *pg;
5552
5553-static inline void flush_map(unsigned long address)
5554+ /* When clflush is available always use it because it is
5555+ much cheaper than WBINVD */
5556+ if (!cpu_has_clflush)
5557+ asm volatile("wbinvd" ::: "memory");
5558+ list_for_each_entry(pg, l, lru) {
5559+ void *adr = page_address(pg);
5560+ if (cpu_has_clflush)
5561+ cache_flush_page(adr);
5562+ __flush_tlb_one(adr);
5563+ }
5564+}
5565+
5566+static inline void flush_map(struct list_head *l)
5567 {
5568- on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
5569+ on_each_cpu(flush_kernel_map, l, 1, 1);
5570 }
5571
5572-static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
5573+static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
5574
5575 static inline void save_page(struct page *fpage)
5576 {
5577- fpage->lru.next = (struct list_head *)deferred_pages;
5578- deferred_pages = fpage;
5579+ list_add(&fpage->lru, &deferred_pages);
5580 }
5581
5582 /*
5583@@ -487,18 +493,18 @@ int change_page_attr(struct page *page,
5584
5585 void global_flush_tlb(void)
5586 {
5587- struct page *dpage;
5588+ struct page *pg, *next;
5589+ struct list_head l;
5590
5591 down_read(&init_mm.mmap_sem);
5592- dpage = xchg(&deferred_pages, NULL);
5593+ list_replace_init(&deferred_pages, &l);
5594 up_read(&init_mm.mmap_sem);
5595
5596- flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
5597- while (dpage) {
5598- struct page *tmp = dpage;
5599- dpage = (struct page *)dpage->lru.next;
5600- ClearPagePrivate(tmp);
5601- __free_page(tmp);
5602+ flush_map(&l);
5603+
5604+ list_for_each_entry_safe(pg, next, &l, lru) {
5605+ ClearPagePrivate(pg);
5606+ __free_page(pg);
5607 }
5608 }
5609
5610--- sle11-2009-06-29.orig/drivers/pci/msi-xen.c 2009-04-24 13:31:56.000000000 +0200
5611+++ sle11-2009-06-29/drivers/pci/msi-xen.c 2008-12-15 11:26:44.000000000 +0100
5612@@ -263,10 +263,8 @@ void disable_msi_mode(struct pci_dev *de
5613 pci_write_config_word(dev, msi_control_reg(pos), control);
5614 dev->msix_enabled = 0;
5615 }
5616- if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
5617- /* PCI Express Endpoint device detected */
5618- pci_intx(dev, 1); /* enable intx */
5619- }
5620+
5621+ pci_intx(dev, 1); /* enable intx */
5622 }
5623
5624 static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
5625@@ -284,10 +282,8 @@ static void enable_msi_mode(struct pci_d
5626 pci_write_config_word(dev, msi_control_reg(pos), control);
5627 dev->msix_enabled = 1;
5628 }
5629- if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
5630- /* PCI Express Endpoint device detected */
5631- pci_intx(dev, 0); /* disable intx */
5632- }
5633+
5634+ pci_intx(dev, 0); /* disable intx */
5635 }
5636
5637 #ifdef CONFIG_PM
5638--- sle11-2009-06-29.orig/drivers/xen/balloon/balloon.c 2009-03-04 11:28:34.000000000 +0100
5639+++ sle11-2009-06-29/drivers/xen/balloon/balloon.c 2009-06-29 15:28:36.000000000 +0200
5640@@ -97,8 +97,8 @@ extern unsigned long totalhigh_pages;
5641 static LIST_HEAD(ballooned_pages);
5642
5643 /* Main work function, always executed in process context. */
5644-static void balloon_process(void *unused);
5645-static DECLARE_WORK(balloon_worker, balloon_process, NULL);
5646+static void balloon_process(struct work_struct *unused);
5647+static DECLARE_WORK(balloon_worker, balloon_process);
5648 static struct timer_list balloon_timer;
5649
5650 /* When ballooning out (allocating memory to return to Xen) we don't really
5651@@ -375,7 +375,7 @@ static int decrease_reservation(unsigned
5652 * by the balloon lock), or with changes to the Xen hard limit, but we will
5653 * recover from these in time.
5654 */
5655-static void balloon_process(void *unused)
5656+static void balloon_process(struct work_struct *unused)
5657 {
5658 int need_sleep = 0;
5659 long credit;
5660--- sle11-2009-06-29.orig/drivers/xen/blkback/blkback.c 2009-03-04 11:28:34.000000000 +0100
5661+++ sle11-2009-06-29/drivers/xen/blkback/blkback.c 2008-12-15 11:26:44.000000000 +0100
5662@@ -37,6 +37,7 @@
5663
5664 #include <linux/spinlock.h>
5665 #include <linux/kthread.h>
5666+#include <linux/freezer.h>
5667 #include <linux/list.h>
5668 #include <linux/delay.h>
5669 #include <xen/balloon.h>
5670--- sle11-2009-06-29.orig/drivers/xen/blkback/interface.c 2009-06-29 15:14:52.000000000 +0200
5671+++ sle11-2009-06-29/drivers/xen/blkback/interface.c 2008-12-15 11:26:44.000000000 +0100
5672@@ -34,7 +34,7 @@
5673 #include <xen/evtchn.h>
5674 #include <linux/kthread.h>
5675
5676-static kmem_cache_t *blkif_cachep;
5677+static struct kmem_cache *blkif_cachep;
5678
5679 blkif_t *blkif_alloc(domid_t domid)
5680 {
5681--- sle11-2009-06-29.orig/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:16.000000000 +0100
5682+++ sle11-2009-06-29/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:27.000000000 +0100
5683@@ -71,7 +71,7 @@ static int setup_blkring(struct xenbus_d
5684 static void kick_pending_request_queues(struct blkfront_info *);
5685
5686 static irqreturn_t blkif_int(int irq, void *dev_id);
5687-static void blkif_restart_queue(void *arg);
5688+static void blkif_restart_queue(struct work_struct *arg);
5689 static void blkif_recover(struct blkfront_info *);
5690 static void blkif_completion(struct blk_shadow *);
5691 static void blkif_free(struct blkfront_info *, int);
5692@@ -111,7 +111,7 @@ static int blkfront_probe(struct xenbus_
5693 info->xbdev = dev;
5694 info->vdevice = vdevice;
5695 info->connected = BLKIF_STATE_DISCONNECTED;
5696- INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
5697+ INIT_WORK(&info->work, blkif_restart_queue);
5698
5699 for (i = 0; i < BLK_RING_SIZE; i++)
5700 info->shadow[i].req.id = i+1;
5701@@ -462,9 +462,9 @@ static void kick_pending_request_queues(
5702 }
5703 }
5704
5705-static void blkif_restart_queue(void *arg)
5706+static void blkif_restart_queue(struct work_struct *arg)
5707 {
5708- struct blkfront_info *info = (struct blkfront_info *)arg;
5709+ struct blkfront_info *info = container_of(arg, struct blkfront_info, work);
5710 spin_lock_irq(&blkif_io_lock);
5711 if (info->connected == BLKIF_STATE_CONNECTED)
5712 kick_pending_request_queues(info);
5713--- sle11-2009-06-29.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:37:34.000000000 +0200
5714+++ sle11-2009-06-29/drivers/xen/blktap/blktap.c 2009-04-20 11:37:50.000000000 +0200
5715@@ -40,6 +40,7 @@
5716
5717 #include <linux/spinlock.h>
5718 #include <linux/kthread.h>
5719+#include <linux/freezer.h>
5720 #include <linux/list.h>
5721 #include <asm/hypervisor.h>
5722 #include "common.h"
5723--- sle11-2009-06-29.orig/drivers/xen/blktap/interface.c 2009-06-29 15:14:52.000000000 +0200
5724+++ sle11-2009-06-29/drivers/xen/blktap/interface.c 2008-12-15 11:26:44.000000000 +0100
5725@@ -34,7 +34,7 @@
5726 #include "common.h"
5727 #include <xen/evtchn.h>
5728
5729-static kmem_cache_t *blkif_cachep;
5730+static struct kmem_cache *blkif_cachep;
5731
5732 blkif_t *tap_alloc_blkif(domid_t domid)
5733 {
5734--- sle11-2009-06-29.orig/drivers/xen/char/mem.c 2009-06-29 15:14:52.000000000 +0200
5735+++ sle11-2009-06-29/drivers/xen/char/mem.c 2008-12-15 11:26:44.000000000 +0100
5736@@ -157,7 +157,7 @@ static loff_t memory_lseek(struct file *
5737 {
5738 loff_t ret;
5739
5740- mutex_lock(&file->f_dentry->d_inode->i_mutex);
5741+ mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
5742 switch (orig) {
5743 case 0:
5744 file->f_pos = offset;
5745@@ -172,7 +172,7 @@ static loff_t memory_lseek(struct file *
5746 default:
5747 ret = -EINVAL;
5748 }
5749- mutex_unlock(&file->f_dentry->d_inode->i_mutex);
5750+ mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
5751 return ret;
5752 }
5753
5754--- sle11-2009-06-29.orig/drivers/xen/console/console.c 2009-03-04 11:28:34.000000000 +0100
5755+++ sle11-2009-06-29/drivers/xen/console/console.c 2008-12-15 11:26:44.000000000 +0100
5756@@ -85,11 +85,6 @@ static int xc_num = -1;
5757 #define XEN_HVC_MAJOR 229
5758 #define XEN_HVC_MINOR 0
5759
5760-#ifdef CONFIG_MAGIC_SYSRQ
5761-static unsigned long sysrq_requested;
5762-extern int sysrq_enabled;
5763-#endif
5764-
5765 static int __init xencons_setup(char *str)
5766 {
5767 char *q;
5768@@ -355,8 +350,8 @@ void __init dom0_init_screen_info(const
5769 #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
5770 ((_tty)->index != (xc_num - 1)))
5771
5772-static struct termios *xencons_termios[MAX_NR_CONSOLES];
5773-static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
5774+static struct ktermios *xencons_termios[MAX_NR_CONSOLES];
5775+static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES];
5776 static struct tty_struct *xencons_tty;
5777 static int xencons_priv_irq;
5778 static char x_char;
5779@@ -372,7 +367,9 @@ void xencons_rx(char *buf, unsigned len)
5780
5781 for (i = 0; i < len; i++) {
5782 #ifdef CONFIG_MAGIC_SYSRQ
5783- if (sysrq_enabled) {
5784+ if (sysrq_on()) {
5785+ static unsigned long sysrq_requested;
5786+
5787 if (buf[i] == '\x0f') { /* ^O */
5788 if (!sysrq_requested) {
5789 sysrq_requested = jiffies;
5790--- sle11-2009-06-29.orig/drivers/xen/core/reboot.c 2009-03-04 11:28:34.000000000 +0100
5791+++ sle11-2009-06-29/drivers/xen/core/reboot.c 2008-12-15 11:26:44.000000000 +0100
5792@@ -34,8 +34,8 @@ static int suspend_cancelled;
5793 /* Can we leave APs online when we suspend? */
5794 static int fast_suspend;
5795
5796-static void __shutdown_handler(void *unused);
5797-static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
5798+static void __shutdown_handler(struct work_struct *unused);
5799+static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler);
5800
5801 static int setup_suspend_evtchn(void);
5802
5803@@ -105,7 +105,7 @@ static int xen_suspend(void *__unused)
5804 case SHUTDOWN_RESUMING:
5805 break;
5806 default:
5807- schedule_work(&shutdown_work);
5808+ schedule_delayed_work(&shutdown_work, 0);
5809 break;
5810 }
5811
5812@@ -137,12 +137,12 @@ static void switch_shutdown_state(int ne
5813
5814 /* Either we kick off the work, or we leave it to xen_suspend(). */
5815 if (old_state == SHUTDOWN_INVALID)
5816- schedule_work(&shutdown_work);
5817+ schedule_delayed_work(&shutdown_work, 0);
5818 else
5819 BUG_ON(old_state != SHUTDOWN_RESUMING);
5820 }
5821
5822-static void __shutdown_handler(void *unused)
5823+static void __shutdown_handler(struct work_struct *unused)
5824 {
5825 int err;
5826
5827--- sle11-2009-06-29.orig/drivers/xen/core/smpboot.c 2009-03-04 11:28:34.000000000 +0100
5828+++ sle11-2009-06-29/drivers/xen/core/smpboot.c 2008-12-15 11:26:44.000000000 +0100
5829@@ -161,7 +161,12 @@ static void xen_smp_intr_exit(unsigned i
5830
5831 void __cpuinit cpu_bringup(void)
5832 {
5833+#ifdef __i386__
5834+ cpu_set_gdt(current_thread_info()->cpu);
5835+ secondary_cpu_init();
5836+#else
5837 cpu_init();
5838+#endif
5839 identify_cpu(cpu_data + smp_processor_id());
5840 touch_softlockup_watchdog();
5841 preempt_disable();
5842@@ -300,11 +305,12 @@ void __init smp_prepare_cpus(unsigned in
5843 if (cpu == 0)
5844 continue;
5845
5846+ idle = fork_idle(cpu);
5847+ if (IS_ERR(idle))
5848+ panic("failed fork for CPU %d", cpu);
5849+
5850 #ifdef __x86_64__
5851 gdt_descr = &cpu_gdt_descr[cpu];
5852-#else
5853- gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
5854-#endif
5855 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
5856 if (unlikely(!gdt_descr->address)) {
5857 printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
5858@@ -313,6 +319,11 @@ void __init smp_prepare_cpus(unsigned in
5859 }
5860 gdt_descr->size = GDT_SIZE;
5861 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
5862+#else
5863+ if (unlikely(!init_gdt(cpu, idle)))
5864+ continue;
5865+ gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
5866+#endif
5867 make_page_readonly(
5868 (void *)gdt_descr->address,
5869 XENFEAT_writable_descriptor_tables);
5870@@ -332,10 +343,6 @@ void __init smp_prepare_cpus(unsigned in
5871 cpu_2_logical_apicid[cpu] = apicid;
5872 x86_cpu_to_apicid[cpu] = apicid;
5873
5874- idle = fork_idle(cpu);
5875- if (IS_ERR(idle))
5876- panic("failed fork for CPU %d", cpu);
5877-
5878 #ifdef __x86_64__
5879 cpu_pda(cpu)->pcurrent = idle;
5880 cpu_pda(cpu)->cpunumber = cpu;
5881--- sle11-2009-06-29.orig/drivers/xen/fbfront/xenfb.c 2009-03-04 11:28:34.000000000 +0100
5882+++ sle11-2009-06-29/drivers/xen/fbfront/xenfb.c 2008-12-15 11:26:44.000000000 +0100
5883@@ -25,6 +25,7 @@
5884 #include <linux/vmalloc.h>
5885 #include <linux/mm.h>
5886 #include <linux/mutex.h>
5887+#include <linux/freezer.h>
5888 #include <asm/hypervisor.h>
5889 #include <xen/evtchn.h>
5890 #include <xen/interface/io/fbif.h>
5891--- sle11-2009-06-29.orig/drivers/xen/netback/loopback.c 2009-03-04 11:28:34.000000000 +0100
5892+++ sle11-2009-06-29/drivers/xen/netback/loopback.c 2008-12-15 11:26:44.000000000 +0100
5893@@ -54,6 +54,7 @@
5894 #include <net/dst.h>
5895 #include <net/xfrm.h> /* secpath_reset() */
5896 #include <asm/hypervisor.h> /* is_initial_xendomain() */
5897+#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */
5898
5899 static int nloopbacks = -1;
5900 module_param(nloopbacks, int, 0);
5901--- sle11-2009-06-29.orig/drivers/xen/pciback/conf_space_header.c 2009-06-29 15:14:52.000000000 +0200
5902+++ sle11-2009-06-29/drivers/xen/pciback/conf_space_header.c 2008-12-15 11:26:44.000000000 +0100
5903@@ -22,14 +22,14 @@ static int command_write(struct pci_dev
5904 {
5905 int err;
5906
5907- if (!dev->is_enabled && is_enable_cmd(value)) {
5908+ if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) {
5909 if (unlikely(verbose_request))
5910 printk(KERN_DEBUG "pciback: %s: enable\n",
5911 pci_name(dev));
5912 err = pci_enable_device(dev);
5913 if (err)
5914 return err;
5915- } else if (dev->is_enabled && !is_enable_cmd(value)) {
5916+ } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) {
5917 if (unlikely(verbose_request))
5918 printk(KERN_DEBUG "pciback: %s: disable\n",
5919 pci_name(dev));
5920--- sle11-2009-06-29.orig/drivers/xen/pciback/pciback.h 2009-03-04 11:28:34.000000000 +0100
5921+++ sle11-2009-06-29/drivers/xen/pciback/pciback.h 2008-12-15 11:26:44.000000000 +0100
5922@@ -88,7 +88,7 @@ void pciback_release_devices(struct pcib
5923
5924 /* Handles events from front-end */
5925 irqreturn_t pciback_handle_event(int irq, void *dev_id);
5926-void pciback_do_op(void *data);
5927+void pciback_do_op(struct work_struct *work);
5928
5929 int pciback_xenbus_register(void);
5930 void pciback_xenbus_unregister(void);
5931--- sle11-2009-06-29.orig/drivers/xen/pciback/pciback_ops.c 2009-03-04 11:28:34.000000000 +0100
5932+++ sle11-2009-06-29/drivers/xen/pciback/pciback_ops.c 2008-12-15 11:26:44.000000000 +0100
5933@@ -25,7 +25,7 @@ void pciback_reset_device(struct pci_dev
5934
5935 pci_write_config_word(dev, PCI_COMMAND, 0);
5936
5937- dev->is_enabled = 0;
5938+ atomic_set(&dev->enable_cnt, 0);
5939 dev->is_busmaster = 0;
5940 } else {
5941 pci_read_config_word(dev, PCI_COMMAND, &cmd);
5942@@ -51,9 +51,9 @@ static inline void test_and_schedule_op(
5943 * context because some of the pci_* functions can sleep (mostly due to ACPI
5944 * use of semaphores). This function is intended to be called from a work
5945 * queue in process context taking a struct pciback_device as a parameter */
5946-void pciback_do_op(void *data)
5947+void pciback_do_op(struct work_struct *work)
5948 {
5949- struct pciback_device *pdev = data;
5950+ struct pciback_device *pdev = container_of(work, struct pciback_device, op_work);
5951 struct pci_dev *dev;
5952 struct xen_pci_op *op = &pdev->sh_info->op;
5953
5954--- sle11-2009-06-29.orig/drivers/xen/pciback/xenbus.c 2009-06-29 15:14:52.000000000 +0200
5955+++ sle11-2009-06-29/drivers/xen/pciback/xenbus.c 2008-12-15 11:26:44.000000000 +0100
5956@@ -32,7 +32,7 @@ static struct pciback_device *alloc_pdev
5957 pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
5958 pdev->be_watching = 0;
5959
5960- INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
5961+ INIT_WORK(&pdev->op_work, pciback_do_op);
5962
5963 if (pciback_init_devices(pdev)) {
5964 kfree(pdev);
5965@@ -54,7 +54,6 @@ static void pciback_disconnect(struct pc
5966
5967 /* If the driver domain started an op, make sure we complete it or
5968 * delete it before releasing the shared memory */
5969- cancel_delayed_work(&pdev->op_work);
5970 flush_scheduled_work();
5971
5972 if (pdev->sh_info != NULL) {
5973--- sle11-2009-06-29.orig/drivers/xen/scsiback/interface.c 2009-06-29 15:14:52.000000000 +0200
5974+++ sle11-2009-06-29/drivers/xen/scsiback/interface.c 2008-12-15 11:26:44.000000000 +0100
5975@@ -39,7 +39,7 @@
5976 #include <linux/kthread.h>
5977
5978
5979-static kmem_cache_t *scsiback_cachep;
5980+static struct kmem_cache *scsiback_cachep;
5981
5982 struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
5983 {
5984--- sle11-2009-06-29.orig/drivers/xen/scsiback/scsiback.c 2009-03-04 11:28:34.000000000 +0100
5985+++ sle11-2009-06-29/drivers/xen/scsiback/scsiback.c 2008-12-15 11:26:44.000000000 +0100
5986@@ -322,13 +322,11 @@ static int scsiback_merge_bio(struct req
5987
5988 if (!rq->bio)
5989 blk_rq_bio_prep(q, rq, bio);
5990- else if (!q->back_merge_fn(q, rq, bio))
5991+ else if (!ll_back_merge_fn(q, rq, bio))
5992 return -EINVAL;
5993 else {
5994 rq->biotail->bi_next = bio;
5995 rq->biotail = bio;
5996- rq->hard_nr_sectors += bio_sectors(bio);
5997- rq->nr_sectors = rq->hard_nr_sectors;
5998 }
5999
6000 return 0;
6001--- sle11-2009-06-29.orig/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:11.000000000 +0200
6002+++ sle11-2009-06-29/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:25.000000000 +0200
6003@@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron
6004
6005 if (skb->ip_summed == CHECKSUM_PARTIAL) {
6006 /* Set to zero to encourage falcon to work it out for us */
6007- *(u16*)(skb->h.raw + skb->csum) = 0;
6008+ *(u16*)(skb->h.raw + skb->csum_offset) = 0;
6009 }
6010
6011 if (multi_post_start_new_buffer(vnic, &state)) {
6012@@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro
6013
6014 if (skb->ip_summed == CHECKSUM_PARTIAL) {
6015 /* Set to zero to encourage falcon to work it out for us */
6016- *(u16*)(skb->h.raw + skb->csum) = 0;
6017+ *(u16*)(skb->h.raw + skb->csum_offset) = 0;
6018 }
6019 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
6020 (skb, idx, frag_data, frag_len, {
6021--- sle11-2009-06-29.orig/drivers/xen/tpmback/interface.c 2009-06-29 15:14:52.000000000 +0200
6022+++ sle11-2009-06-29/drivers/xen/tpmback/interface.c 2008-12-15 11:26:44.000000000 +0100
6023@@ -15,7 +15,7 @@
6024 #include <xen/balloon.h>
6025 #include <xen/gnttab.h>
6026
6027-static kmem_cache_t *tpmif_cachep;
6028+static struct kmem_cache *tpmif_cachep;
6029 int num_frontends = 0;
6030
6031 LIST_HEAD(tpmif_list);
6032--- sle11-2009-06-29.orig/drivers/xen/xenbus/xenbus_comms.c 2009-03-04 11:28:34.000000000 +0100
6033+++ sle11-2009-06-29/drivers/xen/xenbus/xenbus_comms.c 2008-12-15 11:26:44.000000000 +0100
6034@@ -49,9 +49,9 @@
6035
6036 static int xenbus_irq;
6037
6038-extern void xenbus_probe(void *);
6039+extern void xenbus_probe(struct work_struct *);
6040 extern int xenstored_ready;
6041-static DECLARE_WORK(probe_work, xenbus_probe, NULL);
6042+static DECLARE_WORK(probe_work, xenbus_probe);
6043
6044 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
6045
6046--- sle11-2009-06-29.orig/drivers/xen/xenbus/xenbus_probe.c 2008-11-25 12:35:56.000000000 +0100
6047+++ sle11-2009-06-29/drivers/xen/xenbus/xenbus_probe.c 2008-12-15 11:26:44.000000000 +0100
6048@@ -843,7 +843,7 @@ void unregister_xenstore_notifier(struct
6049 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
6050
6051
6052-void xenbus_probe(void *unused)
6053+void xenbus_probe(struct work_struct *unused)
6054 {
6055 BUG_ON((xenstored_ready <= 0));
6056
6057--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/desc_32.h 2009-03-04 11:28:34.000000000 +0100
6058+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-15 11:26:44.000000000 +0100
6059@@ -4,8 +4,6 @@
6060 #include <asm/ldt.h>
6061 #include <asm/segment.h>
6062
6063-#define CPU_16BIT_STACK_SIZE 1024
6064-
6065 #ifndef __ASSEMBLY__
6066
6067 #include <linux/preempt.h>
6068@@ -15,8 +13,6 @@
6069
6070 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
6071
6072-DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
6073-
6074 struct Xgt_desc_struct {
6075 unsigned short size;
6076 unsigned long address __attribute__((packed));
6077@@ -32,11 +28,6 @@ static inline struct desc_struct *get_cp
6078 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
6079 }
6080
6081-/*
6082- * This is the ldt that every process will get unless we need
6083- * something other than this.
6084- */
6085-extern struct desc_struct default_ldt[];
6086 extern struct desc_struct idt_table[];
6087 extern void set_intr_gate(unsigned int irq, void * addr);
6088
6089@@ -63,8 +54,8 @@ static inline void pack_gate(__u32 *a, _
6090 #define DESCTYPE_DPL3 0x60 /* DPL-3 */
6091 #define DESCTYPE_S 0x10 /* !system */
6092
6093+#ifndef CONFIG_XEN
6094 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
6095-#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
6096
6097 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
6098 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
6099@@ -75,6 +66,7 @@ static inline void pack_gate(__u32 *a, _
6100 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
6101 #define store_tr(tr) __asm__ ("str %0":"=m" (tr))
6102 #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
6103+#endif
6104
6105 #if TLS_SIZE != 24
6106 # error update this code.
6107@@ -90,22 +82,43 @@ static inline void load_TLS(struct threa
6108 }
6109
6110 #ifndef CONFIG_XEN
6111+#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6112+#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6113+#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6114+
6115 static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
6116 {
6117 __u32 *lp = (__u32 *)((char *)dt + entry*8);
6118 *lp = entry_a;
6119 *(lp+1) = entry_b;
6120 }
6121-
6122-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6123-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6124+#define set_ldt native_set_ldt
6125 #else
6126 extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
6127 extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
6128+#define set_ldt xen_set_ldt
6129+#endif
6130+
6131+#ifndef CONFIG_XEN
6132+static inline fastcall void native_set_ldt(const void *addr,
6133+ unsigned int entries)
6134+{
6135+ if (likely(entries == 0))
6136+ __asm__ __volatile__("lldt %w0"::"q" (0));
6137+ else {
6138+ unsigned cpu = smp_processor_id();
6139+ __u32 a, b;
6140+
6141+ pack_descriptor(&a, &b, (unsigned long)addr,
6142+ entries * sizeof(struct desc_struct) - 1,
6143+ DESCTYPE_LDT, 0);
6144+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
6145+ __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
6146+ }
6147+}
6148 #endif
6149-#ifndef CONFIG_X86_NO_IDT
6150-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
6151
6152+#ifndef CONFIG_X86_NO_IDT
6153 static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
6154 {
6155 __u32 a, b;
6156@@ -125,14 +138,6 @@ static inline void __set_tss_desc(unsign
6157 }
6158 #endif
6159
6160-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
6161-{
6162- __u32 a, b;
6163- pack_descriptor(&a, &b, (unsigned long)addr,
6164- entries * sizeof(struct desc_struct) - 1,
6165- DESCTYPE_LDT, 0);
6166- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
6167-}
6168
6169 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
6170
6171@@ -163,36 +168,22 @@ static inline void set_ldt_desc(unsigned
6172
6173 static inline void clear_LDT(void)
6174 {
6175- int cpu = get_cpu();
6176-
6177- /*
6178- * NB. We load the default_ldt for lcall7/27 handling on demand, as
6179- * it slows down context switching. Noone uses it anyway.
6180- */
6181- cpu = cpu; /* XXX avoid compiler warning */
6182- xen_set_ldt(NULL, 0);
6183- put_cpu();
6184+ set_ldt(NULL, 0);
6185 }
6186
6187 /*
6188 * load one particular LDT into the current CPU
6189 */
6190-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
6191+static inline void load_LDT_nolock(mm_context_t *pc)
6192 {
6193- void *segments = pc->ldt;
6194- int count = pc->size;
6195-
6196- if (likely(!count))
6197- segments = NULL;
6198-
6199- xen_set_ldt(segments, count);
6200+ set_ldt(pc->ldt, pc->size);
6201 }
6202
6203 static inline void load_LDT(mm_context_t *pc)
6204 {
6205- int cpu = get_cpu();
6206- load_LDT_nolock(pc, cpu);
6207- put_cpu();
6208+ preempt_disable();
6209+ load_LDT_nolock(pc);
6210+ preempt_enable();
6211 }
6212
6213 static inline unsigned long get_desc_base(unsigned long *desc)
6214@@ -204,6 +195,29 @@ static inline unsigned long get_desc_bas
6215 return base;
6216 }
6217
6218+#else /* __ASSEMBLY__ */
6219+
6220+/*
6221+ * GET_DESC_BASE reads the descriptor base of the specified segment.
6222+ *
6223+ * Args:
6224+ * idx - descriptor index
6225+ * gdt - GDT pointer
6226+ * base - 32bit register to which the base will be written
6227+ * lo_w - lo word of the "base" register
6228+ * lo_b - lo byte of the "base" register
6229+ * hi_b - hi byte of the low word of the "base" register
6230+ *
6231+ * Example:
6232+ * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
6233+ * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
6234+ */
6235+#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
6236+ movb idx*8+4(gdt), lo_b; \
6237+ movb idx*8+7(gdt), hi_b; \
6238+ shll $16, base; \
6239+ movw idx*8+2(gdt), lo_w;
6240+
6241 #endif /* !__ASSEMBLY__ */
6242
6243 #endif
6244--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2009-06-29 15:14:52.000000000 +0200
6245+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/dma-mapping_32.h 2008-12-15 11:26:44.000000000 +0100
6246@@ -127,10 +127,10 @@ dma_get_cache_alignment(void)
6247 return (1 << INTERNODE_CACHE_SHIFT);
6248 }
6249
6250-#define dma_is_consistent(d) (1)
6251+#define dma_is_consistent(d, h) (1)
6252
6253 static inline void
6254-dma_cache_sync(void *vaddr, size_t size,
6255+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
6256 enum dma_data_direction direction)
6257 {
6258 flush_write_buffers();
6259--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-04 11:28:34.000000000 +0100
6260+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-15 11:26:44.000000000 +0100
6261@@ -13,13 +13,16 @@
6262 #ifndef _ASM_FIXMAP_H
6263 #define _ASM_FIXMAP_H
6264
6265-
6266 /* used by vmalloc.c, vsyscall.lds.S.
6267 *
6268 * Leave one empty page between vmalloc'ed areas and
6269 * the start of the fixmap.
6270 */
6271 extern unsigned long __FIXADDR_TOP;
6272+#ifdef CONFIG_COMPAT_VDSO
6273+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
6274+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
6275+#endif
6276
6277 #ifndef __ASSEMBLY__
6278 #include <linux/kernel.h>
6279--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-06-29 15:14:52.000000000 +0200
6280+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/highmem.h 2008-12-15 11:26:44.000000000 +0100
6281@@ -85,7 +85,7 @@ static inline void clear_user_highpage(s
6282
6283 void copy_highpage(struct page *to, struct page *from);
6284 static inline void copy_user_highpage(struct page *to, struct page *from,
6285- unsigned long vaddr)
6286+ unsigned long vaddr, struct vm_area_struct *vma)
6287 {
6288 copy_highpage(to, from);
6289 }
6290--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-04 11:28:34.000000000 +0100
6291+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-15 11:26:44.000000000 +0100
6292@@ -46,15 +46,6 @@
6293 #include <asm/percpu.h>
6294 #include <asm/ptrace.h>
6295 #include <asm/page.h>
6296-#if defined(__i386__)
6297-# ifdef CONFIG_X86_PAE
6298-# include <asm-generic/pgtable-nopud.h>
6299-# else
6300-# include <asm-generic/pgtable-nopmd.h>
6301-# endif
6302-#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
6303-# include <asm-generic/pgtable-nopud.h>
6304-#endif
6305
6306 extern shared_info_t *HYPERVISOR_shared_info;
6307
6308--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2009-06-29 15:14:52.000000000 +0200
6309+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-15 11:26:44.000000000 +0100
6310@@ -22,9 +22,6 @@
6311
6312 #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
6313
6314-#define raw_local_save_flags(flags) \
6315- do { (flags) = __raw_local_save_flags(); } while (0)
6316-
6317 #define raw_local_irq_restore(x) \
6318 do { \
6319 vcpu_info_t *_vcpu; \
6320@@ -66,18 +63,6 @@ void raw_safe_halt(void);
6321 */
6322 void halt(void);
6323
6324-static inline int raw_irqs_disabled_flags(unsigned long flags)
6325-{
6326- return (flags != 0);
6327-}
6328-
6329-#define raw_irqs_disabled() \
6330-({ \
6331- unsigned long flags = __raw_local_save_flags(); \
6332- \
6333- raw_irqs_disabled_flags(flags); \
6334-})
6335-
6336 /*
6337 * For spinlocks, etc:
6338 */
6339@@ -90,9 +75,62 @@ static inline int raw_irqs_disabled_flag
6340 flags; \
6341 })
6342
6343+#else
6344+/* Offsets into shared_info_t. */
6345+#define evtchn_upcall_pending /* 0 */
6346+#define evtchn_upcall_mask 1
6347+
6348+#define sizeof_vcpu_shift 6
6349+
6350+#ifdef CONFIG_SMP
6351+#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
6352+ shl $sizeof_vcpu_shift,%esi ; \
6353+ addl HYPERVISOR_shared_info,%esi
6354+#else
6355+#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
6356+#endif
6357+
6358+#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
6359+#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
6360+#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
6361+#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
6362+ __DISABLE_INTERRUPTS
6363+#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
6364+ __ENABLE_INTERRUPTS
6365+#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
6366+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
6367+ __TEST_PENDING ; \
6368+ jnz 14f /* process more events if necessary... */ ; \
6369+ movl PT_ESI(%esp), %esi ; \
6370+ sysexit ; \
6371+14: __DISABLE_INTERRUPTS ; \
6372+ TRACE_IRQS_OFF ; \
6373+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
6374+ push %esp ; \
6375+ call evtchn_do_upcall ; \
6376+ add $4,%esp ; \
6377+ jmp ret_from_intr
6378+#define INTERRUPT_RETURN iret
6379+#endif /* __ASSEMBLY__ */
6380+
6381+#ifndef __ASSEMBLY__
6382+#define raw_local_save_flags(flags) \
6383+ do { (flags) = __raw_local_save_flags(); } while (0)
6384+
6385 #define raw_local_irq_save(flags) \
6386 do { (flags) = __raw_local_irq_save(); } while (0)
6387
6388+static inline int raw_irqs_disabled_flags(unsigned long flags)
6389+{
6390+ return (flags != 0);
6391+}
6392+
6393+#define raw_irqs_disabled() \
6394+({ \
6395+ unsigned long flags = __raw_local_save_flags(); \
6396+ \
6397+ raw_irqs_disabled_flags(flags); \
6398+})
6399 #endif /* __ASSEMBLY__ */
6400
6401 /*
6402--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-06-29 15:14:52.000000000 +0200
6403+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-15 11:26:44.000000000 +0100
6404@@ -27,14 +27,13 @@ static inline void enter_lazy_tlb(struct
6405 static inline void __prepare_arch_switch(void)
6406 {
6407 /*
6408- * Save away %fs and %gs. No need to save %es and %ds, as those
6409- * are always kernel segments while inside the kernel. Must
6410- * happen before reload of cr3/ldt (i.e., not in __switch_to).
6411+ * Save away %fs. No need to save %gs, as it was saved on the
6412+ * stack on entry. No need to save %es and %ds, as those are
6413+ * always kernel segments while inside the kernel.
6414 */
6415- asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
6416- : "=m" (current->thread.fs),
6417- "=m" (current->thread.gs));
6418- asm volatile ( "movl %0,%%fs ; movl %0,%%gs"
6419+ asm volatile ( "mov %%fs,%0"
6420+ : "=m" (current->thread.fs));
6421+ asm volatile ( "movl %0,%%fs"
6422 : : "r" (0) );
6423 }
6424
6425@@ -89,14 +88,14 @@ static inline void switch_mm(struct mm_s
6426 * tlb flush IPI delivery. We must reload %cr3.
6427 */
6428 load_cr3(next->pgd);
6429- load_LDT_nolock(&next->context, cpu);
6430+ load_LDT_nolock(&next->context);
6431 }
6432 }
6433 #endif
6434 }
6435
6436-#define deactivate_mm(tsk, mm) \
6437- asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
6438+#define deactivate_mm(tsk, mm) \
6439+ asm("movl %0,%%fs": :"r" (0));
6440
6441 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
6442 {
6443--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-04 11:28:34.000000000 +0100
6444+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-15 11:26:44.000000000 +0100
6445@@ -1,8 +1,6 @@
6446 #ifndef _I386_PGTABLE_3LEVEL_H
6447 #define _I386_PGTABLE_3LEVEL_H
6448
6449-#include <asm-generic/pgtable-nopud.h>
6450-
6451 /*
6452 * Intel Physical Address Extension (PAE) Mode - three-level page
6453 * tables on PPro+ CPUs.
6454@@ -75,6 +73,23 @@ static inline void set_pte(pte_t *ptep,
6455 xen_l3_entry_update((pudptr), (pudval))
6456
6457 /*
6458+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6459+ * entry, so clear the bottom half first and enforce ordering with a compiler
6460+ * barrier.
6461+ */
6462+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6463+{
6464+ if ((mm != current->mm && mm != &init_mm)
6465+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6466+ ptep->pte_low = 0;
6467+ smp_wmb();
6468+ ptep->pte_high = 0;
6469+ }
6470+}
6471+
6472+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
6473+
6474+/*
6475 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
6476 * the TLB via cr3 if the top-level pgd is changed...
6477 * We do not let the generic code free and clear pgd entries due to
6478@@ -93,45 +108,16 @@ static inline void pud_clear (pud_t * pu
6479 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
6480 pmd_index(address))
6481
6482-static inline int pte_none(pte_t pte)
6483-{
6484- return !(pte.pte_low | pte.pte_high);
6485-}
6486-
6487-/*
6488- * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
6489- * entry, so clear the bottom half first and enforce ordering with a compiler
6490- * barrier.
6491- */
6492-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6493+static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
6494 {
6495- if ((mm != current->mm && mm != &init_mm)
6496- || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6497- ptep->pte_low = 0;
6498- smp_wmb();
6499+ uint64_t val = __pte_val(res);
6500+ if (__cmpxchg64(ptep, val, 0) != val) {
6501+ /* xchg acts as a barrier before the setting of the high bits */
6502+ res.pte_low = xchg(&ptep->pte_low, 0);
6503+ res.pte_high = ptep->pte_high;
6504 ptep->pte_high = 0;
6505 }
6506-}
6507-
6508-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
6509-
6510-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
6511-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6512-{
6513- pte_t pte = *ptep;
6514- if (!pte_none(pte)) {
6515- if ((mm != &init_mm) ||
6516- HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
6517- uint64_t val = __pte_val(pte);
6518- if (__cmpxchg64(ptep, val, 0) != val) {
6519- /* xchg acts as a barrier before the setting of the high bits */
6520- pte.pte_low = xchg(&ptep->pte_low, 0);
6521- pte.pte_high = ptep->pte_high;
6522- ptep->pte_high = 0;
6523- }
6524- }
6525- }
6526- return pte;
6527+ return res;
6528 }
6529
6530 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
6531@@ -160,6 +146,11 @@ static inline int pte_same(pte_t a, pte_
6532
6533 #define pte_page(x) pfn_to_page(pte_pfn(x))
6534
6535+static inline int pte_none(pte_t pte)
6536+{
6537+ return !(pte.pte_low | pte.pte_high);
6538+}
6539+
6540 #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
6541 ((_pte).pte_high << (32-PAGE_SHIFT)))
6542 #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
6543--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:28:34.000000000 +0100
6544+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-15 11:26:44.000000000 +0100
6545@@ -38,14 +38,14 @@ struct vm_area_struct;
6546 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
6547 extern unsigned long empty_zero_page[1024];
6548 extern pgd_t *swapper_pg_dir;
6549-extern kmem_cache_t *pgd_cache;
6550-extern kmem_cache_t *pmd_cache;
6551+extern struct kmem_cache *pgd_cache;
6552+extern struct kmem_cache *pmd_cache;
6553 extern spinlock_t pgd_lock;
6554 extern struct page *pgd_list;
6555
6556-void pmd_ctor(void *, kmem_cache_t *, unsigned long);
6557-void pgd_ctor(void *, kmem_cache_t *, unsigned long);
6558-void pgd_dtor(void *, kmem_cache_t *, unsigned long);
6559+void pmd_ctor(void *, struct kmem_cache *, unsigned long);
6560+void pgd_ctor(void *, struct kmem_cache *, unsigned long);
6561+void pgd_dtor(void *, struct kmem_cache *, unsigned long);
6562 void pgtable_cache_init(void);
6563 void paging_init(void);
6564
6565@@ -276,7 +276,6 @@ static inline pte_t pte_mkhuge(pte_t pte
6566 #define pte_update(mm, addr, ptep) do { } while (0)
6567 #define pte_update_defer(mm, addr, ptep) do { } while (0)
6568
6569-
6570 /*
6571 * We only update the dirty/accessed state if we set
6572 * the dirty bit by hand in the kernel, since the hardware
6573@@ -342,6 +341,19 @@ do { \
6574 __young; \
6575 })
6576
6577+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
6578+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
6579+{
6580+ pte_t pte = *ptep;
6581+ if (!pte_none(pte)
6582+ && (mm != &init_mm
6583+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
6584+ pte = raw_ptep_get_and_clear(ptep, pte);
6585+ pte_update(mm, addr, ptep);
6586+ }
6587+ return pte;
6588+}
6589+
6590 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
6591 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
6592 ((full) ? ({ \
6593--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:28:34.000000000 +0100
6594+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-15 11:26:44.000000000 +0100
6595@@ -20,6 +20,7 @@
6596 #include <linux/threads.h>
6597 #include <asm/percpu.h>
6598 #include <linux/cpumask.h>
6599+#include <linux/init.h>
6600 #include <xen/interface/physdev.h>
6601
6602 /* flag for disabling the tsc */
6603@@ -73,6 +74,7 @@ struct cpuinfo_x86 {
6604 #endif
6605 unsigned char x86_max_cores; /* cpuid returned max cores value */
6606 unsigned char apicid;
6607+ unsigned short x86_clflush_size;
6608 #ifdef CONFIG_SMP
6609 unsigned char booted_cores; /* number of cores as seen by OS */
6610 __u8 phys_proc_id; /* Physical processor id. */
6611@@ -114,6 +116,8 @@ extern struct cpuinfo_x86 cpu_data[];
6612 extern int cpu_llc_id[NR_CPUS];
6613 extern char ignore_fpu_irq;
6614
6615+void __init cpu_detect(struct cpuinfo_x86 *c);
6616+
6617 extern void identify_cpu(struct cpuinfo_x86 *);
6618 extern void print_cpu_info(struct cpuinfo_x86 *);
6619 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
6620@@ -146,8 +150,8 @@ static inline void detect_ht(struct cpui
6621 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
6622 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
6623
6624-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
6625- unsigned int *ecx, unsigned int *edx)
6626+static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
6627+ unsigned int *ecx, unsigned int *edx)
6628 {
6629 /* ecx is often an input as well as an output. */
6630 __asm__(XEN_CPUID
6631@@ -158,59 +162,6 @@ static inline void __cpuid(unsigned int
6632 : "0" (*eax), "2" (*ecx));
6633 }
6634
6635-/*
6636- * Generic CPUID function
6637- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6638- * resulting in stale register contents being returned.
6639- */
6640-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6641-{
6642- *eax = op;
6643- *ecx = 0;
6644- __cpuid(eax, ebx, ecx, edx);
6645-}
6646-
6647-/* Some CPUID calls want 'count' to be placed in ecx */
6648-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6649- int *edx)
6650-{
6651- *eax = op;
6652- *ecx = count;
6653- __cpuid(eax, ebx, ecx, edx);
6654-}
6655-
6656-/*
6657- * CPUID functions returning a single datum
6658- */
6659-static inline unsigned int cpuid_eax(unsigned int op)
6660-{
6661- unsigned int eax, ebx, ecx, edx;
6662-
6663- cpuid(op, &eax, &ebx, &ecx, &edx);
6664- return eax;
6665-}
6666-static inline unsigned int cpuid_ebx(unsigned int op)
6667-{
6668- unsigned int eax, ebx, ecx, edx;
6669-
6670- cpuid(op, &eax, &ebx, &ecx, &edx);
6671- return ebx;
6672-}
6673-static inline unsigned int cpuid_ecx(unsigned int op)
6674-{
6675- unsigned int eax, ebx, ecx, edx;
6676-
6677- cpuid(op, &eax, &ebx, &ecx, &edx);
6678- return ecx;
6679-}
6680-static inline unsigned int cpuid_edx(unsigned int op)
6681-{
6682- unsigned int eax, ebx, ecx, edx;
6683-
6684- cpuid(op, &eax, &ebx, &ecx, &edx);
6685- return edx;
6686-}
6687-
6688 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
6689
6690 /*
6691@@ -480,9 +431,9 @@ struct thread_struct {
6692 .vm86_info = NULL, \
6693 .sysenter_cs = __KERNEL_CS, \
6694 .io_bitmap_ptr = NULL, \
6695+ .gs = __KERNEL_PDA, \
6696 }
6697
6698-#ifndef CONFIG_X86_NO_TSS
6699 /*
6700 * Note that the .io_bitmap member must be extra-big. This is because
6701 * the CPU will access an additional byte beyond the end of the IO
6702@@ -497,26 +448,9 @@ struct thread_struct {
6703 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
6704 }
6705
6706-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
6707-{
6708- tss->esp0 = thread->esp0;
6709- /* This can only happen when SEP is enabled, no need to test "SEP"arately */
6710- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
6711- tss->ss1 = thread->sysenter_cs;
6712- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6713- }
6714-}
6715-#define load_esp0(tss, thread) \
6716- __load_esp0(tss, thread)
6717-#else
6718-#define load_esp0(tss, thread) do { \
6719- if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6720- BUG(); \
6721-} while (0)
6722-#endif
6723-
6724 #define start_thread(regs, new_eip, new_esp) do { \
6725- __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
6726+ __asm__("movl %0,%%fs": :"r" (0)); \
6727+ regs->xgs = 0; \
6728 set_fs(USER_DS); \
6729 regs->xds = __USER_DS; \
6730 regs->xes = __USER_DS; \
6731@@ -526,26 +460,6 @@ static inline void __load_esp0(struct ts
6732 regs->esp = new_esp; \
6733 } while (0)
6734
6735-/*
6736- * These special macros can be used to get or set a debugging register
6737- */
6738-#define get_debugreg(var, register) \
6739- (var) = HYPERVISOR_get_debugreg((register))
6740-#define set_debugreg(value, register) \
6741- WARN_ON(HYPERVISOR_set_debugreg((register), (value)))
6742-
6743-/*
6744- * Set IOPL bits in EFLAGS from given mask
6745- */
6746-static inline void set_iopl_mask(unsigned mask)
6747-{
6748- struct physdev_set_iopl set_iopl;
6749-
6750- /* Force the change at ring 0. */
6751- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
6752- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
6753-}
6754-
6755 /* Forward declaration, a strange C thing */
6756 struct task_struct;
6757 struct mm_struct;
6758@@ -637,6 +551,105 @@ static inline void rep_nop(void)
6759
6760 #define cpu_relax() rep_nop()
6761
6762+#define paravirt_enabled() 0
6763+#define __cpuid xen_cpuid
6764+
6765+#ifndef CONFIG_X86_NO_TSS
6766+static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
6767+{
6768+ tss->esp0 = thread->esp0;
6769+ /* This can only happen when SEP is enabled, no need to test "SEP"arately */
6770+ if (unlikely(tss->ss1 != thread->sysenter_cs)) {
6771+ tss->ss1 = thread->sysenter_cs;
6772+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6773+ }
6774+}
6775+#define load_esp0(tss, thread) \
6776+ __load_esp0(tss, thread)
6777+#else
6778+#define load_esp0(tss, thread) do { \
6779+ if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6780+ BUG(); \
6781+} while (0)
6782+#endif
6783+
6784+
6785+/*
6786+ * These special macros can be used to get or set a debugging register
6787+ */
6788+#define get_debugreg(var, register) \
6789+ (var) = HYPERVISOR_get_debugreg(register)
6790+#define set_debugreg(value, register) \
6791+ WARN_ON(HYPERVISOR_set_debugreg(register, value))
6792+
6793+#define set_iopl_mask xen_set_iopl_mask
6794+
6795+/*
6796+ * Set IOPL bits in EFLAGS from given mask
6797+ */
6798+static inline void xen_set_iopl_mask(unsigned mask)
6799+{
6800+ struct physdev_set_iopl set_iopl;
6801+
6802+ /* Force the change at ring 0. */
6803+ set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
6804+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
6805+}
6806+
6807+
6808+/*
6809+ * Generic CPUID function
6810+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6811+ * resulting in stale register contents being returned.
6812+ */
6813+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6814+{
6815+ *eax = op;
6816+ *ecx = 0;
6817+ __cpuid(eax, ebx, ecx, edx);
6818+}
6819+
6820+/* Some CPUID calls want 'count' to be placed in ecx */
6821+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6822+ int *edx)
6823+{
6824+ *eax = op;
6825+ *ecx = count;
6826+ __cpuid(eax, ebx, ecx, edx);
6827+}
6828+
6829+/*
6830+ * CPUID functions returning a single datum
6831+ */
6832+static inline unsigned int cpuid_eax(unsigned int op)
6833+{
6834+ unsigned int eax, ebx, ecx, edx;
6835+
6836+ cpuid(op, &eax, &ebx, &ecx, &edx);
6837+ return eax;
6838+}
6839+static inline unsigned int cpuid_ebx(unsigned int op)
6840+{
6841+ unsigned int eax, ebx, ecx, edx;
6842+
6843+ cpuid(op, &eax, &ebx, &ecx, &edx);
6844+ return ebx;
6845+}
6846+static inline unsigned int cpuid_ecx(unsigned int op)
6847+{
6848+ unsigned int eax, ebx, ecx, edx;
6849+
6850+ cpuid(op, &eax, &ebx, &ecx, &edx);
6851+ return ecx;
6852+}
6853+static inline unsigned int cpuid_edx(unsigned int op)
6854+{
6855+ unsigned int eax, ebx, ecx, edx;
6856+
6857+ cpuid(op, &eax, &ebx, &ecx, &edx);
6858+ return edx;
6859+}
6860+
6861 /* generic versions from gas */
6862 #define GENERIC_NOP1 ".byte 0x90\n"
6863 #define GENERIC_NOP2 ".byte 0x89,0xf6\n"
6864@@ -736,4 +749,8 @@ extern unsigned long boot_option_idle_ov
6865 extern void enable_sep_cpu(void);
6866 extern int sysenter_setup(void);
6867
6868+extern int init_gdt(int cpu, struct task_struct *idle);
6869+extern void cpu_set_gdt(int);
6870+extern void secondary_cpu_init(void);
6871+
6872 #endif /* __ASM_I386_PROCESSOR_H */
6873--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:28:34.000000000 +0100
6874+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-15 11:26:44.000000000 +0100
6875@@ -39,7 +39,7 @@
6876 * 25 - APM BIOS support
6877 *
6878 * 26 - ESPFIX small SS
6879- * 27 - unused
6880+ * 27 - PDA [ per-cpu private data area ]
6881 * 28 - unused
6882 * 29 - unused
6883 * 30 - unused
6884@@ -74,6 +74,9 @@
6885 #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
6886 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
6887
6888+#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
6889+#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
6890+
6891 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
6892
6893 /*
6894--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:28:34.000000000 +0100
6895+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-15 11:26:44.000000000 +0100
6896@@ -8,6 +8,7 @@
6897 #include <linux/kernel.h>
6898 #include <linux/threads.h>
6899 #include <linux/cpumask.h>
6900+#include <asm/pda.h>
6901 #endif
6902
6903 #ifdef CONFIG_X86_LOCAL_APIC
6904@@ -56,7 +57,7 @@ extern void cpu_uninit(void);
6905 * from the initial startup. We map APIC_BASE very early in page_setup(),
6906 * so this is correct in the x86 case.
6907 */
6908-#define raw_smp_processor_id() (current_thread_info()->cpu)
6909+#define raw_smp_processor_id() (read_pda(cpu_number))
6910
6911 extern cpumask_t cpu_possible_map;
6912 #define cpu_callin_map cpu_possible_map
6913--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/system_32.h 2009-03-04 11:28:34.000000000 +0100
6914+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:26:44.000000000 +0100
6915@@ -139,17 +139,17 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
6916 #define write_cr4(x) \
6917 __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
6918
6919-/*
6920- * Clear and set 'TS' bit respectively
6921- */
6922+#define wbinvd() \
6923+ __asm__ __volatile__ ("wbinvd": : :"memory")
6924+
6925+/* Clear the 'TS' bit */
6926 #define clts() (HYPERVISOR_fpu_taskswitch(0))
6927+
6928+/* Set the 'TS' bit */
6929 #define stts() (HYPERVISOR_fpu_taskswitch(1))
6930
6931 #endif /* __KERNEL__ */
6932
6933-#define wbinvd() \
6934- __asm__ __volatile__ ("wbinvd": : :"memory")
6935-
6936 static inline unsigned long get_limit(unsigned long segment)
6937 {
6938 unsigned long __limit;
6939--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/desc_64.h 2009-06-29 15:14:52.000000000 +0200
6940+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-15 11:26:44.000000000 +0100
6941@@ -9,62 +9,11 @@
6942
6943 #include <linux/string.h>
6944 #include <linux/smp.h>
6945+#include <asm/desc_defs.h>
6946
6947 #include <asm/segment.h>
6948 #include <asm/mmu.h>
6949
6950-// 8 byte segment descriptor
6951-struct desc_struct {
6952- u16 limit0;
6953- u16 base0;
6954- unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
6955- unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
6956-} __attribute__((packed));
6957-
6958-struct n_desc_struct {
6959- unsigned int a,b;
6960-};
6961-
6962-enum {
6963- GATE_INTERRUPT = 0xE,
6964- GATE_TRAP = 0xF,
6965- GATE_CALL = 0xC,
6966-};
6967-
6968-// 16byte gate
6969-struct gate_struct {
6970- u16 offset_low;
6971- u16 segment;
6972- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
6973- u16 offset_middle;
6974- u32 offset_high;
6975- u32 zero1;
6976-} __attribute__((packed));
6977-
6978-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
6979-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
6980-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
6981-
6982-enum {
6983- DESC_TSS = 0x9,
6984- DESC_LDT = 0x2,
6985-};
6986-
6987-// LDT or TSS descriptor in the GDT. 16 bytes.
6988-struct ldttss_desc {
6989- u16 limit0;
6990- u16 base0;
6991- unsigned base1 : 8, type : 5, dpl : 2, p : 1;
6992- unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
6993- u32 base3;
6994- u32 zero1;
6995-} __attribute__((packed));
6996-
6997-struct desc_ptr {
6998- unsigned short size;
6999- unsigned long address;
7000-} __attribute__((packed)) ;
7001-
7002 extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS];
7003
7004 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
7005--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-03-04 11:28:34.000000000 +0100
7006+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-15 11:26:44.000000000 +0100
7007@@ -64,6 +64,9 @@ static inline int dma_mapping_error(dma_
7008 return (dma_addr == bad_dma_address);
7009 }
7010
7011+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
7012+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
7013+
7014 extern void *dma_alloc_coherent(struct device *dev, size_t size,
7015 dma_addr_t *dma_handle, gfp_t gfp);
7016 extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
7017@@ -181,12 +184,13 @@ static inline int dma_get_cache_alignmen
7018 return boot_cpu_data.x86_clflush_size;
7019 }
7020
7021-#define dma_is_consistent(h) 1
7022+#define dma_is_consistent(d, h) 1
7023
7024 extern int dma_set_mask(struct device *dev, u64 mask);
7025
7026 static inline void
7027-dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir)
7028+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
7029+ enum dma_data_direction dir)
7030 {
7031 flush_write_buffers();
7032 }
7033--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:28:34.000000000 +0100
7034+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-15 11:26:44.000000000 +0100
7035@@ -237,19 +237,18 @@ extern unsigned int __kernel_page_user;
7036
7037 static inline unsigned long pgd_bad(pgd_t pgd)
7038 {
7039- unsigned long val = __pgd_val(pgd);
7040- val &= ~PTE_MASK;
7041- val &= ~(_PAGE_USER | _PAGE_DIRTY);
7042- return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
7043+ return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7044 }
7045
7046-static inline unsigned long pud_bad(pud_t pud)
7047-{
7048- unsigned long val = __pud_val(pud);
7049- val &= ~PTE_MASK;
7050- val &= ~(_PAGE_USER | _PAGE_DIRTY);
7051- return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
7052-}
7053+static inline unsigned long pud_bad(pud_t pud)
7054+{
7055+ return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7056+}
7057+
7058+static inline unsigned long pmd_bad(pmd_t pmd)
7059+{
7060+ return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7061+}
7062
7063 #define set_pte_at(_mm,addr,ptep,pteval) do { \
7064 if (((_mm) != current->mm && (_mm) != &init_mm) || \
7065@@ -404,8 +403,6 @@ static inline int pmd_large(pmd_t pte) {
7066 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
7067 #endif
7068 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
7069-#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \
7070- != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT)))
7071 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
7072 #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
7073
7074--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/processor_64.h 2009-03-04 11:28:34.000000000 +0100
7075+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-15 11:26:44.000000000 +0100
7076@@ -484,6 +484,14 @@ static inline void __mwait(unsigned long
7077 : :"a" (eax), "c" (ecx));
7078 }
7079
7080+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
7081+{
7082+ /* "mwait %eax,%ecx;" */
7083+ asm volatile(
7084+ "sti; .byte 0x0f,0x01,0xc9;"
7085+ : :"a" (eax), "c" (ecx));
7086+}
7087+
7088 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
7089
7090 #define stack_current() \
7091--- sle11-2009-06-29.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-04 11:28:34.000000000 +0100
7092+++ sle11-2009-06-29/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-15 11:26:44.000000000 +0100
7093@@ -88,11 +88,6 @@ extern u8 x86_cpu_to_log_apicid[NR_CPUS]
7094 extern u8 bios_cpu_apicid[];
7095
7096 #ifdef CONFIG_X86_LOCAL_APIC
7097-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
7098-{
7099- return cpus_addr(cpumask)[0];
7100-}
7101-
7102 static inline int cpu_present_to_apicid(int mps_cpu)
7103 {
7104 if (mps_cpu < NR_CPUS)
7105@@ -127,13 +122,6 @@ static __inline int logical_smp_processo
7106 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
7107 #else
7108 #define cpu_physical_id(cpu) boot_cpu_id
7109-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
7110- void *info, int retry, int wait)
7111-{
7112- /* Disable interrupts here? */
7113- func(info);
7114- return 0;
7115-}
7116 #endif /* !CONFIG_SMP */
7117 #endif
7118
7119--- sle11-2009-06-29.orig/kernel/kexec.c 2009-02-17 11:27:16.000000000 +0100
7120+++ sle11-2009-06-29/kernel/kexec.c 2009-02-17 11:34:22.000000000 +0100
7121@@ -374,7 +374,7 @@ static struct page *kimage_alloc_pages(g
7122 if (limit == ~0UL)
7123 address_bits = BITS_PER_LONG;
7124 else
7125- address_bits = long_log2(limit);
7126+ address_bits = ilog2(limit);
7127
7128 if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
7129 __free_pages(pages, order);
7130--- sle11-2009-06-29.orig/net/core/dev.c 2009-03-04 11:28:34.000000000 +0100
7131+++ sle11-2009-06-29/net/core/dev.c 2008-12-15 11:26:44.000000000 +0100
7132@@ -1751,10 +1751,10 @@ inline int skb_checksum_setup(struct sk_
7133 goto out;
7134 switch (skb->nh.iph->protocol) {
7135 case IPPROTO_TCP:
7136- skb->csum = offsetof(struct tcphdr, check);
7137+ skb->csum_offset = offsetof(struct tcphdr, check);
7138 break;
7139 case IPPROTO_UDP:
7140- skb->csum = offsetof(struct udphdr, check);
7141+ skb->csum_offset = offsetof(struct udphdr, check);
7142 break;
7143 default:
7144 if (net_ratelimit())
7145@@ -1763,7 +1763,7 @@ inline int skb_checksum_setup(struct sk_
7146 " %d packet", skb->nh.iph->protocol);
7147 goto out;
7148 }
7149- if ((skb->h.raw + skb->csum + 2) > skb->tail)
7150+ if ((skb->h.raw + skb->csum_offset + 2) > skb->tail)
7151 goto out;
7152 skb->ip_summed = CHECKSUM_PARTIAL;
7153 skb->proto_csum_blank = 0;