]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.xen/xen3-patch-2.6.22
Updated xen patches taken from suse.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.xen / xen3-patch-2.6.22
CommitLineData
cc90b958
BS
1From: www.kernel.org
2Subject: Update to 2.6.22
3Patch-mainline: 2.6.22
4
5Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches.py
6
7Acked-by: jbeulich@novell.com
8
00e5a55c
BS
9--- sle11-2009-04-20.orig/arch/x86/Kconfig 2009-03-04 11:25:55.000000000 +0100
10+++ sle11-2009-04-20/arch/x86/Kconfig 2009-02-05 10:22:38.000000000 +0100
11@@ -1433,7 +1433,7 @@ config PHYSICAL_START
cc90b958
BS
12
13 config RELOCATABLE
14 bool "Build a relocatable kernel (EXPERIMENTAL)"
15- depends on EXPERIMENTAL && !X86_XEN
16+ depends on EXPERIMENTAL && !X86_XEN && !X86_64_XEN
17 help
18 This builds a kernel image that retains relocation information
19 so it can be loaded someplace besides the default 1MB.
00e5a55c 20@@ -1487,7 +1487,6 @@ config COMPAT_VDSO
cc90b958
BS
21 def_bool y
22 prompt "Compat VDSO support"
23 depends on X86_32 || IA32_EMULATION
24- depends on !X86_XEN
25 help
26 Map the 32-bit VDSO to the predictable old-style address too.
27 ---help---
00e5a55c 28@@ -1666,6 +1665,7 @@ config PCI
cc90b958
BS
29 bool "PCI support"
30 default y
31 select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
32+ select ARCH_SUPPORTS_MSI if (XEN_UNPRIVILEGED_GUEST && XEN_PCIDEV_FRONTEND)
33 help
34 Find out whether you have a PCI motherboard. PCI is the name of a
35 bus system, i.e. the way the CPU talks to the other stuff inside
00e5a55c
BS
36--- sle11-2009-04-20.orig/arch/x86/kernel/Makefile 2009-03-04 11:25:55.000000000 +0100
37+++ sle11-2009-04-20/arch/x86/kernel/Makefile 2008-12-15 11:27:22.000000000 +0100
38@@ -127,4 +127,4 @@ endif
39 disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
40 smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
41 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
42-%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
43+%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
44--- sle11-2009-04-20.orig/arch/x86/kernel/apic_32-xen.c 2009-03-04 11:25:55.000000000 +0100
45+++ sle11-2009-04-20/arch/x86/kernel/apic_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
46@@ -19,7 +19,6 @@
47 #include <linux/mm.h>
48 #include <linux/delay.h>
49 #include <linux/bootmem.h>
50-#include <linux/smp_lock.h>
51 #include <linux/interrupt.h>
52 #include <linux/mc146818rtc.h>
53 #include <linux/kernel_stat.h>
00e5a55c
BS
54--- sle11-2009-04-20.orig/arch/x86/kernel/asm-offsets_32.c 2008-12-15 11:26:44.000000000 +0100
55+++ sle11-2009-04-20/arch/x86/kernel/asm-offsets_32.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
56@@ -109,11 +109,6 @@ void foo(void)
57
58 OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
59
60-#ifdef CONFIG_XEN
61- BLANK();
62- OFFSET(XEN_START_mfn_list, start_info, mfn_list);
63-#endif
64-
65 #ifdef CONFIG_PARAVIRT
66 BLANK();
67 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
00e5a55c
BS
68--- sle11-2009-04-20.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-04 11:25:55.000000000 +0100
69+++ sle11-2009-04-20/arch/x86/kernel/cpu/common-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
70@@ -22,16 +22,40 @@
71 #define phys_pkg_id(a,b) a
72 #endif
73 #endif
74-#include <asm/pda.h>
75 #include <asm/hypervisor.h>
76
77 #include "cpu.h"
78
79-DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
80-EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
81+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
82+ [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
83+ [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
84+ [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
85+ [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
86+#ifndef CONFIG_XEN
87+ /*
88+ * Segments used for calling PnP BIOS have byte granularity.
89+ * They code segments and data segments have fixed 64k limits,
90+ * the transfer segment sizes are set at run time.
91+ */
92+ [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
93+ [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
94+ [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
95+ [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
96+ [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
97+ /*
98+ * The APM segments have byte granularity and their bases
99+ * are set at run time. All have 64k limits.
100+ */
101+ [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
102+ /* 16-bit code */
103+ [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
104+ [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
105
106-struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
107-EXPORT_SYMBOL(_cpu_pda);
108+ [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
109+#endif
110+ [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
111+} };
112+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
113
114 static int cachesize_override __cpuinitdata = -1;
115 static int disable_x86_fxsr __cpuinitdata;
116@@ -373,7 +397,7 @@ __setup("serialnumber", x86_serial_nr_se
117 /*
118 * This does the hard work of actually picking apart the CPU stuff...
119 */
120-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
121+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
122 {
123 int i;
124
125@@ -484,15 +508,22 @@ void __cpuinit identify_cpu(struct cpuin
126
127 /* Init Machine Check Exception if available. */
128 mcheck_init(c);
129+}
130
131- if (c == &boot_cpu_data)
132- sysenter_setup();
133+void __init identify_boot_cpu(void)
134+{
135+ identify_cpu(&boot_cpu_data);
136+ sysenter_setup();
137 enable_sep_cpu();
138+ mtrr_bp_init();
139+}
140
141- if (c == &boot_cpu_data)
142- mtrr_bp_init();
143- else
144- mtrr_ap_init();
145+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
146+{
147+ BUG_ON(c == &boot_cpu_data);
148+ identify_cpu(c);
149+ enable_sep_cpu();
150+ mtrr_ap_init();
151 }
152
153 #ifdef CONFIG_X86_HT
154@@ -606,136 +637,47 @@ void __init early_cpu_init(void)
155 #endif
156 }
157
158-/* Make sure %gs is initialized properly in idle threads */
159+/* Make sure %fs is initialized properly in idle threads */
160 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
161 {
162 memset(regs, 0, sizeof(struct pt_regs));
163- regs->xfs = __KERNEL_PDA;
164+ regs->xfs = __KERNEL_PERCPU;
165 return regs;
166 }
167
168-static __cpuinit int alloc_gdt(int cpu)
169+/* Current gdt points %fs at the "master" per-cpu area: after this,
170+ * it's on the real one. */
171+void switch_to_new_gdt(void)
172 {
173- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
174- struct desc_struct *gdt;
175- struct i386_pda *pda;
176-
177- gdt = (struct desc_struct *)cpu_gdt_descr->address;
178- pda = cpu_pda(cpu);
179-
180- /*
181- * This is a horrible hack to allocate the GDT. The problem
182- * is that cpu_init() is called really early for the boot CPU
183- * (and hence needs bootmem) but much later for the secondary
184- * CPUs, when bootmem will have gone away
185- */
186- if (NODE_DATA(0)->bdata->node_bootmem_map) {
187- BUG_ON(gdt != NULL || pda != NULL);
188-
189- gdt = alloc_bootmem_pages(PAGE_SIZE);
190- pda = alloc_bootmem(sizeof(*pda));
191- /* alloc_bootmem(_pages) panics on failure, so no check */
192-
193- memset(gdt, 0, PAGE_SIZE);
194- memset(pda, 0, sizeof(*pda));
195- } else {
196- /* GDT and PDA might already have been allocated if
197- this is a CPU hotplug re-insertion. */
198- if (gdt == NULL)
199- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
200-
201- if (pda == NULL)
202- pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
203-
204- if (unlikely(!gdt || !pda)) {
205- free_pages((unsigned long)gdt, 0);
206- kfree(pda);
207- return 0;
208- }
209- }
210-
211- cpu_gdt_descr->address = (unsigned long)gdt;
212- cpu_pda(cpu) = pda;
213-
214- return 1;
215-}
216-
217-/* Initial PDA used by boot CPU */
218-struct i386_pda boot_pda = {
219- ._pda = &boot_pda,
220- .cpu_number = 0,
221- .pcurrent = &init_task,
222-};
223-
224-static inline void set_kernel_fs(void)
225-{
226- /* Set %fs for this CPU's PDA. Memory clobber is to create a
227- barrier with respect to any PDA operations, so the compiler
228- doesn't move any before here. */
229- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
230-}
231-
232-/* Initialize the CPU's GDT and PDA. The boot CPU does this for
233- itself, but secondaries find this done for them. */
234-__cpuinit int init_gdt(int cpu, struct task_struct *idle)
235-{
236- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
237- struct desc_struct *gdt;
238- struct i386_pda *pda;
239-
240- /* For non-boot CPUs, the GDT and PDA should already have been
241- allocated. */
242- if (!alloc_gdt(cpu)) {
243- printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
244- return 0;
245- }
246-
247- gdt = (struct desc_struct *)cpu_gdt_descr->address;
248- pda = cpu_pda(cpu);
249-
250- BUG_ON(gdt == NULL || pda == NULL);
251-
252- /*
253- * Initialize the per-CPU GDT with the boot GDT,
254- * and set up the GDT descriptor:
255- */
256- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
257- cpu_gdt_descr->size = GDT_SIZE - 1;
258-
259- pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
260- (u32 *)&gdt[GDT_ENTRY_PDA].b,
261- (unsigned long)pda, sizeof(*pda) - 1,
262- 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
263-
264- memset(pda, 0, sizeof(*pda));
265- pda->_pda = pda;
266- pda->cpu_number = cpu;
267- pda->pcurrent = idle;
268-
269- return 1;
270-}
271-
272-void __cpuinit cpu_set_gdt(int cpu)
273-{
274- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
275+ struct Xgt_desc_struct gdt_descr;
276 unsigned long va, frames[16];
277 int f;
278
279- for (va = cpu_gdt_descr->address, f = 0;
280- va < cpu_gdt_descr->address + cpu_gdt_descr->size;
281+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
282+ gdt_descr.size = GDT_SIZE - 1;
283+
284+ for (va = gdt_descr.address, f = 0;
285+ va < gdt_descr.address + gdt_descr.size;
286 va += PAGE_SIZE, f++) {
287 frames[f] = virt_to_mfn(va);
288 make_lowmem_page_readonly(
289 (void *)va, XENFEAT_writable_descriptor_tables);
290 }
291- BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
292-
293- set_kernel_fs();
294+ if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8))
295+ BUG();
296+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
297 }
298
299-/* Common CPU init for both boot and secondary CPUs */
300-static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
301+/*
302+ * cpu_init() initializes state that is per-CPU. Some data is already
303+ * initialized (naturally) in the bootstrap process, such as the GDT
304+ * and IDT. We reload them nevertheless, this function acts as a
305+ * 'CPU state barrier', nothing should get across.
306+ */
307+void __cpuinit cpu_init(void)
308 {
309+ int cpu = smp_processor_id();
310+ struct task_struct *curr = current;
311 #ifndef CONFIG_X86_NO_TSS
312 struct tss_struct * t = &per_cpu(init_tss, cpu);
313 #endif
314@@ -757,6 +699,8 @@ static void __cpuinit _cpu_init(int cpu,
315 set_in_cr4(X86_CR4_TSD);
316 }
317
318+ switch_to_new_gdt();
319+
320 /*
321 * Set up and load the per-CPU TSS and LDT
322 */
323@@ -794,38 +738,6 @@ static void __cpuinit _cpu_init(int cpu,
324 mxcsr_feature_mask_init();
325 }
326
327-/* Entrypoint to initialize secondary CPU */
328-void __cpuinit secondary_cpu_init(void)
329-{
330- int cpu = smp_processor_id();
331- struct task_struct *curr = current;
332-
333- _cpu_init(cpu, curr);
334-}
335-
336-/*
337- * cpu_init() initializes state that is per-CPU. Some data is already
338- * initialized (naturally) in the bootstrap process, such as the GDT
339- * and IDT. We reload them nevertheless, this function acts as a
340- * 'CPU state barrier', nothing should get across.
341- */
342-void __cpuinit cpu_init(void)
343-{
344- int cpu = smp_processor_id();
345- struct task_struct *curr = current;
346-
347- /* Set up the real GDT and PDA, so we can transition from the
348- boot versions. */
349- if (!init_gdt(cpu, curr)) {
350- /* failed to allocate something; not much we can do... */
351- for (;;)
352- local_irq_enable();
353- }
354-
355- cpu_set_gdt(cpu);
356- _cpu_init(cpu, curr);
357-}
358-
359 #ifdef CONFIG_HOTPLUG_CPU
360 void __cpuinit cpu_uninit(void)
361 {
00e5a55c
BS
362--- sle11-2009-04-20.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-15 11:26:44.000000000 +0100
363+++ sle11-2009-04-20/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
364@@ -167,7 +167,7 @@ mtrr_del(int reg, unsigned long base, un
365 EXPORT_SYMBOL(mtrr_add);
366 EXPORT_SYMBOL(mtrr_del);
367
368-void __init mtrr_bp_init(void)
369+__init void mtrr_bp_init(void)
370 {
371 }
372
00e5a55c
BS
373--- sle11-2009-04-20.orig/arch/x86/kernel/e820_32-xen.c 2009-03-04 11:25:55.000000000 +0100
374+++ sle11-2009-04-20/arch/x86/kernel/e820_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
375@@ -162,26 +162,27 @@ static struct resource standard_io_resou
376
377 static int __init romsignature(const unsigned char *rom)
378 {
379+ const unsigned short * const ptr = (const unsigned short *)rom;
380 unsigned short sig;
381
382- return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
383- sig == ROMSIGNATURE;
384+ return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
385 }
386
387-static int __init romchecksum(unsigned char *rom, unsigned long length)
388+static int __init romchecksum(const unsigned char *rom, unsigned long length)
389 {
390- unsigned char sum;
391+ unsigned char sum, c;
392
393- for (sum = 0; length; length--)
394- sum += *rom++;
395- return sum == 0;
396+ for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
397+ sum += c;
398+ return !length && !sum;
399 }
400
401 static void __init probe_roms(void)
402 {
403+ const unsigned char *rom;
404 unsigned long start, length, upper;
405- unsigned char *rom;
406- int i;
407+ unsigned char c;
408+ int i;
409
410 #ifdef CONFIG_XEN
411 /* Nothing to do if not running in dom0. */
412@@ -198,8 +199,11 @@ static void __init probe_roms(void)
413
414 video_rom_resource.start = start;
415
416+ if (probe_kernel_address(rom + 2, c) != 0)
417+ continue;
418+
419 /* 0 < length <= 0x7f * 512, historically */
420- length = rom[2] * 512;
421+ length = c * 512;
422
423 /* if checksum okay, trust length byte */
424 if (length && romchecksum(rom, length))
425@@ -233,8 +237,11 @@ static void __init probe_roms(void)
426 if (!romsignature(rom))
427 continue;
428
429+ if (probe_kernel_address(rom + 2, c) != 0)
430+ continue;
431+
432 /* 0 < length <= 0x7f * 512, historically */
433- length = rom[2] * 512;
434+ length = c * 512;
435
436 /* but accept any length that fits if checksum okay */
437 if (!length || start + length > upper || !romchecksum(rom, length))
438@@ -249,7 +256,7 @@ static void __init probe_roms(void)
439 }
440
441 #ifdef CONFIG_XEN
442-static struct e820map machine_e820 __initdata;
443+static struct e820map machine_e820;
444 #define e820 machine_e820
445 #endif
446
447@@ -409,10 +416,8 @@ int __init sanitize_e820_map(struct e820
448 ____________________33__
449 ______________________4_
450 */
451- printk("sanitize start\n");
452 /* if there's only one memory region, don't bother */
453 if (*pnr_map < 2) {
454- printk("sanitize bail 0\n");
455 return -1;
456 }
457
458@@ -421,7 +426,6 @@ int __init sanitize_e820_map(struct e820
459 /* bail out if we find any unreasonable addresses in bios map */
460 for (i=0; i<old_nr; i++)
461 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
462- printk("sanitize bail 1\n");
463 return -1;
464 }
465
466@@ -517,7 +521,6 @@ int __init sanitize_e820_map(struct e820
467 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
468 *pnr_map = new_nr;
469
470- printk("sanitize end\n");
471 return 0;
472 }
473
474@@ -552,7 +555,6 @@ int __init copy_e820_map(struct e820entr
475 unsigned long long size = biosmap->size;
476 unsigned long long end = start + size;
477 unsigned long type = biosmap->type;
478- printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
479
480 /* Overflow in 64 bits? Ignore the memory map. */
481 if (start > end)
482@@ -564,17 +566,11 @@ int __init copy_e820_map(struct e820entr
483 * Not right. Fix it up.
484 */
485 if (type == E820_RAM) {
486- printk("copy_e820_map() type is E820_RAM\n");
487 if (start < 0x100000ULL && end > 0xA0000ULL) {
488- printk("copy_e820_map() lies in range...\n");
489- if (start < 0xA0000ULL) {
490- printk("copy_e820_map() start < 0xA0000ULL\n");
491+ if (start < 0xA0000ULL)
492 add_memory_region(start, 0xA0000ULL-start, type);
493- }
494- if (end <= 0x100000ULL) {
495- printk("copy_e820_map() end <= 0x100000ULL\n");
496+ if (end <= 0x100000ULL)
497 continue;
498- }
499 start = 0x100000ULL;
500 size = end - start;
501 }
00e5a55c
BS
502--- sle11-2009-04-20.orig/arch/x86/kernel/entry_32-xen.S 2009-03-04 11:25:55.000000000 +0100
503+++ sle11-2009-04-20/arch/x86/kernel/entry_32-xen.S 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
504@@ -15,7 +15,7 @@
505 * I changed all the .align's to 4 (16 byte alignment), as that's faster
506 * on a 486.
507 *
508- * Stack layout in 'ret_from_system_call':
509+ * Stack layout in 'syscall_exit':
510 * ptrace needs to have all regs on the stack.
511 * if the order here is changed, it needs to be
512 * updated in fork.c:copy_process, signal.c:do_signal,
513@@ -135,7 +135,7 @@ NMI_MASK = 0x80000000
514 movl $(__USER_DS), %edx; \
515 movl %edx, %ds; \
516 movl %edx, %es; \
517- movl $(__KERNEL_PDA), %edx; \
518+ movl $(__KERNEL_PERCPU), %edx; \
519 movl %edx, %fs
520
521 #define RESTORE_INT_REGS \
522@@ -308,16 +308,12 @@ sysenter_past_esp:
523 pushl $(__USER_CS)
524 CFI_ADJUST_CFA_OFFSET 4
525 /*CFI_REL_OFFSET cs, 0*/
526-#ifndef CONFIG_COMPAT_VDSO
527 /*
528 * Push current_thread_info()->sysenter_return to the stack.
529 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
530 * pushed above; +8 corresponds to copy_thread's esp0 setting.
531 */
532 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
533-#else
534- pushl $SYSENTER_RETURN
535-#endif
536 CFI_ADJUST_CFA_OFFSET 4
537 CFI_REL_OFFSET eip, 0
538
539@@ -345,7 +341,7 @@ sysenter_past_esp:
540 jae syscall_badsys
541 call *sys_call_table(,%eax,4)
542 movl %eax,PT_EAX(%esp)
543- DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
544+ DISABLE_INTERRUPTS(CLBR_ANY)
545 TRACE_IRQS_OFF
546 movl TI_flags(%ebp), %ecx
547 testw $_TIF_ALLWORK_MASK, %cx
548@@ -400,10 +396,6 @@ ENTRY(system_call)
549 CFI_ADJUST_CFA_OFFSET 4
550 SAVE_ALL
551 GET_THREAD_INFO(%ebp)
552- testl $TF_MASK,PT_EFLAGS(%esp)
553- jz no_singlestep
554- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
555-no_singlestep:
556 # system call tracing in operation / emulation
557 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
558 testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
559@@ -418,6 +410,10 @@ syscall_exit:
560 # setting need_resched or sigpending
561 # between sampling and the iret
562 TRACE_IRQS_OFF
563+ testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
564+ jz no_singlestep
565+ orl $_TIF_SINGLESTEP,TI_flags(%ebp)
566+no_singlestep:
567 movl TI_flags(%ebp), %ecx
568 testw $_TIF_ALLWORK_MASK, %cx # current->work
569 jne syscall_exit_work
570@@ -635,9 +631,7 @@ END(syscall_badsys)
571 #ifndef CONFIG_XEN
572 #define FIXUP_ESPFIX_STACK \
573 /* since we are on a wrong stack, we cant make it a C code :( */ \
574- movl %fs:PDA_cpu, %ebx; \
575- PER_CPU(cpu_gdt_descr, %ebx); \
576- movl GDS_address(%ebx), %ebx; \
577+ PER_CPU(gdt_page, %ebx); \
578 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
579 addl %esp, %eax; \
580 pushl $__KERNEL_DS; \
581@@ -710,7 +704,7 @@ ENTRY(name) \
582 SAVE_ALL; \
583 TRACE_IRQS_OFF \
584 movl %esp,%eax; \
585- call smp_/**/name; \
586+ call smp_##name; \
587 jmp ret_from_intr; \
588 CFI_ENDPROC; \
589 ENDPROC(name)
590@@ -718,10 +712,6 @@ ENDPROC(name)
591 /* The include is where all of the SMP etc. interrupts come from */
592 #include "entry_arch.h"
593
594-/* This alternate entry is needed because we hijack the apic LVTT */
595-#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
596-BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
597-#endif
598 #else
599 #define UNWIND_ESPFIX_STACK
600 #endif
601@@ -764,7 +754,7 @@ error_code:
602 pushl %fs
603 CFI_ADJUST_CFA_OFFSET 4
604 /*CFI_REL_OFFSET fs, 0*/
605- movl $(__KERNEL_PDA), %ecx
606+ movl $(__KERNEL_PERCPU), %ecx
607 movl %ecx, %fs
608 UNWIND_ESPFIX_STACK
609 popl %ecx
00e5a55c
BS
610--- sle11-2009-04-20.orig/arch/x86/kernel/head_32-xen.S 2009-03-04 11:25:55.000000000 +0100
611+++ sle11-2009-04-20/arch/x86/kernel/head_32-xen.S 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
612@@ -37,7 +37,8 @@ ENTRY(startup_32)
613 /* Set up the stack pointer */
614 movl $(init_thread_union+THREAD_SIZE),%esp
615
616- call setup_pda
617+ movl %ss,%eax
618+ movl %eax,%fs # gets reset once there's real percpu
619
620 /* get vendor info */
621 xorl %eax,%eax # call CPUID with 0 -> return vendor ID
622@@ -64,55 +65,11 @@ ENTRY(startup_32)
623 xorl %eax,%eax # Clear GS
624 movl %eax,%gs
625
626- movl $(__KERNEL_PDA),%eax
627- mov %eax,%fs
628-
629 cld # gcc2 wants the direction flag cleared at all times
630
631 pushl $0 # fake return address for unwinder
632 jmp start_kernel
633
634-/*
635- * Point the GDT at this CPU's PDA. This will be
636- * cpu_gdt_table and boot_pda.
637- */
638-ENTRY(setup_pda)
639- /* get the PDA pointer */
640- movl $boot_pda, %eax
641-
642- /* slot the PDA address into the GDT */
643- mov $cpu_gdt_table, %ecx
644- mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
645- shr $16, %eax
646- mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
647- mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
648-
649- # %esi still points to start_info, and no registers
650- # need to be preserved.
651-
652- movl XEN_START_mfn_list(%esi), %ebx
653- movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
654- shrl $PAGE_SHIFT, %eax
655- movl (%ebx,%eax,4), %ecx
656- pushl %ecx # frame number for set_gdt below
657-
658- xorl %esi, %esi
659- xorl %edx, %edx
660- shldl $PAGE_SHIFT, %ecx, %edx
661- shll $PAGE_SHIFT, %ecx
662- orl $0x61, %ecx
663- movl $cpu_gdt_table, %ebx
664- movl $__HYPERVISOR_update_va_mapping, %eax
665- int $0x82
666-
667- movl $(PAGE_SIZE_asm / 8), %ecx
668- movl %esp, %ebx
669- movl $__HYPERVISOR_set_gdt, %eax
670- int $0x82
671-
672- popl %ecx
673- ret
674-
675 #define HYPERCALL_PAGE_OFFSET 0x1000
676 .org HYPERCALL_PAGE_OFFSET
677 ENTRY(hypercall_page)
678@@ -138,60 +95,6 @@ ENTRY(empty_zero_page)
679 */
680 .data
681
682-/*
683- * The Global Descriptor Table contains 28 quadwords, per-CPU.
684- */
685- .section .data.page_aligned, "aw"
686- .align PAGE_SIZE_asm
687-ENTRY(cpu_gdt_table)
688- .quad 0x0000000000000000 /* NULL descriptor */
689- .quad 0x0000000000000000 /* 0x0b reserved */
690- .quad 0x0000000000000000 /* 0x13 reserved */
691- .quad 0x0000000000000000 /* 0x1b reserved */
692- .quad 0x0000000000000000 /* 0x20 unused */
693- .quad 0x0000000000000000 /* 0x28 unused */
694- .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
695- .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
696- .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
697- .quad 0x0000000000000000 /* 0x4b reserved */
698- .quad 0x0000000000000000 /* 0x53 reserved */
699- .quad 0x0000000000000000 /* 0x5b reserved */
700-
701- .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
702- .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
703- .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
704- .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
705-
706- .quad 0x0000000000000000 /* 0x80 TSS descriptor */
707- .quad 0x0000000000000000 /* 0x88 LDT descriptor */
708-
709- /*
710- * Segments used for calling PnP BIOS have byte granularity.
711- * They code segments and data segments have fixed 64k limits,
712- * the transfer segment sizes are set at run time.
713- */
714- .quad 0x0000000000000000 /* 0x90 32-bit code */
715- .quad 0x0000000000000000 /* 0x98 16-bit code */
716- .quad 0x0000000000000000 /* 0xa0 16-bit data */
717- .quad 0x0000000000000000 /* 0xa8 16-bit data */
718- .quad 0x0000000000000000 /* 0xb0 16-bit data */
719-
720- /*
721- * The APM segments have byte granularity and their bases
722- * are set at run time. All have 64k limits.
723- */
724- .quad 0x0000000000000000 /* 0xb8 APM CS code */
725- .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
726- .quad 0x0000000000000000 /* 0xc8 APM DS data */
727-
728- .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
729- .quad 0x00cf92000000ffff /* 0xd8 - PDA */
730- .quad 0x0000000000000000 /* 0xe0 - unused */
731- .quad 0x0000000000000000 /* 0xe8 - unused */
732- .quad 0x0000000000000000 /* 0xf0 - unused */
733- .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
734- .align PAGE_SIZE_asm
735-
736 #if CONFIG_XEN_COMPAT <= 0x030002
737 /*
738 * __xen_guest information
00e5a55c
BS
739--- sle11-2009-04-20.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-04 11:25:55.000000000 +0100
740+++ sle11-2009-04-20/arch/x86/kernel/io_apic_32-xen.c 2008-12-15 11:27:22.000000000 +0100
741@@ -25,7 +25,6 @@
742 #include <linux/init.h>
743 #include <linux/delay.h>
744 #include <linux/sched.h>
745-#include <linux/smp_lock.h>
746 #include <linux/mc146818rtc.h>
747 #include <linux/compiler.h>
748 #include <linux/acpi.h>
749@@ -35,6 +34,7 @@
750 #include <linux/msi.h>
751 #include <linux/htirq.h>
752 #include <linux/freezer.h>
753+#include <linux/kthread.h>
cc90b958 754
00e5a55c
BS
755 #include <asm/io.h>
756 #include <asm/smp.h>
757@@ -710,8 +710,6 @@ static int balanced_irq(void *unused)
758 unsigned long prev_balance_time = jiffies;
759 long time_remaining = balanced_irq_interval;
cc90b958 760
00e5a55c
BS
761- daemonize("kirqd");
762-
763 /* push everything to CPU 0 to give us a starting point. */
764 for (i = 0 ; i < NR_IRQS ; i++) {
765 irq_desc[i].pending_mask = cpumask_of_cpu(0);
766@@ -771,10 +769,9 @@ static int __init balanced_irq_init(void
767 }
768
769 printk(KERN_INFO "Starting balanced_irq\n");
770- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
771+ if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
772 return 0;
773- else
774- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
775+ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
776 failed:
777 for_each_possible_cpu(i) {
778 kfree(irq_cpu_data[i].irq_delta);
779@@ -1455,10 +1452,6 @@ static void __init setup_ExtINT_IRQ0_pin
780 enable_8259A_irq(0);
781 }
cc90b958 782
00e5a55c
BS
783-static inline void UNEXPECTED_IO_APIC(void)
784-{
785-}
786-
787 void __init print_IO_APIC(void)
cc90b958 788 {
00e5a55c
BS
789 int apic, i;
790@@ -1498,34 +1491,12 @@ void __init print_IO_APIC(void)
791 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
792 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
793 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
794- if (reg_00.bits.ID >= get_physical_broadcast())
795- UNEXPECTED_IO_APIC();
796- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
797- UNEXPECTED_IO_APIC();
cc90b958
BS
798
799 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
800 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
801- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
802- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
803- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
804- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
805- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
806- (reg_01.bits.entries != 0x2E) &&
807- (reg_01.bits.entries != 0x3F)
808- )
809- UNEXPECTED_IO_APIC();
810
811 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
812 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
813- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
814- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
815- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
816- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
817- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
818- )
819- UNEXPECTED_IO_APIC();
820- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
821- UNEXPECTED_IO_APIC();
822
823 /*
824 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
825@@ -1535,8 +1506,6 @@ void __init print_IO_APIC(void)
826 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
827 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
828 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
829- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
830- UNEXPECTED_IO_APIC();
831 }
832
833 /*
834@@ -1548,8 +1517,6 @@ void __init print_IO_APIC(void)
835 reg_03.raw != reg_01.raw) {
836 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
837 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
838- if (reg_03.bits.__reserved_1)
839- UNEXPECTED_IO_APIC();
840 }
841
842 printk(KERN_DEBUG ".... IRQ redirection table:\n");
843@@ -2686,19 +2653,19 @@ int arch_setup_msi_irq(struct pci_dev *d
844 if (irq < 0)
845 return irq;
846
847- set_irq_msi(irq, desc);
848 ret = msi_compose_msg(dev, irq, &msg);
849 if (ret < 0) {
850 destroy_irq(irq);
851 return ret;
852 }
853
854+ set_irq_msi(irq, desc);
855 write_msi_msg(irq, &msg);
856
857 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
858 "edge");
859
860- return irq;
861+ return 0;
862 }
863
864 void arch_teardown_msi_irq(unsigned int irq)
00e5a55c
BS
865--- sle11-2009-04-20.orig/arch/x86/kernel/ioport_32-xen.c 2009-04-29 08:44:31.000000000 +0200
866+++ sle11-2009-04-20/arch/x86/kernel/ioport_32-xen.c 2008-12-15 11:27:22.000000000 +0100
867@@ -12,10 +12,10 @@
868 #include <linux/types.h>
869 #include <linux/ioport.h>
870 #include <linux/smp.h>
cc90b958 871-#include <linux/smp_lock.h>
00e5a55c
BS
872 #include <linux/stddef.h>
873 #include <linux/slab.h>
874 #include <linux/thread_info.h>
875+#include <linux/syscalls.h>
876 #include <xen/interface/physdev.h>
cc90b958 877
00e5a55c
BS
878 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
879--- sle11-2009-04-20.orig/arch/x86/kernel/irq_32-xen.c 2009-03-04 11:25:55.000000000 +0100
880+++ sle11-2009-04-20/arch/x86/kernel/irq_32-xen.c 2008-12-15 11:27:22.000000000 +0100
881@@ -24,6 +24,9 @@
882 DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
883 EXPORT_PER_CPU_SYMBOL(irq_stat);
cc90b958 884
00e5a55c
BS
885+DEFINE_PER_CPU(struct pt_regs *, irq_regs);
886+EXPORT_PER_CPU_SYMBOL(irq_regs);
887+
888 /*
889 * 'what should we do if we get a hw irq event on an illegal vector'.
890 * each architecture has to answer this themselves.
891--- sle11-2009-04-20.orig/arch/x86/kernel/ldt_32-xen.c 2008-12-15 11:26:44.000000000 +0100
892+++ sle11-2009-04-20/arch/x86/kernel/ldt_32-xen.c 2008-12-15 11:27:22.000000000 +0100
893@@ -10,7 +10,6 @@
894 #include <linux/string.h>
895 #include <linux/mm.h>
896 #include <linux/smp.h>
897-#include <linux/smp_lock.h>
898 #include <linux/vmalloc.h>
899 #include <linux/slab.h>
cc90b958 900
00e5a55c
BS
901--- sle11-2009-04-20.orig/arch/x86/kernel/microcode-xen.c 2009-03-04 11:25:55.000000000 +0100
902+++ sle11-2009-04-20/arch/x86/kernel/microcode-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
903@@ -135,7 +135,7 @@ static int __init microcode_dev_init (vo
904 return 0;
905 }
906
907-static void __exit microcode_dev_exit (void)
908+static void microcode_dev_exit (void)
909 {
910 misc_deregister(&microcode_dev);
911 }
00e5a55c
BS
912--- sle11-2009-04-20.orig/arch/x86/kernel/mpparse_32-xen.c 2009-03-04 11:25:55.000000000 +0100
913+++ sle11-2009-04-20/arch/x86/kernel/mpparse_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
914@@ -18,7 +18,6 @@
915 #include <linux/acpi.h>
916 #include <linux/delay.h>
917 #include <linux/bootmem.h>
918-#include <linux/smp_lock.h>
919 #include <linux/kernel_stat.h>
920 #include <linux/mc146818rtc.h>
921 #include <linux/bitops.h>
922@@ -484,7 +483,7 @@ static int __init smp_read_mpc(struct mp
923 }
924 ++mpc_record;
925 }
926- clustered_apic_check();
927+ setup_apic_routing();
928 if (!num_processors)
929 printk(KERN_ERR "SMP mptable: no processors registered!\n");
930 return num_processors;
00e5a55c
BS
931--- sle11-2009-04-20.orig/arch/x86/kernel/pci-dma-xen.c 2009-03-04 11:25:55.000000000 +0100
932+++ sle11-2009-04-20/arch/x86/kernel/pci-dma-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
933@@ -13,6 +13,7 @@
934 #include <linux/pci.h>
935 #include <linux/module.h>
936 #include <linux/version.h>
937+#include <linux/pci.h>
938 #include <asm/io.h>
939 #include <xen/balloon.h>
940 #include <xen/gnttab.h>
941@@ -278,7 +279,7 @@ int dma_declare_coherent_memory(struct d
942 {
943 void __iomem *mem_base = NULL;
944 int pages = size >> PAGE_SHIFT;
945- int bitmap_size = (pages + 31)/32;
946+ int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
947
948 if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
949 goto out;
950@@ -351,6 +352,32 @@ void *dma_mark_declared_memory_occupied(
951 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
952 #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
953
954+#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
955+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
956+
957+int forbid_dac;
958+EXPORT_SYMBOL(forbid_dac);
959+
960+static __devinit void via_no_dac(struct pci_dev *dev)
961+{
962+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
963+ printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
964+ forbid_dac = 1;
965+ }
966+}
967+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
968+
969+static int check_iommu(char *s)
970+{
971+ if (!strcmp(s, "usedac")) {
972+ forbid_dac = -1;
973+ return 1;
974+ }
975+ return 0;
976+}
977+__setup("iommu=", check_iommu);
978+#endif
979+
980 dma_addr_t
981 dma_map_single(struct device *dev, void *ptr, size_t size,
982 enum dma_data_direction direction)
00e5a55c
BS
983--- sle11-2009-04-20.orig/arch/x86/kernel/process_32-xen.c 2009-03-04 11:25:55.000000000 +0100
984+++ sle11-2009-04-20/arch/x86/kernel/process_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
985@@ -21,7 +21,6 @@
986 #include <linux/mm.h>
987 #include <linux/elfcore.h>
988 #include <linux/smp.h>
989-#include <linux/smp_lock.h>
990 #include <linux/stddef.h>
991 #include <linux/slab.h>
992 #include <linux/vmalloc.h>
993@@ -39,6 +38,7 @@
994 #include <linux/random.h>
995 #include <linux/personality.h>
996 #include <linux/tick.h>
997+#include <linux/percpu.h>
998
999 #include <asm/uaccess.h>
1000 #include <asm/pgtable.h>
1001@@ -61,7 +61,6 @@
1002
1003 #include <asm/tlbflush.h>
1004 #include <asm/cpu.h>
1005-#include <asm/pda.h>
1006
1007 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
1008
1009@@ -70,6 +69,12 @@ static int hlt_counter;
1010 unsigned long boot_option_idle_override = 0;
1011 EXPORT_SYMBOL(boot_option_idle_override);
1012
1013+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1014+EXPORT_PER_CPU_SYMBOL(current_task);
1015+
1016+DEFINE_PER_CPU(int, cpu_number);
1017+EXPORT_PER_CPU_SYMBOL(cpu_number);
1018+
1019 /*
1020 * Return saved PC of a blocked thread.
1021 */
1022@@ -168,6 +173,7 @@ void cpu_idle(void)
1023 if (__get_cpu_var(cpu_idle_state))
1024 __get_cpu_var(cpu_idle_state) = 0;
1025
1026+ check_pgt_cache();
1027 rmb();
1028 idle = xen_idle; /* no alternatives */
1029
1030@@ -218,18 +224,19 @@ void __devinit select_idle_routine(const
1031 {
1032 }
1033
1034-static int __init idle_setup (char *str)
1035+static int __init idle_setup(char *str)
1036 {
1037- if (!strncmp(str, "poll", 4)) {
1038+ if (!strcmp(str, "poll")) {
1039 printk("using polling idle threads.\n");
1040 pm_idle = poll_idle;
1041 }
1042+ else
1043+ return -1;
1044
1045 boot_option_idle_override = 1;
1046- return 1;
1047+ return 0;
1048 }
1049-
1050-__setup("idle=", idle_setup);
1051+early_param("idle", idle_setup);
1052
1053 void show_regs(struct pt_regs * regs)
1054 {
1055@@ -282,7 +289,7 @@ int kernel_thread(int (*fn)(void *), voi
1056
1057 regs.xds = __USER_DS;
1058 regs.xes = __USER_DS;
1059- regs.xfs = __KERNEL_PDA;
1060+ regs.xfs = __KERNEL_PERCPU;
1061 regs.orig_eax = -1;
1062 regs.eip = (unsigned long) kernel_thread_helper;
1063 regs.xcs = __KERNEL_CS | get_kernel_rpl();
1064@@ -562,7 +569,7 @@ struct task_struct fastcall * __switch_t
1065 * multicall to indicate FPU task switch, rather than
1066 * synchronously trapping to Xen.
1067 */
1068- if (prev_p->thread_info->status & TS_USEDFPU) {
1069+ if (task_thread_info(prev_p)->status & TS_USEDFPU) {
1070 __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
1071 mcl->op = __HYPERVISOR_fpu_taskswitch;
1072 mcl->args[0] = 1;
1073@@ -669,7 +676,7 @@ struct task_struct fastcall * __switch_t
1074 if (prev->gs | next->gs)
1075 loadsegment(gs, next->gs);
1076
1077- write_pda(pcurrent, next_p);
1078+ x86_write_percpu(current_task, next_p);
1079
1080 return prev_p;
1081 }
00e5a55c
BS
1082--- sle11-2009-04-20.orig/arch/x86/kernel/quirks-xen.c 2008-12-15 11:26:44.000000000 +0100
1083+++ sle11-2009-04-20/arch/x86/kernel/quirks-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
1084@@ -3,12 +3,10 @@
1085 */
1086 #include <linux/pci.h>
1087 #include <linux/irq.h>
1088-#include <asm/pci-direct.h>
1089-#include <asm/genapic.h>
1090-#include <asm/cpu.h>
1091
1092 #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI)
1093-static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
1094+
1095+static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
1096 {
1097 u8 config, rev;
1098 u32 word;
1099@@ -16,7 +14,7 @@ static void __devinit verify_quirk_intel
1100 /* BIOS may enable hardware IRQ balancing for
1101 * E7520/E7320/E7525(revision ID 0x9 and below)
1102 * based platforms.
1103- * For those platforms, make sure that the genapic is set to 'flat'
1104+ * Disable SW irqbalance/affinity on those platforms.
1105 */
1106 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
1107 if (rev > 0x9)
1108@@ -30,59 +28,20 @@ static void __devinit verify_quirk_intel
1109 raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
1110
1111 if (!(word & (1 << 13))) {
1112-#ifndef CONFIG_XEN
1113-#ifdef CONFIG_X86_64
1114- if (genapic != &apic_flat)
1115- panic("APIC mode must be flat on this system\n");
1116-#elif defined(CONFIG_X86_GENERICARCH)
1117- if (genapic != &apic_default)
1118- panic("APIC mode must be default(flat) on this system. Use apic=default\n");
1119-#endif
1120-#endif
1121- }
1122-
1123- /* put back the original value for config space*/
1124- if (!(config & 0x2))
1125- pci_write_config_byte(dev, 0xf4, config);
1126-}
1127-
1128-void __init quirk_intel_irqbalance(void)
1129-{
1130- u8 config, rev;
1131- u32 word;
1132-
1133- /* BIOS may enable hardware IRQ balancing for
1134- * E7520/E7320/E7525(revision ID 0x9 and below)
1135- * based platforms.
1136- * Disable SW irqbalance/affinity on those platforms.
1137- */
1138- rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
1139- if (rev > 0x9)
1140- return;
1141-
1142- printk(KERN_INFO "Intel E7520/7320/7525 detected.");
1143-
1144- /* enable access to config space */
1145- config = read_pci_config_byte(0, 0, 0, 0xf4);
1146- write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
1147-
1148- /* read xTPR register */
1149- word = read_pci_config_16(0, 0, 0x40, 0x4c);
1150-
1151- if (!(word & (1 << 13))) {
1152 struct xen_platform_op op;
1153- printk(KERN_INFO "Disabling irq balancing and affinity\n");
1154+
1155+ printk(KERN_INFO "Intel E7520/7320/7525 detected. "
1156+ "Disabling irq balancing and affinity\n");
1157 op.cmd = XENPF_platform_quirk;
1158 op.u.platform_quirk.quirk_id = QUIRK_NOIRQBALANCING;
1159 WARN_ON(HYPERVISOR_platform_op(&op));
1160 }
1161
1162- /* put back the original value for config space */
1163+ /* put back the original value for config space*/
1164 if (!(config & 0x2))
1165- write_pci_config_byte(0, 0, 0, 0xf4, config);
1166+ pci_write_config_byte(dev, 0xf4, config);
1167 }
1168-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
1169-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
1170-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
1171-
1172+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
1173+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
1174+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
1175 #endif
00e5a55c
BS
1176--- sle11-2009-04-20.orig/arch/x86/kernel/smp_32-xen.c 2009-03-04 11:25:55.000000000 +0100
1177+++ sle11-2009-04-20/arch/x86/kernel/smp_32-xen.c 2008-12-15 11:27:22.000000000 +0100
1178@@ -13,7 +13,6 @@
1179 #include <linux/mm.h>
1180 #include <linux/delay.h>
1181 #include <linux/spinlock.h>
1182-#include <linux/smp_lock.h>
1183 #include <linux/kernel_stat.h>
1184 #include <linux/mc146818rtc.h>
1185 #include <linux/cache.h>
1186@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask;
1187 static struct mm_struct * flush_mm;
1188 static unsigned long flush_va;
1189 static DEFINE_SPINLOCK(tlbstate_lock);
1190-#define FLUSH_ALL 0xffffffff
cc90b958
BS
1191
1192 /*
1193 * We cannot call mmdrop() because we are in interrupt context,
1194@@ -298,7 +296,7 @@ irqreturn_t smp_invalidate_interrupt(int
1195
1196 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
1197 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
1198- if (flush_va == FLUSH_ALL)
1199+ if (flush_va == TLB_FLUSH_ALL)
1200 local_flush_tlb();
1201 else
1202 __flush_tlb_one(flush_va);
1203@@ -314,9 +312,11 @@ out:
1204 return IRQ_HANDLED;
1205 }
1206
1207-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
1208- unsigned long va)
1209+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
1210+ unsigned long va)
1211 {
1212+ cpumask_t cpumask = *cpumaskp;
1213+
1214 /*
1215 * A couple of (to be removed) sanity checks:
1216 *
1217@@ -327,10 +327,12 @@ static void flush_tlb_others(cpumask_t c
1218 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
1219 BUG_ON(!mm);
1220
1221+#ifdef CONFIG_HOTPLUG_CPU
1222 /* If a CPU which we ran on has gone down, OK. */
1223 cpus_and(cpumask, cpumask, cpu_online_map);
1224- if (cpus_empty(cpumask))
1225+ if (unlikely(cpus_empty(cpumask)))
1226 return;
1227+#endif
1228
1229 /*
1230 * i'm not happy about this global shared spinlock in the
1231@@ -341,17 +343,7 @@ static void flush_tlb_others(cpumask_t c
1232
1233 flush_mm = mm;
1234 flush_va = va;
1235-#if NR_CPUS <= BITS_PER_LONG
1236- atomic_set_mask(cpumask, &flush_cpumask);
1237-#else
1238- {
1239- int k;
1240- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
1241- unsigned long *cpu_mask = (unsigned long *)&cpumask;
1242- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
1243- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
1244- }
1245-#endif
1246+ cpus_or(flush_cpumask, cpumask, flush_cpumask);
1247 /*
1248 * We have to send the IPI only to
1249 * CPUs affected.
1250@@ -378,7 +370,7 @@ void flush_tlb_current_task(void)
1251
1252 local_flush_tlb();
1253 if (!cpus_empty(cpu_mask))
1254- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
1255+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
1256 preempt_enable();
1257 }
1258
1259@@ -397,7 +389,7 @@ void flush_tlb_mm (struct mm_struct * mm
1260 leave_mm(smp_processor_id());
1261 }
1262 if (!cpus_empty(cpu_mask))
1263- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
1264+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
1265
1266 preempt_enable();
1267 }
1268@@ -446,7 +438,7 @@ void flush_tlb_all(void)
1269 * it goes straight through and wastes no time serializing
1270 * anything. Worst case is that we lose a reschedule ...
1271 */
1272-void smp_send_reschedule(int cpu)
1273+void xen_smp_send_reschedule(int cpu)
1274 {
1275 WARN_ON(cpu_is_offline(cpu));
1276 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
1277@@ -478,36 +470,79 @@ void unlock_ipi_call_lock(void)
1278
1279 static struct call_data_struct *call_data;
1280
1281+static void __smp_call_function(void (*func) (void *info), void *info,
1282+ int nonatomic, int wait)
1283+{
1284+ struct call_data_struct data;
1285+ int cpus = num_online_cpus() - 1;
1286+
1287+ if (!cpus)
1288+ return;
1289+
1290+ data.func = func;
1291+ data.info = info;
1292+ atomic_set(&data.started, 0);
1293+ data.wait = wait;
1294+ if (wait)
1295+ atomic_set(&data.finished, 0);
1296+
1297+ call_data = &data;
1298+ mb();
1299+
1300+ /* Send a message to all other CPUs and wait for them to respond */
1301+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
1302+
1303+ /* Wait for response */
1304+ while (atomic_read(&data.started) != cpus)
1305+ cpu_relax();
1306+
1307+ if (wait)
1308+ while (atomic_read(&data.finished) != cpus)
1309+ cpu_relax();
1310+}
1311+
1312+
1313 /**
1314- * smp_call_function(): Run a function on all other CPUs.
1315+ * smp_call_function_mask(): Run a function on a set of other CPUs.
1316+ * @mask: The set of cpus to run on. Must not include the current cpu.
1317 * @func: The function to run. This must be fast and non-blocking.
1318 * @info: An arbitrary pointer to pass to the function.
1319- * @nonatomic: currently unused.
1320 * @wait: If true, wait (atomically) until function has completed on other CPUs.
1321 *
1322- * Returns 0 on success, else a negative status code. Does not return until
1323- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
1324+ * Returns 0 on success, else a negative status code.
1325+ *
1326+ * If @wait is true, then returns once @func has returned; otherwise
1327+ * it returns just before the target cpu calls @func.
1328 *
1329 * You must not call this function with disabled interrupts or from a
1330 * hardware interrupt handler or from a bottom half handler.
1331 */
1332-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
1333- int wait)
1334+int
1335+xen_smp_call_function_mask(cpumask_t mask,
1336+ void (*func)(void *), void *info,
1337+ int wait)
1338 {
1339 struct call_data_struct data;
1340+ cpumask_t allbutself;
1341 int cpus;
1342
1343+ /* Can deadlock when called with interrupts disabled */
1344+ WARN_ON(irqs_disabled());
1345+
1346 /* Holding any lock stops cpus from going down. */
1347 spin_lock(&call_lock);
1348- cpus = num_online_cpus() - 1;
1349+
1350+ allbutself = cpu_online_map;
1351+ cpu_clear(smp_processor_id(), allbutself);
1352+
1353+ cpus_and(mask, mask, allbutself);
1354+ cpus = cpus_weight(mask);
1355+
1356 if (!cpus) {
1357 spin_unlock(&call_lock);
1358 return 0;
1359 }
1360
1361- /* Can deadlock when called with interrupts disabled */
1362- WARN_ON(irqs_disabled());
1363-
1364 data.func = func;
1365 data.info = info;
1366 atomic_set(&data.started, 0);
1367@@ -517,9 +552,12 @@ int smp_call_function (void (*func) (voi
1368
1369 call_data = &data;
1370 mb();
1371-
1372- /* Send a message to all other CPUs and wait for them to respond */
1373- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
1374+
1375+ /* Send a message to other CPUs */
1376+ if (cpus_equal(mask, allbutself))
1377+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
1378+ else
1379+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
1380
1381 /* Wait for response */
1382 while (atomic_read(&data.started) != cpus)
1383@@ -532,15 +570,14 @@ int smp_call_function (void (*func) (voi
1384
1385 return 0;
1386 }
1387-EXPORT_SYMBOL(smp_call_function);
1388
1389 static void stop_this_cpu (void * dummy)
1390 {
1391+ local_irq_disable();
1392 /*
1393 * Remove this CPU:
1394 */
1395 cpu_clear(smp_processor_id(), cpu_online_map);
1396- local_irq_disable();
1397 disable_all_local_evtchn();
1398 if (cpu_data[smp_processor_id()].hlt_works_ok)
1399 for(;;) halt();
1400@@ -551,13 +588,18 @@ static void stop_this_cpu (void * dummy)
1401 * this function calls the 'stop' function on all other CPUs in the system.
1402 */
1403
1404-void smp_send_stop(void)
1405+void xen_smp_send_stop(void)
1406 {
1407- smp_call_function(stop_this_cpu, NULL, 1, 0);
1408+ /* Don't deadlock on the call lock in panic */
1409+ int nolock = !spin_trylock(&call_lock);
1410+ unsigned long flags;
1411
1412- local_irq_disable();
1413+ local_irq_save(flags);
1414+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
1415+ if (!nolock)
1416+ spin_unlock(&call_lock);
1417 disable_all_local_evtchn();
1418- local_irq_enable();
1419+ local_irq_restore(flags);
1420 }
1421
1422 /*
1423@@ -598,74 +640,3 @@ irqreturn_t smp_call_function_interrupt(
1424
1425 return IRQ_HANDLED;
1426 }
1427-
1428-/*
1429- * this function sends a 'generic call function' IPI to one other CPU
1430- * in the system.
1431- *
1432- * cpu is a standard Linux logical CPU number.
1433- */
1434-static void
1435-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
1436- int nonatomic, int wait)
1437-{
1438- struct call_data_struct data;
1439- int cpus = 1;
1440-
1441- data.func = func;
1442- data.info = info;
1443- atomic_set(&data.started, 0);
1444- data.wait = wait;
1445- if (wait)
1446- atomic_set(&data.finished, 0);
1447-
1448- call_data = &data;
1449- wmb();
1450- /* Send a message to all other CPUs and wait for them to respond */
1451- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
1452-
1453- /* Wait for response */
1454- while (atomic_read(&data.started) != cpus)
1455- cpu_relax();
1456-
1457- if (!wait)
1458- return;
1459-
1460- while (atomic_read(&data.finished) != cpus)
1461- cpu_relax();
1462-}
1463-
1464-/*
1465- * smp_call_function_single - Run a function on another CPU
1466- * @func: The function to run. This must be fast and non-blocking.
1467- * @info: An arbitrary pointer to pass to the function.
1468- * @nonatomic: Currently unused.
1469- * @wait: If true, wait until function has completed on other CPUs.
1470- *
1471- * Retrurns 0 on success, else a negative status code.
1472- *
1473- * Does not return until the remote CPU is nearly ready to execute <func>
1474- * or is or has executed.
1475- */
1476-
1477-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
1478- int nonatomic, int wait)
1479-{
1480- /* prevent preemption and reschedule on another processor */
1481- int me = get_cpu();
1482- if (cpu == me) {
1483- WARN_ON(1);
1484- put_cpu();
1485- return -EBUSY;
1486- }
1487-
1488- /* Can deadlock when called with interrupts disabled */
1489- WARN_ON(irqs_disabled());
1490-
1491- spin_lock_bh(&call_lock);
1492- __smp_call_function_single(cpu, func, info, nonatomic, wait);
1493- spin_unlock_bh(&call_lock);
1494- put_cpu();
1495- return 0;
1496-}
1497-EXPORT_SYMBOL(smp_call_function_single);
00e5a55c
BS
1498--- sle11-2009-04-20.orig/arch/x86/kernel/time_32-xen.c 2009-03-24 10:11:08.000000000 +0100
1499+++ sle11-2009-04-20/arch/x86/kernel/time_32-xen.c 2009-03-24 10:11:31.000000000 +0100
cc90b958
BS
1500@@ -80,7 +80,6 @@
1501 #include <asm/i8253.h>
1502 DEFINE_SPINLOCK(i8253_lock);
1503 EXPORT_SYMBOL(i8253_lock);
1504-int pit_latch_buggy; /* extern */
1505 #else
1506 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
1507 #endif
1508@@ -218,6 +217,26 @@ static inline u64 scale_delta(u64 delta,
1509 return product;
1510 }
1511
1512+static inline u64 get64(volatile u64 *ptr)
1513+{
1514+#ifndef CONFIG_64BIT
1515+ return cmpxchg64(ptr, 0, 0);
1516+#else
1517+ return *ptr;
1518+#define cmpxchg64 cmpxchg
1519+#endif
1520+}
1521+
1522+static inline u64 get64_local(volatile u64 *ptr)
1523+{
1524+#ifndef CONFIG_64BIT
1525+ return cmpxchg64_local(ptr, 0, 0);
1526+#else
1527+ return *ptr;
1528+#define cmpxchg64_local cmpxchg_local
1529+#endif
1530+}
1531+
1532 static void init_cpu_khz(void)
1533 {
1534 u64 __cpu_khz = 1000000ULL << 32;
00e5a55c 1535@@ -397,7 +416,7 @@ static int set_rtc_mmss(unsigned long no
cc90b958
BS
1536 return retval;
1537 }
1538
1539-unsigned long long sched_clock(void)
1540+static unsigned long long local_clock(void)
1541 {
1542 unsigned int cpu = get_cpu();
1543 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
00e5a55c 1544@@ -418,6 +437,61 @@ unsigned long long sched_clock(void)
cc90b958
BS
1545 return time;
1546 }
1547
1548+/*
1549+ * Runstate accounting
1550+ */
1551+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
1552+{
1553+ u64 state_time;
1554+ struct vcpu_runstate_info *state;
1555+
1556+ BUG_ON(preemptible());
1557+
1558+ state = &__get_cpu_var(runstate);
1559+
1560+ do {
1561+ state_time = get64_local(&state->state_entry_time);
1562+ *res = *state;
1563+ } while (get64_local(&state->state_entry_time) != state_time);
1564+
1565+ WARN_ON_ONCE(res->state != RUNSTATE_running);
1566+}
1567+
1568+/*
1569+ * Xen sched_clock implementation. Returns the number of unstolen
1570+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
1571+ * states.
1572+ */
1573+unsigned long long sched_clock(void)
1574+{
1575+ struct vcpu_runstate_info runstate;
1576+ cycle_t now;
1577+ u64 ret;
1578+ s64 offset;
1579+
1580+ /*
1581+ * Ideally sched_clock should be called on a per-cpu basis
1582+ * anyway, so preempt should already be disabled, but that's
1583+ * not current practice at the moment.
1584+ */
1585+ preempt_disable();
1586+
1587+ now = local_clock();
1588+
1589+ get_runstate_snapshot(&runstate);
1590+
1591+ offset = now - runstate.state_entry_time;
1592+ if (offset < 0)
1593+ offset = 0;
1594+
1595+ ret = offset + runstate.time[RUNSTATE_running]
1596+ + runstate.time[RUNSTATE_blocked];
1597+
1598+ preempt_enable();
1599+
1600+ return ret;
1601+}
1602+
1603 unsigned long profile_pc(struct pt_regs *regs)
1604 {
1605 unsigned long pc = instruction_pointer(regs);
00e5a55c 1606@@ -465,10 +539,9 @@ EXPORT_SYMBOL(profile_pc);
cc90b958
BS
1607 irqreturn_t timer_interrupt(int irq, void *dev_id)
1608 {
1609 s64 delta, delta_cpu, stolen, blocked;
1610- u64 sched_time;
1611 unsigned int i, cpu = smp_processor_id();
1612 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
1613- struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
1614+ struct vcpu_runstate_info runstate;
1615
1616 /*
1617 * Here we are in the timer irq handler. We just have irqs locally
00e5a55c 1618@@ -488,20 +561,7 @@ irqreturn_t timer_interrupt(int irq, voi
cc90b958
BS
1619 delta -= processed_system_time;
1620 delta_cpu -= per_cpu(processed_system_time, cpu);
1621
1622- /*
1623- * Obtain a consistent snapshot of stolen/blocked cycles. We
1624- * can use state_entry_time to detect if we get preempted here.
1625- */
1626- do {
1627- sched_time = runstate->state_entry_time;
1628- barrier();
1629- stolen = runstate->time[RUNSTATE_runnable] +
1630- runstate->time[RUNSTATE_offline] -
1631- per_cpu(processed_stolen_time, cpu);
1632- blocked = runstate->time[RUNSTATE_blocked] -
1633- per_cpu(processed_blocked_time, cpu);
1634- barrier();
1635- } while (sched_time != runstate->state_entry_time);
1636+ get_runstate_snapshot(&runstate);
1637 } while (!time_values_up_to_date(cpu));
1638
1639 if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
00e5a55c 1640@@ -543,6 +603,9 @@ irqreturn_t timer_interrupt(int irq, voi
cc90b958
BS
1641 * HACK: Passing NULL to account_steal_time()
1642 * ensures that the ticks are accounted as stolen.
1643 */
1644+ stolen = runstate.time[RUNSTATE_runnable]
1645+ + runstate.time[RUNSTATE_offline]
1646+ - per_cpu(processed_stolen_time, cpu);
1647 if ((stolen > 0) && (delta_cpu > 0)) {
1648 delta_cpu -= stolen;
1649 if (unlikely(delta_cpu < 0))
00e5a55c 1650@@ -558,6 +621,8 @@ irqreturn_t timer_interrupt(int irq, voi
cc90b958
BS
1651 * HACK: Passing idle_task to account_steal_time()
1652 * ensures that the ticks are accounted as idle/wait.
1653 */
1654+ blocked = runstate.time[RUNSTATE_blocked]
1655+ - per_cpu(processed_blocked_time, cpu);
1656 if ((blocked > 0) && (delta_cpu > 0)) {
1657 delta_cpu -= blocked;
1658 if (unlikely(delta_cpu < 0))
00e5a55c 1659@@ -594,7 +659,7 @@ irqreturn_t timer_interrupt(int irq, voi
cc90b958
BS
1660 return IRQ_HANDLED;
1661 }
1662
1663-void mark_tsc_unstable(void)
1664+void mark_tsc_unstable(char *reason)
1665 {
1666 #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
1667 tsc_unstable = 1;
00e5a55c 1668@@ -602,17 +667,13 @@ void mark_tsc_unstable(void)
cc90b958
BS
1669 }
1670 EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1671
1672+static cycle_t cs_last;
1673+
1674 static cycle_t xen_clocksource_read(void)
1675 {
1676 #ifdef CONFIG_SMP
1677- static cycle_t last_ret;
1678-#ifndef CONFIG_64BIT
1679- cycle_t last = cmpxchg64(&last_ret, 0, 0);
1680-#else
1681- cycle_t last = last_ret;
1682-#define cmpxchg64 cmpxchg
1683-#endif
1684- cycle_t ret = sched_clock();
1685+ cycle_t last = get64(&cs_last);
1686+ cycle_t ret = local_clock();
1687
1688 if (unlikely((s64)(ret - last) < 0)) {
1689 if (last - ret > permitted_clock_jitter
00e5a55c 1690@@ -631,17 +692,25 @@ static cycle_t xen_clocksource_read(void
cc90b958
BS
1691 }
1692
1693 for (;;) {
1694- cycle_t cur = cmpxchg64(&last_ret, last, ret);
1695+ cycle_t cur = cmpxchg64(&cs_last, last, ret);
1696
1697 if (cur == last || (s64)(ret - cur) < 0)
1698 return ret;
1699 last = cur;
1700 }
1701 #else
1702- return sched_clock();
1703+ return local_clock();
1704 #endif
1705 }
1706
1707+static void xen_clocksource_resume(void)
1708+{
1709+ extern void time_resume(void);
1710+
1711+ time_resume();
1712+ cs_last = local_clock();
1713+}
1714+
1715 static struct clocksource clocksource_xen = {
1716 .name = "xen",
1717 .rating = 400,
00e5a55c 1718@@ -650,6 +719,7 @@ static struct clocksource clocksource_xe
cc90b958
BS
1719 .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */
1720 .shift = XEN_SHIFT,
1721 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
1722+ .resume = xen_clocksource_resume,
1723 };
1724
1725 static void init_missing_ticks_accounting(unsigned int cpu)
00e5a55c 1726@@ -738,35 +808,6 @@ void notify_arch_cmos_timer(void)
cc90b958
BS
1727 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
1728 }
1729
1730-static int timer_resume(struct sys_device *dev)
1731-{
1732- extern void time_resume(void);
1733- time_resume();
1734- return 0;
1735-}
1736-
1737-static struct sysdev_class timer_sysclass = {
1738- .resume = timer_resume,
1739- set_kset_name("timer"),
1740-};
1741-
1742-
1743-/* XXX this driverfs stuff should probably go elsewhere later -john */
1744-static struct sys_device device_timer = {
1745- .id = 0,
1746- .cls = &timer_sysclass,
1747-};
1748-
1749-static int time_init_device(void)
1750-{
1751- int error = sysdev_class_register(&timer_sysclass);
1752- if (!error)
1753- error = sysdev_register(&device_timer);
1754- return error;
1755-}
1756-
1757-device_initcall(time_init_device);
1758-
1759 extern void (*late_time_init)(void);
1760
1761 /* Dynamically-mapped IRQ. */
00e5a55c 1762@@ -897,21 +938,21 @@ static void start_hz_timer(void)
cc90b958
BS
1763 cpu_clear(smp_processor_id(), nohz_cpu_mask);
1764 }
1765
1766-void raw_safe_halt(void)
1767+void xen_safe_halt(void)
1768 {
1769 stop_hz_timer();
1770 /* Blocking includes an implicit local_irq_enable(). */
1771 HYPERVISOR_block();
1772 start_hz_timer();
1773 }
1774-EXPORT_SYMBOL(raw_safe_halt);
1775+EXPORT_SYMBOL(xen_safe_halt);
1776
1777-void halt(void)
1778+void xen_halt(void)
1779 {
1780 if (irqs_disabled())
1781 VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
1782 }
1783-EXPORT_SYMBOL(halt);
1784+EXPORT_SYMBOL(xen_halt);
1785
1786 /* No locking required. Interrupts are disabled on all CPUs. */
1787 void time_resume(void)
00e5a55c
BS
1788--- sle11-2009-04-20.orig/arch/x86/kernel/traps_32-xen.c 2009-03-04 11:25:55.000000000 +0100
1789+++ sle11-2009-04-20/arch/x86/kernel/traps_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
1790@@ -52,7 +52,7 @@
1791 #include <asm/unwind.h>
1792 #include <asm/smp.h>
1793 #include <asm/arch_hooks.h>
1794-#include <asm/kdebug.h>
1795+#include <linux/kdebug.h>
1796 #include <asm/stacktrace.h>
1797
1798 #include <linux/module.h>
1799@@ -101,20 +101,6 @@ asmlinkage void machine_check(void);
1800
1801 int kstack_depth_to_print = 24;
1802 static unsigned int code_bytes = 64;
1803-ATOMIC_NOTIFIER_HEAD(i386die_chain);
1804-
1805-int register_die_notifier(struct notifier_block *nb)
1806-{
1807- vmalloc_sync_all();
1808- return atomic_notifier_chain_register(&i386die_chain, nb);
1809-}
1810-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
1811-
1812-int unregister_die_notifier(struct notifier_block *nb)
1813-{
1814- return atomic_notifier_chain_unregister(&i386die_chain, nb);
1815-}
1816-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
1817
1818 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
1819 {
1820@@ -325,7 +311,7 @@ void show_registers(struct pt_regs *regs
1821 regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
1822 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
1823 TASK_COMM_LEN, current->comm, current->pid,
1824- current_thread_info(), current, current->thread_info);
1825+ current_thread_info(), current, task_thread_info(current));
1826 /*
1827 * When in-kernel, we also print out the stack and code at the
1828 * time of the fault..
1829@@ -482,8 +468,6 @@ static void __kprobes do_trap(int trapnr
1830 siginfo_t *info)
1831 {
1832 struct task_struct *tsk = current;
1833- tsk->thread.error_code = error_code;
1834- tsk->thread.trap_no = trapnr;
1835
1836 if (regs->eflags & VM_MASK) {
1837 if (vm86)
1838@@ -495,6 +479,18 @@ static void __kprobes do_trap(int trapnr
1839 goto kernel_trap;
1840
1841 trap_signal: {
1842+ /*
1843+ * We want error_code and trap_no set for userspace faults and
1844+ * kernelspace faults which result in die(), but not
1845+ * kernelspace faults which are fixed up. die() gives the
1846+ * process no chance to handle the signal and notice the
1847+ * kernel fault information, so that won't result in polluting
1848+ * the information about previously queued, but not yet
1849+ * delivered, faults. See also do_general_protection below.
1850+ */
1851+ tsk->thread.error_code = error_code;
1852+ tsk->thread.trap_no = trapnr;
1853+
1854 if (info)
1855 force_sig_info(signr, info, tsk);
1856 else
1857@@ -503,8 +499,11 @@ static void __kprobes do_trap(int trapnr
1858 }
1859
1860 kernel_trap: {
1861- if (!fixup_exception(regs))
1862+ if (!fixup_exception(regs)) {
1863+ tsk->thread.error_code = error_code;
1864+ tsk->thread.trap_no = trapnr;
1865 die(str, regs, error_code);
1866+ }
1867 return;
1868 }
1869
1870@@ -578,9 +577,6 @@ DO_ERROR_INFO(32, SIGSEGV, "iret excepti
1871 fastcall void __kprobes do_general_protection(struct pt_regs * regs,
1872 long error_code)
1873 {
1874- current->thread.error_code = error_code;
1875- current->thread.trap_no = 13;
1876-
1877 if (regs->eflags & VM_MASK)
1878 goto gp_in_vm86;
1879
1880@@ -599,6 +595,8 @@ gp_in_vm86:
1881
1882 gp_in_kernel:
1883 if (!fixup_exception(regs)) {
1884+ current->thread.error_code = error_code;
1885+ current->thread.trap_no = 13;
1886 if (notify_die(DIE_GPF, "general protection fault", regs,
1887 error_code, 13, SIGSEGV) == NOTIFY_STOP)
1888 return;
1889@@ -987,9 +985,7 @@ fastcall void do_spurious_interrupt_bug(
1890 fastcall unsigned long patch_espfix_desc(unsigned long uesp,
1891 unsigned long kesp)
1892 {
1893- int cpu = smp_processor_id();
1894- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
1895- struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
1896+ struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
1897 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
1898 unsigned long new_kesp = kesp - base;
1899 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
00e5a55c
BS
1900--- sle11-2009-04-20.orig/arch/x86/mm/fault_32-xen.c 2009-03-04 11:25:55.000000000 +0100
1901+++ sle11-2009-04-20/arch/x86/mm/fault_32-xen.c 2008-12-15 11:27:22.000000000 +0100
1902@@ -14,19 +14,20 @@
1903 #include <linux/mman.h>
1904 #include <linux/mm.h>
1905 #include <linux/smp.h>
1906-#include <linux/smp_lock.h>
1907 #include <linux/interrupt.h>
1908 #include <linux/init.h>
1909 #include <linux/tty.h>
1910 #include <linux/vt_kern.h> /* For unblank_screen() */
1911 #include <linux/highmem.h>
1912+#include <linux/bootmem.h> /* for max_low_pfn */
1913+#include <linux/vmalloc.h>
1914 #include <linux/module.h>
1915 #include <linux/kprobes.h>
cc90b958 1916 #include <linux/uaccess.h>
cc90b958
BS
1917+#include <linux/kdebug.h>
1918
1919 #include <asm/system.h>
cc90b958 1920 #include <asm/desc.h>
cc90b958 1921-#include <asm/kdebug.h>
00e5a55c 1922 #include <asm/segment.h>
cc90b958 1923
00e5a55c
BS
1924 extern void die(const char *,struct pt_regs *,long);
1925@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon
1926 unsigned long page;
cc90b958
BS
1927
1928 page = read_cr3();
1929- page = ((unsigned long *) __va(page))[address >> 22];
1930- if (oops_may_print())
1931- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
1932- machine_to_phys(page));
1933+ page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT];
1934+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
1935+ machine_to_phys(page));
1936 /*
1937 * We must not directly access the pte in the highpte
1938 * case if the page table is located in highmem.
1939 * And lets rather not kmap-atomic the pte, just in case
1940 * it's allocated already.
1941 */
1942-#ifdef CONFIG_HIGHPTE
1943- if ((page >> PAGE_SHIFT) >= highstart_pfn)
1944- return;
1945-#endif
1946- if ((page & 1) && oops_may_print()) {
1947- page &= PAGE_MASK;
1948- address &= 0x003ff000;
1949- page = machine_to_phys(page);
1950- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
1951+ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
1952+ && (page & _PAGE_PRESENT)) {
1953+ page = machine_to_phys(page & PAGE_MASK);
1954+ page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
1955+ & (PTRS_PER_PTE - 1)];
1956 printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
1957 machine_to_phys(page));
1958 }
1959@@ -581,6 +577,11 @@ bad_area:
1960 bad_area_nosemaphore:
1961 /* User mode accesses just cause a SIGSEGV */
1962 if (error_code & 4) {
1963+ /*
1964+ * It's possible to have interrupts off here.
1965+ */
1966+ local_irq_enable();
1967+
1968 /*
1969 * Valid to do another page fault here because this one came
1970 * from user space.
1971@@ -633,7 +634,7 @@ no_context:
1972 bust_spinlocks(1);
1973
1974 if (oops_may_print()) {
1975- #ifdef CONFIG_X86_PAE
1976+#ifdef CONFIG_X86_PAE
1977 if (error_code & 16) {
1978 pte_t *pte = lookup_address(address);
1979
1980@@ -642,7 +643,7 @@ no_context:
1981 "NX-protected page - exploit attempt? "
1982 "(uid: %d)\n", current->uid);
1983 }
1984- #endif
1985+#endif
1986 if (address < PAGE_SIZE)
1987 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
1988 "pointer dereference");
1989@@ -652,8 +653,8 @@ no_context:
1990 printk(" at virtual address %08lx\n",address);
1991 printk(KERN_ALERT " printing eip:\n");
1992 printk("%08lx\n", regs->eip);
1993+ dump_fault_path(address);
1994 }
1995- dump_fault_path(address);
1996 tsk->thread.cr2 = address;
1997 tsk->thread.trap_no = 14;
1998 tsk->thread.error_code = error_code;
1999@@ -694,7 +695,6 @@ do_sigbus:
2000 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
2001 }
2002
2003-#if !HAVE_SHARED_KERNEL_PMD
2004 void vmalloc_sync_all(void)
2005 {
2006 /*
2007@@ -710,6 +710,9 @@ void vmalloc_sync_all(void)
2008 static unsigned long start = TASK_SIZE;
2009 unsigned long address;
2010
2011+ if (SHARED_KERNEL_PMD)
2012+ return;
2013+
2014 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
2015 for (address = start;
2016 address >= TASK_SIZE && address < hypervisor_virt_start;
2017@@ -739,4 +742,3 @@ void vmalloc_sync_all(void)
2018 start = address + (1UL << PMD_SHIFT);
2019 }
2020 }
2021-#endif
00e5a55c
BS
2022--- sle11-2009-04-20.orig/arch/x86/mm/highmem_32-xen.c 2009-03-04 11:25:55.000000000 +0100
2023+++ sle11-2009-04-20/arch/x86/mm/highmem_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
2024@@ -26,7 +26,7 @@ void kunmap(struct page *page)
2025 * However when holding an atomic kmap is is not legal to sleep, so atomic
2026 * kmaps are appropriate for short, tight code paths only.
2027 */
2028-static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
2029+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
2030 {
2031 enum fixed_addresses idx;
2032 unsigned long vaddr;
2033@@ -49,15 +49,7 @@ static void *__kmap_atomic(struct page *
2034
2035 void *kmap_atomic(struct page *page, enum km_type type)
2036 {
2037- return __kmap_atomic(page, type, kmap_prot);
2038-}
2039-
2040-/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
2041-void *kmap_atomic_pte(struct page *page, enum km_type type)
2042-{
2043- return __kmap_atomic(page, type,
2044- test_bit(PG_pinned, &page->flags)
2045- ? PAGE_KERNEL_RO : kmap_prot);
2046+ return kmap_atomic_prot(page, type, kmap_prot);
2047 }
2048
2049 void kunmap_atomic(void *kvaddr, enum km_type type)
2050@@ -80,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km
2051 #endif
2052 }
2053
2054+ /*arch_flush_lazy_mmu_mode();*/
2055 pagefault_enable();
2056 }
2057
2058@@ -162,7 +155,6 @@ void copy_highpage(struct page *to, stru
2059 EXPORT_SYMBOL(kmap);
2060 EXPORT_SYMBOL(kunmap);
2061 EXPORT_SYMBOL(kmap_atomic);
2062-EXPORT_SYMBOL(kmap_atomic_pte);
2063 EXPORT_SYMBOL(kunmap_atomic);
2064 EXPORT_SYMBOL(kmap_atomic_to_page);
2065 EXPORT_SYMBOL(clear_highpage);
00e5a55c
BS
2066--- sle11-2009-04-20.orig/arch/x86/mm/init_32-xen.c 2009-03-04 11:25:55.000000000 +0100
2067+++ sle11-2009-04-20/arch/x86/mm/init_32-xen.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
2068@@ -22,6 +22,7 @@
2069 #include <linux/init.h>
2070 #include <linux/highmem.h>
2071 #include <linux/pagemap.h>
2072+#include <linux/pfn.h>
2073 #include <linux/poison.h>
2074 #include <linux/bootmem.h>
2075 #include <linux/slab.h>
2076@@ -65,17 +66,19 @@ static pmd_t * __init one_md_table_init(
2077 pmd_t *pmd_table;
2078
2079 #ifdef CONFIG_X86_PAE
2080- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
2081- paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
2082- make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
2083- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
2084- pud = pud_offset(pgd, 0);
2085- if (pmd_table != pmd_offset(pud, 0))
2086- BUG();
2087-#else
2088+ if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
2089+ pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
2090+
2091+ paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
2092+ make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
2093+ set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
2094+ pud = pud_offset(pgd, 0);
2095+ if (pmd_table != pmd_offset(pud, 0))
2096+ BUG();
2097+ }
2098+#endif
2099 pud = pud_offset(pgd, 0);
2100 pmd_table = pmd_offset(pud, 0);
2101-#endif
2102
2103 return pmd_table;
2104 }
2105@@ -86,16 +89,18 @@ static pmd_t * __init one_md_table_init(
2106 */
2107 static pte_t * __init one_page_table_init(pmd_t *pmd)
2108 {
2109+#if CONFIG_XEN_COMPAT <= 0x030002
2110 if (pmd_none(*pmd)) {
2111+#else
2112+ if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) {
2113+#endif
2114 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
2115+
2116 paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
2117 make_lowmem_page_readonly(page_table,
2118 XENFEAT_writable_page_tables);
2119 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
2120- if (page_table != pte_offset_kernel(pmd, 0))
2121- BUG();
2122-
2123- return page_table;
2124+ BUG_ON(page_table != pte_offset_kernel(pmd, 0));
2125 }
2126
2127 return pte_offset_kernel(pmd, 0);
2128@@ -115,7 +120,6 @@ static pte_t * __init one_page_table_ini
2129 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
2130 {
2131 pgd_t *pgd;
2132- pud_t *pud;
2133 pmd_t *pmd;
2134 int pgd_idx, pmd_idx;
2135 unsigned long vaddr;
2136@@ -126,12 +130,10 @@ static void __init page_table_range_init
2137 pgd = pgd_base + pgd_idx;
2138
2139 for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
2140- if (pgd_none(*pgd))
2141- one_md_table_init(pgd);
2142- pud = pud_offset(pgd, vaddr);
2143- pmd = pmd_offset(pud, vaddr);
2144+ pmd = one_md_table_init(pgd);
2145+ pmd = pmd + pmd_index(vaddr);
2146 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
2147- if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
2148+ if (vaddr < hypervisor_virt_start)
2149 one_page_table_init(pmd);
2150
2151 vaddr += PMD_SIZE;
2152@@ -194,24 +196,25 @@ static void __init kernel_physical_mappi
2153 /* Map with big pages if possible, otherwise create normal page tables. */
2154 if (cpu_has_pse) {
2155 unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
2156-
2157 if (is_kernel_text(address) || is_kernel_text(address2))
2158 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
2159 else
2160 set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
2161+
2162 pfn += PTRS_PER_PTE;
2163 } else {
2164 pte = one_page_table_init(pmd);
2165
2166- pte += pte_ofs;
2167- for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
2168- /* XEN: Only map initial RAM allocation. */
2169- if ((pfn >= max_ram_pfn) || pte_present(*pte))
2170- continue;
2171- if (is_kernel_text(address))
2172- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
2173- else
2174- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
2175+ for (pte += pte_ofs;
2176+ pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
2177+ pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
2178+ /* XEN: Only map initial RAM allocation. */
2179+ if ((pfn >= max_ram_pfn) || pte_present(*pte))
2180+ continue;
2181+ if (is_kernel_text(address))
2182+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
2183+ else
2184+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
2185 }
2186 pte_ofs = 0;
2187 }
2188@@ -381,15 +384,44 @@ extern void __init remap_numa_kva(void);
2189
2190 pgd_t *swapper_pg_dir;
2191
2192+static void __init xen_pagetable_setup_start(pgd_t *base)
2193+{
2194+}
2195+
2196+static void __init xen_pagetable_setup_done(pgd_t *base)
2197+{
2198+}
2199+
2200+/*
2201+ * Build a proper pagetable for the kernel mappings. Up until this
2202+ * point, we've been running on some set of pagetables constructed by
2203+ * the boot process.
2204+ *
2205+ * If we're booting on native hardware, this will be a pagetable
2206+ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
2207+ * (even if we'll end up running in PAE). The root of the pagetable
2208+ * will be swapper_pg_dir.
2209+ *
2210+ * If we're booting paravirtualized under a hypervisor, then there are
2211+ * more options: we may already be running PAE, and the pagetable may
2212+ * or may not be based in swapper_pg_dir. In any case,
2213+ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
2214+ * appropriately for the rest of the initialization to work.
2215+ *
2216+ * In general, pagetable_init() assumes that the pagetable may already
2217+ * be partially populated, and so it avoids stomping on any existing
2218+ * mappings.
2219+ */
2220 static void __init pagetable_init (void)
2221 {
2222- unsigned long vaddr;
2223+ unsigned long vaddr, end;
2224 pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
2225
2226+ xen_pagetable_setup_start(pgd_base);
2227+
2228 /* Enable PSE if available */
2229- if (cpu_has_pse) {
2230+ if (cpu_has_pse)
2231 set_in_cr4(X86_CR4_PSE);
2232- }
2233
2234 /* Enable PGE if available */
2235 if (cpu_has_pge) {
2236@@ -406,9 +438,12 @@ static void __init pagetable_init (void)
2237 * created - mappings will be set by set_fixmap():
2238 */
2239 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
2240- page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);
2241+ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
2242+ page_table_range_init(vaddr, end, pgd_base);
2243
2244 permanent_kmaps_init(pgd_base);
2245+
2246+ xen_pagetable_setup_done(pgd_base);
2247 }
2248
2249 #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
2250@@ -750,34 +785,29 @@ int remove_memory(u64 start, u64 size)
2251 EXPORT_SYMBOL_GPL(remove_memory);
2252 #endif
2253
2254-struct kmem_cache *pgd_cache;
2255 struct kmem_cache *pmd_cache;
2256
2257 void __init pgtable_cache_init(void)
2258 {
2259+ size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
2260+
2261 if (PTRS_PER_PMD > 1) {
2262 pmd_cache = kmem_cache_create("pmd",
2263 PTRS_PER_PMD*sizeof(pmd_t),
2264 PTRS_PER_PMD*sizeof(pmd_t),
2265- 0,
2266+ SLAB_PANIC,
2267 pmd_ctor,
2268 NULL);
2269- if (!pmd_cache)
2270- panic("pgtable_cache_init(): cannot create pmd cache");
2271+ if (!SHARED_KERNEL_PMD) {
2272+ /* If we're in PAE mode and have a non-shared
2273+ kernel pmd, then the pgd size must be a
2274+ page size. This is because the pgd_list
2275+ links through the page structure, so there
2276+ can only be one pgd per page for this to
2277+ work. */
2278+ pgd_size = PAGE_SIZE;
2279+ }
2280 }
2281- pgd_cache = kmem_cache_create("pgd",
2282-#ifndef CONFIG_XEN
2283- PTRS_PER_PGD*sizeof(pgd_t),
2284- PTRS_PER_PGD*sizeof(pgd_t),
2285-#else
2286- PAGE_SIZE,
2287- PAGE_SIZE,
2288-#endif
2289- 0,
2290- pgd_ctor,
2291- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
2292- if (!pgd_cache)
2293- panic("pgtable_cache_init(): Cannot create pgd cache");
2294 }
2295
2296 /*
2297@@ -811,13 +841,26 @@ static int noinline do_test_wp_bit(void)
2298
2299 void mark_rodata_ro(void)
2300 {
2301- unsigned long addr = (unsigned long)__start_rodata;
2302-
2303- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
2304- change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
2305+ unsigned long start = PFN_ALIGN(_text);
2306+ unsigned long size = PFN_ALIGN(_etext) - start;
2307
2308- printk("Write protecting the kernel read-only data: %uk\n",
2309- (__end_rodata - __start_rodata) >> 10);
2310+#ifndef CONFIG_KPROBES
2311+#ifdef CONFIG_HOTPLUG_CPU
2312+ /* It must still be possible to apply SMP alternatives. */
2313+ if (num_possible_cpus() <= 1)
2314+#endif
2315+ {
2316+ change_page_attr(virt_to_page(start),
2317+ size >> PAGE_SHIFT, PAGE_KERNEL_RX);
2318+ printk("Write protecting the kernel text: %luk\n", size >> 10);
2319+ }
2320+#endif
2321+ start += size;
2322+ size = (unsigned long)__end_rodata - start;
2323+ change_page_attr(virt_to_page(start),
2324+ size >> PAGE_SHIFT, PAGE_KERNEL_RO);
2325+ printk("Write protecting the kernel read-only data: %luk\n",
2326+ size >> 10);
2327
2328 /*
2329 * change_page_attr() requires a global_flush_tlb() call after it.
2330@@ -840,7 +883,7 @@ void free_init_pages(char *what, unsigne
2331 free_page(addr);
2332 totalram_pages++;
2333 }
2334- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
2335+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
2336 }
2337
2338 void free_initmem(void)
00e5a55c
BS
2339--- sle11-2009-04-20.orig/arch/x86/mm/ioremap_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2340+++ sle11-2009-04-20/arch/x86/mm/ioremap_32-xen.c 2008-12-15 11:27:22.000000000 +0100
2341@@ -13,6 +13,7 @@
2342 #include <linux/slab.h>
cc90b958 2343 #include <linux/module.h>
00e5a55c
BS
2344 #include <linux/io.h>
2345+#include <linux/sched.h>
2346 #include <asm/fixmap.h>
2347 #include <asm/cacheflush.h>
2348 #include <asm/tlbflush.h>
2349--- sle11-2009-04-20.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-04 11:25:55.000000000 +0100
2350+++ sle11-2009-04-20/arch/x86/mm/pgtable_32-xen.c 2008-12-15 11:27:22.000000000 +0100
2351@@ -13,6 +13,7 @@
2352 #include <linux/pagemap.h>
2353 #include <linux/spinlock.h>
2354 #include <linux/module.h>
2355+#include <linux/quicklist.h>
cc90b958 2356
cc90b958 2357 #include <asm/system.h>
00e5a55c
BS
2358 #include <asm/pgtable.h>
2359@@ -218,8 +219,6 @@ void pmd_ctor(void *pmd, struct kmem_cac
2360 * against pageattr.c; it is the unique case in which a valid change
2361 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
2362 * vmalloc faults work because attached pagetables are never freed.
2363- * The locking scheme was chosen on the basis of manfred's
2364- * recommendations and having no core impact whatsoever.
2365 * -- wli
2366 */
2367 DEFINE_SPINLOCK(pgd_lock);
2368@@ -245,37 +244,54 @@ static inline void pgd_list_del(pgd_t *p
2369 set_page_private(next, (unsigned long)pprev);
cc90b958
BS
2370 }
2371
00e5a55c
BS
2372-void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
2373+
2374+
2375+#if (PTRS_PER_PMD == 1)
2376+/* Non-PAE pgd constructor */
2377+void pgd_ctor(void *pgd)
cc90b958 2378 {
00e5a55c 2379 unsigned long flags;
cc90b958 2380
00e5a55c
BS
2381- if (PTRS_PER_PMD > 1) {
2382- if (HAVE_SHARED_KERNEL_PMD)
2383- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
2384- swapper_pg_dir + USER_PTRS_PER_PGD,
2385- KERNEL_PGD_PTRS);
2386- } else {
2387- spin_lock_irqsave(&pgd_lock, flags);
2388+ /* !PAE, no pagetable sharing */
2389+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
cc90b958 2390+
00e5a55c
BS
2391+ spin_lock_irqsave(&pgd_lock, flags);
2392+
2393+ /* must happen under lock */
2394+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
2395+ swapper_pg_dir + USER_PTRS_PER_PGD,
2396+ KERNEL_PGD_PTRS);
2397+
2398+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
2399+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
2400+ USER_PTRS_PER_PGD,
2401+ KERNEL_PGD_PTRS);
2402+ pgd_list_add(pgd);
2403+ spin_unlock_irqrestore(&pgd_lock, flags);
2404+}
2405+#else /* PTRS_PER_PMD > 1 */
2406+/* PAE pgd constructor */
2407+void pgd_ctor(void *pgd)
2408+{
2409+ /* PAE, kernel PMD may be shared */
2410+
2411+ if (SHARED_KERNEL_PMD) {
2412 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
2413 swapper_pg_dir + USER_PTRS_PER_PGD,
2414 KERNEL_PGD_PTRS);
2415+ } else {
2416 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
2417-
2418- /* must happen under lock */
2419- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
2420- __pa(swapper_pg_dir) >> PAGE_SHIFT,
2421- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
2422-
2423- pgd_list_add(pgd);
2424- spin_unlock_irqrestore(&pgd_lock, flags);
cc90b958 2425 }
cc90b958 2426 }
00e5a55c 2427+#endif /* PTRS_PER_PMD */
cc90b958 2428
00e5a55c
BS
2429-/* never called when PTRS_PER_PMD > 1 */
2430-void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
2431+void pgd_dtor(void *pgd)
cc90b958 2432 {
00e5a55c
BS
2433 unsigned long flags; /* can be called from interrupt context */
2434
2435+ if (SHARED_KERNEL_PMD)
2436+ return;
cc90b958 2437+
00e5a55c
BS
2438 paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
2439 spin_lock_irqsave(&pgd_lock, flags);
2440 pgd_list_del(pgd);
2441@@ -284,11 +300,46 @@ void pgd_dtor(void *pgd, struct kmem_cac
2442 pgd_test_and_unpin(pgd);
cc90b958
BS
2443 }
2444
00e5a55c
BS
2445+#define UNSHARED_PTRS_PER_PGD \
2446+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
2447+
2448+/* If we allocate a pmd for part of the kernel address space, then
2449+ make sure its initialized with the appropriate kernel mappings.
2450+ Otherwise use a cached zeroed pmd. */
2451+static pmd_t *pmd_cache_alloc(int idx)
2452+{
2453+ pmd_t *pmd;
2454+
2455+ if (idx >= USER_PTRS_PER_PGD) {
2456+ pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
2457+
2458+#ifndef CONFIG_XEN
2459+ if (pmd)
2460+ memcpy(pmd,
2461+ (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
2462+ sizeof(pmd_t) * PTRS_PER_PMD);
2463+#endif
2464+ } else
2465+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
2466+
2467+ return pmd;
2468+}
2469+
2470+static void pmd_cache_free(pmd_t *pmd, int idx)
2471+{
2472+ if (idx >= USER_PTRS_PER_PGD) {
2473+ make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables);
2474+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
2475+ free_page((unsigned long)pmd);
2476+ } else
2477+ kmem_cache_free(pmd_cache, pmd);
2478+}
2479+
2480 pgd_t *pgd_alloc(struct mm_struct *mm)
cc90b958 2481 {
00e5a55c
BS
2482 int i;
2483- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
2484- pmd_t **pmd;
2485+ pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
2486+ pmd_t **pmds = NULL;
2487 unsigned long flags;
cc90b958 2488
00e5a55c
BS
2489 pgd_test_and_unpin(pgd);
2490@@ -296,37 +347,40 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
2491 if (PTRS_PER_PMD == 1 || !pgd)
2492 return pgd;
cc90b958 2493
00e5a55c
BS
2494- if (HAVE_SHARED_KERNEL_PMD) {
2495- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
2496- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
2497- if (!pmd)
2498- goto out_oom;
2499- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
2500- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
2501+#ifdef CONFIG_XEN
2502+ if (!SHARED_KERNEL_PMD) {
2503+ /*
2504+ * We can race save/restore (if we sleep during a GFP_KERNEL memory
2505+ * allocation). We therefore store virtual addresses of pmds as they
2506+ * do not change across save/restore, and poke the machine addresses
2507+ * into the pgdir under the pgd_lock.
2508+ */
2509+ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
2510+ if (!pmds) {
2511+ quicklist_free(0, pgd_dtor, pgd);
2512+ return NULL;
cc90b958 2513 }
00e5a55c
BS
2514- return pgd;
2515- }
2516-
2517- /*
2518- * We can race save/restore (if we sleep during a GFP_KERNEL memory
2519- * allocation). We therefore store virtual addresses of pmds as they
2520- * do not change across save/restore, and poke the machine addresses
2521- * into the pgdir under the pgd_lock.
2522- */
2523- pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
2524- if (!pmd) {
2525- kmem_cache_free(pgd_cache, pgd);
2526- return NULL;
cc90b958 2527 }
00e5a55c 2528+#endif
cc90b958 2529
00e5a55c
BS
2530 /* Allocate pmds, remember virtual addresses. */
2531- for (i = 0; i < PTRS_PER_PGD; ++i) {
2532- pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
2533- if (!pmd[i])
2534+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
2535+ pmd_t *pmd = pmd_cache_alloc(i);
cc90b958 2536+
00e5a55c
BS
2537+ if (!pmd)
2538 goto out_oom;
cc90b958 2539+
00e5a55c
BS
2540 paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
2541+ if (pmds)
2542+ pmds[i] = pmd;
2543+ else
2544+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
2545 }
2546
2547+#ifdef CONFIG_XEN
2548+ if (SHARED_KERNEL_PMD)
2549+ return pgd;
cc90b958 2550+
00e5a55c 2551 spin_lock_irqsave(&pgd_lock, flags);
cc90b958 2552
00e5a55c
BS
2553 /* Protect against save/restore: move below 4GB under pgd_lock. */
2554@@ -341,44 +395,43 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
cc90b958 2555
00e5a55c
BS
2556 /* Copy kernel pmd contents and write-protect the new pmds. */
2557 for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
2558- unsigned long v = (unsigned long)i << PGDIR_SHIFT;
2559- pgd_t *kpgd = pgd_offset_k(v);
2560- pud_t *kpud = pud_offset(kpgd, v);
2561- pmd_t *kpmd = pmd_offset(kpud, v);
2562- memcpy(pmd[i], kpmd, PAGE_SIZE);
2563+ memcpy(pmds[i],
2564+ (void *)pgd_page_vaddr(swapper_pg_dir[i]),
2565+ sizeof(pmd_t) * PTRS_PER_PMD);
2566 make_lowmem_page_readonly(
2567- pmd[i], XENFEAT_writable_page_tables);
2568+ pmds[i], XENFEAT_writable_page_tables);
2569 }
cc90b958 2570
00e5a55c
BS
2571 /* It is safe to poke machine addresses of pmds under the pmd_lock. */
2572 for (i = 0; i < PTRS_PER_PGD; i++)
2573- set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
2574+ set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i])));
cc90b958 2575
00e5a55c
BS
2576 /* Ensure this pgd gets picked up and pinned on save/restore. */
2577 pgd_list_add(pgd);
cc90b958 2578
00e5a55c 2579 spin_unlock_irqrestore(&pgd_lock, flags);
cc90b958 2580
00e5a55c
BS
2581- kfree(pmd);
2582+ kfree(pmds);
2583+#endif
cc90b958 2584
00e5a55c 2585 return pgd;
cc90b958 2586
00e5a55c
BS
2587 out_oom:
2588- if (HAVE_SHARED_KERNEL_PMD) {
2589+ if (!pmds) {
2590 for (i--; i >= 0; i--) {
2591 pgd_t pgdent = pgd[i];
2592 void* pmd = (void *)__va(pgd_val(pgdent)-1);
2593 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
2594- kmem_cache_free(pmd_cache, pmd);
2595+ pmd_cache_free(pmd, i);
2596 }
2597 } else {
2598 for (i--; i >= 0; i--) {
2599- paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
2600- kmem_cache_free(pmd_cache, pmd[i]);
2601+ paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT);
2602+ pmd_cache_free(pmds[i], i);
2603 }
2604- kfree(pmd);
2605+ kfree(pmds);
2606 }
2607- kmem_cache_free(pgd_cache, pgd);
2608+ quicklist_free(0, pgd_dtor, pgd);
2609 return NULL;
cc90b958
BS
2610 }
2611
00e5a55c 2612@@ -398,35 +451,24 @@ void pgd_free(pgd_t *pgd)
cc90b958 2613
00e5a55c
BS
2614 /* in the PAE case user pgd entries are overwritten before usage */
2615 if (PTRS_PER_PMD > 1) {
2616- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
2617+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
2618 pgd_t pgdent = pgd[i];
2619 void* pmd = (void *)__va(pgd_val(pgdent)-1);
2620 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
2621- kmem_cache_free(pmd_cache, pmd);
2622+ pmd_cache_free(pmd, i);
2623 }
2624
2625- if (!HAVE_SHARED_KERNEL_PMD) {
2626- unsigned long flags;
2627- spin_lock_irqsave(&pgd_lock, flags);
2628- pgd_list_del(pgd);
2629- spin_unlock_irqrestore(&pgd_lock, flags);
cc90b958 2630-
00e5a55c
BS
2631- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
2632- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
2633- make_lowmem_page_writable(
2634- pmd, XENFEAT_writable_page_tables);
2635- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
2636- kmem_cache_free(pmd_cache, pmd);
2637- }
cc90b958 2638-
00e5a55c
BS
2639- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
2640- xen_destroy_contiguous_region(
2641- (unsigned long)pgd, 0);
2642- }
2643+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
2644+ xen_destroy_contiguous_region((unsigned long)pgd, 0);
cc90b958 2645 }
cc90b958 2646
00e5a55c
BS
2647 /* in the non-PAE case, free_pgtables() clears user pgd entries */
2648- kmem_cache_free(pgd_cache, pgd);
2649+ quicklist_free(0, pgd_dtor, pgd);
cc90b958
BS
2650+}
2651+
00e5a55c 2652+void check_pgt_cache(void)
cc90b958 2653+{
00e5a55c
BS
2654+ quicklist_trim(0, pgd_dtor, 25, 16);
2655 }
cc90b958 2656
00e5a55c
BS
2657 void make_lowmem_page_readonly(void *va, unsigned int feature)
2658@@ -723,13 +765,13 @@ void mm_pin_all(void)
2659 spin_unlock_irqrestore(&pgd_lock, flags);
2660 }
cc90b958 2661
00e5a55c
BS
2662-void _arch_dup_mmap(struct mm_struct *mm)
2663+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
2664 {
2665 if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
2666 mm_pin(mm);
2667 }
cc90b958 2668
00e5a55c
BS
2669-void _arch_exit_mmap(struct mm_struct *mm)
2670+void arch_exit_mmap(struct mm_struct *mm)
2671 {
2672 struct task_struct *tsk = current;
cc90b958 2673
00e5a55c
BS
2674--- sle11-2009-04-20.orig/arch/x86/ia32/ia32entry-xen.S 2009-03-04 11:25:55.000000000 +0100
2675+++ sle11-2009-04-20/arch/x86/ia32/ia32entry-xen.S 2008-12-15 11:27:22.000000000 +0100
2676@@ -431,11 +431,7 @@ ia32_sys_call_table:
2677 .quad sys_symlink
2678 .quad sys_lstat
2679 .quad sys_readlink /* 85 */
2680-#ifdef CONFIG_IA32_AOUT
2681 .quad sys_uselib
2682-#else
2683- .quad quiet_ni_syscall
2684-#endif
2685 .quad sys_swapon
2686 .quad sys_reboot
2687 .quad compat_sys_old_readdir
2688@@ -574,7 +570,7 @@ ia32_sys_call_table:
2689 .quad quiet_ni_syscall /* tux */
2690 .quad quiet_ni_syscall /* security */
2691 .quad sys_gettid
2692- .quad sys_readahead /* 225 */
2693+ .quad sys32_readahead /* 225 */
2694 .quad sys_setxattr
2695 .quad sys_lsetxattr
2696 .quad sys_fsetxattr
2697@@ -599,7 +595,7 @@ ia32_sys_call_table:
2698 .quad compat_sys_io_getevents
2699 .quad compat_sys_io_submit
2700 .quad sys_io_cancel
2701- .quad sys_fadvise64 /* 250 */
2702+ .quad sys32_fadvise64 /* 250 */
2703 .quad quiet_ni_syscall /* free_huge_pages */
2704 .quad sys_exit_group
2705 .quad sys32_lookup_dcookie
2706@@ -663,10 +659,14 @@ ia32_sys_call_table:
2707 .quad compat_sys_set_robust_list
2708 .quad compat_sys_get_robust_list
2709 .quad sys_splice
2710- .quad sys_sync_file_range
2711- .quad sys_tee
2712+ .quad sys32_sync_file_range
2713+ .quad sys_tee /* 315 */
2714 .quad compat_sys_vmsplice
2715 .quad compat_sys_move_pages
2716 .quad sys_getcpu
2717 .quad sys_epoll_pwait
2718-ia32_syscall_end:
2719+ .quad compat_sys_utimensat /* 320 */
2720+ .quad compat_sys_signalfd
2721+ .quad compat_sys_timerfd
2722+ .quad sys_eventfd
2723+ia32_syscall_end:
2724--- sle11-2009-04-20.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2009-03-04 11:25:55.000000000 +0100
2725+++ sle11-2009-04-20/arch/x86/kernel/acpi/sleep_64-xen.c 2008-12-15 11:27:22.000000000 +0100
2726@@ -60,19 +60,6 @@ unsigned long acpi_video_flags;
2727 extern char wakeup_start, wakeup_end;
cc90b958 2728
00e5a55c
BS
2729 extern unsigned long acpi_copy_wakeup_routine(unsigned long);
2730-
2731-static pgd_t low_ptr;
2732-
2733-static void init_low_mapping(void)
2734-{
2735- pgd_t *slot0 = pgd_offset(current->mm, 0UL);
2736- low_ptr = *slot0;
2737- /* FIXME: We're playing with the current task's page tables here, which
2738- * is potentially dangerous on SMP systems.
2739- */
2740- set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
2741- local_flush_tlb();
2742-}
2743 #endif
cc90b958 2744
00e5a55c
BS
2745 /**
2746@@ -84,8 +71,6 @@ static void init_low_mapping(void)
2747 int acpi_save_state_mem(void)
2748 {
2749 #ifndef CONFIG_ACPI_PV_SLEEP
2750- init_low_mapping();
2751-
2752 memcpy((void *)acpi_wakeup_address, &wakeup_start,
2753 &wakeup_end - &wakeup_start);
2754 acpi_copy_wakeup_routine(acpi_wakeup_address);
2755@@ -98,10 +83,6 @@ int acpi_save_state_mem(void)
2756 */
2757 void acpi_restore_state_mem(void)
2758 {
2759-#ifndef CONFIG_ACPI_PV_SLEEP
2760- set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
2761- local_flush_tlb();
2762-#endif
2763 }
cc90b958 2764
00e5a55c
BS
2765 /**
2766@@ -115,10 +96,11 @@ void acpi_restore_state_mem(void)
2767 void __init acpi_reserve_bootmem(void)
2768 {
2769 #ifndef CONFIG_ACPI_PV_SLEEP
2770- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
2771- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE)
2772+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
2773+ if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
2774 printk(KERN_CRIT
2775- "ACPI: Wakeup code way too big, will crash on attempt to suspend\n");
2776+ "ACPI: Wakeup code way too big, will crash on attempt"
2777+ " to suspend\n");
2778 #endif
2779 }
cc90b958 2780
00e5a55c
BS
2781--- sle11-2009-04-20.orig/arch/x86/kernel/apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
2782+++ sle11-2009-04-20/arch/x86/kernel/apic_64-xen.c 2008-12-15 11:27:22.000000000 +0100
2783@@ -19,7 +19,6 @@
2784 #include <linux/mm.h>
2785 #include <linux/delay.h>
2786 #include <linux/bootmem.h>
2787-#include <linux/smp_lock.h>
2788 #include <linux/interrupt.h>
2789 #include <linux/mc146818rtc.h>
2790 #include <linux/kernel_stat.h>
2791--- sle11-2009-04-20.orig/arch/x86/kernel/e820_64-xen.c 2009-03-04 11:25:55.000000000 +0100
2792+++ sle11-2009-04-20/arch/x86/kernel/e820_64-xen.c 2008-12-15 11:27:22.000000000 +0100
2793@@ -17,6 +17,8 @@
2794 #include <linux/kexec.h>
2795 #include <linux/module.h>
2796 #include <linux/mm.h>
2797+#include <linux/suspend.h>
2798+#include <linux/pfn.h>
cc90b958 2799
00e5a55c
BS
2800 #include <asm/pgtable.h>
2801 #include <asm/page.h>
2802@@ -28,7 +30,7 @@
cc90b958 2803
00e5a55c
BS
2804 struct e820map e820 __initdata;
2805 #ifdef CONFIG_XEN
2806-struct e820map machine_e820 __initdata;
2807+struct e820map machine_e820;
2808 #endif
cc90b958 2809
00e5a55c
BS
2810 /*
2811@@ -291,22 +293,6 @@ void __init e820_reserve_resources(struc
cc90b958
BS
2812 }
2813
00e5a55c
BS
2814 #ifndef CONFIG_XEN
2815-/* Mark pages corresponding to given address range as nosave */
2816-static void __init
2817-e820_mark_nosave_range(unsigned long start, unsigned long end)
2818-{
2819- unsigned long pfn, max_pfn;
cc90b958 2820-
00e5a55c
BS
2821- if (start >= end)
2822- return;
cc90b958 2823-
00e5a55c
BS
2824- printk("Nosave address range: %016lx - %016lx\n", start, end);
2825- max_pfn = end >> PAGE_SHIFT;
2826- for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
2827- if (pfn_valid(pfn))
2828- SetPageNosave(pfn_to_page(pfn));
2829-}
2830-
2831 /*
2832 * Find the ranges of physical addresses that do not correspond to
2833 * e820 RAM areas and mark the corresponding pages as nosave for software
2834@@ -325,13 +311,13 @@ void __init e820_mark_nosave_regions(voi
2835 struct e820entry *ei = &e820.map[i];
cc90b958 2836
00e5a55c
BS
2837 if (paddr < ei->addr)
2838- e820_mark_nosave_range(paddr,
2839- round_up(ei->addr, PAGE_SIZE));
2840+ register_nosave_region(PFN_DOWN(paddr),
2841+ PFN_UP(ei->addr));
cc90b958 2842
00e5a55c
BS
2843 paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
2844 if (ei->type != E820_RAM)
2845- e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
2846- paddr);
2847+ register_nosave_region(PFN_UP(ei->addr),
2848+ PFN_DOWN(paddr));
cc90b958 2849
00e5a55c
BS
2850 if (paddr >= (end_pfn << PAGE_SHIFT))
2851 break;
2852--- sle11-2009-04-20.orig/arch/x86/kernel/early_printk-xen.c 2009-03-04 11:28:34.000000000 +0100
2853+++ sle11-2009-04-20/arch/x86/kernel/early_printk-xen.c 2008-12-15 11:27:22.000000000 +0100
2854@@ -11,11 +11,10 @@
cc90b958 2855
00e5a55c
BS
2856 #ifdef __i386__
2857 #include <asm/setup.h>
2858-#define VGABASE (__ISA_IO_base + 0xb8000)
2859 #else
2860 #include <asm/bootsetup.h>
2861-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
2862 #endif
2863+#define VGABASE (__ISA_IO_base + 0xb8000)
cc90b958 2864
00e5a55c
BS
2865 #ifndef CONFIG_XEN
2866 static int max_ypos = 25, max_xpos = 80;
2867@@ -93,9 +92,9 @@ static int early_serial_putc(unsigned ch
2868 static void early_serial_write(struct console *con, const char *s, unsigned n)
cc90b958 2869 {
00e5a55c
BS
2870 while (*s && n-- > 0) {
2871- early_serial_putc(*s);
2872 if (*s == '\n')
2873 early_serial_putc('\r');
2874+ early_serial_putc(*s);
2875 s++;
2876 }
2877 }
2878@@ -205,7 +204,7 @@ static noinline long simnow(long cmd, lo
2879 return ret;
cc90b958 2880 }
cc90b958 2881
00e5a55c
BS
2882-void __init simnow_init(char *str)
2883+static void __init simnow_init(char *str)
cc90b958 2884 {
00e5a55c
BS
2885 char *fn = "klog";
2886 if (*str == '=')
2887@@ -277,22 +276,12 @@ static int __init setup_early_printk(cha
2888 early_console = &simnow_console;
2889 keep_early = 1;
2890 }
2891+
2892+ if (keep_early)
2893+ early_console->flags &= ~CON_BOOT;
2894+ else
2895+ early_console->flags |= CON_BOOT;
2896 register_console(early_console);
2897 return 0;
cc90b958 2898 }
00e5a55c
BS
2899-
2900 early_param("earlyprintk", setup_early_printk);
2901-
2902-void __init disable_early_printk(void)
cc90b958 2903-{
00e5a55c
BS
2904- if (!early_console_initialized || !early_console)
2905- return;
2906- if (!keep_early) {
2907- printk("disabling early console\n");
2908- unregister_console(early_console);
2909- early_console_initialized = 0;
2910- } else {
2911- printk("keeping early console\n");
2912- }
cc90b958
BS
2913-}
2914-
00e5a55c
BS
2915--- sle11-2009-04-20.orig/arch/x86/kernel/entry_64-xen.S 2009-03-04 11:25:55.000000000 +0100
2916+++ sle11-2009-04-20/arch/x86/kernel/entry_64-xen.S 2008-12-15 11:27:22.000000000 +0100
2917@@ -1254,3 +1254,10 @@ ENTRY(call_softirq)
2918 ret
2919 CFI_ENDPROC
2920 ENDPROC(call_softirq)
cc90b958 2921+
00e5a55c
BS
2922+KPROBE_ENTRY(ignore_sysret)
2923+ CFI_STARTPROC
2924+ mov $-ENOSYS,%eax
2925+ HYPERVISOR_IRET 0
2926+ CFI_ENDPROC
2927+ENDPROC(ignore_sysret)
2928--- sle11-2009-04-20.orig/arch/x86/kernel/genapic_64-xen.c 2009-03-04 11:25:55.000000000 +0100
2929+++ sle11-2009-04-20/arch/x86/kernel/genapic_64-xen.c 2008-12-15 11:27:22.000000000 +0100
2930@@ -11,123 +11,57 @@
2931 #include <linux/threads.h>
2932 #include <linux/cpumask.h>
2933 #include <linux/string.h>
2934+#include <linux/module.h>
2935 #include <linux/kernel.h>
2936 #include <linux/ctype.h>
2937 #include <linux/init.h>
2938-#include <linux/module.h>
cc90b958 2939
00e5a55c
BS
2940 #include <asm/smp.h>
2941 #include <asm/ipi.h>
2942+#include <asm/genapic.h>
cc90b958 2943
00e5a55c
BS
2944-#if defined(CONFIG_ACPI)
2945+#ifdef CONFIG_ACPI
2946 #include <acpi/acpi_bus.h>
2947 #endif
cc90b958 2948
00e5a55c
BS
2949 /* which logical CPU number maps to which CPU (physical APIC ID) */
2950-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
2951+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
2952+ = { [0 ... NR_CPUS-1] = BAD_APICID };
2953 EXPORT_SYMBOL(x86_cpu_to_apicid);
2954-u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
cc90b958 2955
00e5a55c
BS
2956-extern struct genapic apic_cluster;
2957-extern struct genapic apic_flat;
2958-extern struct genapic apic_physflat;
2959+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
cc90b958 2960
00e5a55c
BS
2961 #ifndef CONFIG_XEN
2962-struct genapic *genapic = &apic_flat;
2963-struct genapic *genapic_force;
2964+struct genapic __read_mostly *genapic = &apic_flat;
2965 #else
2966 extern struct genapic apic_xen;
2967-struct genapic *genapic = &apic_xen;
2968+struct genapic __read_mostly *genapic = &apic_xen;
2969 #endif
cc90b958 2970
cc90b958 2971
00e5a55c
BS
2972 /*
2973 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
2974 */
2975-void __init clustered_apic_check(void)
2976+void __init setup_apic_routing(void)
2977 {
2978 #ifndef CONFIG_XEN
2979- long i;
2980- u8 clusters, max_cluster;
2981- u8 id;
2982- u8 cluster_cnt[NUM_APIC_CLUSTERS];
2983- int max_apic = 0;
cc90b958 2984-
00e5a55c
BS
2985- /* genapic selection can be forced because of certain quirks.
2986- */
2987- if (genapic_force) {
2988- genapic = genapic_force;
2989- goto print;
2990- }
2991-
2992-#if defined(CONFIG_ACPI)
2993+#ifdef CONFIG_ACPI
2994 /*
2995- * Some x86_64 machines use physical APIC mode regardless of how many
2996- * procs/clusters are present (x86_64 ES7000 is an example).
2997+ * Quirk: some x86_64 machines can only use physical APIC mode
2998+ * regardless of how many processors are present (x86_64 ES7000
2999+ * is an example).
3000 */
3001- if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
3002- if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
3003- genapic = &apic_cluster;
3004- goto print;
3005- }
3006-#endif
3007-
3008- memset(cluster_cnt, 0, sizeof(cluster_cnt));
3009- for (i = 0; i < NR_CPUS; i++) {
3010- id = bios_cpu_apicid[i];
3011- if (id == BAD_APICID)
3012- continue;
3013- if (id > max_apic)
3014- max_apic = id;
3015- cluster_cnt[APIC_CLUSTERID(id)]++;
3016- }
3017-
3018- /* Don't use clustered mode on AMD platforms. */
3019- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
3020+ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
3021+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
3022 genapic = &apic_physflat;
3023-#ifndef CONFIG_HOTPLUG_CPU
3024- /* In the CPU hotplug case we cannot use broadcast mode
3025- because that opens a race when a CPU is removed.
3026- Stay at physflat mode in this case.
3027- It is bad to do this unconditionally though. Once
3028- we have ACPI platform support for CPU hotplug
3029- we should detect hotplug capablity from ACPI tables and
3030- only do this when really needed. -AK */
3031- if (max_apic <= 8)
3032- genapic = &apic_flat;
3033-#endif
3034- goto print;
3035- }
3036-
3037- clusters = 0;
3038- max_cluster = 0;
3039-
3040- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
3041- if (cluster_cnt[i] > 0) {
3042- ++clusters;
3043- if (cluster_cnt[i] > max_cluster)
3044- max_cluster = cluster_cnt[i];
3045- }
3046- }
3047+ else
3048+#endif
cc90b958 3049
00e5a55c
BS
3050- /*
3051- * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
3052- * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
3053- * else physical mode.
3054- * (We don't use lowest priority delivery + HW APIC IRQ steering, so
3055- * can ignore the clustered logical case and go straight to physical.)
3056- */
3057- if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
3058-#ifdef CONFIG_HOTPLUG_CPU
3059- /* Don't use APIC shortcuts in CPU hotplug to avoid races */
3060- genapic = &apic_physflat;
3061-#else
3062+ if (cpus_weight(cpu_possible_map) <= 8)
3063 genapic = &apic_flat;
3064-#endif
3065- } else
3066- genapic = &apic_cluster;
3067+ else
3068+ genapic = &apic_physflat;
cc90b958 3069
00e5a55c
BS
3070-print:
3071 #else
3072 /* hardcode to xen apic functions */
3073 genapic = &apic_xen;
3074@@ -135,7 +69,7 @@ print:
3075 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
3076 }
cc90b958 3077
00e5a55c
BS
3078-/* Same for both flat and clustered. */
3079+/* Same for both flat and physical. */
cc90b958 3080
00e5a55c
BS
3081 #ifdef CONFIG_XEN
3082 extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
3083--- sle11-2009-04-20.orig/arch/x86/kernel/genapic_xen_64.c 2009-03-04 11:28:34.000000000 +0100
3084+++ sle11-2009-04-20/arch/x86/kernel/genapic_xen_64.c 2008-12-15 11:27:22.000000000 +0100
3085@@ -21,9 +21,8 @@
3086 #include <asm/ipi.h>
3087 #else
3088 #include <asm/apic.h>
3089-#include <asm/apicdef.h>
3090-#include <asm/genapic.h>
3091 #endif
3092+#include <asm/genapic.h>
3093 #include <xen/evtchn.h>
cc90b958 3094
00e5a55c
BS
3095 DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
3096--- sle11-2009-04-20.orig/arch/x86/kernel/head_64-xen.S 2009-03-04 11:28:34.000000000 +0100
3097+++ sle11-2009-04-20/arch/x86/kernel/head_64-xen.S 2008-12-15 11:27:22.000000000 +0100
3098@@ -5,6 +5,7 @@
3099 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
3100 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
3101 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
3102+ * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
3103 * Jun Nakajima <jun.nakajima@intel.com>
3104 * Modified for Xen
3105 */
3106@@ -34,27 +35,15 @@ startup_64:
3107 pushq $0 # fake return address
3108 jmp x86_64_start_kernel
cc90b958 3109
00e5a55c
BS
3110-#ifdef CONFIG_ACPI_SLEEP
3111-.org 0xf00
3112- .globl pGDT32
3113-pGDT32:
3114- .word gdt_end-cpu_gdt_table-1
3115- .long cpu_gdt_table-__START_KERNEL_map
3116-#endif
3117-ENTRY(stext)
3118-ENTRY(_stext)
3119+.balign PAGE_SIZE
cc90b958 3120
00e5a55c
BS
3121- $page = 0
3122 #define NEXT_PAGE(name) \
3123- $page = $page + 1; \
3124- .org $page * 0x1000; \
3125- phys_##name = $page * 0x1000 + __PHYSICAL_START; \
3126+ .balign PAGE_SIZE; \
3127+ phys_##name = . - .bootstrap.text; \
3128 ENTRY(name)
cc90b958 3129
00e5a55c
BS
3130 NEXT_PAGE(init_level4_pgt)
3131- /* This gets initialized in x86_64_start_kernel */
3132 .fill 512,8,0
3133-NEXT_PAGE(init_level4_user_pgt)
3134 /*
3135 * We update two pgd entries to make kernel and user pgd consistent
3136 * at pgd_populate(). It can be used for kernel modules. So we place
3137@@ -101,14 +90,6 @@ NEXT_PAGE(hypercall_page)
3138 #undef NEXT_PAGE
cc90b958 3139
00e5a55c
BS
3140 .data
3141-/* Just dummy symbol to allow compilation. Not used in sleep path */
3142-#ifdef CONFIG_ACPI_SLEEP
3143- .align PAGE_SIZE
3144-ENTRY(wakeup_level4_pgt)
3145- .fill 512,8,0
3146-#endif
cc90b958 3147-
00e5a55c 3148- .data
cc90b958 3149
00e5a55c
BS
3150 .align 16
3151 .globl cpu_gdt_descr
3152@@ -136,13 +117,13 @@ gdt:
cc90b958 3153
00e5a55c
BS
3154 ENTRY(cpu_gdt_table)
3155 .quad 0x0000000000000000 /* NULL descriptor */
3156+ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
3157+ .quad 0x00af9b000000ffff /* __KERNEL_CS */
3158+ .quad 0x00cf93000000ffff /* __KERNEL_DS */
3159+ .quad 0x00cffb000000ffff /* __USER32_CS */
3160+ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
3161+ .quad 0x00affb000000ffff /* __USER_CS */
3162 .quad 0x0 /* unused */
3163- .quad 0x00af9a000000ffff /* __KERNEL_CS */
3164- .quad 0x00cf92000000ffff /* __KERNEL_DS */
3165- .quad 0x00cffa000000ffff /* __USER32_CS */
3166- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
3167- .quad 0x00affa000000ffff /* __USER_CS */
3168- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
3169 .quad 0,0 /* TSS */
3170 .quad 0,0 /* LDT */
3171 .quad 0,0,0 /* three TLS descriptors */
3172@@ -165,14 +146,11 @@ ENTRY(empty_zero_page)
3173 * __xen_guest information
3174 */
3175 .macro utoh value
3176- .if (\value) < 0 || (\value) >= 0x10
3177- utoh (((\value)>>4)&0x0fffffffffffffff)
3178- .endif
3179- .if ((\value) & 0xf) < 10
3180- .byte '0' + ((\value) & 0xf)
3181- .else
3182- .byte 'A' + ((\value) & 0xf) - 10
3183- .endif
3184+ i = 64
3185+ .rept 16
3186+ i = i - 4
3187+ .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf)
3188+ .endr
3189 .endm
cc90b958 3190
00e5a55c
BS
3191 .section __xen_guest
3192--- sle11-2009-04-20.orig/arch/x86/kernel/head64-xen.c 2009-03-04 11:25:55.000000000 +0100
3193+++ sle11-2009-04-20/arch/x86/kernel/head64-xen.c 2008-12-15 11:27:22.000000000 +0100
3194@@ -25,13 +25,21 @@
3195 #include <asm/setup.h>
3196 #include <asm/desc.h>
3197 #include <asm/pgtable.h>
3198+#include <asm/tlbflush.h>
3199 #include <asm/sections.h>
3200
3201 unsigned long start_pfn;
cc90b958 3202
cc90b958 3203+#ifndef CONFIG_XEN
00e5a55c 3204+static void __init zap_identity_mappings(void)
cc90b958 3205+{
00e5a55c
BS
3206+ pgd_t *pgd = pgd_offset_k(0UL);
3207+ pgd_clear(pgd);
3208+ __flush_tlb();
cc90b958
BS
3209+}
3210+
00e5a55c
BS
3211 /* Don't add a printk in there. printk relies on the PDA which is not initialized
3212 yet. */
3213-#if 0
3214 static void __init clear_bss(void)
3215 {
3216 memset(__bss_start, 0,
3217@@ -40,26 +48,25 @@ static void __init clear_bss(void)
3218 #endif
3219
3220 #define NEW_CL_POINTER 0x228 /* Relative to real mode data */
3221-#define OLD_CL_MAGIC_ADDR 0x90020
3222+#define OLD_CL_MAGIC_ADDR 0x20
3223 #define OLD_CL_MAGIC 0xA33F
3224-#define OLD_CL_BASE_ADDR 0x90000
3225-#define OLD_CL_OFFSET 0x90022
3226+#define OLD_CL_OFFSET 0x22
3227
3228 static void __init copy_bootdata(char *real_mode_data)
3229 {
3230 #ifndef CONFIG_XEN
3231- int new_data;
3232+ unsigned long new_data;
3233 char * command_line;
3234
3235 memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
3236- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
3237+ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
3238 if (!new_data) {
3239- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
3240+ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
3241 return;
3242 }
3243- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
3244+ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
3245 }
3246- command_line = (char *) ((u64)(new_data));
3247+ command_line = __va(new_data);
3248 memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
3249 #else
3250 int max_cmdline;
3251@@ -101,10 +108,13 @@ void __init x86_64_start_kernel(char * r
3252 while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
3253 machine_to_phys_order++;
3254
3255-#if 0
cc90b958 3256+#ifndef CONFIG_XEN
00e5a55c
BS
3257 /* clear bss before set_intr_gate with early_idt_handler */
3258 clear_bss();
cc90b958 3259
00e5a55c
BS
3260+ /* Make NULL pointers segfault */
3261+ zap_identity_mappings();
cc90b958 3262+
00e5a55c
BS
3263 for (i = 0; i < IDT_ENTRIES; i++)
3264 set_intr_gate(i, early_idt_handler);
3265 asm volatile("lidt %0" :: "m" (idt_descr));
3266@@ -116,7 +126,7 @@ void __init x86_64_start_kernel(char * r
3267 cpu_pda(i) = &boot_cpu_pda[i];
cc90b958 3268
00e5a55c
BS
3269 pda_init(0);
3270- copy_bootdata(real_mode_data);
3271+ copy_bootdata(__va(real_mode_data));
3272 #ifdef CONFIG_SMP
3273 cpu_set(0, cpu_online_map);
3274 #endif
3275--- sle11-2009-04-20.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-04 11:25:55.000000000 +0100
3276+++ sle11-2009-04-20/arch/x86/kernel/io_apic_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3277@@ -25,7 +25,6 @@
3278 #include <linux/init.h>
3279 #include <linux/delay.h>
3280 #include <linux/sched.h>
cc90b958 3281-#include <linux/smp_lock.h>
00e5a55c
BS
3282 #include <linux/pci.h>
3283 #include <linux/mc146818rtc.h>
3284 #include <linux/acpi.h>
3285@@ -904,10 +903,6 @@ static void __init setup_ExtINT_IRQ0_pin
3286 enable_8259A_irq(0);
cc90b958
BS
3287 }
3288
00e5a55c
BS
3289-void __init UNEXPECTED_IO_APIC(void)
3290-{
3291-}
3292-
3293 void __apicdebuginit print_IO_APIC(void)
cc90b958 3294 {
00e5a55c
BS
3295 int apic, i;
3296@@ -943,40 +938,16 @@ void __apicdebuginit print_IO_APIC(void)
3297 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
3298 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
3299 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
3300- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
3301- UNEXPECTED_IO_APIC();
cc90b958 3302
00e5a55c
BS
3303 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
3304 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
3305- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
3306- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
3307- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
3308- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
3309- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
3310- (reg_01.bits.entries != 0x2E) &&
3311- (reg_01.bits.entries != 0x3F) &&
3312- (reg_01.bits.entries != 0x03)
3313- )
3314- UNEXPECTED_IO_APIC();
cc90b958 3315
00e5a55c
BS
3316 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
3317 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
3318- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
3319- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
3320- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
3321- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
3322- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
3323- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
3324- )
3325- UNEXPECTED_IO_APIC();
3326- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
3327- UNEXPECTED_IO_APIC();
cc90b958 3328
00e5a55c
BS
3329 if (reg_01.bits.version >= 0x10) {
3330 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
3331 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
3332- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
3333- UNEXPECTED_IO_APIC();
3334 }
cc90b958 3335
00e5a55c
BS
3336 printk(KERN_DEBUG ".... IRQ redirection table:\n");
3337@@ -1408,8 +1379,7 @@ static void irq_complete_move(unsigned i
cc90b958 3338
00e5a55c
BS
3339 vector = ~get_irq_regs()->orig_rax;
3340 me = smp_processor_id();
3341- if ((vector == cfg->vector) &&
3342- cpu_isset(smp_processor_id(), cfg->domain)) {
3343+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
3344 cpumask_t cleanup_mask;
cc90b958 3345
00e5a55c
BS
3346 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
3347@@ -1444,7 +1414,7 @@ static void ack_apic_level(unsigned int
cc90b958 3348
00e5a55c
BS
3349 /*
3350 * We must acknowledge the irq before we move it or the acknowledge will
3351- * not propogate properly.
3352+ * not propagate properly.
3353 */
3354 ack_APIC_irq();
cc90b958 3355
00e5a55c
BS
3356@@ -1527,6 +1497,7 @@ static void ack_lapic_irq (unsigned int
3357 static void end_lapic_irq (unsigned int i) { /* nothing */ }
cc90b958 3358
00e5a55c
BS
3359 static struct hw_interrupt_type lapic_irq_type __read_mostly = {
3360+ .name = "local-APIC",
3361 .typename = "local-APIC-edge",
3362 .startup = NULL, /* startup_irq() not used for IRQ0 */
3363 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
3364@@ -1998,18 +1969,18 @@ int arch_setup_msi_irq(struct pci_dev *d
3365 if (irq < 0)
3366 return irq;
cc90b958 3367
00e5a55c
BS
3368- set_irq_msi(irq, desc);
3369 ret = msi_compose_msg(dev, irq, &msg);
3370 if (ret < 0) {
3371 destroy_irq(irq);
3372 return ret;
3373 }
cc90b958 3374
00e5a55c
BS
3375+ set_irq_msi(irq, desc);
3376 write_msi_msg(irq, &msg);
cc90b958 3377
00e5a55c 3378 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
cc90b958 3379
00e5a55c
BS
3380- return irq;
3381+ return 0;
cc90b958
BS
3382 }
3383
00e5a55c
BS
3384 void arch_teardown_msi_irq(unsigned int irq)
3385--- sle11-2009-04-20.orig/arch/x86/kernel/ioport_64-xen.c 2009-03-04 11:28:34.000000000 +0100
3386+++ sle11-2009-04-20/arch/x86/kernel/ioport_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3387@@ -13,10 +13,10 @@
3388 #include <linux/ioport.h>
3389 #include <linux/mm.h>
3390 #include <linux/smp.h>
3391-#include <linux/smp_lock.h>
3392 #include <linux/stddef.h>
3393 #include <linux/slab.h>
3394 #include <linux/thread_info.h>
3395+#include <linux/syscalls.h>
3396 #include <xen/interface/physdev.h>
cc90b958 3397
00e5a55c
BS
3398 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
3399--- sle11-2009-04-20.orig/arch/x86/kernel/irq_64-xen.c 2009-03-04 11:25:55.000000000 +0100
3400+++ sle11-2009-04-20/arch/x86/kernel/irq_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3401@@ -32,7 +32,7 @@ atomic_t irq_err_count;
3402 */
3403 static inline void stack_overflow_check(struct pt_regs *regs)
cc90b958 3404 {
00e5a55c
BS
3405- u64 curbase = (u64) current->thread_info;
3406+ u64 curbase = (u64)task_stack_page(current);
3407 static unsigned long warned = -60*HZ;
cc90b958 3408
00e5a55c
BS
3409 if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
3410@@ -145,17 +145,43 @@ void fixup_irqs(cpumask_t map)
cc90b958 3411
00e5a55c
BS
3412 for (irq = 0; irq < NR_IRQS; irq++) {
3413 cpumask_t mask;
3414+ int break_affinity = 0;
3415+ int set_affinity = 1;
3416+
3417 if (irq == 2)
3418 continue;
cc90b958 3419
00e5a55c
BS
3420+ /* interrupt's are disabled at this point */
3421+ spin_lock(&irq_desc[irq].lock);
3422+
3423+ if (!irq_has_action(irq) ||
3424+ cpus_equal(irq_desc[irq].affinity, map)) {
3425+ spin_unlock(&irq_desc[irq].lock);
3426+ continue;
3427+ }
3428+
3429 cpus_and(mask, irq_desc[irq].affinity, map);
3430- if (any_online_cpu(mask) == NR_CPUS) {
3431- /*printk("Breaking affinity for irq %i\n", irq);*/
3432+ if (cpus_empty(mask)) {
3433+ break_affinity = 1;
3434 mask = map;
3435 }
3436+
3437+ if (irq_desc[irq].chip->mask)
3438+ irq_desc[irq].chip->mask(irq);
3439+
3440 if (irq_desc[irq].chip->set_affinity)
3441 irq_desc[irq].chip->set_affinity(irq, mask);
3442- else if (irq_desc[irq].action && !(warned++))
3443+ else if (!(warned++))
3444+ set_affinity = 0;
3445+
3446+ if (irq_desc[irq].chip->unmask)
3447+ irq_desc[irq].chip->unmask(irq);
3448+
3449+ spin_unlock(&irq_desc[irq].lock);
3450+
3451+ if (break_affinity && set_affinity)
3452+ /*printk("Broke affinity for irq %i\n", irq)*/;
3453+ else if (!set_affinity)
3454 printk("Cannot set affinity for irq %i\n", irq);
3455 }
cc90b958 3456
00e5a55c
BS
3457--- sle11-2009-04-20.orig/arch/x86/kernel/ldt_64-xen.c 2009-04-29 08:44:31.000000000 +0200
3458+++ sle11-2009-04-20/arch/x86/kernel/ldt_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3459@@ -13,7 +13,6 @@
3460 #include <linux/string.h>
3461 #include <linux/mm.h>
3462 #include <linux/smp.h>
3463-#include <linux/smp_lock.h>
3464 #include <linux/vmalloc.h>
3465 #include <linux/slab.h>
cc90b958 3466
00e5a55c
BS
3467--- sle11-2009-04-20.orig/arch/x86/kernel/mpparse_64-xen.c 2009-03-04 11:25:55.000000000 +0100
3468+++ sle11-2009-04-20/arch/x86/kernel/mpparse_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3469@@ -17,7 +17,6 @@
3470 #include <linux/init.h>
3471 #include <linux/delay.h>
3472 #include <linux/bootmem.h>
3473-#include <linux/smp_lock.h>
3474 #include <linux/kernel_stat.h>
3475 #include <linux/mc146818rtc.h>
3476 #include <linux/acpi.h>
3477@@ -307,7 +306,7 @@ static int __init smp_read_mpc(struct mp
3478 }
3479 }
3480 }
3481- clustered_apic_check();
3482+ setup_apic_routing();
3483 if (!num_processors)
3484 printk(KERN_ERR "MPTABLE: no processors registered!\n");
3485 return num_processors;
3486--- sle11-2009-04-20.orig/arch/x86/kernel/process_64-xen.c 2009-03-04 11:25:55.000000000 +0100
3487+++ sle11-2009-04-20/arch/x86/kernel/process_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3488@@ -39,6 +39,7 @@
3489 #include <linux/random.h>
3490 #include <linux/notifier.h>
3491 #include <linux/kprobes.h>
3492+#include <linux/kdebug.h>
cc90b958 3493
00e5a55c
BS
3494 #include <asm/uaccess.h>
3495 #include <asm/pgtable.h>
3496@@ -49,7 +50,6 @@
3497 #include <asm/mmu_context.h>
3498 #include <asm/pda.h>
3499 #include <asm/prctl.h>
3500-#include <asm/kdebug.h>
3501 #include <xen/interface/platform.h>
3502 #include <xen/interface/physdev.h>
3503 #include <xen/interface/vcpu.h>
3504@@ -232,16 +232,18 @@ void __cpuinit select_idle_routine(const
cc90b958 3505
00e5a55c 3506 static int __init idle_setup (char *str)
cc90b958 3507 {
00e5a55c
BS
3508- if (!strncmp(str, "poll", 4)) {
3509+ if (!strcmp(str, "poll")) {
3510 printk("using polling idle threads.\n");
3511 pm_idle = poll_idle;
3512- }
3513+ } else if (!strcmp(str, "mwait"))
3514+ force_mwait = 1;
3515+ else
3516+ return -1;
cc90b958 3517
00e5a55c
BS
3518 boot_option_idle_override = 1;
3519- return 1;
3520+ return 0;
cc90b958 3521 }
00e5a55c
BS
3522-
3523-__setup("idle=", idle_setup);
3524+early_param("idle", idle_setup);
cc90b958 3525
00e5a55c
BS
3526 /* Prints also some state that isn't saved in the pt_regs */
3527 void __show_regs(struct pt_regs * regs)
3528@@ -546,7 +548,7 @@ __switch_to(struct task_struct *prev_p,
3529 * The AMD workaround requires it to be after DS reload, or
3530 * after DS has been cleared, which we do in __prepare_arch_switch.
3531 */
3532- if (prev_p->thread_info->status & TS_USEDFPU) {
3533+ if (task_thread_info(prev_p)->status & TS_USEDFPU) {
3534 __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
3535 mcl->op = __HYPERVISOR_fpu_taskswitch;
3536 mcl->args[0] = 1;
3537--- sle11-2009-04-20.orig/arch/x86/kernel/setup_64-xen.c 2009-03-04 11:25:55.000000000 +0100
3538+++ sle11-2009-04-20/arch/x86/kernel/setup_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3539@@ -120,6 +120,8 @@ int bootloader_type;
cc90b958 3540
00e5a55c 3541 unsigned long saved_video_mode;
cc90b958 3542
00e5a55c
BS
3543+int force_mwait __cpuinitdata;
3544+
3545 /*
3546 * Early DMI memory
3547 */
3548@@ -253,10 +255,10 @@ static void discover_ebda(void)
3549 * there is a real-mode segmented pointer pointing to the
3550 * 4K EBDA area at 0x40E
3551 */
3552- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
3553+ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
3554 ebda_addr <<= 4;
cc90b958 3555
00e5a55c
BS
3556- ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
3557+ ebda_size = *(unsigned short *)__va(ebda_addr);
cc90b958 3558
00e5a55c
BS
3559 /* Round EBDA up to pages */
3560 if (ebda_size == 0)
3561@@ -410,15 +412,8 @@ void __init setup_arch(char **cmdline_p)
cc90b958 3562 #endif
cc90b958 3563
00e5a55c
BS
3564 #ifdef CONFIG_SMP
3565- /*
3566- * But first pinch a few for the stack/trampoline stuff
3567- * FIXME: Don't need the extra page at 4K, but need to fix
3568- * trampoline before removing it. (see the GDT stuff)
3569- */
3570- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
3571-
3572 /* Reserve SMP trampoline */
3573- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
3574+ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
3575 #endif
3576 #endif
cc90b958 3577
00e5a55c
BS
3578@@ -570,8 +565,6 @@ void __init setup_arch(char **cmdline_p)
3579 early_quirks();
3580 #endif
cc90b958 3581
00e5a55c
BS
3582- zap_low_mappings(0);
3583-
3584 /*
3585 * set this early, so we dont allocate cpu0
3586 * if MADT list doesnt list BSP first
3587@@ -864,6 +857,10 @@ static void __cpuinit init_amd(struct cp
cc90b958 3588
00e5a55c
BS
3589 /* RDTSC can be speculated around */
3590 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
3591+
3592+ /* Family 10 doesn't support C states in MWAIT so don't use it */
3593+ if (c->x86 == 0x10 && !force_mwait)
3594+ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
3595 }
cc90b958 3596
00e5a55c
BS
3597 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
3598@@ -1146,9 +1143,7 @@ void __cpuinit identify_cpu(struct cpuin
3599 #ifdef CONFIG_X86_MCE
3600 mcheck_init(c);
cc90b958 3601 #endif
00e5a55c
BS
3602- if (c == &boot_cpu_data)
3603- mtrr_bp_init();
3604- else
3605+ if (c != &boot_cpu_data)
3606 mtrr_ap_init();
3607 #ifdef CONFIG_NUMA
3608 numa_add_cpu(smp_processor_id());
3609@@ -1239,9 +1234,8 @@ static int show_cpuinfo(struct seq_file
3610 "stc",
3611 "100mhzsteps",
3612 "hwpstate",
3613- NULL, /* tsc invariant mapped to constant_tsc */
3614- NULL,
3615- /* nothing */ /* constant_tsc - moved to flags */
3616+ "", /* tsc invariant mapped to constant_tsc */
3617+ /* nothing */
3618 };
cc90b958 3619
cc90b958 3620
00e5a55c
BS
3621--- sle11-2009-04-20.orig/arch/x86/kernel/setup64-xen.c 2009-03-04 11:28:34.000000000 +0100
3622+++ sle11-2009-04-20/arch/x86/kernel/setup64-xen.c 2008-12-15 11:27:22.000000000 +0100
3623@@ -113,9 +113,9 @@ void __init setup_per_cpu_areas(void)
3624 if (!NODE_DATA(cpu_to_node(i))) {
3625 printk("cpu with no node %d, num_online_nodes %d\n",
3626 i, num_online_nodes());
3627- ptr = alloc_bootmem(size);
3628+ ptr = alloc_bootmem_pages(size);
3629 } else {
3630- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
3631+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
3632 }
3633 if (!ptr)
3634 panic("Cannot allocate cpu data for CPU %d\n", i);
3635@@ -208,6 +208,8 @@ char boot_exception_stacks[(N_EXCEPTION_
3636 __attribute__((section(".bss.page_aligned")));
3637 #endif
cc90b958 3638
00e5a55c
BS
3639+extern asmlinkage void ignore_sysret(void);
3640+
3641 /* May not be marked __init: used by software suspend */
3642 void syscall_init(void)
3643 {
3644@@ -219,12 +221,22 @@ void syscall_init(void)
3645 */
3646 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
3647 wrmsrl(MSR_LSTAR, system_call);
3648+ wrmsrl(MSR_CSTAR, ignore_sysret);
cc90b958 3649
00e5a55c
BS
3650 /* Flags to clear on syscall */
3651 wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
3652 #endif
3653 #ifdef CONFIG_IA32_EMULATION
3654 syscall32_cpu_init ();
3655+#else
3656+ {
3657+ static const struct callback_register cstar = {
3658+ .type = CALLBACKTYPE_syscall32,
3659+ .address = (unsigned long)ignore_sysret
3660+ };
3661+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
3662+ printk(KERN_WARN "Unable to register CSTAR callback\n");
3663+ }
3664 #endif
3665 }
cc90b958 3666
00e5a55c
BS
3667@@ -262,7 +274,6 @@ void __cpuinit cpu_init (void)
3668 /* CPU 0 is initialised in head64.c */
3669 if (cpu != 0) {
3670 pda_init(cpu);
3671- zap_low_mappings(cpu);
3672 }
3673 #ifndef CONFIG_X86_NO_TSS
3674 else
3675--- sle11-2009-04-20.orig/arch/x86/kernel/smp_64-xen.c 2008-12-15 11:26:44.000000000 +0100
3676+++ sle11-2009-04-20/arch/x86/kernel/smp_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3677@@ -14,7 +14,6 @@
3678 #include <linux/mm.h>
3679 #include <linux/delay.h>
3680 #include <linux/spinlock.h>
3681-#include <linux/smp_lock.h>
3682 #include <linux/smp.h>
3683 #include <linux/kernel_stat.h>
3684 #include <linux/mc146818rtc.h>
3685@@ -457,44 +456,36 @@ int smp_call_function (void (*func) (voi
cc90b958 3686 }
00e5a55c 3687 EXPORT_SYMBOL(smp_call_function);
cc90b958 3688
00e5a55c
BS
3689-void smp_stop_cpu(void)
3690+static void stop_this_cpu(void *dummy)
3691 {
3692- unsigned long flags;
3693+ local_irq_disable();
3694 /*
3695 * Remove this CPU:
3696 */
3697 cpu_clear(smp_processor_id(), cpu_online_map);
3698- local_irq_save(flags);
3699 disable_all_local_evtchn();
3700- local_irq_restore(flags);
3701-}
3702-
3703-static void smp_really_stop_cpu(void *dummy)
3704-{
3705- smp_stop_cpu();
3706 for (;;)
3707 halt();
3708 }
cc90b958 3709
00e5a55c
BS
3710 void smp_send_stop(void)
3711 {
3712- int nolock = 0;
3713+ int nolock;
3714+ unsigned long flags;
3715+
3716 #ifndef CONFIG_XEN
3717 if (reboot_force)
3718 return;
3719 #endif
3720+
3721 /* Don't deadlock on the call lock in panic */
3722- if (!spin_trylock(&call_lock)) {
3723- /* ignore locking because we have panicked anyways */
3724- nolock = 1;
3725- }
3726- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
3727+ nolock = !spin_trylock(&call_lock);
3728+ local_irq_save(flags);
3729+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
3730 if (!nolock)
3731 spin_unlock(&call_lock);
3732-
3733- local_irq_disable();
3734 disable_all_local_evtchn();
3735- local_irq_enable();
3736+ local_irq_restore(flags);
3737 }
cc90b958 3738
00e5a55c
BS
3739 /*
3740--- sle11-2009-04-20.orig/arch/x86/kernel/traps_64-xen.c 2008-12-15 11:26:44.000000000 +0100
3741+++ sle11-2009-04-20/arch/x86/kernel/traps_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3742@@ -32,6 +32,7 @@
3743 #include <linux/unwind.h>
3744 #include <linux/uaccess.h>
3745 #include <linux/bug.h>
3746+#include <linux/kdebug.h>
cc90b958 3747
00e5a55c
BS
3748 #include <asm/system.h>
3749 #include <asm/io.h>
3750@@ -39,7 +40,6 @@
3751 #include <asm/debugreg.h>
3752 #include <asm/desc.h>
3753 #include <asm/i387.h>
3754-#include <asm/kdebug.h>
3755 #include <asm/processor.h>
3756 #include <asm/unwind.h>
3757 #include <asm/smp.h>
3758@@ -71,22 +71,6 @@ asmlinkage void alignment_check(void);
3759 asmlinkage void machine_check(void);
3760 asmlinkage void spurious_interrupt_bug(void);
cc90b958 3761
00e5a55c
BS
3762-ATOMIC_NOTIFIER_HEAD(die_chain);
3763-EXPORT_SYMBOL(die_chain);
3764-
3765-int register_die_notifier(struct notifier_block *nb)
3766-{
3767- vmalloc_sync_all();
3768- return atomic_notifier_chain_register(&die_chain, nb);
3769-}
3770-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
3771-
3772-int unregister_die_notifier(struct notifier_block *nb)
3773-{
3774- return atomic_notifier_chain_unregister(&die_chain, nb);
3775-}
3776-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
3777-
3778 static inline void conditional_sti(struct pt_regs *regs)
3779 {
3780 if (regs->eflags & X86_EFLAGS_IF)
3781@@ -428,8 +412,7 @@ void show_registers(struct pt_regs *regs
3782 const int cpu = smp_processor_id();
3783 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
cc90b958 3784
00e5a55c
BS
3785- rsp = regs->rsp;
3786-
3787+ rsp = regs->rsp;
3788 printk("CPU %d ", cpu);
3789 __show_regs(regs);
3790 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
3791@@ -440,7 +423,6 @@ void show_registers(struct pt_regs *regs
3792 * time of the fault..
3793 */
3794 if (in_kernel) {
3795-
3796 printk("Stack: ");
3797 _show_stack(NULL, regs, (unsigned long*)rsp);
cc90b958 3798
00e5a55c 3799@@ -485,13 +467,14 @@ static unsigned int die_nest_count;
cc90b958 3800
00e5a55c
BS
3801 unsigned __kprobes long oops_begin(void)
3802 {
3803- int cpu = smp_processor_id();
3804+ int cpu;
3805 unsigned long flags;
cc90b958 3806
00e5a55c 3807 oops_enter();
cc90b958 3808
00e5a55c
BS
3809 /* racy, but better than risking deadlock. */
3810 local_irq_save(flags);
3811+ cpu = smp_processor_id();
3812 if (!spin_trylock(&die_lock)) {
3813 if (cpu == die_owner)
3814 /* nested oops. should stop eventually */;
3815@@ -585,10 +568,20 @@ static void __kprobes do_trap(int trapnr
3816 {
3817 struct task_struct *tsk = current;
cc90b958 3818
00e5a55c
BS
3819- tsk->thread.error_code = error_code;
3820- tsk->thread.trap_no = trapnr;
3821-
3822 if (user_mode(regs)) {
3823+ /*
3824+ * We want error_code and trap_no set for userspace
3825+ * faults and kernelspace faults which result in
3826+ * die(), but not kernelspace faults which are fixed
3827+ * up. die() gives the process no chance to handle
3828+ * the signal and notice the kernel fault information,
3829+ * so that won't result in polluting the information
3830+ * about previously queued, but not yet delivered,
3831+ * faults. See also do_general_protection below.
3832+ */
3833+ tsk->thread.error_code = error_code;
3834+ tsk->thread.trap_no = trapnr;
3835+
3836 if (exception_trace && unhandled_signal(tsk, signr))
3837 printk(KERN_INFO
3838 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
3839@@ -609,8 +602,11 @@ static void __kprobes do_trap(int trapnr
3840 fixup = search_exception_tables(regs->rip);
3841 if (fixup)
3842 regs->rip = fixup->fixup;
3843- else
3844+ else {
3845+ tsk->thread.error_code = error_code;
3846+ tsk->thread.trap_no = trapnr;
3847 die(str, regs, error_code);
3848+ }
3849 return;
3850 }
3851 }
3852@@ -686,10 +682,10 @@ asmlinkage void __kprobes do_general_pro
cc90b958 3853
00e5a55c 3854 conditional_sti(regs);
cc90b958 3855
00e5a55c
BS
3856- tsk->thread.error_code = error_code;
3857- tsk->thread.trap_no = 13;
3858-
3859 if (user_mode(regs)) {
3860+ tsk->thread.error_code = error_code;
3861+ tsk->thread.trap_no = 13;
3862+
3863 if (exception_trace && unhandled_signal(tsk, SIGSEGV))
3864 printk(KERN_INFO
3865 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
3866@@ -708,6 +704,9 @@ asmlinkage void __kprobes do_general_pro
3867 regs->rip = fixup->fixup;
3868 return;
3869 }
3870+
3871+ tsk->thread.error_code = error_code;
3872+ tsk->thread.trap_no = 13;
3873 if (notify_die(DIE_GPF, "general protection fault", regs,
3874 error_code, 13, SIGSEGV) == NOTIFY_STOP)
3875 return;
3876--- sle11-2009-04-20.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:25:55.000000000 +0100
3877+++ sle11-2009-04-20/arch/x86/kernel/vsyscall_64-xen.c 2008-12-15 11:27:22.000000000 +0100
3878@@ -45,14 +45,34 @@
cc90b958 3879
00e5a55c
BS
3880 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
3881 #define __syscall_clobber "r11","rcx","memory"
3882+#define __pa_vsymbol(x) \
3883+ ({unsigned long v; \
3884+ extern char __vsyscall_0; \
3885+ asm("" : "=r" (v) : "0" (x)); \
3886+ ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
cc90b958 3887
00e5a55c
BS
3888+/*
3889+ * vsyscall_gtod_data contains data that is :
3890+ * - readonly from vsyscalls
3891+ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
3892+ * Try to keep this structure as small as possible to avoid cache line ping pongs
3893+ */
3894 struct vsyscall_gtod_data_t {
3895- seqlock_t lock;
3896- int sysctl_enabled;
3897- struct timeval wall_time_tv;
3898+ seqlock_t lock;
3899+
3900+ /* open coded 'struct timespec' */
3901+ time_t wall_time_sec;
3902+ u32 wall_time_nsec;
3903+
3904+ int sysctl_enabled;
3905 struct timezone sys_tz;
3906- cycle_t offset_base;
3907- struct clocksource clock;
3908+ struct { /* extract of a clocksource struct */
3909+ cycle_t (*vread)(void);
3910+ cycle_t cycle_last;
3911+ cycle_t mask;
3912+ u32 mult;
3913+ u32 shift;
3914+ } clock;
3915 };
3916 int __vgetcpu_mode __section_vgetcpu_mode;
cc90b958 3917
00e5a55c 3918@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wa
cc90b958 3919
00e5a55c
BS
3920 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
3921 /* copy vsyscall data */
3922- vsyscall_gtod_data.clock = *clock;
3923- vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
3924- vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
3925+ vsyscall_gtod_data.clock.vread = clock->vread;
3926+ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
3927+ vsyscall_gtod_data.clock.mask = clock->mask;
3928+ vsyscall_gtod_data.clock.mult = clock->mult;
3929+ vsyscall_gtod_data.clock.shift = clock->shift;
3930+ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
3931+ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
3932 vsyscall_gtod_data.sys_tz = sys_tz;
3933 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
3934 }
3935@@ -105,7 +129,8 @@ static __always_inline long time_syscall
3936 static __always_inline void do_vgettimeofday(struct timeval * tv)
3937 {
3938 cycle_t now, base, mask, cycle_delta;
3939- unsigned long seq, mult, shift, nsec_delta;
3940+ unsigned seq;
3941+ unsigned long mult, shift, nsec;
3942 cycle_t (*vread)(void);
3943 do {
3944 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
3945@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeo
3946 mult = __vsyscall_gtod_data.clock.mult;
3947 shift = __vsyscall_gtod_data.clock.shift;
cc90b958 3948
00e5a55c
BS
3949- *tv = __vsyscall_gtod_data.wall_time_tv;
3950-
3951+ tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
3952+ nsec = __vsyscall_gtod_data.wall_time_nsec;
3953 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
cc90b958 3954
00e5a55c
BS
3955 /* calculate interval: */
3956 cycle_delta = (now - base) & mask;
3957 /* convert to nsecs: */
3958- nsec_delta = (cycle_delta * mult) >> shift;
3959+ nsec += (cycle_delta * mult) >> shift;
cc90b958 3960
00e5a55c
BS
3961- /* convert to usecs and add to timespec: */
3962- tv->tv_usec += nsec_delta / NSEC_PER_USEC;
3963- while (tv->tv_usec > USEC_PER_SEC) {
3964+ while (nsec >= NSEC_PER_SEC) {
3965 tv->tv_sec += 1;
3966- tv->tv_usec -= USEC_PER_SEC;
3967+ nsec -= NSEC_PER_SEC;
3968 }
3969+ tv->tv_usec = nsec / NSEC_PER_USEC;
3970 }
cc90b958 3971
00e5a55c
BS
3972 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
3973@@ -151,11 +175,16 @@ int __vsyscall(0) vgettimeofday(struct t
3974 * unlikely */
3975 time_t __vsyscall(1) vtime(time_t *t)
3976 {
3977+ struct timeval tv;
3978+ time_t result;
3979 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
3980 return time_syscall(t);
3981- else if (t)
3982- *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
3983- return __vsyscall_gtod_data.wall_time_tv.tv_sec;
3984+
3985+ vgettimeofday(&tv, 0);
3986+ result = tv.tv_sec;
3987+ if (t)
3988+ *t = result;
3989+ return result;
3990 }
cc90b958 3991
00e5a55c
BS
3992 /* Fast way to get current CPU and node.
3993@@ -224,10 +253,10 @@ static int vsyscall_sysctl_change(ctl_ta
3994 return ret;
3995 /* gcc has some trouble with __va(__pa()), so just do it this
3996 way. */
3997- map1 = ioremap(__pa_symbol(&vsysc1), 2);
3998+ map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
3999 if (!map1)
4000 return -ENOMEM;
4001- map2 = ioremap(__pa_symbol(&vsysc2), 2);
4002+ map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
4003 if (!map2) {
4004 ret = -ENOMEM;
4005 goto out;
4006@@ -304,7 +333,7 @@ static int __cpuinit
4007 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
4008 {
4009 long cpu = (long)arg;
4010- if (action == CPU_ONLINE)
4011+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
4012 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
4013 return NOTIFY_DONE;
4014 }
4015--- sle11-2009-04-20.orig/arch/x86/mm/fault_64-xen.c 2009-03-04 11:25:55.000000000 +0100
4016+++ sle11-2009-04-20/arch/x86/mm/fault_64-xen.c 2008-12-15 11:27:22.000000000 +0100
4017@@ -15,22 +15,22 @@
4018 #include <linux/mman.h>
4019 #include <linux/mm.h>
4020 #include <linux/smp.h>
4021-#include <linux/smp_lock.h>
4022 #include <linux/interrupt.h>
4023 #include <linux/init.h>
4024 #include <linux/tty.h>
4025 #include <linux/vt_kern.h> /* For unblank_screen() */
4026 #include <linux/compiler.h>
4027+#include <linux/vmalloc.h>
4028 #include <linux/module.h>
4029 #include <linux/kprobes.h>
4030 #include <linux/uaccess.h>
4031+#include <linux/kdebug.h>
cc90b958 4032
00e5a55c
BS
4033 #include <asm/system.h>
4034 #include <asm/pgalloc.h>
4035 #include <asm/smp.h>
4036 #include <asm/tlbflush.h>
4037 #include <asm/proto.h>
4038-#include <asm/kdebug.h>
4039 #include <asm-generic/sections.h>
cc90b958 4040
00e5a55c
BS
4041 /* Page fault error code bits */
4042@@ -537,6 +537,12 @@ bad_area:
4043 bad_area_nosemaphore:
4044 /* User mode accesses just cause a SIGSEGV */
4045 if (error_code & PF_USER) {
4046+
4047+ /*
4048+ * It's possible to have interrupts off here.
4049+ */
4050+ local_irq_enable();
4051+
4052 if (is_prefetch(regs, address, error_code))
4053 return;
cc90b958 4054
00e5a55c
BS
4055@@ -646,7 +652,7 @@ do_sigbus:
4056 }
cc90b958 4057
00e5a55c
BS
4058 DEFINE_SPINLOCK(pgd_lock);
4059-struct page *pgd_list;
4060+LIST_HEAD(pgd_list);
cc90b958 4061
00e5a55c 4062 void vmalloc_sync_all(void)
cc90b958 4063 {
00e5a55c
BS
4064@@ -666,8 +672,7 @@ void vmalloc_sync_all(void)
4065 if (pgd_none(*pgd_ref))
4066 continue;
4067 spin_lock(&pgd_lock);
4068- for (page = pgd_list; page;
4069- page = (struct page *)page->index) {
4070+ list_for_each_entry(page, &pgd_list, lru) {
4071 pgd_t *pgd;
4072 pgd = (pgd_t *)page_address(page) + pgd_index(address);
4073 if (pgd_none(*pgd))
4074--- sle11-2009-04-20.orig/arch/x86/mm/init_64-xen.c 2009-03-04 11:25:55.000000000 +0100
4075+++ sle11-2009-04-20/arch/x86/mm/init_64-xen.c 2008-12-15 11:27:22.000000000 +0100
4076@@ -25,10 +25,12 @@
4077 #include <linux/bootmem.h>
4078 #include <linux/proc_fs.h>
4079 #include <linux/pci.h>
4080+#include <linux/pfn.h>
4081 #include <linux/poison.h>
4082 #include <linux/dma-mapping.h>
4083 #include <linux/module.h>
4084 #include <linux/memory_hotplug.h>
4085+#include <linux/nmi.h>
4086
4087 #include <asm/processor.h>
4088 #include <asm/system.h>
4089@@ -51,7 +53,7 @@
4090 #define Dprintk(x...)
4091 #endif
4092
4093-struct dma_mapping_ops* dma_ops;
4094+const struct dma_mapping_ops* dma_ops;
4095 EXPORT_SYMBOL(dma_ops);
4096
4097 #if CONFIG_XEN_COMPAT <= 0x030002
4098@@ -191,6 +193,13 @@ void show_mem(void)
4099
4100 for_each_online_pgdat(pgdat) {
4101 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
4102+ /* this loop can take a while with 256 GB and 4k pages
4103+ so update the NMI watchdog */
4104+ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
4105+ touch_nmi_watchdog();
4106+ }
4107+ if (!pfn_valid(pgdat->node_start_pfn + i))
4108+ continue;
4109 page = pfn_to_page(pgdat->node_start_pfn + i);
4110 total++;
4111 if (PageReserved(page))
4112@@ -359,7 +368,7 @@ __set_fixmap (enum fixed_addresses idx,
4113 }
cc90b958
BS
4114 }
4115
00e5a55c
BS
4116-unsigned long __initdata table_start, table_end;
4117+unsigned long __meminitdata table_start, table_end;
cc90b958 4118
00e5a55c
BS
4119 static __meminit void *alloc_static_page(unsigned long *phys)
4120 {
4121@@ -376,7 +385,7 @@ static __meminit void *alloc_static_page
4122 start_pfn++;
4123 memset((void *)va, 0, PAGE_SIZE);
4124 return (void *)va;
4125-}
4126+}
4127
4128 #define PTE_SIZE PAGE_SIZE
4129
4130@@ -412,28 +421,46 @@ static inline int make_readonly(unsigned
cc90b958
BS
4131
4132 #ifndef CONFIG_XEN
00e5a55c
BS
4133 /* Must run before zap_low_mappings */
4134-__init void *early_ioremap(unsigned long addr, unsigned long size)
4135+__meminit void *early_ioremap(unsigned long addr, unsigned long size)
4136 {
4137- unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
cc90b958 4138-
00e5a55c
BS
4139- /* actually usually some more */
4140- if (size >= LARGE_PAGE_SIZE) {
4141- return NULL;
4142+ unsigned long vaddr;
4143+ pmd_t *pmd, *last_pmd;
4144+ int i, pmds;
4145+
4146+ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
4147+ vaddr = __START_KERNEL_map;
4148+ pmd = level2_kernel_pgt;
4149+ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
4150+ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
4151+ for (i = 0; i < pmds; i++) {
4152+ if (pmd_present(pmd[i]))
4153+ goto next;
4154+ }
4155+ vaddr += addr & ~PMD_MASK;
4156+ addr &= PMD_MASK;
4157+ for (i = 0; i < pmds; i++, addr += PMD_SIZE)
4158+ set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
4159+ __flush_tlb();
4160+ return (void *)vaddr;
4161+ next:
4162+ ;
4163 }
4164- set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
4165- map += LARGE_PAGE_SIZE;
4166- set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
4167- __flush_tlb();
4168- return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
4169+ printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
4170+ return NULL;
4171 }
cc90b958 4172
00e5a55c
BS
4173 /* To avoid virtual aliases later */
4174-__init void early_iounmap(void *addr, unsigned long size)
4175+__meminit void early_iounmap(void *addr, unsigned long size)
4176 {
4177- if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
4178- printk("early_iounmap: bad address %p\n", addr);
4179- set_pmd(temp_mappings[0].pmd, __pmd(0));
4180- set_pmd(temp_mappings[1].pmd, __pmd(0));
4181+ unsigned long vaddr;
4182+ pmd_t *pmd;
4183+ int i, pmds;
4184+
4185+ vaddr = (unsigned long)addr;
4186+ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
4187+ pmd = level2_kernel_pgt + pmd_index(vaddr);
4188+ for (i = 0; i < pmds; i++)
4189+ pmd_clear(pmd + i);
4190 __flush_tlb();
4191 }
4192 #endif
4193@@ -767,14 +794,6 @@ void __meminit init_memory_mapping(unsig
4194 __flush_tlb_all();
4195 }
4196
4197-void __cpuinit zap_low_mappings(int cpu)
cc90b958 4198-{
00e5a55c
BS
4199- /* this is not required for Xen */
4200-#if 0
4201- swap_low_mappings();
4202-#endif
cc90b958 4203-}
00e5a55c
BS
4204-
4205 #ifndef CONFIG_NUMA
4206 void __init paging_init(void)
cc90b958 4207 {
00e5a55c
BS
4208@@ -960,17 +979,6 @@ void __init mem_init(void)
4209 reservedpages << (PAGE_SHIFT-10),
4210 datasize >> 10,
4211 initsize >> 10);
4212-
4213-#ifndef CONFIG_XEN
4214-#ifdef CONFIG_SMP
4215- /*
4216- * Sync boot_level4_pgt mappings with the init_level4_pgt
4217- * except for the low identity mappings which are already zapped
4218- * in init_level4_pgt. This sync-up is essential for AP's bringup
4219- */
4220- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
cc90b958 4221-#endif
00e5a55c
BS
4222-#endif
4223 }
cc90b958 4224
00e5a55c
BS
4225 void free_init_pages(char *what, unsigned long begin, unsigned long end)
4226@@ -980,7 +988,7 @@ void free_init_pages(char *what, unsigne
4227 if (begin >= end)
4228 return;
4229
4230- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
4231+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
4232 for (addr = begin; addr < end; addr += PAGE_SIZE) {
4233 ClearPageReserved(virt_to_page(addr));
4234 init_page_count(virt_to_page(addr));
4235@@ -989,24 +997,17 @@ void free_init_pages(char *what, unsigne
4236 if (addr >= __START_KERNEL_map) {
4237 /* make_readonly() reports all kernel addresses. */
4238 __make_page_writable(__va(__pa(addr)));
4239- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
4240- pgd_t *pgd = pgd_offset_k(addr);
4241- pud_t *pud = pud_offset(pgd, addr);
4242- pmd_t *pmd = pmd_offset(pud, addr);
4243- pte_t *pte = pte_offset_kernel(pmd, addr);
4244-
4245- xen_l1_entry_update(pte, __pte(0)); /* fallback */
4246- }
4247+ change_page_attr_addr(addr, 1, __pgprot(0));
4248 }
4249 free_page(addr);
4250 totalram_pages++;
cc90b958 4251 }
00e5a55c
BS
4252+ if (addr > __START_KERNEL_map)
4253+ global_flush_tlb();
cc90b958 4254 }
00e5a55c
BS
4255
4256 void free_initmem(void)
4257 {
4258- memset(__initdata_begin, POISON_FREE_INITDATA,
4259- __initdata_end - __initdata_begin);
4260 free_init_pages("unused kernel memory",
4261 (unsigned long)(&__init_begin),
4262 (unsigned long)(&__init_end));
4263@@ -1016,13 +1017,28 @@ void free_initmem(void)
4264
4265 void mark_rodata_ro(void)
4266 {
4267- unsigned long addr = (unsigned long)__start_rodata;
4268+ unsigned long start = (unsigned long)_stext, end;
cc90b958 4269+
00e5a55c
BS
4270+#ifdef CONFIG_HOTPLUG_CPU
4271+ /* It must still be possible to apply SMP alternatives. */
4272+ if (num_possible_cpus() > 1)
4273+ start = (unsigned long)_etext;
4274+#endif
cc90b958 4275+
00e5a55c
BS
4276+#ifdef CONFIG_KPROBES
4277+ start = (unsigned long)__start_rodata;
4278+#endif
cc90b958 4279+
00e5a55c
BS
4280+ end = (unsigned long)__end_rodata;
4281+ start = (start + PAGE_SIZE - 1) & PAGE_MASK;
4282+ end &= PAGE_MASK;
4283+ if (end <= start)
4284+ return;
4285
4286- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
4287- change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
4288+ change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
4289
4290- printk ("Write protecting the kernel read-only data: %luk\n",
4291- (__end_rodata - __start_rodata) >> 10);
4292+ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
4293+ (end - start) >> 10);
4294
4295 /*
4296 * change_page_attr_addr() requires a global_flush_tlb() call after it.
4297@@ -1173,3 +1189,11 @@ int in_gate_area_no_task(unsigned long a
4298 {
4299 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
4300 }
cc90b958 4301+
00e5a55c
BS
4302+#ifndef CONFIG_XEN
4303+void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
cc90b958 4304+{
00e5a55c
BS
4305+ return __alloc_bootmem_core(pgdat->bdata, size,
4306+ SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
cc90b958 4307+}
00e5a55c
BS
4308+#endif
4309--- sle11-2009-04-20.orig/arch/x86/mm/pageattr_64-xen.c 2009-03-04 11:25:55.000000000 +0100
4310+++ sle11-2009-04-20/arch/x86/mm/pageattr_64-xen.c 2008-12-15 11:27:22.000000000 +0100
4311@@ -215,13 +215,13 @@ void mm_pin_all(void)
4312 preempt_enable();
4313 }
cc90b958 4314
00e5a55c
BS
4315-void _arch_dup_mmap(struct mm_struct *mm)
4316+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
4317 {
4318 if (!mm->context.pinned)
4319 mm_pin(mm);
cc90b958
BS
4320 }
4321
00e5a55c
BS
4322-void _arch_exit_mmap(struct mm_struct *mm)
4323+void arch_exit_mmap(struct mm_struct *mm)
4324 {
4325 struct task_struct *tsk = current;
cc90b958 4326
00e5a55c
BS
4327@@ -343,10 +343,11 @@ static void flush_kernel_map(void *arg)
4328 struct page *pg;
4329
4330 /* When clflush is available always use it because it is
4331- much cheaper than WBINVD */
4332- if (!cpu_has_clflush)
4333+ much cheaper than WBINVD. Disable clflush for now because
4334+ the high level code is not ready yet */
4335+ if (1 || !cpu_has_clflush)
4336 asm volatile("wbinvd" ::: "memory");
4337- list_for_each_entry(pg, l, lru) {
4338+ else list_for_each_entry(pg, l, lru) {
4339 void *adr = page_address(pg);
4340 if (cpu_has_clflush)
4341 cache_flush_page(adr);
4342@@ -460,16 +461,24 @@ __change_page_attr(unsigned long address
4343 */
4344 int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
cc90b958 4345 {
00e5a55c
BS
4346- int err = 0;
4347+ int err = 0, kernel_map = 0;
4348 int i;
cc90b958 4349
00e5a55c
BS
4350+ if (address >= __START_KERNEL_map
4351+ && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
4352+ address = (unsigned long)__va(__pa(address));
4353+ kernel_map = 1;
4354+ }
4355+
4356 down_write(&init_mm.mmap_sem);
4357 for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
4358 unsigned long pfn = __pa(address) >> PAGE_SHIFT;
cc90b958 4359
00e5a55c
BS
4360- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
4361- if (err)
4362- break;
4363+ if (!kernel_map || pte_present(pfn_pte(0, prot))) {
4364+ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
4365+ if (err)
4366+ break;
4367+ }
4368 /* Handle kernel mapping too which aliases part of the
4369 * lowmem */
4370 if (__pa(address) < KERNEL_TEXT_SIZE) {
4371--- sle11-2009-04-20.orig/drivers/char/tpm/tpm_xen.c 2009-03-04 11:25:55.000000000 +0100
4372+++ sle11-2009-04-20/drivers/char/tpm/tpm_xen.c 2008-12-15 11:27:22.000000000 +0100
4373@@ -463,7 +463,7 @@ static int tpmif_connect(struct xenbus_d
4374 tp->backend_id = domid;
cc90b958 4375
00e5a55c
BS
4376 err = bind_listening_port_to_irqhandler(
4377- domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
4378+ domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp);
4379 if (err <= 0) {
4380 WPRINTK("bind_listening_port_to_irqhandler failed "
4381 "(err=%d)\n", err);
4382--- sle11-2009-04-20.orig/drivers/pci/msi-xen.c 2009-03-04 11:25:55.000000000 +0100
4383+++ sle11-2009-04-20/drivers/pci/msi-xen.c 2008-12-15 11:27:22.000000000 +0100
4384@@ -12,16 +12,15 @@
4385 #include <linux/interrupt.h>
4386 #include <linux/init.h>
4387 #include <linux/ioport.h>
4388-#include <linux/smp_lock.h>
4389 #include <linux/pci.h>
4390 #include <linux/proc_fs.h>
4391 #include <linux/msi.h>
4392+#include <linux/smp.h>
cc90b958 4393
00e5a55c 4394 #include <xen/evtchn.h>
cc90b958 4395
00e5a55c
BS
4396 #include <asm/errno.h>
4397 #include <asm/io.h>
4398-#include <asm/smp.h>
cc90b958 4399
00e5a55c
BS
4400 #include "pci.h"
4401 #include "msi.h"
4402@@ -154,6 +153,7 @@ int register_msi_get_owner(int (*func)(s
4403 get_owner = func;
4404 return 0;
4405 }
4406+EXPORT_SYMBOL(register_msi_get_owner);
cc90b958 4407
00e5a55c
BS
4408 int unregister_msi_get_owner(int (*func)(struct pci_dev *dev))
4409 {
4410@@ -162,6 +162,7 @@ int unregister_msi_get_owner(int (*func)
4411 get_owner = NULL;
4412 return 0;
4413 }
4414+EXPORT_SYMBOL(unregister_msi_get_owner);
cc90b958 4415
00e5a55c
BS
4416 static int msi_get_dev_owner(struct pci_dev *dev)
4417 {
4418@@ -263,11 +264,6 @@ static int msi_map_vector(struct pci_dev
4419 return msi_map_pirq_to_vector(dev, -1, entry_nr, table_base);
4420 }
cc90b958 4421
00e5a55c
BS
4422-static int msi_init(void)
4423-{
4424- return 0;
4425-}
4426-
4427 #ifdef CONFIG_PM
4428 static void __pci_restore_msi_state(struct pci_dev *dev)
4429 {
4430@@ -434,21 +430,32 @@ static int msix_capability_init(struct p
4431 }
cc90b958 4432
00e5a55c
BS
4433 /**
4434- * pci_msi_supported - check whether MSI may be enabled on device
4435+ * pci_msi_check_device - check whether MSI may be enabled on a device
4436 * @dev: pointer to the pci_dev data structure of MSI device function
4437+ * @nvec: how many MSIs have been requested ?
4438+ * @type: are we checking for MSI or MSI-X ?
4439 *
4440 * Look at global flags, the device itself, and its parent busses
4441- * to return 0 if MSI are supported for the device.
4442+ * to determine if MSI/-X are supported for the device. If MSI/-X is
4443+ * supported return 0, else return an error code.
4444 **/
4445-static
4446-int pci_msi_supported(struct pci_dev * dev)
4447+static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
4448 {
4449 struct pci_bus *bus;
4450+ int ret;
cc90b958 4451
00e5a55c
BS
4452 /* MSI must be globally enabled and supported by the device */
4453 if (!pci_msi_enable || !dev || dev->no_msi)
4454 return -EINVAL;
cc90b958 4455
00e5a55c
BS
4456+ /*
4457+ * You can't ask to have 0 or less MSIs configured.
4458+ * a) it's stupid ..
4459+ * b) the list manipulation code assumes nvec >= 1.
4460+ */
4461+ if (nvec < 1)
4462+ return -ERANGE;
4463+
4464 /* Any bridge which does NOT route MSI transactions from it's
4465 * secondary bus to it's primary bus must set NO_MSI flag on
4466 * the secondary pci_bus.
4467@@ -459,6 +466,13 @@ int pci_msi_supported(struct pci_dev * d
4468 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
4469 return -EINVAL;
4470
4471+ ret = arch_msi_check_device(dev, nvec, type);
4472+ if (ret)
4473+ return ret;
4474+
4475+ if (!pci_find_capability(dev, type))
4476+ return -EINVAL;
4477+
4478 return 0;
cc90b958
BS
4479 }
4480
00e5a55c
BS
4481@@ -475,14 +489,11 @@ int pci_msi_supported(struct pci_dev * d
4482 extern int pci_frontend_enable_msi(struct pci_dev *dev);
4483 int pci_enable_msi(struct pci_dev* dev)
4484 {
4485- int pos, temp, status;
4486+ int temp, status;
cc90b958 4487
00e5a55c
BS
4488- if (pci_msi_supported(dev) < 0)
4489- return -EINVAL;
4490-
4491- status = msi_init();
4492- if (status < 0)
4493- return status;
4494+ status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
4495+ if (status)
4496+ return status;
cc90b958 4497
00e5a55c
BS
4498 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
4499 if (!is_initial_xendomain())
4500@@ -503,10 +514,6 @@ int pci_enable_msi(struct pci_dev* dev)
cc90b958 4501
00e5a55c 4502 temp = dev->irq;
cc90b958 4503
00e5a55c
BS
4504- pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
4505- if (!pos)
4506- return -EINVAL;
4507-
4508 /* Check whether driver already requested for MSI-X irqs */
4509 if (dev->msix_enabled) {
4510 printk(KERN_INFO "PCI: %s: Can't enable MSI. "
4511@@ -521,15 +528,14 @@ int pci_enable_msi(struct pci_dev* dev)
cc90b958 4512
00e5a55c
BS
4513 return status;
4514 }
4515+EXPORT_SYMBOL(pci_enable_msi);
cc90b958 4516
00e5a55c
BS
4517 extern void pci_frontend_disable_msi(struct pci_dev* dev);
4518 void pci_disable_msi(struct pci_dev* dev)
4519 {
4520 int pirq;
cc90b958 4521
00e5a55c
BS
4522- if (!pci_msi_enable)
4523- return;
4524- if (!dev)
4525+ if (!pci_msi_enable || !dev)
4526 return;
cc90b958 4527
00e5a55c
BS
4528 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
4529@@ -554,6 +560,7 @@ void pci_disable_msi(struct pci_dev* dev
4530 pci_intx(dev, 1); /* enable intx */
4531 dev->msi_enabled = 0;
4532 }
4533+EXPORT_SYMBOL(pci_disable_msi);
cc90b958 4534
00e5a55c
BS
4535 /**
4536 * pci_enable_msix - configure device's MSI-X capability structure
4537@@ -578,7 +585,7 @@ int pci_enable_msix(struct pci_dev* dev,
4538 int i, j, temp;
4539 u16 control;
cc90b958 4540
00e5a55c
BS
4541- if (!entries || pci_msi_supported(dev) < 0)
4542+ if (!entries)
4543 return -EINVAL;
cc90b958 4544
00e5a55c
BS
4545 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
4546@@ -616,14 +623,11 @@ int pci_enable_msix(struct pci_dev* dev,
4547 }
4548 #endif
cc90b958 4549
00e5a55c
BS
4550- status = msi_init();
4551- if (status < 0)
4552+ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
4553+ if (status)
4554 return status;
cc90b958 4555
00e5a55c
BS
4556 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
4557- if (!pos)
4558- return -EINVAL;
4559-
4560 pci_read_config_word(dev, msi_control_reg(pos), &control);
4561 nr_entries = multi_msix_capable(control);
4562 if (nvec > nr_entries)
4563@@ -655,6 +659,7 @@ int pci_enable_msix(struct pci_dev* dev,
4564
4565 return status;
cc90b958 4566 }
00e5a55c 4567+EXPORT_SYMBOL(pci_enable_msix);
cc90b958 4568
00e5a55c
BS
4569 extern void pci_frontend_disable_msix(struct pci_dev* dev);
4570 void pci_disable_msix(struct pci_dev* dev)
4571@@ -694,6 +699,7 @@ void pci_disable_msix(struct pci_dev* de
4572 pci_intx(dev, 1); /* enable intx */
4573 dev->msix_enabled = 0;
4574 }
4575+EXPORT_SYMBOL(pci_disable_msix);
4576
4577 /**
4578 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
4579@@ -737,12 +743,57 @@ void pci_no_msi(void)
4580 pci_msi_enable = 0;
cc90b958
BS
4581 }
4582
00e5a55c
BS
4583-EXPORT_SYMBOL(pci_enable_msi);
4584-EXPORT_SYMBOL(pci_disable_msi);
4585-EXPORT_SYMBOL(pci_enable_msix);
4586-EXPORT_SYMBOL(pci_disable_msix);
4587-#ifdef CONFIG_XEN
4588-EXPORT_SYMBOL(register_msi_get_owner);
4589-EXPORT_SYMBOL(unregister_msi_get_owner);
4590+void pci_msi_init_pci_dev(struct pci_dev *dev)
cc90b958 4591+{
00e5a55c
BS
4592+#ifndef CONFIG_XEN
4593+ INIT_LIST_HEAD(&dev->msi_list);
4594 #endif
cc90b958 4595+}
00e5a55c
BS
4596+
4597+
4598+/* Arch hooks */
4599+
4600+int __attribute__ ((weak))
4601+arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
cc90b958 4602+{
00e5a55c 4603+ return 0;
cc90b958 4604+}
00e5a55c
BS
4605+
4606+#ifndef CONFIG_XEN
4607+int __attribute__ ((weak))
4608+arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
4609+{
4610+ return 0;
4611+}
4612+
4613+int __attribute__ ((weak))
4614+arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
4615+{
4616+ struct msi_desc *entry;
4617+ int ret;
cc90b958 4618
00e5a55c
BS
4619+ list_for_each_entry(entry, &dev->msi_list, list) {
4620+ ret = arch_setup_msi_irq(dev, entry);
4621+ if (ret)
4622+ return ret;
4623+ }
cc90b958 4624+
00e5a55c
BS
4625+ return 0;
4626+}
cc90b958 4627+
00e5a55c 4628+void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
cc90b958 4629+{
00e5a55c 4630+ return;
cc90b958
BS
4631+}
4632+
00e5a55c
BS
4633+void __attribute__ ((weak))
4634+arch_teardown_msi_irqs(struct pci_dev *dev)
4635+{
4636+ struct msi_desc *entry;
4637+
4638+ list_for_each_entry(entry, &dev->msi_list, list) {
4639+ if (entry->irq != 0)
4640+ arch_teardown_msi_irq(entry->irq);
4641+ }
4642+}
4643+#endif
4644--- sle11-2009-04-20.orig/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:49.000000000 +0100
4645+++ sle11-2009-04-20/drivers/xen/blkfront/blkfront.c 2009-03-24 10:11:24.000000000 +0100
4646@@ -244,7 +244,7 @@ static int setup_blkring(struct xenbus_d
4647 info->ring_ref = err;
cc90b958 4648
00e5a55c
BS
4649 err = bind_listening_port_to_irqhandler(
4650- dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
4651+ dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info);
4652 if (err <= 0) {
4653 xenbus_dev_fatal(dev, err,
4654 "bind_listening_port_to_irqhandler");
4655--- sle11-2009-04-20.orig/drivers/xen/char/mem.c 2008-12-15 11:26:44.000000000 +0100
4656+++ sle11-2009-04-20/drivers/xen/char/mem.c 2008-12-15 11:27:22.000000000 +0100
4657@@ -18,7 +18,6 @@
4658 #include <linux/raw.h>
4659 #include <linux/tty.h>
4660 #include <linux/capability.h>
4661-#include <linux/smp_lock.h>
4662 #include <linux/ptrace.h>
4663 #include <linux/device.h>
4664 #include <asm/pgalloc.h>
4665--- sle11-2009-04-20.orig/drivers/xen/core/hypervisor_sysfs.c 2009-04-29 08:44:31.000000000 +0200
4666+++ sle11-2009-04-20/drivers/xen/core/hypervisor_sysfs.c 2008-12-15 11:27:22.000000000 +0100
4667@@ -50,7 +50,7 @@ static int __init hypervisor_subsys_init
4668 if (!is_running_on_xen())
4669 return -ENODEV;
cc90b958 4670
00e5a55c
BS
4671- hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
4672+ hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type;
4673 return 0;
4674 }
cc90b958 4675
00e5a55c
BS
4676--- sle11-2009-04-20.orig/drivers/xen/core/smpboot.c 2009-03-04 11:25:55.000000000 +0100
4677+++ sle11-2009-04-20/drivers/xen/core/smpboot.c 2008-12-15 11:27:22.000000000 +0100
4678@@ -161,13 +161,12 @@ static void xen_smp_intr_exit(unsigned i
cc90b958 4679
00e5a55c
BS
4680 void __cpuinit cpu_bringup(void)
4681 {
4682+ cpu_init();
4683 #ifdef __i386__
4684- cpu_set_gdt(current_thread_info()->cpu);
4685- secondary_cpu_init();
4686+ identify_secondary_cpu(cpu_data + smp_processor_id());
4687 #else
4688- cpu_init();
4689-#endif
4690 identify_cpu(cpu_data + smp_processor_id());
4691+#endif
4692 touch_softlockup_watchdog();
4693 preempt_disable();
4694 local_irq_enable();
4695@@ -187,11 +186,6 @@ static void __cpuinit cpu_initialize_con
4696 static DEFINE_SPINLOCK(ctxt_lock);
cc90b958 4697
00e5a55c
BS
4698 struct task_struct *idle = idle_task(cpu);
4699-#ifdef __x86_64__
4700- struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
cc90b958 4701-#else
00e5a55c 4702- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
cc90b958 4703-#endif
cc90b958 4704
00e5a55c
BS
4705 if (cpu_test_and_set(cpu, cpu_initialized_map))
4706 return;
4707@@ -214,11 +208,11 @@ static void __cpuinit cpu_initialize_con
4708 smp_trap_init(ctxt.trap_ctxt);
cc90b958 4709
00e5a55c
BS
4710 ctxt.ldt_ents = 0;
4711-
4712- ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
4713- ctxt.gdt_ents = gdt_descr->size / 8;
4714+ ctxt.gdt_ents = GDT_SIZE / 8;
cc90b958 4715
00e5a55c
BS
4716 #ifdef __i386__
4717+ ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
4718+
4719 ctxt.user_regs.cs = __KERNEL_CS;
4720 ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
cc90b958 4721
00e5a55c
BS
4722@@ -231,7 +225,11 @@ static void __cpuinit cpu_initialize_con
4723 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
cc90b958 4724
00e5a55c 4725 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
cc90b958 4726+
00e5a55c
BS
4727+ ctxt.user_regs.fs = __KERNEL_PERCPU;
4728 #else /* __x86_64__ */
4729+ ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address);
cc90b958 4730+
00e5a55c
BS
4731 ctxt.user_regs.cs = __KERNEL_CS;
4732 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
cc90b958 4733
00e5a55c
BS
4734@@ -261,9 +259,8 @@ void __init smp_prepare_cpus(unsigned in
4735 struct vcpu_get_physid cpu_id;
4736 #ifdef __x86_64__
4737 struct desc_ptr *gdt_descr;
4738-#else
4739- struct Xgt_desc_struct *gdt_descr;
4740 #endif
4741+ void *gdt_addr;
cc90b958 4742
00e5a55c
BS
4743 apicid = 0;
4744 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
4745@@ -313,14 +310,12 @@ void __init smp_prepare_cpus(unsigned in
4746 }
4747 gdt_descr->size = GDT_SIZE;
4748 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
4749+ gdt_addr = (void *)gdt_descr->address;
4750 #else
4751- if (unlikely(!init_gdt(cpu, idle)))
4752- continue;
4753- gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
4754+ init_gdt(cpu);
4755+ gdt_addr = get_cpu_gdt_table(cpu);
4756 #endif
4757- make_page_readonly(
4758- (void *)gdt_descr->address,
4759- XENFEAT_writable_descriptor_tables);
4760+ make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
cc90b958 4761
00e5a55c
BS
4762 apicid = cpu;
4763 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
4764@@ -334,7 +329,9 @@ void __init smp_prepare_cpus(unsigned in
4765 #ifdef __x86_64__
4766 cpu_pda(cpu)->pcurrent = idle;
4767 cpu_pda(cpu)->cpunumber = cpu;
4768- clear_ti_thread_flag(idle->thread_info, TIF_FORK);
4769+ clear_ti_thread_flag(task_thread_info(idle), TIF_FORK);
4770+#else
4771+ per_cpu(current_task, cpu) = idle;
4772 #endif
4773
4774 irq_ctx_init(cpu);
4775@@ -359,8 +356,12 @@ void __init smp_prepare_cpus(unsigned in
4776 #endif
4777 }
4778
4779-void __devinit smp_prepare_boot_cpu(void)
4780+void __init smp_prepare_boot_cpu(void)
4781 {
4782+#ifdef __i386__
4783+ init_gdt(smp_processor_id());
4784+ switch_to_new_gdt();
cc90b958 4785+#endif
00e5a55c
BS
4786 prefill_possible_map();
4787 }
cc90b958 4788
00e5a55c
BS
4789--- sle11-2009-04-20.orig/drivers/xen/core/xen_sysfs.c 2009-04-29 08:44:31.000000000 +0200
4790+++ sle11-2009-04-20/drivers/xen/core/xen_sysfs.c 2008-12-15 11:27:22.000000000 +0100
4791@@ -29,12 +29,12 @@ HYPERVISOR_ATTR_RO(type);
cc90b958 4792
00e5a55c
BS
4793 static int __init xen_sysfs_type_init(void)
4794 {
4795- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
4796+ return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr);
4797 }
cc90b958 4798
00e5a55c
BS
4799 static void xen_sysfs_type_destroy(void)
4800 {
4801- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
4802+ sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr);
4803 }
cc90b958 4804
00e5a55c
BS
4805 /* xen version attributes */
4806@@ -90,13 +90,13 @@ static struct attribute_group version_gr
cc90b958 4807
00e5a55c
BS
4808 static int __init xen_sysfs_version_init(void)
4809 {
4810- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
4811+ return sysfs_create_group(&hypervisor_subsys.kobj,
4812 &version_group);
4813 }
cc90b958 4814
00e5a55c
BS
4815 static void xen_sysfs_version_destroy(void)
4816 {
4817- sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
4818+ sysfs_remove_group(&hypervisor_subsys.kobj, &version_group);
cc90b958
BS
4819 }
4820
00e5a55c
BS
4821 /* UUID */
4822@@ -126,12 +126,12 @@ HYPERVISOR_ATTR_RO(uuid);
4823
4824 static int __init xen_sysfs_uuid_init(void)
cc90b958 4825 {
00e5a55c
BS
4826- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
4827+ return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr);
4828 }
cc90b958 4829
00e5a55c
BS
4830 static void xen_sysfs_uuid_destroy(void)
4831 {
4832- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
4833+ sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr);
cc90b958
BS
4834 }
4835
00e5a55c
BS
4836 /* xen compilation attributes */
4837@@ -204,13 +204,13 @@ static struct attribute_group xen_compil
cc90b958 4838
00e5a55c
BS
4839 int __init static xen_compilation_init(void)
4840 {
4841- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
4842+ return sysfs_create_group(&hypervisor_subsys.kobj,
4843 &xen_compilation_group);
4844 }
cc90b958 4845
00e5a55c
BS
4846 static void xen_compilation_destroy(void)
4847 {
4848- sysfs_remove_group(&hypervisor_subsys.kset.kobj,
4849+ sysfs_remove_group(&hypervisor_subsys.kobj,
4850 &xen_compilation_group);
4851 }
cc90b958 4852
00e5a55c 4853@@ -325,13 +325,13 @@ static struct attribute_group xen_proper
cc90b958 4854
00e5a55c
BS
4855 static int __init xen_properties_init(void)
4856 {
4857- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
4858+ return sysfs_create_group(&hypervisor_subsys.kobj,
4859 &xen_properties_group);
4860 }
cc90b958 4861
00e5a55c
BS
4862 static void xen_properties_destroy(void)
4863 {
4864- sysfs_remove_group(&hypervisor_subsys.kset.kobj,
4865+ sysfs_remove_group(&hypervisor_subsys.kobj,
4866 &xen_properties_group);
4867 }
cc90b958 4868
00e5a55c 4869@@ -350,13 +350,13 @@ HYPERVISOR_ATTR_RO(vmcoreinfo);
cc90b958 4870
00e5a55c
BS
4871 static int __init xen_sysfs_vmcoreinfo_init(void)
4872 {
4873- return sysfs_create_file(&hypervisor_subsys.kset.kobj,
4874+ return sysfs_create_file(&hypervisor_subsys.kobj,
4875 &vmcoreinfo_attr.attr);
4876 }
cc90b958 4877
00e5a55c
BS
4878 static void xen_sysfs_vmcoreinfo_destroy(void)
4879 {
4880- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr);
4881+ sysfs_remove_file(&hypervisor_subsys.kobj, &vmcoreinfo_attr.attr);
4882 }
cc90b958 4883
cc90b958 4884 #endif
00e5a55c
BS
4885--- sle11-2009-04-20.orig/drivers/xen/netback/netback.c 2009-03-04 11:28:34.000000000 +0100
4886+++ sle11-2009-04-20/drivers/xen/netback/netback.c 2008-12-23 09:33:22.000000000 +0100
4887@@ -195,7 +195,7 @@ static struct sk_buff *netbk_copy_skb(st
4888 goto err;
cc90b958 4889
00e5a55c
BS
4890 skb_reserve(nskb, 16 + NET_IP_ALIGN);
4891- headlen = nskb->end - nskb->data;
4892+ headlen = skb_end_pointer(nskb) - nskb->data;
4893 if (headlen > skb_headlen(skb))
4894 headlen = skb_headlen(skb);
4895 ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
4896@@ -241,11 +241,15 @@ static struct sk_buff *netbk_copy_skb(st
4897 len -= copy;
4898 }
cc90b958 4899
00e5a55c
BS
4900+#ifdef NET_SKBUFF_DATA_USES_OFFSET
4901+ offset = 0;
4902+#else
4903 offset = nskb->data - skb->data;
4904+#endif
cc90b958 4905
00e5a55c
BS
4906- nskb->h.raw = skb->h.raw + offset;
4907- nskb->nh.raw = skb->nh.raw + offset;
4908- nskb->mac.raw = skb->mac.raw + offset;
4909+ nskb->transport_header = skb->transport_header + offset;
4910+ nskb->network_header = skb->network_header + offset;
4911+ nskb->mac_header = skb->mac_header + offset;
cc90b958 4912
00e5a55c 4913 return nskb;
cc90b958 4914
00e5a55c
BS
4915@@ -1619,7 +1623,7 @@ static int __init netback_init(void)
4916 (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
4917 0,
4918 netif_be_dbg,
4919- SA_SHIRQ,
4920+ IRQF_SHARED,
4921 "net-be-dbg",
4922 &netif_be_dbg);
4923 #endif
4924--- sle11-2009-04-20.orig/drivers/xen/netfront/netfront.c 2009-03-30 16:35:44.000000000 +0200
4925+++ sle11-2009-04-20/drivers/xen/netfront/netfront.c 2009-03-30 16:36:30.000000000 +0200
4926@@ -513,7 +513,7 @@ static int setup_device(struct xenbus_de
4927 memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
cc90b958 4928
00e5a55c
BS
4929 err = bind_listening_port_to_irqhandler(
4930- dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
4931+ dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name,
4932 netdev);
4933 if (err < 0)
4934 goto fail;
4935--- sle11-2009-04-20.orig/drivers/xen/pciback/xenbus.c 2009-03-04 11:25:55.000000000 +0100
4936+++ sle11-2009-04-20/drivers/xen/pciback/xenbus.c 2008-12-15 11:27:22.000000000 +0100
4937@@ -99,7 +99,7 @@ static int pciback_do_attach(struct pcib
cc90b958 4938
00e5a55c
BS
4939 err = bind_interdomain_evtchn_to_irqhandler(
4940 pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
4941- SA_SAMPLE_RANDOM, "pciback", pdev);
4942+ IRQF_SAMPLE_RANDOM, "pciback", pdev);
4943 if (err < 0) {
4944 xenbus_dev_fatal(pdev->xdev, err,
4945 "Error binding event channel to IRQ");
4946--- sle11-2009-04-20.orig/drivers/xen/pcifront/xenbus.c 2009-03-04 11:25:55.000000000 +0100
4947+++ sle11-2009-04-20/drivers/xen/pcifront/xenbus.c 2008-12-15 11:27:22.000000000 +0100
4948@@ -10,10 +10,6 @@
4949 #include <xen/gnttab.h>
4950 #include "pcifront.h"
cc90b958 4951
00e5a55c
BS
4952-#ifndef __init_refok
4953-#define __init_refok
4954-#endif
4955-
4956 #define INVALID_GRANT_REF (0)
4957 #define INVALID_EVTCHN (-1)
cc90b958 4958
00e5a55c
BS
4959--- sle11-2009-04-20.orig/drivers/xen/scsifront/xenbus.c 2009-03-04 11:25:55.000000000 +0100
4960+++ sle11-2009-04-20/drivers/xen/scsifront/xenbus.c 2008-12-15 11:27:22.000000000 +0100
4961@@ -96,7 +96,7 @@ static int scsifront_alloc_ring(struct v
cc90b958 4962
00e5a55c
BS
4963 err = bind_listening_port_to_irqhandler(
4964 dev->otherend_id, scsifront_intr,
4965- SA_SAMPLE_RANDOM, "scsifront", info);
4966+ IRQF_SAMPLE_RANDOM, "scsifront", info);
cc90b958 4967
00e5a55c
BS
4968 if (err <= 0) {
4969 xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler");
4970--- sle11-2009-04-20.orig/drivers/xen/sfc_netback/accel_fwd.c 2009-04-29 08:44:31.000000000 +0200
4971+++ sle11-2009-04-20/drivers/xen/sfc_netback/accel_fwd.c 2008-12-15 11:27:22.000000000 +0100
4972@@ -308,7 +308,7 @@ static struct netback_accel *for_a_vnic(
4973 static inline int packet_is_arp_reply(struct sk_buff *skb)
cc90b958 4974 {
00e5a55c
BS
4975 return skb->protocol == ntohs(ETH_P_ARP)
4976- && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY);
4977+ && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY);
cc90b958 4978 }
cc90b958 4979
cc90b958 4980
00e5a55c 4981@@ -392,12 +392,13 @@ void netback_accel_tx_packet(struct sk_b
cc90b958 4982
00e5a55c 4983 BUG_ON(fwd_priv == NULL);
cc90b958 4984
00e5a55c
BS
4985- if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) {
4986+ if (is_broadcast_ether_addr(skb_mac_header(skb))
4987+ && packet_is_arp_reply(skb)) {
4988 /*
4989 * update our fast path forwarding to reflect this
4990 * gratuitous ARP
4991 */
4992- mac = skb->mac.raw+ETH_ALEN;
4993+ mac = skb_mac_header(skb)+ETH_ALEN;
cc90b958 4994
00e5a55c
BS
4995 DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
4996 __FUNCTION__, MAC_ARG(mac));
4997--- sle11-2009-04-20.orig/drivers/xen/sfc_netback/accel_solarflare.c 2009-04-29 08:44:31.000000000 +0200
4998+++ sle11-2009-04-20/drivers/xen/sfc_netback/accel_solarflare.c 2009-03-30 16:36:16.000000000 +0200
4999@@ -113,7 +113,7 @@ bend_dl_tx_packet(struct efx_dl_device *
5000 BUG_ON(port == NULL);
cc90b958 5001
00e5a55c
BS
5002 NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++);
5003- if (skb->mac.raw != NULL)
5004+ if (skb_mac_header_was_set(skb))
5005 netback_accel_tx_packet(skb, port->fwd_priv);
5006 else {
5007 DPRINTK("Ignoring packet with missing mac address\n");
5008--- sle11-2009-04-20.orig/drivers/xen/sfc_netfront/accel_tso.c 2009-03-04 11:28:34.000000000 +0100
5009+++ sle11-2009-04-20/drivers/xen/sfc_netfront/accel_tso.c 2008-12-15 11:27:22.000000000 +0100
5010@@ -33,10 +33,9 @@
cc90b958 5011
00e5a55c 5012 #include "accel_tso.h"
cc90b958 5013
00e5a55c
BS
5014-#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2))
5015-#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data)
5016-#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data)
5017-#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data)
5018+#define ETH_HDR_LEN(skb) skb_network_offset(skb)
5019+#define SKB_TCP_OFF(skb) skb_transport_offset(skb)
5020+#define SKB_IP_OFF(skb) skb_network_offset(skb)
cc90b958 5021
cc90b958 5022 /*
00e5a55c
BS
5023 * Set a maximum number of buffers in each output packet to make life
5024@@ -114,9 +113,8 @@ struct netfront_accel_tso_state {
5025 static inline void tso_check_safe(struct sk_buff *skb) {
5026 EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
5027 EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
5028- EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
5029- EPRINTK_ON((SKB_TCP_OFF(skb)
5030- + (skb->h.th->doff << 2u)) > skb_headlen(skb));
5031+ EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
5032+ EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb));
5033 }
cc90b958 5034
cc90b958 5035
00e5a55c
BS
5036@@ -129,17 +127,17 @@ static inline void tso_start(struct netf
5037 * All ethernet/IP/TCP headers combined size is TCP header size
5038 * plus offset of TCP header relative to start of packet.
5039 */
5040- st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb);
5041+ st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb);
5042 st->p.full_packet_size = (st->p.header_length
5043 + skb_shinfo(skb)->gso_size);
5044 st->p.gso_size = skb_shinfo(skb)->gso_size;
cc90b958 5045
00e5a55c
BS
5046- st->p.ip_id = htons(skb->nh.iph->id);
5047- st->seqnum = ntohl(skb->h.th->seq);
5048+ st->p.ip_id = htons(ip_hdr(skb)->id);
5049+ st->seqnum = ntohl(tcp_hdr(skb)->seq);
cc90b958 5050
00e5a55c
BS
5051- EPRINTK_ON(skb->h.th->urg);
5052- EPRINTK_ON(skb->h.th->syn);
5053- EPRINTK_ON(skb->h.th->rst);
5054+ EPRINTK_ON(tcp_hdr(skb)->urg);
5055+ EPRINTK_ON(tcp_hdr(skb)->syn);
5056+ EPRINTK_ON(tcp_hdr(skb)->rst);
cc90b958 5057
00e5a55c 5058 st->remaining_len = skb->len - st->p.header_length;
cc90b958 5059
00e5a55c
BS
5060@@ -258,8 +256,8 @@ int tso_start_new_packet(netfront_accel_
5061 /* This packet will be the last in the TSO burst. */
5062 ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
5063 + st->remaining_len);
5064- tsoh_th->fin = skb->h.th->fin;
5065- tsoh_th->psh = skb->h.th->psh;
5066+ tsoh_th->fin = tcp_hdr(skb)->fin;
5067+ tsoh_th->psh = tcp_hdr(skb)->psh;
5068 }
5069
5070 tsoh_iph->tot_len = htons(ip_length);
5071--- sle11-2009-04-20.orig/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:25.000000000 +0200
5072+++ sle11-2009-04-20/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:36:26.000000000 +0200
5073@@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron
5074
5075 if (skb->ip_summed == CHECKSUM_PARTIAL) {
5076 /* Set to zero to encourage falcon to work it out for us */
5077- *(u16*)(skb->h.raw + skb->csum_offset) = 0;
5078+ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
5079 }
5080
5081 if (multi_post_start_new_buffer(vnic, &state)) {
5082@@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro
5083
5084 if (skb->ip_summed == CHECKSUM_PARTIAL) {
5085 /* Set to zero to encourage falcon to work it out for us */
5086- *(u16*)(skb->h.raw + skb->csum_offset) = 0;
5087+ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
5088 }
5089 NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
5090 (skb, idx, frag_data, frag_len, {
5091--- sle11-2009-04-20.orig/drivers/xen/sfc_netfront/accel_xenbus.c 2009-04-29 08:44:31.000000000 +0200
5092+++ sle11-2009-04-20/drivers/xen/sfc_netfront/accel_xenbus.c 2008-12-15 11:27:22.000000000 +0100
5093@@ -356,7 +356,7 @@ static int vnic_setup_domU_shared_state(
5094 /* Create xenbus msg event channel */
5095 err = bind_listening_port_to_irqhandler
5096 (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
5097- SA_SAMPLE_RANDOM, "vnicctrl", vnic);
5098+ IRQF_SAMPLE_RANDOM, "vnicctrl", vnic);
5099 if (err < 0) {
5100 EPRINTK("Couldn't bind msg event channel\n");
5101 goto fail_msg_irq;
5102@@ -367,7 +367,7 @@ static int vnic_setup_domU_shared_state(
5103 /* Create xenbus net event channel */
5104 err = bind_listening_port_to_irqhandler
5105 (dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
5106- SA_SAMPLE_RANDOM, "vnicfront", vnic);
5107+ IRQF_SAMPLE_RANDOM, "vnicfront", vnic);
5108 if (err < 0) {
5109 EPRINTK("Couldn't bind net event channel\n");
5110 goto fail_net_irq;
5111--- sle11-2009-04-20.orig/fs/aio.c 2009-04-29 08:44:31.000000000 +0200
5112+++ sle11-2009-04-20/fs/aio.c 2009-03-24 10:11:37.000000000 +0100
5113@@ -38,7 +38,7 @@
5114
5115 #ifdef CONFIG_EPOLL
5116 #include <linux/poll.h>
5117-#include <linux/eventpoll.h>
5118+#include <linux/anon_inodes.h>
cc90b958 5119 #endif
cc90b958 5120
00e5a55c
BS
5121 #if DEBUG > 1
5122@@ -1325,7 +1325,7 @@ static const struct file_operations aioq
cc90b958 5123
00e5a55c
BS
5124 /* make_aio_fd:
5125 * Create a file descriptor that can be used to poll the event queue.
5126- * Based and piggybacked on the excellent epoll code.
5127+ * Based on the excellent epoll code.
5128 */
cc90b958 5129
00e5a55c
BS
5130 static int make_aio_fd(struct kioctx *ioctx)
5131@@ -1334,7 +1334,8 @@ static int make_aio_fd(struct kioctx *io
5132 struct inode *inode;
5133 struct file *file;
5134
5135- error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
5136+ error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
5137+ &aioq_fops, ioctx);
5138 if (error)
5139 return error;
5140
5141--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/desc_32.h 2009-03-04 11:25:55.000000000 +0100
5142+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-15 11:27:22.000000000 +0100
5143@@ -11,23 +11,24 @@
5144
5145 #include <asm/mmu.h>
5146
5147-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
5148-
5149 struct Xgt_desc_struct {
5150 unsigned short size;
5151 unsigned long address __attribute__((packed));
5152 unsigned short pad;
5153 } __attribute__ ((packed));
5154
5155-extern struct Xgt_desc_struct idt_descr;
5156-DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
5157-extern struct Xgt_desc_struct early_gdt_descr;
5158+struct gdt_page
cc90b958 5159+{
00e5a55c
BS
5160+ struct desc_struct gdt[GDT_ENTRIES];
5161+} __attribute__((aligned(PAGE_SIZE)));
5162+DECLARE_PER_CPU(struct gdt_page, gdt_page);
5163
5164 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
cc90b958 5165 {
00e5a55c
BS
5166- return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
5167+ return per_cpu(gdt_page, cpu).gdt;
5168 }
5169
5170+extern struct Xgt_desc_struct idt_descr;
5171 extern struct desc_struct idt_table[];
5172 extern void set_intr_gate(unsigned int irq, void * addr);
5173
5174@@ -55,53 +56,32 @@ static inline void pack_gate(__u32 *a, _
5175 #define DESCTYPE_S 0x10 /* !system */
5176
5177 #ifndef CONFIG_XEN
5178-#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
cc90b958 5179-
00e5a55c
BS
5180-#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
5181-#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
5182+#define load_TR_desc() native_load_tr_desc()
5183+#define load_gdt(dtr) native_load_gdt(dtr)
5184+#define load_idt(dtr) native_load_idt(dtr)
5185 #define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
5186 #define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
5187
5188-#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
5189-#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
5190-#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
5191+#define store_gdt(dtr) native_store_gdt(dtr)
5192+#define store_idt(dtr) native_store_idt(dtr)
5193+#define store_tr(tr) (tr = native_store_tr())
5194 #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
5195-#endif
5196
5197-#if TLS_SIZE != 24
5198-# error update this code.
5199-#endif
cc90b958 5200-
00e5a55c
BS
5201-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
5202-{
5203-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
5204- *(u64 *)&t->tls_array[i]) \
5205- BUG()
5206- C(0); C(1); C(2);
5207-#undef C
5208-}
5209+#define load_TLS(t, cpu) native_load_tls(t, cpu)
5210+#define set_ldt native_set_ldt
cc90b958 5211
00e5a55c
BS
5212-#ifndef CONFIG_XEN
5213 #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
5214 #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
5215 #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
5216
5217-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
5218+static inline void write_dt_entry(struct desc_struct *dt,
5219+ int entry, u32 entry_low, u32 entry_high)
5220 {
5221- __u32 *lp = (__u32 *)((char *)dt + entry*8);
5222- *lp = entry_a;
5223- *(lp+1) = entry_b;
5224+ dt[entry].a = entry_low;
5225+ dt[entry].b = entry_high;
5226 }
5227-#define set_ldt native_set_ldt
5228-#else
5229-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
5230-extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
5231-#define set_ldt xen_set_ldt
5232-#endif
5233
5234-#ifndef CONFIG_XEN
5235-static inline fastcall void native_set_ldt(const void *addr,
5236- unsigned int entries)
5237+static inline void native_set_ldt(const void *addr, unsigned int entries)
5238 {
5239 if (likely(entries == 0))
5240 __asm__ __volatile__("lldt %w0"::"q" (0));
5241@@ -116,6 +96,65 @@ static inline fastcall void native_set_l
5242 __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
5243 }
5244 }
5245+
5246+
5247+static inline void native_load_tr_desc(void)
5248+{
5249+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
5250+}
5251+
5252+static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
5253+{
5254+ asm volatile("lgdt %0"::"m" (*dtr));
5255+}
5256+
5257+static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
5258+{
5259+ asm volatile("lidt %0"::"m" (*dtr));
5260+}
5261+
5262+static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
5263+{
5264+ asm ("sgdt %0":"=m" (*dtr));
5265+}
5266+
5267+static inline void native_store_idt(struct Xgt_desc_struct *dtr)
5268+{
5269+ asm ("sidt %0":"=m" (*dtr));
5270+}
5271+
5272+static inline unsigned long native_store_tr(void)
5273+{
5274+ unsigned long tr;
5275+ asm ("str %0":"=r" (tr));
5276+ return tr;
5277+}
5278+
5279+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
5280+{
5281+ unsigned int i;
5282+ struct desc_struct *gdt = get_cpu_gdt_table(cpu);
5283+
5284+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
5285+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
5286+}
5287+#else
5288+#define load_TLS(t, cpu) xen_load_tls(t, cpu)
5289+#define set_ldt xen_set_ldt
5290+
5291+extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
5292+extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
5293+
5294+static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu)
5295+{
5296+ unsigned int i;
5297+ struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN;
5298+
5299+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
5300+ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
5301+ *(u64 *)&t->tls_array[i]))
5302+ BUG();
5303+}
5304 #endif
5305
5306 #ifndef CONFIG_X86_NO_IDT
5307--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-15 11:26:44.000000000 +0100
5308+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-15 11:27:22.000000000 +0100
5309@@ -19,10 +19,8 @@
5310 * the start of the fixmap.
5311 */
5312 extern unsigned long __FIXADDR_TOP;
5313-#ifdef CONFIG_COMPAT_VDSO
5314-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
5315-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
5316-#endif
5317+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
5318+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
5319
5320 #ifndef __ASSEMBLY__
5321 #include <linux/kernel.h>
5322@@ -85,6 +83,9 @@ enum fixed_addresses {
5323 #ifdef CONFIG_PCI_MMCONFIG
5324 FIX_PCIE_MCFG,
5325 #endif
5326+#ifdef CONFIG_PARAVIRT
5327+ FIX_PARAVIRT_BOOTMAP,
5328+#endif
5329 FIX_SHARED_INFO,
5330 #define NR_FIX_ISAMAPS 256
5331 FIX_ISAMAP_END,
5332--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/highmem.h 2008-12-15 11:26:44.000000000 +0100
5333+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/highmem.h 2008-12-15 11:27:22.000000000 +0100
5334@@ -67,12 +67,18 @@ extern void FASTCALL(kunmap_high(struct
5335
5336 void *kmap(struct page *page);
5337 void kunmap(struct page *page);
5338+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
5339 void *kmap_atomic(struct page *page, enum km_type type);
5340 void *kmap_atomic_pte(struct page *page, enum km_type type);
5341 void kunmap_atomic(void *kvaddr, enum km_type type);
5342 void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
5343 struct page *kmap_atomic_to_page(void *ptr);
5344
5345+#define kmap_atomic_pte(page, type) \
5346+ kmap_atomic_prot(page, type, \
5347+ test_bit(PG_pinned, &(page)->flags) \
5348+ ? PAGE_KERNEL_RO : kmap_prot)
5349+
5350 #define flush_cache_kmaps() do { } while (0)
5351
5352 void clear_highpage(struct page *);
5353--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-15 11:26:44.000000000 +0100
5354+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/irqflags_32.h 2008-12-15 11:27:22.000000000 +0100
5355@@ -11,6 +11,40 @@
5356 #define _ASM_IRQFLAGS_H
5357
5358 #ifndef __ASSEMBLY__
5359+#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask)
5360+
5361+#define xen_restore_fl(f) \
5362+do { \
5363+ vcpu_info_t *_vcpu; \
5364+ barrier(); \
5365+ _vcpu = current_vcpu_info(); \
5366+ if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \
5367+ barrier(); /* unmask then check (avoid races) */\
5368+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
5369+ force_evtchn_callback(); \
5370+ } \
5371+} while (0)
5372+
5373+#define xen_irq_disable() \
5374+do { \
5375+ current_vcpu_info()->evtchn_upcall_mask = 1; \
5376+ barrier(); \
5377+} while (0)
5378+
5379+#define xen_irq_enable() \
5380+do { \
5381+ vcpu_info_t *_vcpu; \
5382+ barrier(); \
5383+ _vcpu = current_vcpu_info(); \
5384+ _vcpu->evtchn_upcall_mask = 0; \
5385+ barrier(); /* unmask then check (avoid races) */ \
5386+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
5387+ force_evtchn_callback(); \
5388+} while (0)
5389+
5390+void xen_safe_halt(void);
5391+
5392+void xen_halt(void);
5393
5394 /*
5395 * The use of 'barrier' in the following reflects their use as local-lock
5396@@ -20,48 +54,31 @@
5397 * includes these barriers, for example.
5398 */
5399
5400-#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
5401+#define __raw_local_save_flags() xen_save_fl()
5402
5403-#define raw_local_irq_restore(x) \
5404-do { \
5405- vcpu_info_t *_vcpu; \
5406- barrier(); \
5407- _vcpu = current_vcpu_info(); \
5408- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
5409- barrier(); /* unmask then check (avoid races) */ \
5410- if (unlikely(_vcpu->evtchn_upcall_pending)) \
5411- force_evtchn_callback(); \
5412- } \
5413-} while (0)
5414+#define raw_local_irq_restore(flags) xen_restore_fl(flags)
5415
5416-#define raw_local_irq_disable() \
5417-do { \
5418- current_vcpu_info()->evtchn_upcall_mask = 1; \
5419- barrier(); \
5420-} while (0)
5421+#define raw_local_irq_disable() xen_irq_disable()
5422
5423-#define raw_local_irq_enable() \
5424-do { \
5425- vcpu_info_t *_vcpu; \
5426- barrier(); \
5427- _vcpu = current_vcpu_info(); \
5428- _vcpu->evtchn_upcall_mask = 0; \
5429- barrier(); /* unmask then check (avoid races) */ \
5430- if (unlikely(_vcpu->evtchn_upcall_pending)) \
5431- force_evtchn_callback(); \
5432-} while (0)
5433+#define raw_local_irq_enable() xen_irq_enable()
5434
5435 /*
5436 * Used in the idle loop; sti takes one instruction cycle
5437 * to complete:
5438 */
5439-void raw_safe_halt(void);
5440+static inline void raw_safe_halt(void)
5441+{
5442+ xen_safe_halt();
5443+}
5444
5445 /*
5446 * Used when interrupts are already enabled or to
5447 * shutdown the processor:
5448 */
5449-void halt(void);
5450+static inline void halt(void)
5451+{
5452+ xen_halt();
5453+}
5454
5455 /*
5456 * For spinlocks, etc:
5457--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-04 11:25:55.000000000 +0100
5458+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-12-15 11:27:22.000000000 +0100
5459@@ -6,6 +6,20 @@
5460 #include <asm/pgalloc.h>
5461 #include <asm/tlbflush.h>
5462
5463+void arch_exit_mmap(struct mm_struct *mm);
5464+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
5465+
5466+void mm_pin(struct mm_struct *mm);
5467+void mm_unpin(struct mm_struct *mm);
5468+void mm_pin_all(void);
5469+
5470+static inline void xen_activate_mm(struct mm_struct *prev,
5471+ struct mm_struct *next)
5472+{
5473+ if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
5474+ mm_pin(next);
5475+}
5476+
5477 /*
5478 * Used for LDT copy/destruction.
5479 */
5480@@ -37,10 +51,6 @@ static inline void __prepare_arch_switch
5481 : : "r" (0) );
5482 }
5483
5484-extern void mm_pin(struct mm_struct *mm);
5485-extern void mm_unpin(struct mm_struct *mm);
5486-void mm_pin_all(void);
5487-
5488 static inline void switch_mm(struct mm_struct *prev,
5489 struct mm_struct *next,
5490 struct task_struct *tsk)
5491@@ -97,11 +107,10 @@ static inline void switch_mm(struct mm_s
5492 #define deactivate_mm(tsk, mm) \
5493 asm("movl %0,%%gs": :"r" (0));
5494
5495-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
5496-{
5497- if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
5498- mm_pin(next);
5499- switch_mm(prev, next, NULL);
5500-}
5501+#define activate_mm(prev, next) \
5502+ do { \
5503+ xen_activate_mm(prev, next); \
5504+ switch_mm((prev),(next),NULL); \
5505+ } while(0)
5506
5507 #endif
5508--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgalloc_32.h 2009-03-04 11:25:55.000000000 +0100
5509+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-12-15 11:27:22.000000000 +0100
5510@@ -1,7 +1,6 @@
5511 #ifndef _I386_PGALLOC_H
5512 #define _I386_PGALLOC_H
5513
5514-#include <asm/fixmap.h>
5515 #include <linux/threads.h>
5516 #include <linux/mm.h> /* for struct page */
5517 #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
5518@@ -69,6 +68,4 @@ do { \
5519 #define pud_populate(mm, pmd, pte) BUG()
5520 #endif
5521
5522-#define check_pgt_cache() do { } while (0)
5523-
5524 #endif /* _I386_PGALLOC_H */
5525--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-15 11:26:44.000000000 +0100
5526+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-15 11:27:22.000000000 +0100
5527@@ -52,32 +52,40 @@ static inline int pte_exec_kernel(pte_t
5528 * value and then use set_pte to update it. -ben
5529 */
5530
5531-static inline void set_pte(pte_t *ptep, pte_t pte)
5532+static inline void xen_set_pte(pte_t *ptep, pte_t pte)
5533 {
5534 ptep->pte_high = pte.pte_high;
5535 smp_wmb();
5536 ptep->pte_low = pte.pte_low;
5537 }
5538-#define set_pte_atomic(pteptr,pteval) \
5539- set_64bit((unsigned long long *)(pteptr),__pte_val(pteval))
5540
5541-#define set_pte_at(_mm,addr,ptep,pteval) do { \
5542- if (((_mm) != current->mm && (_mm) != &init_mm) || \
5543- HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \
5544- set_pte((ptep), (pteval)); \
5545-} while (0)
5546-
5547-#define set_pmd(pmdptr,pmdval) \
5548- xen_l2_entry_update((pmdptr), (pmdval))
5549-#define set_pud(pudptr,pudval) \
5550- xen_l3_entry_update((pudptr), (pudval))
5551+static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
5552+ pte_t *ptep , pte_t pte)
5553+{
5554+ if ((mm != current->mm && mm != &init_mm) ||
5555+ HYPERVISOR_update_va_mapping(addr, pte, 0))
5556+ xen_set_pte(ptep, pte);
5557+}
5558+
5559+static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
5560+{
5561+ set_64bit((unsigned long long *)(ptep),__pte_val(pte));
5562+}
5563+static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
5564+{
5565+ xen_l2_entry_update(pmdp, pmd);
5566+}
5567+static inline void xen_set_pud(pud_t *pudp, pud_t pud)
5568+{
5569+ xen_l3_entry_update(pudp, pud);
5570+}
5571
5572 /*
5573 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
5574 * entry, so clear the bottom half first and enforce ordering with a compiler
5575 * barrier.
5576 */
5577-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
5578+static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
5579 {
5580 if ((mm != current->mm && mm != &init_mm)
5581 || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
5582@@ -87,7 +95,18 @@ static inline void pte_clear(struct mm_s
5583 }
5584 }
5585
5586-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
5587+static inline void xen_pmd_clear(pmd_t *pmd)
5588+{
5589+ xen_l2_entry_update(pmd, __pmd(0));
5590+}
5591+
5592+#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
5593+#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
5594+#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte)
5595+#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd)
5596+#define set_pud(pudp, pud) xen_set_pud(pudp, pud)
5597+#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep)
5598+#define pmd_clear(pmd) xen_pmd_clear(pmd)
5599
5600 /*
5601 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
5602@@ -108,7 +127,8 @@ static inline void pud_clear (pud_t * pu
5603 #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
5604 pmd_index(address))
5605
5606-static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
5607+#ifdef CONFIG_SMP
5608+static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
5609 {
5610 uint64_t val = __pte_val(res);
5611 if (__cmpxchg64(ptep, val, 0) != val) {
5612@@ -119,6 +139,9 @@ static inline pte_t raw_ptep_get_and_cle
5613 }
5614 return res;
5615 }
5616+#else
5617+#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte)
5618+#endif
5619
5620 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
5621 #define ptep_clear_flush(vma, addr, ptep) \
5622@@ -165,13 +188,13 @@ extern unsigned long long __supported_pt
5623 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
5624 {
5625 return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
5626- pgprot_val(pgprot)) & __supported_pte_mask);
5627+ pgprot_val(pgprot)) & __supported_pte_mask);
5628 }
5629
5630 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
5631 {
5632 return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
5633- pgprot_val(pgprot)) & __supported_pte_mask);
5634+ pgprot_val(pgprot)) & __supported_pte_mask);
5635 }
5636
5637 /*
5638@@ -191,6 +214,4 @@ static inline pmd_t pfn_pmd(unsigned lon
5639
5640 #define __pmd_free_tlb(tlb, x) do { } while (0)
5641
5642-void vmalloc_sync_all(void);
5643-
5644 #endif /* _I386_PGTABLE_3LEVEL_H */
5645--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h 2009-04-29 08:44:31.000000000 +0200
5646+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable-3level-defs.h 2008-12-15 11:27:22.000000000 +0100
5647@@ -1,7 +1,7 @@
5648 #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
5649 #define _I386_PGTABLE_3LEVEL_DEFS_H
5650
5651-#define HAVE_SHARED_KERNEL_PMD 0
5652+#define SHARED_KERNEL_PMD 0
5653
5654 /*
5655 * PGDIR_SHIFT determines what a top-level page table entry can map
5656--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:25:55.000000000 +0100
5657+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-15 11:27:22.000000000 +0100
5658@@ -24,11 +24,11 @@
5659 #include <linux/slab.h>
5660 #include <linux/list.h>
5661 #include <linux/spinlock.h>
5662+#include <linux/sched.h>
5663
5664 /* Is this pagetable pinned? */
5665 #define PG_pinned PG_arch_1
5666
5667-struct mm_struct;
5668 struct vm_area_struct;
5669
5670 /*
5671@@ -38,17 +38,16 @@ struct vm_area_struct;
5672 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
5673 extern unsigned long empty_zero_page[1024];
5674 extern pgd_t *swapper_pg_dir;
5675-extern struct kmem_cache *pgd_cache;
5676 extern struct kmem_cache *pmd_cache;
5677 extern spinlock_t pgd_lock;
5678 extern struct page *pgd_list;
5679+void check_pgt_cache(void);
5680
5681 void pmd_ctor(void *, struct kmem_cache *, unsigned long);
5682-void pgd_ctor(void *, struct kmem_cache *, unsigned long);
5683-void pgd_dtor(void *, struct kmem_cache *, unsigned long);
5684 void pgtable_cache_init(void);
5685 void paging_init(void);
5686
5687+
5688 /*
5689 * The Linux x86 paging architecture is 'compile-time dual-mode', it
5690 * implements both the traditional 2-level x86 page tables and the
5691@@ -165,6 +164,7 @@ void paging_init(void);
5692
5693 extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
5694 #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
5695+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
5696 #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
5697 #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
5698 #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
5699@@ -172,6 +172,7 @@ extern unsigned long long __PAGE_KERNEL,
5700 #define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
5701 #define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
5702 #define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
5703+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
5704 #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
5705 #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
5706 #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
5707@@ -275,7 +276,13 @@ static inline pte_t pte_mkhuge(pte_t pte
5708 */
5709 #define pte_update(mm, addr, ptep) do { } while (0)
5710 #define pte_update_defer(mm, addr, ptep) do { } while (0)
5711-#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
5712+
5713+/* local pte updates need not use xchg for locking */
5714+static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
5715+{
5716+ xen_set_pte(ptep, __pte(0));
5717+ return res;
5718+}
5719
5720 /*
5721 * We only update the dirty/accessed state if we set
5722@@ -286,17 +293,34 @@ static inline pte_t pte_mkhuge(pte_t pte
5723 */
5724 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
5725 #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
5726-do { \
5727- if (dirty) \
5728+({ \
5729+ int __changed = !pte_same(*(ptep), entry); \
5730+ if (__changed && (dirty)) \
5731 ptep_establish(vma, address, ptep, entry); \
5732-} while (0)
5733+ __changed; \
5734+})
5735
5736-/*
5737- * We don't actually have these, but we want to advertise them so that
5738- * we can encompass the flush here.
5739- */
5740 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
5741+#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \
5742+ int __ret = 0; \
5743+ if (pte_dirty(*(ptep))) \
5744+ __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \
5745+ &(ptep)->pte_low); \
5746+ if (__ret) \
5747+ pte_update((vma)->vm_mm, addr, ptep); \
5748+ __ret; \
5749+})
5750+
5751 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
5752+#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
5753+ int __ret = 0; \
5754+ if (pte_young(*(ptep))) \
5755+ __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
5756+ &(ptep)->pte_low); \
5757+ if (__ret) \
5758+ pte_update((vma)->vm_mm, addr, ptep); \
5759+ __ret; \
5760+})
5761
5762 /*
5763 * Rules for using ptep_establish: the pte MUST be a user pte, and
5764@@ -323,7 +347,7 @@ do { \
5765 int __dirty = pte_dirty(__pte); \
5766 __pte = pte_mkclean(__pte); \
5767 if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
5768- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
5769+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
5770 else if (__dirty) \
5771 (ptep)->pte_low = __pte.pte_low; \
cc90b958 5772 __dirty; \
00e5a55c 5773@@ -336,7 +360,7 @@ do { \
cc90b958
BS
5774 int __young = pte_young(__pte); \
5775 __pte = pte_mkold(__pte); \
00e5a55c 5776 if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
cc90b958
BS
5777- ptep_set_access_flags(vma, address, ptep, __pte, __young); \
5778+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
5779 else if (__young) \
00e5a55c 5780 (ptep)->pte_low = __pte.pte_low; \
cc90b958 5781 __young; \
00e5a55c
BS
5782@@ -349,7 +373,7 @@ static inline pte_t ptep_get_and_clear(s
5783 if (!pte_none(pte)
5784 && (mm != &init_mm
5785 || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
5786- pte = raw_ptep_get_and_clear(ptep, pte);
5787+ pte = xen_ptep_get_and_clear(ptep, pte);
5788 pte_update(mm, addr, ptep);
5789 }
5790 return pte;
5791@@ -491,24 +515,10 @@ extern pte_t *lookup_address(unsigned lo
5792 #endif
5793
5794 #if defined(CONFIG_HIGHPTE)
5795-#define pte_offset_map(dir, address) \
5796-({ \
5797- pte_t *__ptep; \
5798- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
5799- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
5800- paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
5801- __ptep = __ptep + pte_index(address); \
5802- __ptep; \
5803-})
5804-#define pte_offset_map_nested(dir, address) \
5805-({ \
5806- pte_t *__ptep; \
5807- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
5808- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
5809- paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
5810- __ptep = __ptep + pte_index(address); \
5811- __ptep; \
5812-})
5813+#define pte_offset_map(dir, address) \
5814+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
5815+#define pte_offset_map_nested(dir, address) \
5816+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
5817 #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
5818 #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
5819 #else
5820@@ -597,10 +607,6 @@ int xen_change_pte_range(struct mm_struc
5821 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
5822 direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
5823
5824-#define MK_IOSPACE_PFN(space, pfn) (pfn)
5825-#define GET_IOSPACE(pfn) 0
5826-#define GET_PFN(pfn) (pfn)
5827-
5828 #include <asm-generic/pgtable.h>
5829
5830 #endif /* _I386_PGTABLE_H */
5831--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:25:55.000000000 +0100
5832+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-15 11:27:22.000000000 +0100
5833@@ -21,6 +21,7 @@
5834 #include <asm/percpu.h>
5835 #include <linux/cpumask.h>
5836 #include <linux/init.h>
5837+#include <asm/processor-flags.h>
5838 #include <xen/interface/physdev.h>
5839
5840 /* flag for disabling the tsc */
5841@@ -118,7 +119,8 @@ extern char ignore_fpu_irq;
5842
5843 void __init cpu_detect(struct cpuinfo_x86 *c);
5844
5845-extern void identify_cpu(struct cpuinfo_x86 *);
5846+extern void identify_boot_cpu(void);
5847+extern void identify_secondary_cpu(struct cpuinfo_x86 *);
5848 extern void print_cpu_info(struct cpuinfo_x86 *);
5849 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
5850 extern unsigned short num_cache_leaves;
5851@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86
5852 static inline void detect_ht(struct cpuinfo_x86 *c) {}
5853 #endif
5854
5855-/*
5856- * EFLAGS bits
5857- */
5858-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
5859-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
5860-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
5861-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
5862-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
5863-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
5864-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
5865-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
5866-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
5867-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
5868-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
5869-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
5870-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
5871-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
5872-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
5873-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
5874-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
5875-
5876-static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
5877- unsigned int *ecx, unsigned int *edx)
5878+static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx,
5879+ unsigned int *ecx, unsigned int *edx)
5880 {
5881 /* ecx is often an input as well as an output. */
5882 __asm__(XEN_CPUID
5883@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un
5884 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
5885
5886 /*
5887- * Intel CPU features in CR4
5888- */
5889-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
5890-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
5891-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
5892-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
5893-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
5894-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
5895-#define X86_CR4_MCE 0x0040 /* Machine check enable */
5896-#define X86_CR4_PGE 0x0080 /* enable global pages */
5897-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
5898-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
5899-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
5900-
5901-/*
5902 * Save the cr4 feature set we're using (ie
5903 * Pentium 4MB enable and PPro Global page
5904 * enable), so that any CPU's that boot up
5905@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne
5906 }
5907
5908 /*
5909- * NSC/Cyrix CPU configuration register indexes
5910- */
5911-
5912-#define CX86_PCR0 0x20
5913-#define CX86_GCR 0xb8
5914-#define CX86_CCR0 0xc0
5915-#define CX86_CCR1 0xc1
5916-#define CX86_CCR2 0xc2
5917-#define CX86_CCR3 0xc3
5918-#define CX86_CCR4 0xe8
5919-#define CX86_CCR5 0xe9
5920-#define CX86_CCR6 0xea
5921-#define CX86_CCR7 0xeb
5922-#define CX86_PCR1 0xf0
5923-#define CX86_DIR0 0xfe
5924-#define CX86_DIR1 0xff
5925-#define CX86_ARR_BASE 0xc4
5926-#define CX86_RCR_BASE 0xdc
5927-
5928-/*
5929 * NSC/Cyrix CPU indexed register access macros
5930 */
5931
5932@@ -351,7 +297,8 @@ typedef struct {
5933 struct thread_struct;
5934
5935 #ifndef CONFIG_X86_NO_TSS
5936-struct tss_struct {
5937+/* This is the TSS defined by the hardware. */
5938+struct i386_hw_tss {
5939 unsigned short back_link,__blh;
5940 unsigned long esp0;
5941 unsigned short ss0,__ss0h;
5942@@ -375,6 +322,11 @@ struct tss_struct {
5943 unsigned short gs, __gsh;
5944 unsigned short ldt, __ldth;
5945 unsigned short trace, io_bitmap_base;
5946+} __attribute__((packed));
5947+
5948+struct tss_struct {
5949+ struct i386_hw_tss x86_tss;
5950+
5951 /*
5952 * The extra 1 is there because the CPU will access an
5953 * additional byte beyond the end of the IO permission
5954@@ -428,10 +380,11 @@ struct thread_struct {
5955 };
5956
5957 #define INIT_THREAD { \
5958+ .esp0 = sizeof(init_stack) + (long)&init_stack, \
5959 .vm86_info = NULL, \
5960 .sysenter_cs = __KERNEL_CS, \
5961 .io_bitmap_ptr = NULL, \
5962- .fs = __KERNEL_PDA, \
5963+ .fs = __KERNEL_PERCPU, \
5964 }
5965
5966 /*
5967@@ -441,10 +394,12 @@ struct thread_struct {
5968 * be within the limit.
5969 */
5970 #define INIT_TSS { \
5971- .esp0 = sizeof(init_stack) + (long)&init_stack, \
5972- .ss0 = __KERNEL_DS, \
5973- .ss1 = __KERNEL_CS, \
5974- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
5975+ .x86_tss = { \
5976+ .esp0 = sizeof(init_stack) + (long)&init_stack, \
5977+ .ss0 = __KERNEL_DS, \
5978+ .ss1 = __KERNEL_CS, \
5979+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
5980+ }, \
5981 .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
5982 }
5983
5984@@ -551,38 +506,33 @@ static inline void rep_nop(void)
5985
5986 #define cpu_relax() rep_nop()
5987
5988-#define paravirt_enabled() 0
5989-#define __cpuid xen_cpuid
5990-
5991 #ifndef CONFIG_X86_NO_TSS
5992-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
5993+static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
5994 {
5995- tss->esp0 = thread->esp0;
5996+ tss->x86_tss.esp0 = thread->esp0;
5997 /* This can only happen when SEP is enabled, no need to test "SEP"arately */
5998- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
5999- tss->ss1 = thread->sysenter_cs;
6000+ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
6001+ tss->x86_tss.ss1 = thread->sysenter_cs;
6002 wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
6003 }
6004 }
6005-#define load_esp0(tss, thread) \
6006- __load_esp0(tss, thread)
6007 #else
6008-#define load_esp0(tss, thread) do { \
6009+#define xen_load_esp0(tss, thread) do { \
6010 if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
6011 BUG(); \
6012 } while (0)
6013 #endif
6014
6015
6016-/*
6017- * These special macros can be used to get or set a debugging register
6018- */
6019-#define get_debugreg(var, register) \
6020- (var) = HYPERVISOR_get_debugreg(register)
6021-#define set_debugreg(value, register) \
6022- WARN_ON(HYPERVISOR_set_debugreg(register, value))
6023+static inline unsigned long xen_get_debugreg(int regno)
6024+{
6025+ return HYPERVISOR_get_debugreg(regno);
6026+}
6027
6028-#define set_iopl_mask xen_set_iopl_mask
6029+static inline void xen_set_debugreg(int regno, unsigned long value)
6030+{
6031+ WARN_ON(HYPERVISOR_set_debugreg(regno, value));
6032+}
6033
6034 /*
6035 * Set IOPL bits in EFLAGS from given mask
6036@@ -597,6 +547,21 @@ static inline void xen_set_iopl_mask(uns
6037 }
6038
6039
6040+#define paravirt_enabled() 0
6041+#define __cpuid xen_cpuid
6042+
6043+#define load_esp0 xen_load_esp0
6044+
6045+/*
6046+ * These special macros can be used to get or set a debugging register
6047+ */
6048+#define get_debugreg(var, register) \
6049+ (var) = xen_get_debugreg(register)
6050+#define set_debugreg(value, register) \
6051+ xen_set_debugreg(register, value)
6052+
6053+#define set_iopl_mask xen_set_iopl_mask
6054+
6055 /*
6056 * Generic CPUID function
6057 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6058@@ -749,8 +714,14 @@ extern unsigned long boot_option_idle_ov
6059 extern void enable_sep_cpu(void);
6060 extern int sysenter_setup(void);
6061
6062-extern int init_gdt(int cpu, struct task_struct *idle);
6063+/* Defined in head.S */
6064+extern struct Xgt_desc_struct early_gdt_descr;
6065+
6066 extern void cpu_set_gdt(int);
6067-extern void secondary_cpu_init(void);
6068+extern void switch_to_new_gdt(void);
6069+extern void cpu_init(void);
6070+extern void init_gdt(int cpu);
6071+
6072+extern int force_mwait;
6073
6074 #endif /* __ASM_I386_PROCESSOR_H */
6075--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:25:55.000000000 +0100
6076+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-15 11:27:22.000000000 +0100
6077@@ -39,7 +39,7 @@
6078 * 25 - APM BIOS support
6079 *
6080 * 26 - ESPFIX small SS
6081- * 27 - PDA [ per-cpu private data area ]
6082+ * 27 - per-cpu [ offset to per-cpu data area ]
6083 * 28 - unused
6084 * 29 - unused
6085 * 30 - unused
6086@@ -74,8 +74,12 @@
6087 #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14)
6088 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
6089
6090-#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15)
6091-#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
6092+#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15)
6093+#ifdef CONFIG_SMP
6094+#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
6095+#else
6096+#define __KERNEL_PERCPU 0
6097+#endif
6098
6099 #define GDT_ENTRY_DOUBLEFAULT_TSS 31
6100
6101--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:25:55.000000000 +0100
6102+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-15 11:27:22.000000000 +0100
6103@@ -8,19 +8,15 @@
6104 #include <linux/kernel.h>
6105 #include <linux/threads.h>
6106 #include <linux/cpumask.h>
6107-#include <asm/pda.h>
6108 #endif
6109
6110-#ifdef CONFIG_X86_LOCAL_APIC
6111-#ifndef __ASSEMBLY__
6112-#include <asm/fixmap.h>
6113+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
6114 #include <asm/bitops.h>
6115 #include <asm/mpspec.h>
6116+#include <asm/apic.h>
6117 #ifdef CONFIG_X86_IO_APIC
6118 #include <asm/io_apic.h>
6119 #endif
6120-#include <asm/apic.h>
6121-#endif
6122 #endif
6123
6124 #define BAD_APICID 0xFFu
6125@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void);
6126 extern void cpu_uninit(void);
6127 #endif
6128
6129-#ifndef CONFIG_PARAVIRT
6130+#ifndef CONFIG_XEN
6131+struct smp_ops
6132+{
6133+ void (*smp_prepare_boot_cpu)(void);
6134+ void (*smp_prepare_cpus)(unsigned max_cpus);
6135+ int (*cpu_up)(unsigned cpu);
6136+ void (*smp_cpus_done)(unsigned max_cpus);
6137+
6138+ void (*smp_send_stop)(void);
6139+ void (*smp_send_reschedule)(int cpu);
6140+ int (*smp_call_function_mask)(cpumask_t mask,
6141+ void (*func)(void *info), void *info,
6142+ int wait);
6143+};
6144+
6145+extern struct smp_ops smp_ops;
6146+
6147+static inline void smp_prepare_boot_cpu(void)
6148+{
6149+ smp_ops.smp_prepare_boot_cpu();
6150+}
6151+static inline void smp_prepare_cpus(unsigned int max_cpus)
6152+{
6153+ smp_ops.smp_prepare_cpus(max_cpus);
6154+}
6155+static inline int __cpu_up(unsigned int cpu)
6156+{
6157+ return smp_ops.cpu_up(cpu);
6158+}
6159+static inline void smp_cpus_done(unsigned int max_cpus)
6160+{
6161+ smp_ops.smp_cpus_done(max_cpus);
6162+}
6163+
6164+static inline void smp_send_stop(void)
6165+{
6166+ smp_ops.smp_send_stop();
6167+}
6168+static inline void smp_send_reschedule(int cpu)
6169+{
6170+ smp_ops.smp_send_reschedule(cpu);
6171+}
6172+static inline int smp_call_function_mask(cpumask_t mask,
6173+ void (*func) (void *info), void *info,
6174+ int wait)
6175+{
6176+ return smp_ops.smp_call_function_mask(mask, func, info, wait);
6177+}
6178+
6179+void native_smp_prepare_boot_cpu(void);
6180+void native_smp_prepare_cpus(unsigned int max_cpus);
6181+int native_cpu_up(unsigned int cpunum);
6182+void native_smp_cpus_done(unsigned int max_cpus);
6183+
6184 #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
6185 do { } while (0)
6186+
6187+#else
6188+
6189+
6190+void xen_smp_send_stop(void);
6191+void xen_smp_send_reschedule(int cpu);
6192+int xen_smp_call_function_mask(cpumask_t mask,
6193+ void (*func) (void *info), void *info,
6194+ int wait);
6195+
6196+#define smp_send_stop xen_smp_send_stop
6197+#define smp_send_reschedule xen_smp_send_reschedule
6198+#define smp_call_function_mask xen_smp_call_function_mask
6199+
6200 #endif
6201
6202 /*
6203@@ -62,7 +125,8 @@ do { } while (0)
6204 * from the initial startup. We map APIC_BASE very early in page_setup(),
6205 * so this is correct in the x86 case.
6206 */
6207-#define raw_smp_processor_id() (read_pda(cpu_number))
6208+DECLARE_PER_CPU(int, cpu_number);
6209+#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
6210
6211 extern cpumask_t cpu_possible_map;
6212 #define cpu_callin_map cpu_possible_map
6213@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void)
6214 return cpus_weight(cpu_possible_map);
6215 }
6216
6217-#ifdef CONFIG_X86_LOCAL_APIC
6218-
6219-#ifdef APIC_DEFINITION
6220-extern int hard_smp_processor_id(void);
6221-#else
6222-#include <mach_apicdef.h>
6223-static inline int hard_smp_processor_id(void)
6224-{
6225- /* we don't want to mark this access volatile - bad code generation */
6226- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
6227-}
6228-#endif
6229-#endif
6230-
6231 #define safe_smp_processor_id() smp_processor_id()
6232 extern int __cpu_disable(void);
6233 extern void __cpu_die(unsigned int cpu);
6234@@ -102,10 +152,31 @@ extern unsigned int num_processors;
6235
6236 #define NO_PROC_ID 0xFF /* No processor magic marker */
6237
6238-#endif
6239+#endif /* CONFIG_SMP */
6240
6241 #ifndef __ASSEMBLY__
6242
6243+#ifdef CONFIG_X86_LOCAL_APIC
6244+
6245+#ifdef APIC_DEFINITION
6246+extern int hard_smp_processor_id(void);
6247+#else
6248+#include <mach_apicdef.h>
6249+static inline int hard_smp_processor_id(void)
6250+{
6251+ /* we don't want to mark this access volatile - bad code generation */
6252+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
6253+}
6254+#endif /* APIC_DEFINITION */
6255+
6256+#else /* CONFIG_X86_LOCAL_APIC */
6257+
6258+#ifndef CONFIG_SMP
6259+#define hard_smp_processor_id() 0
6260+#endif
6261+
6262+#endif /* CONFIG_X86_LOCAL_APIC */
6263+
6264 extern u8 apicid_2_node[];
6265
6266 #ifdef CONFIG_X86_LOCAL_APIC
6267--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:26:44.000000000 +0100
6268+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/system_32.h 2008-12-15 11:27:22.000000000 +0100
6269@@ -4,7 +4,7 @@
6270 #include <linux/kernel.h>
6271 #include <asm/segment.h>
6272 #include <asm/cpufeature.h>
6273-#include <linux/bitops.h> /* for LOCK_PREFIX */
6274+#include <asm/cmpxchg.h>
6275 #include <asm/synch_bitops.h>
6276 #include <asm/hypervisor.h>
6277
6278@@ -90,308 +90,102 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
6279 #define savesegment(seg, value) \
6280 asm volatile("mov %%" #seg ",%0":"=rm" (value))
6281
6282-#define read_cr0() ({ \
6283- unsigned int __dummy; \
6284- __asm__ __volatile__( \
6285- "movl %%cr0,%0\n\t" \
6286- :"=r" (__dummy)); \
6287- __dummy; \
6288-})
6289-#define write_cr0(x) \
6290- __asm__ __volatile__("movl %0,%%cr0": :"r" (x))
6291-
6292-#define read_cr2() (current_vcpu_info()->arch.cr2)
6293-#define write_cr2(x) \
6294- __asm__ __volatile__("movl %0,%%cr2": :"r" (x))
6295-
6296-#define read_cr3() ({ \
6297- unsigned int __dummy; \
6298- __asm__ ( \
6299- "movl %%cr3,%0\n\t" \
6300- :"=r" (__dummy)); \
6301- __dummy = xen_cr3_to_pfn(__dummy); \
6302- mfn_to_pfn(__dummy) << PAGE_SHIFT; \
6303-})
6304-#define write_cr3(x) ({ \
6305- unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \
6306- __dummy = xen_pfn_to_cr3(__dummy); \
6307- __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \
6308-})
6309-#define read_cr4() ({ \
6310- unsigned int __dummy; \
6311- __asm__( \
6312- "movl %%cr4,%0\n\t" \
6313- :"=r" (__dummy)); \
6314- __dummy; \
6315-})
6316-#define read_cr4_safe() ({ \
6317- unsigned int __dummy; \
6318- /* This could fault if %cr4 does not exist */ \
6319- __asm__("1: movl %%cr4, %0 \n" \
6320- "2: \n" \
6321- ".section __ex_table,\"a\" \n" \
6322- ".long 1b,2b \n" \
6323- ".previous \n" \
6324- : "=r" (__dummy): "0" (0)); \
6325- __dummy; \
6326-})
6327-
6328-#define write_cr4(x) \
6329- __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
6330-
6331-#define wbinvd() \
6332- __asm__ __volatile__ ("wbinvd": : :"memory")
6333-
6334-/* Clear the 'TS' bit */
6335-#define clts() (HYPERVISOR_fpu_taskswitch(0))
6336-
6337-/* Set the 'TS' bit */
6338-#define stts() (HYPERVISOR_fpu_taskswitch(1))
6339-
6340-#endif /* __KERNEL__ */
6341-
6342-static inline unsigned long get_limit(unsigned long segment)
6343+static inline void xen_clts(void)
6344 {
6345- unsigned long __limit;
6346- __asm__("lsll %1,%0"
6347- :"=r" (__limit):"r" (segment));
6348- return __limit+1;
6349+ HYPERVISOR_fpu_taskswitch(0);
6350 }
6351
6352-#define nop() __asm__ __volatile__ ("nop")
6353-
6354-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
6355-
6356-#define tas(ptr) (xchg((ptr),1))
6357-
6358-struct __xchg_dummy { unsigned long a[100]; };
6359-#define __xg(x) ((struct __xchg_dummy *)(x))
6360+static inline unsigned long xen_read_cr0(void)
6361+{
6362+ unsigned long val;
6363+ asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
6364+ return val;
6365+}
6366
6367+static inline void xen_write_cr0(unsigned long val)
6368+{
6369+ asm volatile("movl %0,%%cr0": :"r" (val));
6370+}
6371
6372-#ifdef CONFIG_X86_CMPXCHG64
6373+#define xen_read_cr2() (current_vcpu_info()->arch.cr2)
6374
6375-/*
6376- * The semantics of XCHGCMP8B are a bit strange, this is why
6377- * there is a loop and the loading of %%eax and %%edx has to
6378- * be inside. This inlines well in most cases, the cached
6379- * cost is around ~38 cycles. (in the future we might want
6380- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
6381- * might have an implicit FPU-save as a cost, so it's not
6382- * clear which path to go.)
6383- *
6384- * cmpxchg8b must be used with the lock prefix here to allow
6385- * the instruction to be executed atomically, see page 3-102
6386- * of the instruction set reference 24319102.pdf. We need
6387- * the reader side to see the coherent 64bit value.
6388- */
6389-static inline void __set_64bit (unsigned long long * ptr,
6390- unsigned int low, unsigned int high)
6391+static inline void xen_write_cr2(unsigned long val)
6392 {
6393- __asm__ __volatile__ (
6394- "\n1:\t"
6395- "movl (%0), %%eax\n\t"
6396- "movl 4(%0), %%edx\n\t"
6397- "lock cmpxchg8b (%0)\n\t"
6398- "jnz 1b"
6399- : /* no outputs */
6400- : "D"(ptr),
6401- "b"(low),
6402- "c"(high)
6403- : "ax","dx","memory");
6404+ asm volatile("movl %0,%%cr2": :"r" (val));
6405 }
6406
6407-static inline void __set_64bit_constant (unsigned long long *ptr,
6408- unsigned long long value)
6409+static inline unsigned long xen_read_cr3(void)
6410 {
6411- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
6412+ unsigned long val;
6413+ asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
6414+ return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
6415 }
6416-#define ll_low(x) *(((unsigned int*)&(x))+0)
6417-#define ll_high(x) *(((unsigned int*)&(x))+1)
6418
6419-static inline void __set_64bit_var (unsigned long long *ptr,
6420- unsigned long long value)
6421+static inline void xen_write_cr3(unsigned long val)
6422 {
6423- __set_64bit(ptr,ll_low(value), ll_high(value));
6424+ val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT));
6425+ asm volatile("movl %0,%%cr3": :"r" (val));
6426 }
6427
6428-#define set_64bit(ptr,value) \
6429-(__builtin_constant_p(value) ? \
6430- __set_64bit_constant(ptr, value) : \
6431- __set_64bit_var(ptr, value) )
6432-
6433-#define _set_64bit(ptr,value) \
6434-(__builtin_constant_p(value) ? \
6435- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
6436- __set_64bit(ptr, ll_low(value), ll_high(value)) )
6437-
6438-#endif
6439-
6440-/*
6441- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
6442- * Note 2: xchg has side effect, so that attribute volatile is necessary,
6443- * but generally the primitive is invalid, *ptr is output argument. --ANK
6444- */
6445-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
6446+static inline unsigned long xen_read_cr4(void)
6447 {
6448- switch (size) {
6449- case 1:
6450- __asm__ __volatile__("xchgb %b0,%1"
6451- :"=q" (x)
6452- :"m" (*__xg(ptr)), "0" (x)
6453- :"memory");
6454- break;
6455- case 2:
6456- __asm__ __volatile__("xchgw %w0,%1"
6457- :"=r" (x)
6458- :"m" (*__xg(ptr)), "0" (x)
6459- :"memory");
6460- break;
6461- case 4:
6462- __asm__ __volatile__("xchgl %0,%1"
6463- :"=r" (x)
6464- :"m" (*__xg(ptr)), "0" (x)
6465- :"memory");
6466- break;
6467- }
6468- return x;
6469+ unsigned long val;
6470+ asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
6471+ return val;
6472 }
6473
6474-/*
6475- * Atomic compare and exchange. Compare OLD with MEM, if identical,
6476- * store NEW in MEM. Return the initial value in MEM. Success is
6477- * indicated by comparing RETURN with OLD.
6478- */
6479-
6480-#ifdef CONFIG_X86_CMPXCHG
6481-#define __HAVE_ARCH_CMPXCHG 1
6482-#define cmpxchg(ptr,o,n)\
6483- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
6484- (unsigned long)(n),sizeof(*(ptr))))
6485-#define sync_cmpxchg(ptr,o,n)\
6486- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
6487- (unsigned long)(n),sizeof(*(ptr))))
6488-#endif
6489+static inline unsigned long xen_read_cr4_safe(void)
6490+{
6491+ unsigned long val;
6492+ /* This could fault if %cr4 does not exist */
6493+ asm("1: movl %%cr4, %0 \n"
6494+ "2: \n"
6495+ ".section __ex_table,\"a\" \n"
6496+ ".long 1b,2b \n"
6497+ ".previous \n"
6498+ : "=r" (val): "0" (0));
6499+ return val;
6500+}
6501
6502-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
6503- unsigned long new, int size)
6504+static inline void xen_write_cr4(unsigned long val)
6505 {
6506- unsigned long prev;
6507- switch (size) {
6508- case 1:
6509- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
6510- : "=a"(prev)
6511- : "q"(new), "m"(*__xg(ptr)), "0"(old)
6512- : "memory");
6513- return prev;
6514- case 2:
6515- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
6516- : "=a"(prev)
6517- : "r"(new), "m"(*__xg(ptr)), "0"(old)
6518- : "memory");
6519- return prev;
6520- case 4:
6521- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
6522- : "=a"(prev)
6523- : "r"(new), "m"(*__xg(ptr)), "0"(old)
6524- : "memory");
6525- return prev;
6526- }
6527- return old;
6528+ asm volatile("movl %0,%%cr4": :"r" (val));
6529 }
6530
6531-/*
6532- * Always use locked operations when touching memory shared with a
6533- * hypervisor, since the system may be SMP even if the guest kernel
6534- * isn't.
6535- */
6536-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
6537- unsigned long old,
6538- unsigned long new, int size)
6539-{
6540- unsigned long prev;
6541- switch (size) {
6542- case 1:
6543- __asm__ __volatile__("lock; cmpxchgb %b1,%2"
6544- : "=a"(prev)
6545- : "q"(new), "m"(*__xg(ptr)), "0"(old)
6546- : "memory");
6547- return prev;
6548- case 2:
6549- __asm__ __volatile__("lock; cmpxchgw %w1,%2"
6550- : "=a"(prev)
6551- : "r"(new), "m"(*__xg(ptr)), "0"(old)
6552- : "memory");
6553- return prev;
6554- case 4:
6555- __asm__ __volatile__("lock; cmpxchgl %1,%2"
6556- : "=a"(prev)
6557- : "r"(new), "m"(*__xg(ptr)), "0"(old)
6558- : "memory");
6559- return prev;
6560- }
6561- return old;
6562+static inline void xen_wbinvd(void)
6563+{
6564+ asm volatile("wbinvd": : :"memory");
6565 }
cc90b958 6566
00e5a55c
BS
6567-#ifndef CONFIG_X86_CMPXCHG
6568-/*
6569- * Building a kernel capable running on 80386. It may be necessary to
6570- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
6571- * a function for each of the sizes we support.
6572- */
6573+#define read_cr0() (xen_read_cr0())
6574+#define write_cr0(x) (xen_write_cr0(x))
6575+#define read_cr2() (xen_read_cr2())
6576+#define write_cr2(x) (xen_write_cr2(x))
6577+#define read_cr3() (xen_read_cr3())
6578+#define write_cr3(x) (xen_write_cr3(x))
6579+#define read_cr4() (xen_read_cr4())
6580+#define read_cr4_safe() (xen_read_cr4_safe())
6581+#define write_cr4(x) (xen_write_cr4(x))
6582+#define wbinvd() (xen_wbinvd())
cc90b958 6583
00e5a55c
BS
6584-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
6585-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
6586-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
6587-
6588-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
6589- unsigned long new, int size)
6590-{
6591- switch (size) {
6592- case 1:
6593- return cmpxchg_386_u8(ptr, old, new);
6594- case 2:
6595- return cmpxchg_386_u16(ptr, old, new);
6596- case 4:
6597- return cmpxchg_386_u32(ptr, old, new);
6598- }
6599- return old;
6600-}
6601-
6602-#define cmpxchg(ptr,o,n) \
6603-({ \
6604- __typeof__(*(ptr)) __ret; \
6605- if (likely(boot_cpu_data.x86 > 3)) \
6606- __ret = __cmpxchg((ptr), (unsigned long)(o), \
6607- (unsigned long)(n), sizeof(*(ptr))); \
6608- else \
6609- __ret = cmpxchg_386((ptr), (unsigned long)(o), \
6610- (unsigned long)(n), sizeof(*(ptr))); \
6611- __ret; \
6612-})
6613-#endif
6614+/* Clear the 'TS' bit */
6615+#define clts() (xen_clts())
cc90b958 6616
00e5a55c
BS
6617-#ifdef CONFIG_X86_CMPXCHG64
6618+/* Set the 'TS' bit */
6619+#define stts() (HYPERVISOR_fpu_taskswitch(1))
cc90b958 6620
00e5a55c
BS
6621-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
6622- unsigned long long new)
6623+#endif /* __KERNEL__ */
6624+
6625+static inline unsigned long get_limit(unsigned long segment)
6626 {
6627- unsigned long long prev;
6628- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
6629- : "=A"(prev)
6630- : "b"((unsigned long)new),
6631- "c"((unsigned long)(new >> 32)),
6632- "m"(*__xg(ptr)),
6633- "0"(old)
6634- : "memory");
6635- return prev;
6636-}
cc90b958 6637-
00e5a55c
BS
6638-#define cmpxchg64(ptr,o,n)\
6639- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
6640- (unsigned long long)(n)))
6641+ unsigned long __limit;
6642+ __asm__("lsll %1,%0"
6643+ :"=r" (__limit):"r" (segment));
6644+ return __limit+1;
6645+}
6646+
6647+#define nop() __asm__ __volatile__ ("nop")
cc90b958 6648
00e5a55c
BS
6649-#endif
6650-
6651 /*
6652 * Force strict CPU ordering.
6653 * And yes, this is required on UP too when we're talking
6654--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-03-04 11:28:34.000000000 +0100
6655+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-15 11:27:22.000000000 +0100
6656@@ -29,8 +29,13 @@
6657 * and page-granular flushes are available only on i486 and up.
6658 */
cc90b958 6659
00e5a55c
BS
6660+#define TLB_FLUSH_ALL 0xffffffff
6661+
6662+
6663 #ifndef CONFIG_SMP
cc90b958 6664
00e5a55c
BS
6665+#include <linux/sched.h>
6666+
6667 #define flush_tlb() __flush_tlb()
6668 #define flush_tlb_all() __flush_tlb_all()
6669 #define local_flush_tlb() __flush_tlb()
6670@@ -55,7 +60,7 @@ static inline void flush_tlb_range(struc
6671 __flush_tlb();
6672 }
cc90b958 6673
00e5a55c
BS
6674-#else
6675+#else /* SMP */
cc90b958 6676
00e5a55c 6677 #include <asm/smp.h>
cc90b958 6678
00e5a55c
BS
6679@@ -84,9 +89,7 @@ struct tlb_state
6680 char __cacheline_padding[L1_CACHE_BYTES-8];
6681 };
6682 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
cc90b958 6683-
cc90b958 6684-
00e5a55c
BS
6685-#endif
6686+#endif /* SMP */
6687
6688 #define flush_tlb_kernel_range(start, end) flush_tlb_all()
6689
6690--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-15 11:26:44.000000000 +0100
6691+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/desc_64.h 2008-12-15 11:27:22.000000000 +0100
6692@@ -127,16 +127,6 @@ static inline void set_ldt_desc(unsigned
6693 DESC_LDT, size * 8 - 1);
cc90b958
BS
6694 }
6695
00e5a55c
BS
6696-static inline void set_seg_base(unsigned cpu, int entry, void *base)
6697-{
6698- struct desc_struct *d = &cpu_gdt(cpu)[entry];
6699- u32 addr = (u32)(u64)base;
6700- BUG_ON((u64)base >> 32);
6701- d->base0 = addr & 0xffff;
6702- d->base1 = (addr >> 16) & 0xff;
6703- d->base2 = (addr >> 24) & 0xff;
6704-}
cc90b958 6705-
00e5a55c
BS
6706 #define LDT_entry_a(info) \
6707 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
6708 /* Don't allow setting of the lm bit. It is useless anyways because
6709@@ -165,25 +155,15 @@ static inline void set_seg_base(unsigned
6710 (info)->useable == 0 && \
6711 (info)->lm == 0)
6712
6713-#if TLS_SIZE != 24
6714-# error update this code.
6715-#endif
cc90b958 6716-
00e5a55c
BS
6717 static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
6718 {
6719-#if 0
6720+ unsigned int i;
6721 u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
6722- gdt[0] = t->tls_array[0];
6723- gdt[1] = t->tls_array[1];
6724- gdt[2] = t->tls_array[2];
6725-#endif
6726-#define C(i) \
6727- if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \
6728- t->tls_array[i])) \
6729- BUG();
cc90b958 6730
00e5a55c
BS
6731- C(0); C(1); C(2);
6732-#undef C
6733+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
6734+ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
6735+ t->tls_array[i]))
6736+ BUG();
6737 }
cc90b958 6738
00e5a55c
BS
6739 /*
6740--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-03-04 11:25:55.000000000 +0100
6741+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-15 11:27:22.000000000 +0100
6742@@ -51,7 +51,7 @@ struct dma_mapping_ops {
cc90b958
BS
6743 };
6744
00e5a55c
BS
6745 extern dma_addr_t bad_dma_address;
6746-extern struct dma_mapping_ops* dma_ops;
6747+extern const struct dma_mapping_ops* dma_ops;
6748 extern int iommu_merge;
6749
6750 #if 0
6751--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-04 11:28:34.000000000 +0100
6752+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-12-15 11:27:22.000000000 +0100
6753@@ -15,7 +15,6 @@
6754 #include <asm/apicdef.h>
6755 #include <asm/page.h>
6756 #include <asm/vsyscall.h>
6757-#include <asm/vsyscall32.h>
6758 #include <asm/acpi.h>
cc90b958
BS
6759
6760 /*
00e5a55c
BS
6761--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/irqflags_64.h 2009-04-29 08:44:31.000000000 +0200
6762+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/irqflags_64.h 2008-12-15 11:27:22.000000000 +0100
6763@@ -9,6 +9,7 @@
cc90b958 6764 */
00e5a55c
BS
6765 #ifndef _ASM_IRQFLAGS_H
6766 #define _ASM_IRQFLAGS_H
6767+#include <asm/processor-flags.h>
cc90b958 6768
00e5a55c
BS
6769 #ifndef __ASSEMBLY__
6770 /*
6771@@ -50,19 +51,19 @@ static inline void raw_local_irq_disable
6772 {
6773 unsigned long flags = __raw_local_save_flags();
cc90b958 6774
00e5a55c
BS
6775- raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
6776+ raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
6777 }
cc90b958 6778
00e5a55c 6779 static inline void raw_local_irq_enable(void)
cc90b958 6780 {
00e5a55c
BS
6781 unsigned long flags = __raw_local_save_flags();
6782
6783- raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
6784+ raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
cc90b958 6785 }
cc90b958 6786
00e5a55c
BS
6787 static inline int raw_irqs_disabled_flags(unsigned long flags)
6788 {
6789- return !(flags & (1<<9)) || (flags & (1 << 18));
6790+ return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
6791 }
cc90b958 6792
00e5a55c
BS
6793 #else /* CONFIG_X86_VSMP */
6794@@ -118,13 +119,21 @@ static inline int raw_irqs_disabled_flag
6795 * Used in the idle loop; sti takes one instruction cycle
6796 * to complete:
6797 */
6798-void raw_safe_halt(void);
6799+void xen_safe_halt(void);
6800+static inline void raw_safe_halt(void)
cc90b958 6801+{
00e5a55c 6802+ xen_safe_halt();
cc90b958
BS
6803+}
6804
00e5a55c
BS
6805 /*
6806 * Used when interrupts are already enabled or to
6807 * shutdown the processor:
6808 */
6809-void halt(void);
6810+void xen_halt(void);
6811+static inline void halt(void)
cc90b958 6812+{
00e5a55c 6813+ xen_halt();
cc90b958
BS
6814+}
6815
00e5a55c
BS
6816 #else /* __ASSEMBLY__: */
6817 # ifdef CONFIG_TRACE_IRQFLAGS
6818--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-04-29 08:44:31.000000000 +0200
6819+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/mmu_context_64.h 2008-12-15 11:27:22.000000000 +0100
6820@@ -9,6 +9,9 @@
6821 #include <asm/pgtable.h>
6822 #include <asm/tlbflush.h>
cc90b958 6823
00e5a55c
BS
6824+void arch_exit_mmap(struct mm_struct *mm);
6825+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
cc90b958 6826+
cc90b958 6827 /*
00e5a55c
BS
6828 * possibly do the LDT unload here?
6829 */
6830--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-04-29 08:44:31.000000000 +0200
6831+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/page_64.h 2008-12-15 11:27:22.000000000 +0100
6832@@ -7,6 +7,7 @@
6833 #include <linux/types.h>
6834 #include <asm/bug.h>
6835 #endif
6836+#include <linux/const.h>
6837 #include <xen/interface/xen.h>
cc90b958
BS
6838
6839 /*
00e5a55c 6840@@ -19,18 +20,14 @@
cc90b958 6841
00e5a55c
BS
6842 /* PAGE_SHIFT determines the page size */
6843 #define PAGE_SHIFT 12
6844-#ifdef __ASSEMBLY__
6845-#define PAGE_SIZE (0x1 << PAGE_SHIFT)
6846-#else
6847-#define PAGE_SIZE (1UL << PAGE_SHIFT)
6848-#endif
6849+#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
6850 #define PAGE_MASK (~(PAGE_SIZE-1))
cc90b958 6851
00e5a55c
BS
6852 /* See Documentation/x86_64/mm.txt for a description of the memory map. */
6853 #define __PHYSICAL_MASK_SHIFT 46
6854-#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1)
6855+#define __PHYSICAL_MASK ((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
6856 #define __VIRTUAL_MASK_SHIFT 48
6857-#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
6858+#define __VIRTUAL_MASK ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
cc90b958 6859
00e5a55c 6860 #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
cc90b958 6861
00e5a55c
BS
6862@@ -55,10 +52,10 @@
6863 #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
cc90b958 6864
00e5a55c
BS
6865 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
6866-#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
6867+#define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
cc90b958 6868
00e5a55c
BS
6869 #define HPAGE_SHIFT PMD_SHIFT
6870-#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
6871+#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT)
6872 #define HPAGE_MASK (~(HPAGE_SIZE - 1))
6873 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
cc90b958 6874
00e5a55c
BS
6875@@ -152,17 +149,23 @@ static inline pgd_t __pgd(unsigned long
6876
6877 #define __pgprot(x) ((pgprot_t) { (x) } )
6878
6879-#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START)
6880-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
6881-#define __START_KERNEL_map 0xffffffff80000000UL
6882-#define __PAGE_OFFSET 0xffff880000000000UL
6883+#endif /* !__ASSEMBLY__ */
6884
6885-#else
6886 #define __PHYSICAL_START CONFIG_PHYSICAL_START
6887+#define __KERNEL_ALIGN 0x200000
cc90b958 6888+
00e5a55c
BS
6889+/*
6890+ * Make sure kernel is aligned to 2MB address. Catching it at compile
6891+ * time is better. Change your config file and compile the kernel
6892+ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
6893+ */
6894+#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
6895+#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
6896+#endif
cc90b958 6897+
00e5a55c
BS
6898 #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
6899-#define __START_KERNEL_map 0xffffffff80000000
6900-#define __PAGE_OFFSET 0xffff880000000000
6901-#endif /* !__ASSEMBLY__ */
6902+#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
6903+#define __PAGE_OFFSET _AC(0xffff880000000000, UL)
6904
6905 #if CONFIG_XEN_COMPAT <= 0x030002
6906 #undef LOAD_OFFSET
6907@@ -172,20 +175,20 @@ static inline pgd_t __pgd(unsigned long
6908 /* to align the pointer to the (next) page boundary */
6909 #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
6910
6911-#define KERNEL_TEXT_SIZE (40UL*1024*1024)
6912-#define KERNEL_TEXT_START 0xffffffff80000000UL
6913+#define KERNEL_TEXT_SIZE (40*1024*1024)
6914+#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
cc90b958 6915+
00e5a55c
BS
6916+#define PAGE_OFFSET __PAGE_OFFSET
6917
6918-#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
6919+#ifndef __ASSEMBLY__
6920+static inline unsigned long __phys_addr(unsigned long x)
cc90b958 6921+{
00e5a55c 6922+ return x - (x >= __START_KERNEL_map ? __START_KERNEL_map : PAGE_OFFSET);
cc90b958 6923+}
00e5a55c 6924+#endif
cc90b958 6925
00e5a55c
BS
6926-/* Note: __pa(&symbol_visible_to_c) should be always replaced with __pa_symbol.
6927- Otherwise you risk miscompilation. */
6928-#define __pa(x) (((unsigned long)(x)>=__START_KERNEL_map)?(unsigned long)(x) - (unsigned long)__START_KERNEL_map:(unsigned long)(x) - PAGE_OFFSET)
6929-/* __pa_symbol should be used for C visible symbols.
6930- This seems to be the official gcc blessed way to do such arithmetic. */
6931-#define __pa_symbol(x) \
6932- ({unsigned long v; \
6933- asm("" : "=r" (v) : "0" (x)); \
6934- __pa(v); })
6935+#define __pa(x) __phys_addr((unsigned long)(x))
6936+#define __pa_symbol(x) __phys_addr((unsigned long)(x))
cc90b958 6937
00e5a55c
BS
6938 #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
6939 #define __boot_va(x) __va(x)
6940--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgalloc_64.h 2009-04-29 08:44:31.000000000 +0200
6941+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgalloc_64.h 2008-12-15 11:27:22.000000000 +0100
6942@@ -1,7 +1,6 @@
6943 #ifndef _X86_64_PGALLOC_H
6944 #define _X86_64_PGALLOC_H
6945
6946-#include <asm/fixmap.h>
6947 #include <asm/pda.h>
6948 #include <linux/threads.h>
6949 #include <linux/mm.h>
6950@@ -100,24 +99,16 @@ static inline void pgd_list_add(pgd_t *p
6951 struct page *page = virt_to_page(pgd);
6952
6953 spin_lock(&pgd_lock);
6954- page->index = (pgoff_t)pgd_list;
6955- if (pgd_list)
6956- pgd_list->private = (unsigned long)&page->index;
6957- pgd_list = page;
6958- page->private = (unsigned long)&pgd_list;
6959+ list_add(&page->lru, &pgd_list);
6960 spin_unlock(&pgd_lock);
6961 }
6962
6963 static inline void pgd_list_del(pgd_t *pgd)
6964 {
6965- struct page *next, **pprev, *page = virt_to_page(pgd);
6966+ struct page *page = virt_to_page(pgd);
6967
6968 spin_lock(&pgd_lock);
6969- next = (struct page *)page->index;
6970- pprev = (struct page **)page->private;
6971- *pprev = next;
6972- if (next)
6973- next->private = (unsigned long)pprev;
6974+ list_del(&page->lru);
6975 spin_unlock(&pgd_lock);
cc90b958
BS
6976 }
6977
00e5a55c
BS
6978--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:25:55.000000000 +0100
6979+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-15 11:27:22.000000000 +0100
6980@@ -1,12 +1,14 @@
6981 #ifndef _X86_64_PGTABLE_H
6982 #define _X86_64_PGTABLE_H
cc90b958 6983
00e5a55c
BS
6984+#include <linux/const.h>
6985+#ifndef __ASSEMBLY__
6986+
6987 /*
6988 * This file contains the functions and defines necessary to modify and use
6989 * the x86-64 page table tree.
6990 */
6991 #include <asm/processor.h>
6992-#include <asm/fixmap.h>
6993 #include <asm/bitops.h>
6994 #include <linux/threads.h>
6995 #include <linux/sched.h>
6996@@ -35,11 +37,9 @@ extern pte_t *lookup_address(unsigned lo
6997 #endif
cc90b958 6998
00e5a55c
BS
6999 extern pud_t level3_kernel_pgt[512];
7000-extern pud_t level3_physmem_pgt[512];
7001 extern pud_t level3_ident_pgt[512];
7002 extern pmd_t level2_kernel_pgt[512];
7003 extern pgd_t init_level4_pgt[];
7004-extern pgd_t boot_level4_pgt[];
7005 extern unsigned long __supported_pte_mask;
cc90b958 7006
00e5a55c
BS
7007 #define swapper_pg_dir init_level4_pgt
7008@@ -54,6 +54,8 @@ extern void clear_kernel_mapping(unsigne
7009 extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
7010 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
cc90b958 7011
00e5a55c 7012+#endif /* !__ASSEMBLY__ */
cc90b958 7013+
00e5a55c
BS
7014 /*
7015 * PGDIR_SHIFT determines what a top-level page table entry can map
7016 */
7017@@ -78,6 +80,8 @@ extern unsigned long empty_zero_page[PAG
7018 */
7019 #define PTRS_PER_PTE 512
7020
7021+#ifndef __ASSEMBLY__
cc90b958 7022+
00e5a55c
BS
7023 #define pte_ERROR(e) \
7024 printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
7025 &(e), __pte_val(e), pte_pfn(e))
7026@@ -120,22 +124,23 @@ static inline void pgd_clear (pgd_t * pg
cc90b958 7027
00e5a55c 7028 #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
cc90b958 7029
00e5a55c
BS
7030-#define PMD_SIZE (1UL << PMD_SHIFT)
7031+#endif /* !__ASSEMBLY__ */
7032+
7033+#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
7034 #define PMD_MASK (~(PMD_SIZE-1))
7035-#define PUD_SIZE (1UL << PUD_SHIFT)
7036+#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
7037 #define PUD_MASK (~(PUD_SIZE-1))
7038-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
7039+#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
7040 #define PGDIR_MASK (~(PGDIR_SIZE-1))
cc90b958 7041
00e5a55c
BS
7042 #define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
7043 #define FIRST_USER_ADDRESS 0
cc90b958 7044
00e5a55c
BS
7045-#ifndef __ASSEMBLY__
7046-#define MAXMEM 0x3fffffffffffUL
7047-#define VMALLOC_START 0xffffc20000000000UL
7048-#define VMALLOC_END 0xffffe1ffffffffffUL
7049-#define MODULES_VADDR 0xffffffff88000000UL
7050-#define MODULES_END 0xfffffffffff00000UL
7051+#define MAXMEM _AC(0x3fffffffffff, UL)
7052+#define VMALLOC_START _AC(0xffffc20000000000, UL)
7053+#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
7054+#define MODULES_VADDR _AC(0xffffffff88000000, UL)
7055+#define MODULES_END _AC(0xfffffffffff00000, UL)
7056 #define MODULES_LEN (MODULES_END - MODULES_VADDR)
cc90b958 7057
00e5a55c
BS
7058 #define _PAGE_BIT_PRESENT 0
7059@@ -161,16 +166,18 @@ static inline void pgd_clear (pgd_t * pg
7060 #define _PAGE_GLOBAL 0x100 /* Global TLB entry */
cc90b958 7061
00e5a55c
BS
7062 #define _PAGE_PROTNONE 0x080 /* If not present */
7063-#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
7064+#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
cc90b958 7065
00e5a55c
BS
7066 /* Mapped page is I/O or foreign and has no associated page struct. */
7067 #define _PAGE_IO 0x200
cc90b958 7068
00e5a55c
BS
7069+#ifndef __ASSEMBLY__
7070 #if CONFIG_XEN_COMPAT <= 0x030002
7071 extern unsigned int __kernel_page_user;
7072 #else
7073 #define __kernel_page_user 0
cc90b958 7074 #endif
00e5a55c 7075+#endif
cc90b958 7076
00e5a55c
BS
7077 #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
7078 #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
7079@@ -235,6 +242,8 @@ extern unsigned int __kernel_page_user;
7080 #define __S110 PAGE_SHARED_EXEC
7081 #define __S111 PAGE_SHARED_EXEC
cc90b958 7082
00e5a55c
BS
7083+#ifndef __ASSEMBLY__
7084+
7085 static inline unsigned long pgd_bad(pgd_t pgd)
7086 {
7087 return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
7088@@ -346,6 +355,20 @@ static inline pte_t pte_mkwrite(pte_t pt
7089 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
7090 static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
cc90b958 7091
00e5a55c 7092+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
cc90b958 7093+{
00e5a55c
BS
7094+ if (!pte_dirty(*ptep))
7095+ return 0;
7096+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
cc90b958 7097+}
00e5a55c
BS
7098+
7099+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
cc90b958 7100+{
00e5a55c
BS
7101+ if (!pte_young(*ptep))
7102+ return 0;
7103+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
cc90b958 7104+}
00e5a55c
BS
7105+
7106 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
7107 {
7108 pte_t pte = *ptep;
7109@@ -470,18 +493,12 @@ static inline pte_t pte_modify(pte_t pte
7110 * bit at the same time. */
7111 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
7112 #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
7113- do { \
7114- if (dirty) \
7115- ptep_establish(vma, address, ptep, entry); \
7116- } while (0)
7117-
7118-
cc90b958 7119-/*
00e5a55c
BS
7120- * i386 says: We don't actually have these, but we want to advertise
7121- * them so that we can encompass the flush here.
cc90b958 7122- */
00e5a55c
BS
7123-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
7124-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
7125+({ \
7126+ int __changed = !pte_same(*(ptep), entry); \
7127+ if (__changed && (dirty)) \
7128+ ptep_establish(vma, address, ptep, entry); \
7129+ __changed; \
7130+})
cc90b958 7131
00e5a55c
BS
7132 #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
7133 #define ptep_clear_flush_dirty(vma, address, ptep) \
7134@@ -490,7 +507,7 @@ static inline pte_t pte_modify(pte_t pte
7135 int __dirty = pte_dirty(__pte); \
7136 __pte = pte_mkclean(__pte); \
7137 if ((vma)->vm_mm->context.pinned) \
7138- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
7139+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
7140 else if (__dirty) \
7141 set_pte(ptep, __pte); \
7142 __dirty; \
7143@@ -503,7 +520,7 @@ static inline pte_t pte_modify(pte_t pte
7144 int __young = pte_young(__pte); \
7145 __pte = pte_mkold(__pte); \
7146 if ((vma)->vm_mm->context.pinned) \
7147- ptep_set_access_flags(vma, address, ptep, __pte, __young); \
7148+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
7149 else if (__young) \
7150 set_pte(ptep, __pte); \
7151 __young; \
7152@@ -517,10 +534,7 @@ static inline pte_t pte_modify(pte_t pte
7153 #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
cc90b958 7154
00e5a55c
BS
7155 extern spinlock_t pgd_lock;
7156-extern struct page *pgd_list;
7157-void vmalloc_sync_all(void);
cc90b958 7158-
00e5a55c
BS
7159-#endif /* !__ASSEMBLY__ */
7160+extern struct list_head pgd_list;
7161
7162 extern int kern_addr_valid(unsigned long addr);
7163
7164@@ -559,10 +573,6 @@ int xen_change_pte_range(struct mm_struc
7165 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
7166 direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
7167
7168-#define MK_IOSPACE_PFN(space, pfn) (pfn)
7169-#define GET_IOSPACE(pfn) 0
7170-#define GET_PFN(pfn) (pfn)
cc90b958 7171-
00e5a55c
BS
7172 #define HAVE_ARCH_UNMAPPED_AREA
7173
7174 #define pgtable_cache_init() do { } while (0)
7175@@ -576,11 +586,14 @@ int xen_change_pte_range(struct mm_struc
7176 #define kc_offset_to_vaddr(o) \
7177 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
7178
7179+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
7180+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
7181 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
7182 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
7183 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
7184 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
7185 #define __HAVE_ARCH_PTE_SAME
7186 #include <asm-generic/pgtable.h>
7187+#endif /* !__ASSEMBLY__ */
7188
7189 #endif /* _X86_64_PGTABLE_H */
7190--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-15 11:26:44.000000000 +0100
7191+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-15 11:27:22.000000000 +0100
7192@@ -20,6 +20,7 @@
7193 #include <asm/percpu.h>
7194 #include <linux/personality.h>
7195 #include <linux/cpumask.h>
7196+#include <asm/processor-flags.h>
7197
7198 #define TF_MASK 0x00000100
7199 #define IF_MASK 0x00000200
7200@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo
7201 extern unsigned short num_cache_leaves;
7202
7203 /*
7204- * EFLAGS bits
7205- */
7206-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
7207-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
7208-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
7209-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
7210-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
7211-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
7212-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
7213-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
7214-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
7215-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
7216-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
7217-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
7218-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
7219-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
7220-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
7221-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
7222-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
cc90b958
BS
7223-
7224-/*
00e5a55c 7225- * Intel CPU features in CR4
cc90b958 7226- */
00e5a55c
BS
7227-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
7228-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
7229-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
7230-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
7231-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
7232-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
7233-#define X86_CR4_MCE 0x0040 /* Machine check enable */
7234-#define X86_CR4_PGE 0x0080 /* enable global pages */
7235-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
7236-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
7237-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
7238-
7239-/*
7240 * Save the cr4 feature set we're using (ie
7241 * Pentium 4MB enable and PPro Global page
7242 * enable), so that any CPU's that boot up
7243@@ -203,7 +168,7 @@ struct i387_fxsave_struct {
7244 u32 mxcsr;
7245 u32 mxcsr_mask;
7246 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
7247- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
7248+ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
7249 u32 padding[24];
7250 } __attribute__ ((aligned (16)));
7251
7252@@ -436,22 +401,6 @@ static inline void prefetchw(void *x)
7253 #define cpu_relax() rep_nop()
cc90b958 7254
00e5a55c
BS
7255 /*
7256- * NSC/Cyrix CPU configuration register indexes
cc90b958 7257- */
00e5a55c
BS
7258-#define CX86_CCR0 0xc0
7259-#define CX86_CCR1 0xc1
7260-#define CX86_CCR2 0xc2
7261-#define CX86_CCR3 0xc3
7262-#define CX86_CCR4 0xe8
7263-#define CX86_CCR5 0xe9
7264-#define CX86_CCR6 0xea
7265-#define CX86_CCR7 0xeb
7266-#define CX86_DIR0 0xfe
7267-#define CX86_DIR1 0xff
7268-#define CX86_ARR_BASE 0xc4
7269-#define CX86_RCR_BASE 0xdc
cc90b958 7270-
00e5a55c
BS
7271-/*
7272 * NSC/Cyrix CPU indexed register access macros
7273 */
cc90b958 7274
00e5a55c
BS
7275--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-04 11:25:55.000000000 +0100
7276+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-15 11:27:22.000000000 +0100
7277@@ -11,12 +11,11 @@
7278 extern int disable_apic;
cc90b958 7279
00e5a55c
BS
7280 #ifdef CONFIG_X86_LOCAL_APIC
7281-#include <asm/fixmap.h>
7282 #include <asm/mpspec.h>
7283+#include <asm/apic.h>
7284 #ifdef CONFIG_X86_IO_APIC
7285 #include <asm/io_apic.h>
7286 #endif
7287-#include <asm/apic.h>
7288 #include <asm/thread_info.h>
7289 #endif
cc90b958 7290
00e5a55c
BS
7291@@ -41,7 +40,6 @@ extern void lock_ipi_call_lock(void);
7292 extern void unlock_ipi_call_lock(void);
7293 extern int smp_num_siblings;
7294 extern void smp_send_reschedule(int cpu);
7295-void smp_stop_cpu(void);
cc90b958 7296
00e5a55c
BS
7297 extern cpumask_t cpu_sibling_map[NR_CPUS];
7298 extern cpumask_t cpu_core_map[NR_CPUS];
7299@@ -62,14 +60,6 @@ static inline int num_booting_cpus(void)
7300
7301 #define raw_smp_processor_id() read_pda(cpunumber)
7302
7303-#ifdef CONFIG_X86_LOCAL_APIC
7304-static inline int hard_smp_processor_id(void)
cc90b958 7305-{
00e5a55c
BS
7306- /* we don't want to mark this access volatile - bad code generation */
7307- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
cc90b958 7308-}
cc90b958 7309-#endif
00e5a55c
BS
7310-
7311 extern int __cpu_disable(void);
7312 extern void __cpu_die(unsigned int cpu);
7313 extern void prefill_possible_map(void);
7314@@ -78,6 +68,14 @@ extern unsigned __cpuinitdata disabled_c
cc90b958 7315
00e5a55c 7316 #define NO_PROC_ID 0xFF /* No processor magic marker */
cc90b958 7317
00e5a55c 7318+#endif /* CONFIG_SMP */
cc90b958 7319+
00e5a55c
BS
7320+#ifdef CONFIG_X86_LOCAL_APIC
7321+static inline int hard_smp_processor_id(void)
7322+{
7323+ /* we don't want to mark this access volatile - bad code generation */
7324+ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
cc90b958 7325+}
00e5a55c 7326 #endif
cc90b958 7327
cc90b958 7328 /*
00e5a55c
BS
7329--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/system_64.h 2009-03-04 11:28:34.000000000 +0100
7330+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/system_64.h 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
7331@@ -3,7 +3,7 @@
7332
7333 #include <linux/kernel.h>
7334 #include <asm/segment.h>
7335-#include <asm/alternative.h>
7336+#include <asm/cmpxchg.h>
7337
7338 #include <asm/synch_bitops.h>
7339 #include <asm/hypervisor.h>
7340@@ -43,7 +43,7 @@
7341 [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
7342 [ti_flags] "i" (offsetof(struct thread_info, flags)),\
7343 [tif_fork] "i" (TIF_FORK), \
7344- [thread_info] "i" (offsetof(struct task_struct, thread_info)), \
7345+ [thread_info] "i" (offsetof(struct task_struct, stack)), \
7346 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
7347 : "memory", "cc" __EXTRA_CLOBBER)
7348
7349@@ -92,6 +92,12 @@ static inline void write_cr0(unsigned lo
7350 machine_to_phys(__dummy); \
7351 })
7352
7353+static inline void write_cr3(unsigned long val)
7354+{
7355+ val = phys_to_machine(val);
7356+ asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
7357+}
7358+
7359 static inline unsigned long read_cr4(void)
7360 {
7361 unsigned long cr4;
7362@@ -101,7 +107,7 @@ static inline unsigned long read_cr4(voi
7363
7364 static inline void write_cr4(unsigned long val)
7365 {
7366- asm volatile("movq %0,%%cr4" :: "r" (val));
7367+ asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
7368 }
7369
7370 #define stts() (HYPERVISOR_fpu_taskswitch(1))
7371@@ -122,100 +128,6 @@ static inline void sched_cacheflush(void
7372
7373 #define nop() __asm__ __volatile__ ("nop")
7374
7375-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
7376-
7377-#define tas(ptr) (xchg((ptr),1))
7378-
7379-#define __xg(x) ((volatile long *)(x))
7380-
7381-static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
7382-{
7383- *ptr = val;
7384-}
7385-
7386-#define _set_64bit set_64bit
7387-
7388-/*
7389- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
7390- * Note 2: xchg has side effect, so that attribute volatile is necessary,
7391- * but generally the primitive is invalid, *ptr is output argument. --ANK
7392- */
7393-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
7394-{
7395- switch (size) {
7396- case 1:
7397- __asm__ __volatile__("xchgb %b0,%1"
7398- :"=q" (x)
7399- :"m" (*__xg(ptr)), "0" (x)
7400- :"memory");
7401- break;
7402- case 2:
7403- __asm__ __volatile__("xchgw %w0,%1"
7404- :"=r" (x)
7405- :"m" (*__xg(ptr)), "0" (x)
7406- :"memory");
7407- break;
7408- case 4:
7409- __asm__ __volatile__("xchgl %k0,%1"
7410- :"=r" (x)
7411- :"m" (*__xg(ptr)), "0" (x)
7412- :"memory");
7413- break;
7414- case 8:
7415- __asm__ __volatile__("xchgq %0,%1"
7416- :"=r" (x)
7417- :"m" (*__xg(ptr)), "0" (x)
7418- :"memory");
7419- break;
7420- }
7421- return x;
7422-}
7423-
7424-/*
7425- * Atomic compare and exchange. Compare OLD with MEM, if identical,
7426- * store NEW in MEM. Return the initial value in MEM. Success is
7427- * indicated by comparing RETURN with OLD.
7428- */
7429-
7430-#define __HAVE_ARCH_CMPXCHG 1
7431-
7432-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
7433- unsigned long new, int size)
7434-{
7435- unsigned long prev;
7436- switch (size) {
7437- case 1:
7438- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
7439- : "=a"(prev)
7440- : "q"(new), "m"(*__xg(ptr)), "0"(old)
7441- : "memory");
7442- return prev;
7443- case 2:
7444- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
7445- : "=a"(prev)
7446- : "r"(new), "m"(*__xg(ptr)), "0"(old)
7447- : "memory");
7448- return prev;
7449- case 4:
7450- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
7451- : "=a"(prev)
7452- : "r"(new), "m"(*__xg(ptr)), "0"(old)
7453- : "memory");
7454- return prev;
7455- case 8:
7456- __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
7457- : "=a"(prev)
7458- : "r"(new), "m"(*__xg(ptr)), "0"(old)
7459- : "memory");
7460- return prev;
7461- }
7462- return old;
7463-}
7464-
7465-#define cmpxchg(ptr,o,n)\
7466- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
7467- (unsigned long)(n),sizeof(*(ptr))))
7468-
7469 #ifdef CONFIG_SMP
7470 #define smp_mb() mb()
7471 #define smp_rmb() rmb()
00e5a55c
BS
7472--- sle11-2009-04-20.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-03-04 11:28:34.000000000 +0100
7473+++ sle11-2009-04-20/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
7474@@ -2,7 +2,9 @@
7475 #define _X8664_TLBFLUSH_H
7476
7477 #include <linux/mm.h>
7478+#include <linux/sched.h>
7479 #include <asm/processor.h>
7480+#include <asm/system.h>
7481
7482 #define __flush_tlb() xen_tlb_flush()
7483
00e5a55c
BS
7484--- sle11-2009-04-20.orig/include/linux/pci.h 2009-04-29 08:44:31.000000000 +0200
7485+++ sle11-2009-04-20/include/linux/pci.h 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
7486@@ -239,7 +239,7 @@ struct pci_dev {
7487 int rom_attr_enabled; /* has display of the rom attribute been enabled? */
7488 struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
7489 struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
7490-#ifdef CONFIG_PCI_MSI
7491+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
7492 struct list_head msi_list;
7493 #endif
7494 struct pci_vpd *vpd;
00e5a55c
BS
7495--- sle11-2009-04-20.orig/lib/swiotlb-xen.c 2009-03-04 11:25:55.000000000 +0100
7496+++ sle11-2009-04-20/lib/swiotlb-xen.c 2009-02-05 11:16:51.000000000 +0100
cc90b958
BS
7497@@ -723,7 +723,6 @@ swiotlb_dma_supported (struct device *hw
7498 return (mask >= ((1UL << dma_bits) - 1));
7499 }
7500
7501-EXPORT_SYMBOL(swiotlb_init);
7502 EXPORT_SYMBOL(swiotlb_map_single);
7503 EXPORT_SYMBOL(swiotlb_unmap_single);
7504 EXPORT_SYMBOL(swiotlb_map_sg);
00e5a55c
BS
7505--- sle11-2009-04-20.orig/net/core/dev.c 2008-12-15 11:26:44.000000000 +0100
7506+++ sle11-2009-04-20/net/core/dev.c 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
7507@@ -1744,12 +1744,17 @@ static struct netdev_queue *dev_pick_tx(
7508 inline int skb_checksum_setup(struct sk_buff *skb)
7509 {
7510 if (skb->proto_csum_blank) {
7511+ struct iphdr *iph;
7512+ unsigned char *th;
7513+
7514 if (skb->protocol != htons(ETH_P_IP))
7515 goto out;
7516- skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
7517- if (skb->h.raw >= skb->tail)
7518+ iph = ip_hdr(skb);
7519+ th = skb_network_header(skb) + 4 * iph->ihl;
7520+ if (th >= skb_tail_pointer(skb))
7521 goto out;
7522- switch (skb->nh.iph->protocol) {
7523+ skb->csum_start = th - skb->head;
7524+ switch (iph->protocol) {
7525 case IPPROTO_TCP:
7526 skb->csum_offset = offsetof(struct tcphdr, check);
7527 break;
7528@@ -1760,10 +1765,10 @@ inline int skb_checksum_setup(struct sk_
7529 if (net_ratelimit())
7530 printk(KERN_ERR "Attempting to checksum a non-"
7531 "TCP/UDP packet, dropping a protocol"
7532- " %d packet", skb->nh.iph->protocol);
7533+ " %d packet", iph->protocol);
7534 goto out;
7535 }
7536- if ((skb->h.raw + skb->csum_offset + 2) > skb->tail)
7537+ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
7538 goto out;
7539 skb->ip_summed = CHECKSUM_PARTIAL;
7540 skb->proto_csum_blank = 0;
00e5a55c
BS
7541--- sle11-2009-04-20.orig/scripts/Makefile.xen.awk 2009-04-29 08:44:31.000000000 +0200
7542+++ sle11-2009-04-20/scripts/Makefile.xen.awk 2008-12-15 11:27:22.000000000 +0100
cc90b958
BS
7543@@ -13,7 +13,7 @@ BEGIN {
7544 next
7545 }
7546
7547-/:[[:space:]]*%\.[cS][[:space:]]/ {
7548+/:[[:space:]]*\$\(src\)\/%\.[cS][[:space:]]/ {
7549 line = gensub(/%.([cS])/, "%-xen.\\1", "g", $0)
7550 line = gensub(/(single-used-m)/, "xen-\\1", "g", line)
7551 print line