]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.xen/xen3-patch-2.6.19
Add a patch to fix Intel E100 wake-on-lan problems.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.xen / xen3-patch-2.6.19
CommitLineData
2cb7cef9
BS
1From: www.kernel.org
2Subject: Linux 2.6.19
3Patch-mainline: 2.6.19
4
5Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
6
7Acked-by: jbeulich@novell.com
8
9--- sle11-2009-05-14.orig/arch/x86/Kconfig 2009-04-20 11:36:10.000000000 +0200
10+++ sle11-2009-05-14/arch/x86/Kconfig 2009-03-04 11:28:34.000000000 +0100
11@@ -415,6 +415,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
12
13 menuconfig PARAVIRT_GUEST
14 bool "Paravirtualized guest support"
15+ depends on !X86_XEN && !X86_64_XEN
16 help
17 Say Y here to get to see options related to running Linux under
18 various hypervisors. This option alone does not add any kernel code.
19--- sle11-2009-05-14.orig/arch/x86/kernel/apic_32-xen.c 2009-05-14 11:02:43.000000000 +0200
20+++ sle11-2009-05-14/arch/x86/kernel/apic_32-xen.c 2009-03-04 11:28:34.000000000 +0100
21@@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
22 /*
23 * Knob to control our willingness to enable the local APIC.
24 */
25-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
26
27 /*
28 * Debug level
29@@ -102,7 +101,7 @@ int get_physical_broadcast(void)
30
31 #ifndef CONFIG_XEN
32 #ifndef CONFIG_SMP
33-static void up_apic_timer_interrupt_call(struct pt_regs *regs)
34+static void up_apic_timer_interrupt_call(void)
35 {
36 int cpu = smp_processor_id();
37
38@@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
39 */
40 per_cpu(irq_stat, cpu).apic_timer_irqs++;
41
42- smp_local_timer_interrupt(regs);
43+ smp_local_timer_interrupt();
44 }
45 #endif
46
47-void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
48+void smp_send_timer_broadcast_ipi(void)
49 {
50 cpumask_t mask;
51
52@@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
53 * We can directly call the apic timer interrupt handler
54 * in UP case. Minus all irq related functions
55 */
56- up_apic_timer_interrupt_call(regs);
57+ up_apic_timer_interrupt_call();
58 #endif
59 }
60 }
61--- sle11-2009-05-14.orig/arch/x86/kernel/cpu/common-xen.c 2009-05-14 11:02:43.000000000 +0200
62+++ sle11-2009-05-14/arch/x86/kernel/cpu/common-xen.c 2009-03-04 11:28:34.000000000 +0100
63@@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
64
65 extern int disable_pse;
66
67-static void default_init(struct cpuinfo_x86 * c)
68+static void __cpuinit default_init(struct cpuinfo_x86 * c)
69 {
70 /* Not much we can do here... */
71 /* Check if at least it has cpuid */
72@@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
73 }
74 }
75
76-static struct cpu_dev default_cpu = {
77+static struct cpu_dev __cpuinitdata default_cpu = {
78 .c_init = default_init,
79 .c_vendor = "Unknown",
80 };
81@@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
82
83 static int __init x86_fxsr_setup(char * s)
84 {
85+ /* Tell all the other CPU's to not use it... */
86 disable_x86_fxsr = 1;
87+
88+ /*
89+ * ... and clear the bits early in the boot_cpu_data
90+ * so that the bootup process doesn't try to do this
91+ * either.
92+ */
93+ clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
94+ clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
95 return 1;
96 }
97 __setup("nofxsr", x86_fxsr_setup);
98@@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
99 }
100 }
101
102-void __cpuinit generic_identify(struct cpuinfo_x86 * c)
103+static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
104 {
105 u32 tfms, xlvl;
106 int ebx;
107@@ -698,8 +707,7 @@ old_gdt:
108 */
109 atomic_inc(&init_mm.mm_count);
110 current->active_mm = &init_mm;
111- if (current->mm)
112- BUG();
113+ BUG_ON(current->mm);
114 enter_lazy_tlb(&init_mm, current);
115
116 load_esp0(t, thread);
117@@ -712,7 +720,7 @@ old_gdt:
118 #endif
119
120 /* Clear %fs and %gs. */
121- asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
122+ asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
123
124 /* Clear all 6 debug registers: */
125 set_debugreg(0, 0);
126--- sle11-2009-05-14.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:01:46.000000000 +0200
127+++ sle11-2009-05-14/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:07:47.000000000 +0200
128@@ -80,8 +80,12 @@ VM_MASK = 0x00020000
129 NMI_MASK = 0x80000000
130
131 #ifndef CONFIG_XEN
132-#define DISABLE_INTERRUPTS cli
133-#define ENABLE_INTERRUPTS sti
134+/* These are replaces for paravirtualization */
135+#define DISABLE_INTERRUPTS cli
136+#define ENABLE_INTERRUPTS sti
137+#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
138+#define INTERRUPT_RETURN iret
139+#define GET_CR0_INTO_EAX movl %cr0, %eax
140 #else
141 /* Offsets into shared_info_t. */
142 #define evtchn_upcall_pending /* 0 */
143@@ -99,15 +103,29 @@ NMI_MASK = 0x80000000
144
145 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
146 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
147+#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
148 #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
149 __DISABLE_INTERRUPTS
150 #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
151 __ENABLE_INTERRUPTS
152-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
153+#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
154+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
155+ __TEST_PENDING ; \
156+ jnz 14f # process more events if necessary... ; \
157+ movl ESI(%esp), %esi ; \
158+ sysexit ; \
159+14: __DISABLE_INTERRUPTS ; \
160+ TRACE_IRQS_OFF ; \
161+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
162+ push %esp ; \
163+ call evtchn_do_upcall ; \
164+ add $4,%esp ; \
165+ jmp ret_from_intr
166+#define INTERRUPT_RETURN iret
167 #endif
168
169 #ifdef CONFIG_PREEMPT
170-#define preempt_stop cli; TRACE_IRQS_OFF
171+#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
172 #else
173 #define preempt_stop
174 #define resume_kernel restore_nocheck
175@@ -206,18 +224,21 @@ NMI_MASK = 0x80000000
176
177 #define RING0_INT_FRAME \
178 CFI_STARTPROC simple;\
179+ CFI_SIGNAL_FRAME;\
180 CFI_DEF_CFA esp, 3*4;\
181 /*CFI_OFFSET cs, -2*4;*/\
182 CFI_OFFSET eip, -3*4
183
184 #define RING0_EC_FRAME \
185 CFI_STARTPROC simple;\
186+ CFI_SIGNAL_FRAME;\
187 CFI_DEF_CFA esp, 4*4;\
188 /*CFI_OFFSET cs, -2*4;*/\
189 CFI_OFFSET eip, -3*4
190
191 #define RING0_PTREGS_FRAME \
192 CFI_STARTPROC simple;\
193+ CFI_SIGNAL_FRAME;\
194 CFI_DEF_CFA esp, OLDESP-EBX;\
195 /*CFI_OFFSET cs, CS-OLDESP;*/\
196 CFI_OFFSET eip, EIP-OLDESP;\
197@@ -263,8 +284,9 @@ ret_from_intr:
198 check_userspace:
199 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
200 movb CS(%esp), %al
201- testl $(VM_MASK | 2), %eax
202- jz resume_kernel
203+ andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
204+ cmpl $USER_RPL, %eax
205+ jb resume_kernel # not returning to v8086 or userspace
206 ENTRY(resume_userspace)
207 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
208 # setting need_resched or sigpending
209@@ -277,7 +299,7 @@ ENTRY(resume_userspace)
210
211 #ifdef CONFIG_PREEMPT
212 ENTRY(resume_kernel)
213- cli
214+ DISABLE_INTERRUPTS
215 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
216 jnz restore_nocheck
217 need_resched:
218@@ -297,6 +319,7 @@ need_resched:
219 # sysenter call handler stub
220 ENTRY(sysenter_entry)
221 CFI_STARTPROC simple
222+ CFI_SIGNAL_FRAME
223 CFI_DEF_CFA esp, 0
224 CFI_REGISTER esp, ebp
225 movl SYSENTER_stack_esp0(%esp),%esp
226@@ -305,7 +328,7 @@ sysenter_past_esp:
227 * No need to follow this irqs on/off section: the syscall
228 * disabled irqs and here we enable it straight after entry:
229 */
230- sti
231+ ENABLE_INTERRUPTS
232 pushl $(__USER_DS)
233 CFI_ADJUST_CFA_OFFSET 4
234 /*CFI_REL_OFFSET ss, 0*/
235@@ -359,26 +382,8 @@ sysenter_past_esp:
236 movl EIP(%esp), %edx
237 movl OLDESP(%esp), %ecx
238 xorl %ebp,%ebp
239-#ifdef CONFIG_XEN
240 TRACE_IRQS_ON
241- __ENABLE_INTERRUPTS
242-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
243- __TEST_PENDING
244- jnz 14f # process more events if necessary...
245- movl ESI(%esp), %esi
246- sysexit
247-14: __DISABLE_INTERRUPTS
248- TRACE_IRQS_OFF
249-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
250- push %esp
251- call evtchn_do_upcall
252- add $4,%esp
253- jmp ret_from_intr
254-#else
255- TRACE_IRQS_ON
256- sti
257- sysexit
258-#endif /* !CONFIG_XEN */
259+ ENABLE_INTERRUPTS_SYSEXIT
260 CFI_ENDPROC
261
262 # pv sysenter call handler stub
263@@ -444,8 +449,8 @@ restore_all:
264 # See comments in process.c:copy_thread() for details.
265 movb OLDSS(%esp), %ah
266 movb CS(%esp), %al
267- andl $(VM_MASK | (4 << 8) | 3), %eax
268- cmpl $((4 << 8) | 3), %eax
269+ andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
270+ cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
271 CFI_REMEMBER_STATE
272 je ldt_ss # returning to user-space with LDT SS
273 restore_nocheck:
274@@ -467,12 +472,11 @@ restore_nocheck_notrace:
275 RESTORE_REGS
276 addl $4, %esp
277 CFI_ADJUST_CFA_OFFSET -4
278-1: iret
279+1: INTERRUPT_RETURN
280 .section .fixup,"ax"
281 iret_exc:
282 #ifndef CONFIG_XEN
283- TRACE_IRQS_ON
284- sti
285+ ENABLE_INTERRUPTS
286 #endif
287 pushl $0 # no error code
288 pushl $do_iret_error
289@@ -498,7 +502,7 @@ ldt_ss:
290 * dosemu and wine happy. */
291 subl $8, %esp # reserve space for switch16 pointer
292 CFI_ADJUST_CFA_OFFSET 8
293- cli
294+ DISABLE_INTERRUPTS
295 TRACE_IRQS_OFF
296 movl %esp, %eax
297 /* Set up the 16bit stack frame with switch32 pointer on top,
298@@ -508,7 +512,7 @@ ldt_ss:
299 TRACE_IRQS_IRET
300 RESTORE_REGS
301 lss 20+4(%esp), %esp # switch to 16bit stack
302-1: iret
303+1: INTERRUPT_RETURN
304 .section __ex_table,"a"
305 .align 4
306 .long 1b,iret_exc
307@@ -524,7 +528,7 @@ scrit: /**** START OF CRITICAL REGION **
308 RESTORE_REGS
309 addl $4, %esp
310 CFI_ADJUST_CFA_OFFSET -4
311-1: iret
312+1: INTERRUPT_RETURN
313 .section __ex_table,"a"
314 .align 4
315 .long 1b,iret_exc
316@@ -713,11 +717,9 @@ ENTRY(name) \
317 #define UNWIND_ESPFIX_STACK
318 #endif
319
320-ENTRY(divide_error)
321- RING0_INT_FRAME
322- pushl $0 # no error code
323- CFI_ADJUST_CFA_OFFSET 4
324- pushl $do_divide_error
325+KPROBE_ENTRY(page_fault)
326+ RING0_EC_FRAME
327+ pushl $do_page_fault
328 CFI_ADJUST_CFA_OFFSET 4
329 ALIGN
330 error_code:
331@@ -767,6 +769,7 @@ error_code:
332 call *%edi
333 jmp ret_from_exception
334 CFI_ENDPROC
335+KPROBE_END(page_fault)
336
337 #ifdef CONFIG_XEN
338 # A note on the "critical region" in our callback handler.
339@@ -790,9 +793,11 @@ ENTRY(hypervisor_callback)
340 pushl %eax
341 CFI_ADJUST_CFA_OFFSET 4
342 SAVE_ALL
343- testb $2,CS(%esp)
344+ movl CS(%esp),%ecx
345 movl EIP(%esp),%eax
346- jnz .Ldo_upcall
347+ andl $SEGMENT_RPL_MASK,%ecx
348+ cmpl $USER_RPL,%ecx
349+ jae .Ldo_upcall
350 cmpl $scrit,%eax
351 jb 0f
352 cmpl $ecrit,%eax
353@@ -928,7 +933,7 @@ ENTRY(device_not_available)
354 CFI_ADJUST_CFA_OFFSET 4
355 SAVE_ALL
356 #ifndef CONFIG_XEN
357- movl %cr0, %eax
358+ GET_CR0_INTO_EAX
359 testl $0x4, %eax # EM (math emulation bit)
360 je device_available_emulate
361 pushl $0 # temporary storage for ORIG_EIP
362@@ -963,9 +968,15 @@ device_available_emulate:
363 jne ok; \
364 label: \
365 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
366+ CFI_DEF_CFA esp, 0; \
367+ CFI_UNDEFINED eip; \
368 pushfl; \
369+ CFI_ADJUST_CFA_OFFSET 4; \
370 pushl $__KERNEL_CS; \
371- pushl $sysenter_past_esp
372+ CFI_ADJUST_CFA_OFFSET 4; \
373+ pushl $sysenter_past_esp; \
374+ CFI_ADJUST_CFA_OFFSET 4; \
375+ CFI_REL_OFFSET eip, 0
376 #endif /* CONFIG_XEN */
377
378 KPROBE_ENTRY(debug)
379@@ -984,7 +995,8 @@ debug_stack_correct:
380 call do_debug
381 jmp ret_from_exception
382 CFI_ENDPROC
383- .previous .text
384+KPROBE_END(debug)
385+
386 #ifndef CONFIG_XEN
387 /*
388 * NMI is doubly nasty. It can happen _while_ we're handling
389@@ -994,7 +1006,7 @@ debug_stack_correct:
390 * check whether we got an NMI on the debug path where the debug
391 * fault happened on the sysenter path.
392 */
393-ENTRY(nmi)
394+KPROBE_ENTRY(nmi)
395 RING0_INT_FRAME
396 pushl %eax
397 CFI_ADJUST_CFA_OFFSET 4
398@@ -1019,6 +1031,7 @@ ENTRY(nmi)
399 cmpl $sysenter_entry,12(%esp)
400 je nmi_debug_stack_check
401 nmi_stack_correct:
402+ /* We have a RING0_INT_FRAME here */
403 pushl %eax
404 CFI_ADJUST_CFA_OFFSET 4
405 SAVE_ALL
406@@ -1029,9 +1042,12 @@ nmi_stack_correct:
407 CFI_ENDPROC
408
409 nmi_stack_fixup:
410+ RING0_INT_FRAME
411 FIX_STACK(12,nmi_stack_correct, 1)
412 jmp nmi_stack_correct
413+
414 nmi_debug_stack_check:
415+ /* We have a RING0_INT_FRAME here */
416 cmpw $__KERNEL_CS,16(%esp)
417 jne nmi_stack_correct
418 cmpl $debug,(%esp)
419@@ -1042,8 +1058,10 @@ nmi_debug_stack_check:
420 jmp nmi_stack_correct
421
422 nmi_16bit_stack:
423- RING0_INT_FRAME
424- /* create the pointer to lss back */
425+ /* We have a RING0_INT_FRAME here.
426+ *
427+ * create the pointer to lss back
428+ */
429 pushl %ss
430 CFI_ADJUST_CFA_OFFSET 4
431 pushl %esp
432@@ -1064,14 +1082,14 @@ nmi_16bit_stack:
433 call do_nmi
434 RESTORE_REGS
435 lss 12+4(%esp), %esp # back to 16bit stack
436-1: iret
437+1: INTERRUPT_RETURN
438 CFI_ENDPROC
439 .section __ex_table,"a"
440 .align 4
441 .long 1b,iret_exc
442 .previous
443 #else
444-ENTRY(nmi)
445+KPROBE_ENTRY(nmi)
446 RING0_INT_FRAME
447 pushl %eax
448 CFI_ADJUST_CFA_OFFSET 4
449@@ -1083,6 +1101,7 @@ ENTRY(nmi)
450 jmp restore_all
451 CFI_ENDPROC
452 #endif
453+KPROBE_END(nmi)
454
455 KPROBE_ENTRY(int3)
456 RING0_INT_FRAME
457@@ -1094,7 +1113,7 @@ KPROBE_ENTRY(int3)
458 call do_int3
459 jmp ret_from_exception
460 CFI_ENDPROC
461- .previous .text
462+KPROBE_END(int3)
463
464 ENTRY(overflow)
465 RING0_INT_FRAME
466@@ -1159,7 +1178,7 @@ KPROBE_ENTRY(general_protection)
467 CFI_ADJUST_CFA_OFFSET 4
468 jmp error_code
469 CFI_ENDPROC
470- .previous .text
471+KPROBE_END(general_protection)
472
473 ENTRY(alignment_check)
474 RING0_EC_FRAME
475@@ -1168,13 +1187,14 @@ ENTRY(alignment_check)
476 jmp error_code
477 CFI_ENDPROC
478
479-KPROBE_ENTRY(page_fault)
480- RING0_EC_FRAME
481- pushl $do_page_fault
482+ENTRY(divide_error)
483+ RING0_INT_FRAME
484+ pushl $0 # no error code
485+ CFI_ADJUST_CFA_OFFSET 4
486+ pushl $do_divide_error
487 CFI_ADJUST_CFA_OFFSET 4
488 jmp error_code
489 CFI_ENDPROC
490- .previous .text
491
492 #ifdef CONFIG_X86_MCE
493 ENTRY(machine_check)
494@@ -1236,6 +1256,19 @@ ENTRY(fixup_4gb_segment)
495 jmp error_code
496 CFI_ENDPROC
497
498+ENTRY(kernel_thread_helper)
499+ pushl $0 # fake return address for unwinder
500+ CFI_STARTPROC
501+ movl %edx,%eax
502+ push %edx
503+ CFI_ADJUST_CFA_OFFSET 4
504+ call *%ebx
505+ push %eax
506+ CFI_ADJUST_CFA_OFFSET 4
507+ call do_exit
508+ CFI_ENDPROC
509+ENDPROC(kernel_thread_helper)
510+
511 .section .rodata,"a"
512 #include "syscall_table.S"
513
514--- sle11-2009-05-14.orig/arch/x86/kernel/head_32-xen.S 2009-05-14 11:02:43.000000000 +0200
515+++ sle11-2009-05-14/arch/x86/kernel/head_32-xen.S 2009-03-04 11:28:34.000000000 +0100
516@@ -62,7 +62,7 @@ ENTRY(startup_32)
517 movl %eax,%gs
518 cld # gcc2 wants the direction flag cleared at all times
519
520- pushl %eax # fake return address
521+ pushl $0 # fake return address for unwinder
522 jmp start_kernel
523
524 #define HYPERCALL_PAGE_OFFSET 0x1000
525--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:13:45.000000000 +0100
526+++ sle11-2009-05-14/arch/x86/kernel/io_apic_32-xen.c 2009-03-04 11:28:34.000000000 +0100
527@@ -31,6 +31,9 @@
528 #include <linux/acpi.h>
529 #include <linux/module.h>
530 #include <linux/sysdev.h>
531+#include <linux/pci.h>
532+#include <linux/msi.h>
533+#include <linux/htirq.h>
534
535 #include <asm/io.h>
536 #include <asm/smp.h>
537@@ -38,13 +41,15 @@
538 #include <asm/timer.h>
539 #include <asm/i8259.h>
540 #include <asm/nmi.h>
541+#include <asm/msidef.h>
542+#include <asm/hypertransport.h>
543
544 #include <mach_apic.h>
545+#include <mach_apicdef.h>
546
547 #include "io_ports.h"
548
549 #ifdef CONFIG_XEN
550-
551 #include <xen/interface/xen.h>
552 #include <xen/interface/physdev.h>
553 #include <xen/evtchn.h>
554@@ -56,32 +61,7 @@
555
556 unsigned long io_apic_irqs;
557
558-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
559-{
560- struct physdev_apic apic_op;
561- int ret;
562-
563- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
564- apic_op.reg = reg;
565- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
566- if (ret)
567- return ret;
568- return apic_op.value;
569-}
570-
571-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
572-{
573- struct physdev_apic apic_op;
574-
575- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
576- apic_op.reg = reg;
577- apic_op.value = value;
578- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
579-}
580-
581-#define io_apic_read(a,r) xen_io_apic_read(a,r)
582-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
583-
584+#define clear_IO_APIC() ((void)0)
585 #endif /* CONFIG_XEN */
586
587 int (*ioapic_renumber_irq)(int ioapic, int irq);
588@@ -108,7 +88,7 @@ int sis_apic_bug = -1;
589 */
590 int nr_ioapic_registers[MAX_IO_APICS];
591
592-int disable_timer_pin_1 __initdata;
593+static int disable_timer_pin_1 __initdata;
594
595 /*
596 * Rough estimation of how many shared IRQs there are, can
597@@ -128,12 +108,124 @@ static struct irq_pin_list {
598 int apic, pin, next;
599 } irq_2_pin[PIN_MAP_SIZE];
600
601-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
602-#ifdef CONFIG_PCI_MSI
603-#define vector_to_irq(vector) \
604- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
605+#ifndef CONFIG_XEN
606+struct io_apic {
607+ unsigned int index;
608+ unsigned int unused[3];
609+ unsigned int data;
610+};
611+
612+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
613+{
614+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
615+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
616+}
617+#endif
618+
619+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
620+{
621+#ifndef CONFIG_XEN
622+ struct io_apic __iomem *io_apic = io_apic_base(apic);
623+ writel(reg, &io_apic->index);
624+ return readl(&io_apic->data);
625+#else
626+ struct physdev_apic apic_op;
627+ int ret;
628+
629+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
630+ apic_op.reg = reg;
631+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
632+ if (ret)
633+ return ret;
634+ return apic_op.value;
635+#endif
636+}
637+
638+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
639+{
640+#ifndef CONFIG_XEN
641+ struct io_apic __iomem *io_apic = io_apic_base(apic);
642+ writel(reg, &io_apic->index);
643+ writel(value, &io_apic->data);
644+#else
645+ struct physdev_apic apic_op;
646+
647+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
648+ apic_op.reg = reg;
649+ apic_op.value = value;
650+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
651+#endif
652+}
653+
654+#ifndef CONFIG_XEN
655+/*
656+ * Re-write a value: to be used for read-modify-write
657+ * cycles where the read already set up the index register.
658+ *
659+ * Older SiS APIC requires we rewrite the index register
660+ */
661+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
662+{
663+ volatile struct io_apic *io_apic = io_apic_base(apic);
664+ if (sis_apic_bug)
665+ writel(reg, &io_apic->index);
666+ writel(value, &io_apic->data);
667+}
668 #else
669-#define vector_to_irq(vector) (vector)
670+#define io_apic_modify io_apic_write
671+#endif
672+
673+union entry_union {
674+ struct { u32 w1, w2; };
675+ struct IO_APIC_route_entry entry;
676+};
677+
678+#ifndef CONFIG_XEN
679+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
680+{
681+ union entry_union eu;
682+ unsigned long flags;
683+ spin_lock_irqsave(&ioapic_lock, flags);
684+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
685+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
686+ spin_unlock_irqrestore(&ioapic_lock, flags);
687+ return eu.entry;
688+}
689+#endif
690+
691+/*
692+ * When we write a new IO APIC routing entry, we need to write the high
693+ * word first! If the mask bit in the low word is clear, we will enable
694+ * the interrupt, and we need to make sure the entry is fully populated
695+ * before that happens.
696+ */
697+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
698+{
699+ unsigned long flags;
700+ union entry_union eu;
701+ eu.entry = e;
702+ spin_lock_irqsave(&ioapic_lock, flags);
703+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
704+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
705+ spin_unlock_irqrestore(&ioapic_lock, flags);
706+}
707+
708+#ifndef CONFIG_XEN
709+/*
710+ * When we mask an IO APIC routing entry, we need to write the low
711+ * word first, in order to set the mask bit before we change the
712+ * high bits!
713+ */
714+static void ioapic_mask_entry(int apic, int pin)
715+{
716+ unsigned long flags;
717+ union entry_union eu = { .entry.mask = 1 };
718+
719+ spin_lock_irqsave(&ioapic_lock, flags);
720+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
721+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
722+ spin_unlock_irqrestore(&ioapic_lock, flags);
723+}
724 #endif
725
726 /*
727@@ -159,9 +251,7 @@ static void add_pin_to_irq(unsigned int
728 entry->pin = pin;
729 }
730
731-#ifdef CONFIG_XEN
732-#define clear_IO_APIC() ((void)0)
733-#else
734+#ifndef CONFIG_XEN
735 /*
736 * Reroute an IRQ to a different pin.
737 */
738@@ -246,25 +336,16 @@ static void unmask_IO_APIC_irq (unsigned
739 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
740 {
741 struct IO_APIC_route_entry entry;
742- unsigned long flags;
743
744 /* Check delivery_mode to be sure we're not clearing an SMI pin */
745- spin_lock_irqsave(&ioapic_lock, flags);
746- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
747- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
748- spin_unlock_irqrestore(&ioapic_lock, flags);
749+ entry = ioapic_read_entry(apic, pin);
750 if (entry.delivery_mode == dest_SMI)
751 return;
752
753 /*
754 * Disable it in the IO-APIC irq-routing table:
755 */
756- memset(&entry, 0, sizeof(entry));
757- entry.mask = 1;
758- spin_lock_irqsave(&ioapic_lock, flags);
759- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
760- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
761- spin_unlock_irqrestore(&ioapic_lock, flags);
762+ ioapic_mask_entry(apic, pin);
763 }
764
765 static void clear_IO_APIC (void)
766@@ -304,7 +385,7 @@ static void set_ioapic_affinity_irq(unsi
767 break;
768 entry = irq_2_pin + entry->next;
769 }
770- set_irq_info(irq, cpumask);
771+ set_native_irq_info(irq, cpumask);
772 spin_unlock_irqrestore(&ioapic_lock, flags);
773 }
774
775@@ -1212,43 +1293,43 @@ static inline int IO_APIC_irq_trigger(in
776 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
777 u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
778
779-int assign_irq_vector(int irq)
780+static int __assign_irq_vector(int irq)
781 {
782- unsigned long flags;
783 int vector;
784 struct physdev_irq irq_op;
785
786- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
787+ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
788
789 if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
790 return -EINVAL;
791
792- spin_lock_irqsave(&vector_lock, flags);
793-
794- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
795- spin_unlock_irqrestore(&vector_lock, flags);
796- return IO_APIC_VECTOR(irq);
797- }
798+ if (irq_vector[irq] > 0)
799+ return irq_vector[irq];
800
801 irq_op.irq = irq;
802- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
803- spin_unlock_irqrestore(&vector_lock, flags);
804+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
805 return -ENOSPC;
806- }
807
808 vector = irq_op.vector;
809- vector_irq[vector] = irq;
810- if (irq != AUTO_ASSIGN)
811- IO_APIC_VECTOR(irq) = vector;
812+ irq_vector[irq] = vector;
813+
814+ return vector;
815+}
816+
817+static int assign_irq_vector(int irq)
818+{
819+ unsigned long flags;
820+ int vector;
821
822+ spin_lock_irqsave(&vector_lock, flags);
823+ vector = __assign_irq_vector(irq);
824 spin_unlock_irqrestore(&vector_lock, flags);
825
826 return vector;
827 }
828
829 #ifndef CONFIG_XEN
830-static struct hw_interrupt_type ioapic_level_type;
831-static struct hw_interrupt_type ioapic_edge_type;
832+static struct irq_chip ioapic_chip;
833
834 #define IOAPIC_AUTO -1
835 #define IOAPIC_EDGE 0
836@@ -1256,16 +1337,16 @@ static struct hw_interrupt_type ioapic_e
837
838 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
839 {
840- unsigned idx;
841-
842- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
843-
844 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
845 trigger == IOAPIC_LEVEL)
846- irq_desc[idx].chip = &ioapic_level_type;
847- else
848- irq_desc[idx].chip = &ioapic_edge_type;
849- set_intr_gate(vector, interrupt[idx]);
850+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
851+ handle_fasteoi_irq, "fasteoi");
852+ else {
853+ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
854+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
855+ handle_edge_irq, "edge");
856+ }
857+ set_intr_gate(vector, interrupt[irq]);
858 }
859 #else
860 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
861@@ -1336,9 +1417,8 @@ static void __init setup_IO_APIC_irqs(vo
862 if (!apic && (irq < 16))
863 disable_8259A_irq(irq);
864 }
865+ ioapic_write_entry(apic, pin, entry);
866 spin_lock_irqsave(&ioapic_lock, flags);
867- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
868- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
869 set_native_irq_info(irq, TARGET_CPUS);
870 spin_unlock_irqrestore(&ioapic_lock, flags);
871 }
872@@ -1355,7 +1435,6 @@ static void __init setup_IO_APIC_irqs(vo
873 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
874 {
875 struct IO_APIC_route_entry entry;
876- unsigned long flags;
877
878 memset(&entry,0,sizeof(entry));
879
880@@ -1380,15 +1459,13 @@ static void __init setup_ExtINT_IRQ0_pin
881 * The timer IRQ doesn't have to know that behind the
882 * scene we have a 8259A-master in AEOI mode ...
883 */
884- irq_desc[0].chip = &ioapic_edge_type;
885+ irq_desc[0].chip = &ioapic_chip;
886+ set_irq_handler(0, handle_edge_irq);
887
888 /*
889 * Add it to the IO-APIC irq-routing table:
890 */
891- spin_lock_irqsave(&ioapic_lock, flags);
892- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
893- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
894- spin_unlock_irqrestore(&ioapic_lock, flags);
895+ ioapic_write_entry(apic, pin, entry);
896
897 enable_8259A_irq(0);
898 }
899@@ -1498,10 +1575,7 @@ void __init print_IO_APIC(void)
900 for (i = 0; i <= reg_01.bits.entries; i++) {
901 struct IO_APIC_route_entry entry;
902
903- spin_lock_irqsave(&ioapic_lock, flags);
904- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
905- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
906- spin_unlock_irqrestore(&ioapic_lock, flags);
907+ entry = ioapic_read_entry(apic, i);
908
909 printk(KERN_DEBUG " %02x %03X %02X ",
910 i,
911@@ -1521,17 +1595,12 @@ void __init print_IO_APIC(void)
912 );
913 }
914 }
915- if (use_pci_vector())
916- printk(KERN_INFO "Using vector-based indexing\n");
917 printk(KERN_DEBUG "IRQ to pin mappings:\n");
918 for (i = 0; i < NR_IRQS; i++) {
919 struct irq_pin_list *entry = irq_2_pin + i;
920 if (entry->pin < 0)
921 continue;
922- if (use_pci_vector() && !platform_legacy_irq(i))
923- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
924- else
925- printk(KERN_DEBUG "IRQ%d ", i);
926+ printk(KERN_DEBUG "IRQ%d ", i);
927 for (;;) {
928 printk("-> %d:%d", entry->apic, entry->pin);
929 if (!entry->next)
930@@ -1720,10 +1789,7 @@ static void __init enable_IO_APIC(void)
931 /* See if any of the pins is in ExtINT mode */
932 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
933 struct IO_APIC_route_entry entry;
934- spin_lock_irqsave(&ioapic_lock, flags);
935- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
936- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
937- spin_unlock_irqrestore(&ioapic_lock, flags);
938+ entry = ioapic_read_entry(apic, pin);
939
940
941 /* If the interrupt line is enabled and in ExtInt mode
942@@ -1782,7 +1848,6 @@ void disable_IO_APIC(void)
943 */
944 if (ioapic_i8259.pin != -1) {
945 struct IO_APIC_route_entry entry;
946- unsigned long flags;
947
948 memset(&entry, 0, sizeof(entry));
949 entry.mask = 0; /* Enabled */
950@@ -1799,12 +1864,7 @@ void disable_IO_APIC(void)
951 /*
952 * Add it to the IO-APIC irq-routing table:
953 */
954- spin_lock_irqsave(&ioapic_lock, flags);
955- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
956- *(((int *)&entry)+1));
957- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
958- *(((int *)&entry)+0));
959- spin_unlock_irqrestore(&ioapic_lock, flags);
960+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
961 }
962 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
963 #endif
964@@ -1971,6 +2031,8 @@ static int __init timer_irq_works(void)
965 */
966
967 /*
968+ * Startup quirk:
969+ *
970 * Starting up a edge-triggered IO-APIC interrupt is
971 * nasty - we need to make sure that we get the edge.
972 * If it is already asserted for some reason, we need
973@@ -1978,8 +2040,10 @@ static int __init timer_irq_works(void)
974 *
975 * This is not complete - we should be able to fake
976 * an edge even if it isn't on the 8259A...
977+ *
978+ * (We do this for level-triggered IRQs too - it cannot hurt.)
979 */
980-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
981+static unsigned int startup_ioapic_irq(unsigned int irq)
982 {
983 int was_pending = 0;
984 unsigned long flags;
985@@ -1996,47 +2060,18 @@ static unsigned int startup_edge_ioapic_
986 return was_pending;
987 }
988
989-/*
990- * Once we have recorded IRQ_PENDING already, we can mask the
991- * interrupt for real. This prevents IRQ storms from unhandled
992- * devices.
993- */
994-static void ack_edge_ioapic_irq(unsigned int irq)
995-{
996- move_irq(irq);
997- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
998- == (IRQ_PENDING | IRQ_DISABLED))
999- mask_IO_APIC_irq(irq);
1000- ack_APIC_irq();
1001-}
1002-
1003-/*
1004- * Level triggered interrupts can just be masked,
1005- * and shutting down and starting up the interrupt
1006- * is the same as enabling and disabling them -- except
1007- * with a startup need to return a "was pending" value.
1008- *
1009- * Level triggered interrupts are special because we
1010- * do not touch any IO-APIC register while handling
1011- * them. We ack the APIC in the end-IRQ handler, not
1012- * in the start-IRQ-handler. Protection against reentrance
1013- * from the same interrupt is still provided, both by the
1014- * generic IRQ layer and by the fact that an unacked local
1015- * APIC does not accept IRQs.
1016- */
1017-static unsigned int startup_level_ioapic_irq (unsigned int irq)
1018+static void ack_ioapic_irq(unsigned int irq)
1019 {
1020- unmask_IO_APIC_irq(irq);
1021-
1022- return 0; /* don't check for pending */
1023+ move_native_irq(irq);
1024+ ack_APIC_irq();
1025 }
1026
1027-static void end_level_ioapic_irq (unsigned int irq)
1028+static void ack_ioapic_quirk_irq(unsigned int irq)
1029 {
1030 unsigned long v;
1031 int i;
1032
1033- move_irq(irq);
1034+ move_native_irq(irq);
1035 /*
1036 * It appears there is an erratum which affects at least version 0x11
1037 * of I/O APIC (that's the 82093AA and cores integrated into various
1038@@ -2056,7 +2091,7 @@ static void end_level_ioapic_irq (unsign
1039 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1040 * The idea is from Manfred Spraul. --macro
1041 */
1042- i = IO_APIC_VECTOR(irq);
1043+ i = irq_vector[irq];
1044
1045 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1046
1047@@ -2071,104 +2106,24 @@ static void end_level_ioapic_irq (unsign
1048 }
1049 }
1050
1051-#ifdef CONFIG_PCI_MSI
1052-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
1053-{
1054- int irq = vector_to_irq(vector);
1055-
1056- return startup_edge_ioapic_irq(irq);
1057-}
1058-
1059-static void ack_edge_ioapic_vector(unsigned int vector)
1060-{
1061- int irq = vector_to_irq(vector);
1062-
1063- move_native_irq(vector);
1064- ack_edge_ioapic_irq(irq);
1065-}
1066-
1067-static unsigned int startup_level_ioapic_vector (unsigned int vector)
1068-{
1069- int irq = vector_to_irq(vector);
1070-
1071- return startup_level_ioapic_irq (irq);
1072-}
1073-
1074-static void end_level_ioapic_vector (unsigned int vector)
1075-{
1076- int irq = vector_to_irq(vector);
1077-
1078- move_native_irq(vector);
1079- end_level_ioapic_irq(irq);
1080-}
1081-
1082-static void mask_IO_APIC_vector (unsigned int vector)
1083-{
1084- int irq = vector_to_irq(vector);
1085-
1086- mask_IO_APIC_irq(irq);
1087-}
1088-
1089-static void unmask_IO_APIC_vector (unsigned int vector)
1090+static int ioapic_retrigger_irq(unsigned int irq)
1091 {
1092- int irq = vector_to_irq(vector);
1093-
1094- unmask_IO_APIC_irq(irq);
1095-}
1096-
1097-#ifdef CONFIG_SMP
1098-static void set_ioapic_affinity_vector (unsigned int vector,
1099- cpumask_t cpu_mask)
1100-{
1101- int irq = vector_to_irq(vector);
1102-
1103- set_native_irq_info(vector, cpu_mask);
1104- set_ioapic_affinity_irq(irq, cpu_mask);
1105-}
1106-#endif
1107-#endif
1108-
1109-static int ioapic_retrigger(unsigned int irq)
1110-{
1111- send_IPI_self(IO_APIC_VECTOR(irq));
1112+ send_IPI_self(irq_vector[irq]);
1113
1114 return 1;
1115 }
1116
1117-/*
1118- * Level and edge triggered IO-APIC interrupts need different handling,
1119- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1120- * handled with the level-triggered descriptor, but that one has slightly
1121- * more overhead. Level-triggered interrupts cannot be handled with the
1122- * edge-triggered handler, without risking IRQ storms and other ugly
1123- * races.
1124- */
1125-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
1126- .typename = "IO-APIC-edge",
1127- .startup = startup_edge_ioapic,
1128- .shutdown = shutdown_edge_ioapic,
1129- .enable = enable_edge_ioapic,
1130- .disable = disable_edge_ioapic,
1131- .ack = ack_edge_ioapic,
1132- .end = end_edge_ioapic,
1133-#ifdef CONFIG_SMP
1134- .set_affinity = set_ioapic_affinity,
1135-#endif
1136- .retrigger = ioapic_retrigger,
1137-};
1138-
1139-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
1140- .typename = "IO-APIC-level",
1141- .startup = startup_level_ioapic,
1142- .shutdown = shutdown_level_ioapic,
1143- .enable = enable_level_ioapic,
1144- .disable = disable_level_ioapic,
1145- .ack = mask_and_ack_level_ioapic,
1146- .end = end_level_ioapic,
1147+static struct irq_chip ioapic_chip __read_mostly = {
1148+ .name = "IO-APIC",
1149+ .startup = startup_ioapic_irq,
1150+ .mask = mask_IO_APIC_irq,
1151+ .unmask = unmask_IO_APIC_irq,
1152+ .ack = ack_ioapic_irq,
1153+ .eoi = ack_ioapic_quirk_irq,
1154 #ifdef CONFIG_SMP
1155- .set_affinity = set_ioapic_affinity,
1156+ .set_affinity = set_ioapic_affinity_irq,
1157 #endif
1158- .retrigger = ioapic_retrigger,
1159+ .retrigger = ioapic_retrigger_irq,
1160 };
1161 #endif /* !CONFIG_XEN */
1162
1163@@ -2189,12 +2144,7 @@ static inline void init_IO_APIC_traps(vo
1164 */
1165 for (irq = 0; irq < NR_IRQS ; irq++) {
1166 int tmp = irq;
1167- if (use_pci_vector()) {
1168- if (!platform_legacy_irq(tmp))
1169- if ((tmp = vector_to_irq(tmp)) == -1)
1170- continue;
1171- }
1172- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
1173+ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1174 /*
1175 * Hmm.. We don't have an entry for this,
1176 * so default to an old-fashioned 8259
1177@@ -2205,22 +2155,23 @@ static inline void init_IO_APIC_traps(vo
1178 #ifndef CONFIG_XEN
1179 else
1180 /* Strange. Oh, well.. */
1181- irq_desc[irq].chip = &no_irq_type;
1182+ irq_desc[irq].chip = &no_irq_chip;
1183 #endif
1184 }
1185 }
1186 }
1187
1188 #ifndef CONFIG_XEN
1189-static void enable_lapic_irq (unsigned int irq)
1190-{
1191- unsigned long v;
1192+/*
1193+ * The local APIC irq-chip implementation:
1194+ */
1195
1196- v = apic_read(APIC_LVT0);
1197- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1198+static void ack_apic(unsigned int irq)
1199+{
1200+ ack_APIC_irq();
1201 }
1202
1203-static void disable_lapic_irq (unsigned int irq)
1204+static void mask_lapic_irq (unsigned int irq)
1205 {
1206 unsigned long v;
1207
1208@@ -2228,21 +2179,19 @@ static void disable_lapic_irq (unsigned
1209 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
1210 }
1211
1212-static void ack_lapic_irq (unsigned int irq)
1213+static void unmask_lapic_irq (unsigned int irq)
1214 {
1215- ack_APIC_irq();
1216-}
1217+ unsigned long v;
1218
1219-static void end_lapic_irq (unsigned int i) { /* nothing */ }
1220+ v = apic_read(APIC_LVT0);
1221+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1222+}
1223
1224-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
1225- .typename = "local-APIC-edge",
1226- .startup = NULL, /* startup_irq() not used for IRQ0 */
1227- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1228- .enable = enable_lapic_irq,
1229- .disable = disable_lapic_irq,
1230- .ack = ack_lapic_irq,
1231- .end = end_lapic_irq
1232+static struct irq_chip lapic_chip __read_mostly = {
1233+ .name = "local-APIC-edge",
1234+ .mask = mask_lapic_irq,
1235+ .unmask = unmask_lapic_irq,
1236+ .eoi = ack_apic,
1237 };
1238
1239 static void setup_nmi (void)
1240@@ -2275,17 +2224,13 @@ static inline void unlock_ExtINT_logic(v
1241 int apic, pin, i;
1242 struct IO_APIC_route_entry entry0, entry1;
1243 unsigned char save_control, save_freq_select;
1244- unsigned long flags;
1245
1246 pin = find_isa_irq_pin(8, mp_INT);
1247 apic = find_isa_irq_apic(8, mp_INT);
1248 if (pin == -1)
1249 return;
1250
1251- spin_lock_irqsave(&ioapic_lock, flags);
1252- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1253- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1254- spin_unlock_irqrestore(&ioapic_lock, flags);
1255+ entry0 = ioapic_read_entry(apic, pin);
1256 clear_IO_APIC_pin(apic, pin);
1257
1258 memset(&entry1, 0, sizeof(entry1));
1259@@ -2298,10 +2243,7 @@ static inline void unlock_ExtINT_logic(v
1260 entry1.trigger = 0;
1261 entry1.vector = 0;
1262
1263- spin_lock_irqsave(&ioapic_lock, flags);
1264- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
1265- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
1266- spin_unlock_irqrestore(&ioapic_lock, flags);
1267+ ioapic_write_entry(apic, pin, entry1);
1268
1269 save_control = CMOS_READ(RTC_CONTROL);
1270 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
1271@@ -2320,10 +2262,7 @@ static inline void unlock_ExtINT_logic(v
1272 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
1273 clear_IO_APIC_pin(apic, pin);
1274
1275- spin_lock_irqsave(&ioapic_lock, flags);
1276- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
1277- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
1278- spin_unlock_irqrestore(&ioapic_lock, flags);
1279+ ioapic_write_entry(apic, pin, entry0);
1280 }
1281
1282 int timer_uses_ioapic_pin_0;
1283@@ -2423,7 +2362,8 @@ static inline void check_timer(void)
1284 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1285
1286 disable_8259A_irq(0);
1287- irq_desc[0].chip = &lapic_irq_type;
1288+ set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
1289+ "fasteio");
1290 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1291 enable_8259A_irq(0);
1292
1293@@ -2537,17 +2477,12 @@ static int ioapic_suspend(struct sys_dev
1294 {
1295 struct IO_APIC_route_entry *entry;
1296 struct sysfs_ioapic_data *data;
1297- unsigned long flags;
1298 int i;
1299
1300 data = container_of(dev, struct sysfs_ioapic_data, dev);
1301 entry = data->entry;
1302- spin_lock_irqsave(&ioapic_lock, flags);
1303- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1304- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
1305- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
1306- }
1307- spin_unlock_irqrestore(&ioapic_lock, flags);
1308+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1309+ entry[i] = ioapic_read_entry(dev->id, i);
1310
1311 return 0;
1312 }
1313@@ -2569,11 +2504,9 @@ static int ioapic_resume(struct sys_devi
1314 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
1315 io_apic_write(dev->id, 0, reg_00.raw);
1316 }
1317- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1318- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
1319- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
1320- }
1321 spin_unlock_irqrestore(&ioapic_lock, flags);
1322+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1323+ ioapic_write_entry(dev->id, i, entry[i]);
1324
1325 return 0;
1326 }
1327@@ -2619,8 +2552,240 @@ static int __init ioapic_init_sysfs(void
1328
1329 device_initcall(ioapic_init_sysfs);
1330
1331+/*
1332+ * Dynamic irq allocate and deallocation
1333+ */
1334+int create_irq(void)
1335+{
1336+ /* Allocate an unused irq */
1337+ int irq, new, vector;
1338+ unsigned long flags;
1339+
1340+ irq = -ENOSPC;
1341+ spin_lock_irqsave(&vector_lock, flags);
1342+ for (new = (NR_IRQS - 1); new >= 0; new--) {
1343+ if (platform_legacy_irq(new))
1344+ continue;
1345+ if (irq_vector[new] != 0)
1346+ continue;
1347+ vector = __assign_irq_vector(new);
1348+ if (likely(vector > 0))
1349+ irq = new;
1350+ break;
1351+ }
1352+ spin_unlock_irqrestore(&vector_lock, flags);
1353+
1354+ if (irq >= 0) {
1355+ set_intr_gate(vector, interrupt[irq]);
1356+ dynamic_irq_init(irq);
1357+ }
1358+ return irq;
1359+}
1360+
1361+void destroy_irq(unsigned int irq)
1362+{
1363+ unsigned long flags;
1364+
1365+ dynamic_irq_cleanup(irq);
1366+
1367+ spin_lock_irqsave(&vector_lock, flags);
1368+ irq_vector[irq] = 0;
1369+ spin_unlock_irqrestore(&vector_lock, flags);
1370+}
1371+
1372 #endif /* CONFIG_XEN */
1373
1374+/*
1375+ * MSI mesage composition
1376+ */
1377+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
1378+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1379+{
1380+ int vector;
1381+ unsigned dest;
1382+
1383+ vector = assign_irq_vector(irq);
1384+ if (vector >= 0) {
1385+ dest = cpu_mask_to_apicid(TARGET_CPUS);
1386+
1387+ msg->address_hi = MSI_ADDR_BASE_HI;
1388+ msg->address_lo =
1389+ MSI_ADDR_BASE_LO |
1390+ ((INT_DEST_MODE == 0) ?
1391+ MSI_ADDR_DEST_MODE_PHYSICAL:
1392+ MSI_ADDR_DEST_MODE_LOGICAL) |
1393+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1394+ MSI_ADDR_REDIRECTION_CPU:
1395+ MSI_ADDR_REDIRECTION_LOWPRI) |
1396+ MSI_ADDR_DEST_ID(dest);
1397+
1398+ msg->data =
1399+ MSI_DATA_TRIGGER_EDGE |
1400+ MSI_DATA_LEVEL_ASSERT |
1401+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1402+ MSI_DATA_DELIVERY_FIXED:
1403+ MSI_DATA_DELIVERY_LOWPRI) |
1404+ MSI_DATA_VECTOR(vector);
1405+ }
1406+ return vector;
1407+}
1408+
1409+#ifdef CONFIG_SMP
1410+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1411+{
1412+ struct msi_msg msg;
1413+ unsigned int dest;
1414+ cpumask_t tmp;
1415+ int vector;
1416+
1417+ cpus_and(tmp, mask, cpu_online_map);
1418+ if (cpus_empty(tmp))
1419+ tmp = TARGET_CPUS;
1420+
1421+ vector = assign_irq_vector(irq);
1422+ if (vector < 0)
1423+ return;
1424+
1425+ dest = cpu_mask_to_apicid(mask);
1426+
1427+ read_msi_msg(irq, &msg);
1428+
1429+ msg.data &= ~MSI_DATA_VECTOR_MASK;
1430+ msg.data |= MSI_DATA_VECTOR(vector);
1431+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1432+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1433+
1434+ write_msi_msg(irq, &msg);
1435+ set_native_irq_info(irq, mask);
1436+}
1437+#endif /* CONFIG_SMP */
1438+
1439+/*
1440+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
1441+ * which implement the MSI or MSI-X Capability Structure.
1442+ */
1443+static struct irq_chip msi_chip = {
1444+ .name = "PCI-MSI",
1445+ .unmask = unmask_msi_irq,
1446+ .mask = mask_msi_irq,
1447+ .ack = ack_ioapic_irq,
1448+#ifdef CONFIG_SMP
1449+ .set_affinity = set_msi_irq_affinity,
1450+#endif
1451+ .retrigger = ioapic_retrigger_irq,
1452+};
1453+
1454+int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1455+{
1456+ struct msi_msg msg;
1457+ int ret;
1458+ ret = msi_compose_msg(dev, irq, &msg);
1459+ if (ret < 0)
1460+ return ret;
1461+
1462+ write_msi_msg(irq, &msg);
1463+
1464+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
1465+ "edge");
1466+
1467+ return 0;
1468+}
1469+
1470+void arch_teardown_msi_irq(unsigned int irq)
1471+{
1472+ return;
1473+}
1474+
1475+#endif /* CONFIG_PCI_MSI */
1476+
1477+/*
1478+ * Hypertransport interrupt support
1479+ */
1480+#ifdef CONFIG_HT_IRQ
1481+
1482+#ifdef CONFIG_SMP
1483+
1484+static void target_ht_irq(unsigned int irq, unsigned int dest)
1485+{
1486+ struct ht_irq_msg msg;
1487+ fetch_ht_irq_msg(irq, &msg);
1488+
1489+ msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
1490+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
1491+
1492+ msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
1493+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
1494+
1495+ write_ht_irq_msg(irq, &msg);
1496+}
1497+
1498+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1499+{
1500+ unsigned int dest;
1501+ cpumask_t tmp;
1502+
1503+ cpus_and(tmp, mask, cpu_online_map);
1504+ if (cpus_empty(tmp))
1505+ tmp = TARGET_CPUS;
1506+
1507+ cpus_and(mask, tmp, CPU_MASK_ALL);
1508+
1509+ dest = cpu_mask_to_apicid(mask);
1510+
1511+ target_ht_irq(irq, dest);
1512+ set_native_irq_info(irq, mask);
1513+}
1514+#endif
1515+
1516+static struct irq_chip ht_irq_chip = {
1517+ .name = "PCI-HT",
1518+ .mask = mask_ht_irq,
1519+ .unmask = unmask_ht_irq,
1520+ .ack = ack_ioapic_irq,
1521+#ifdef CONFIG_SMP
1522+ .set_affinity = set_ht_irq_affinity,
1523+#endif
1524+ .retrigger = ioapic_retrigger_irq,
1525+};
1526+
1527+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1528+{
1529+ int vector;
1530+
1531+ vector = assign_irq_vector(irq);
1532+ if (vector >= 0) {
1533+ struct ht_irq_msg msg;
1534+ unsigned dest;
1535+ cpumask_t tmp;
1536+
1537+ cpus_clear(tmp);
1538+ cpu_set(vector >> 8, tmp);
1539+ dest = cpu_mask_to_apicid(tmp);
1540+
1541+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1542+
1543+ msg.address_lo =
1544+ HT_IRQ_LOW_BASE |
1545+ HT_IRQ_LOW_DEST_ID(dest) |
1546+ HT_IRQ_LOW_VECTOR(vector) |
1547+ ((INT_DEST_MODE == 0) ?
1548+ HT_IRQ_LOW_DM_PHYSICAL :
1549+ HT_IRQ_LOW_DM_LOGICAL) |
1550+ HT_IRQ_LOW_RQEOI_EDGE |
1551+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1552+ HT_IRQ_LOW_MT_FIXED :
1553+ HT_IRQ_LOW_MT_ARBITRATED) |
1554+ HT_IRQ_LOW_IRQ_MASKED;
1555+
1556+ write_ht_irq_msg(irq, &msg);
1557+
1558+ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1559+ handle_edge_irq, "edge");
1560+ }
1561+ return vector;
1562+}
1563+#endif /* CONFIG_HT_IRQ */
1564+
1565 /* --------------------------------------------------------------------------
1566 ACPI-based IOAPIC Configuration
1567 -------------------------------------------------------------------------- */
1568@@ -2774,13 +2939,34 @@ int io_apic_set_pci_routing (int ioapic,
1569 if (!ioapic && (irq < 16))
1570 disable_8259A_irq(irq);
1571
1572+ ioapic_write_entry(ioapic, pin, entry);
1573 spin_lock_irqsave(&ioapic_lock, flags);
1574- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
1575- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
1576- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
1577+ set_native_irq_info(irq, TARGET_CPUS);
1578 spin_unlock_irqrestore(&ioapic_lock, flags);
1579
1580 return 0;
1581 }
1582
1583 #endif /* CONFIG_ACPI */
1584+
1585+static int __init parse_disable_timer_pin_1(char *arg)
1586+{
1587+ disable_timer_pin_1 = 1;
1588+ return 0;
1589+}
1590+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
1591+
1592+static int __init parse_enable_timer_pin_1(char *arg)
1593+{
1594+ disable_timer_pin_1 = -1;
1595+ return 0;
1596+}
1597+early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
1598+
1599+static int __init parse_noapic(char *arg)
1600+{
1601+ /* disable IO-APIC */
1602+ disable_ioapic_setup();
1603+ return 0;
1604+}
1605+early_param("noapic", parse_noapic);
1606--- sle11-2009-05-14.orig/arch/x86/kernel/irq_32-xen.c 2009-05-14 11:02:43.000000000 +0200
1607+++ sle11-2009-05-14/arch/x86/kernel/irq_32-xen.c 2009-03-04 11:28:34.000000000 +0100
1608@@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU
1609 */
1610 fastcall unsigned int do_IRQ(struct pt_regs *regs)
1611 {
1612+ struct pt_regs *old_regs;
1613 /* high bit used in ret_from_ code */
1614 int irq = ~regs->orig_eax;
1615+ struct irq_desc *desc = irq_desc + irq;
1616 #ifdef CONFIG_4KSTACKS
1617 union irq_ctx *curctx, *irqctx;
1618 u32 *isp;
1619@@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r
1620 BUG();
1621 }
1622
1623+ old_regs = set_irq_regs(regs);
1624 /*irq_enter();*/
1625 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1626 /* Debugging check for stack overflow: is there less than 1KB free? */
1627@@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r
1628 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
1629
1630 asm volatile(
1631- " xchgl %%ebx,%%esp \n"
1632- " call __do_IRQ \n"
1633+ " xchgl %%ebx,%%esp \n"
1634+ " call *%%edi \n"
1635 " movl %%ebx,%%esp \n"
1636 : "=a" (arg1), "=d" (arg2), "=b" (ebx)
1637- : "0" (irq), "1" (regs), "2" (isp)
1638- : "memory", "cc", "ecx"
1639+ : "0" (irq), "1" (desc), "2" (isp),
1640+ "D" (desc->handle_irq)
1641+ : "memory", "cc"
1642 );
1643 } else
1644 #endif
1645- __do_IRQ(irq, regs);
1646+ desc->handle_irq(irq, desc);
1647
1648 /*irq_exit();*/
1649-
1650+ set_irq_regs(old_regs);
1651 return 1;
1652 }
1653
1654@@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p,
1655 for_each_online_cpu(j)
1656 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
1657 #endif
1658- seq_printf(p, " %14s", irq_desc[i].chip->typename);
1659+ seq_printf(p, " %8s", irq_desc[i].chip->name);
1660+ seq_printf(p, "-%-8s", irq_desc[i].name);
1661 seq_printf(p, " %s", action->name);
1662
1663 for (action=action->next; action; action = action->next)
1664--- sle11-2009-05-14.orig/arch/x86/kernel/ldt_32-xen.c 2009-05-14 11:02:43.000000000 +0200
1665+++ sle11-2009-05-14/arch/x86/kernel/ldt_32-xen.c 2009-03-04 11:28:34.000000000 +0100
1666@@ -1,5 +1,5 @@
1667 /*
1668- * linux/kernel/ldt.c
1669+ * linux/arch/i386/kernel/ldt.c
1670 *
1671 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
1672 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1673--- sle11-2009-05-14.orig/arch/x86/kernel/microcode-xen.c 2009-05-14 11:02:43.000000000 +0200
1674+++ sle11-2009-05-14/arch/x86/kernel/microcode-xen.c 2009-03-04 11:28:34.000000000 +0100
1675@@ -2,6 +2,7 @@
1676 * Intel CPU Microcode Update Driver for Linux
1677 *
1678 * Copyright (C) 2000-2004 Tigran Aivazian
1679+ * 2006 Shaohua Li <shaohua.li@intel.com>
1680 *
1681 * This driver allows to upgrade microcode on Intel processors
1682 * belonging to IA-32 family - PentiumPro, Pentium II,
1683@@ -33,7 +34,9 @@
1684 #include <linux/spinlock.h>
1685 #include <linux/mm.h>
1686 #include <linux/mutex.h>
1687-#include <linux/syscalls.h>
1688+#include <linux/cpu.h>
1689+#include <linux/firmware.h>
1690+#include <linux/platform_device.h>
1691
1692 #include <asm/msr.h>
1693 #include <asm/uaccess.h>
1694@@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
1695 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
1696 static DEFINE_MUTEX(microcode_mutex);
1697
1698-static int microcode_open (struct inode *unused1, struct file *unused2)
1699-{
1700- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1701-}
1702-
1703-
1704+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
1705 static int do_microcode_update (const void __user *ubuf, size_t len)
1706 {
1707 int err;
1708@@ -85,6 +83,11 @@ static int do_microcode_update (const vo
1709 return err;
1710 }
1711
1712+static int microcode_open (struct inode *unused1, struct file *unused2)
1713+{
1714+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1715+}
1716+
1717 static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
1718 {
1719 ssize_t ret;
1720@@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
1721 .fops = &microcode_fops,
1722 };
1723
1724-static int __init microcode_init (void)
1725+static int __init microcode_dev_init (void)
1726 {
1727 int error;
1728
1729@@ -129,6 +132,68 @@ static int __init microcode_init (void)
1730 return error;
1731 }
1732
1733+ return 0;
1734+}
1735+
1736+static void __exit microcode_dev_exit (void)
1737+{
1738+ misc_deregister(&microcode_dev);
1739+}
1740+
1741+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1742+#else
1743+#define microcode_dev_init() 0
1744+#define microcode_dev_exit() do { } while(0)
1745+#endif
1746+
1747+/* fake device for request_firmware */
1748+static struct platform_device *microcode_pdev;
1749+
1750+static int request_microcode(void)
1751+{
1752+ char name[30];
1753+ const struct cpuinfo_x86 *c = &boot_cpu_data;
1754+ const struct firmware *firmware;
1755+ int error;
1756+ struct xen_platform_op op;
1757+
1758+ sprintf(name,"intel-ucode/%02x-%02x-%02x",
1759+ c->x86, c->x86_model, c->x86_mask);
1760+ error = request_firmware(&firmware, name, &microcode_pdev->dev);
1761+ if (error) {
1762+ pr_debug("ucode data file %s load failed\n", name);
1763+ return error;
1764+ }
1765+
1766+ op.cmd = XENPF_microcode_update;
1767+ set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
1768+ op.u.microcode.length = firmware->size;
1769+ error = HYPERVISOR_platform_op(&op);
1770+
1771+ release_firmware(firmware);
1772+
1773+ if (error)
1774+ pr_debug("ucode load failed\n");
1775+
1776+ return error;
1777+}
1778+
1779+static int __init microcode_init (void)
1780+{
1781+ int error;
1782+
1783+ error = microcode_dev_init();
1784+ if (error)
1785+ return error;
1786+ microcode_pdev = platform_device_register_simple("microcode", -1,
1787+ NULL, 0);
1788+ if (IS_ERR(microcode_pdev)) {
1789+ microcode_dev_exit();
1790+ return PTR_ERR(microcode_pdev);
1791+ }
1792+
1793+ request_microcode();
1794+
1795 printk(KERN_INFO
1796 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
1797 return 0;
1798@@ -136,9 +201,9 @@ static int __init microcode_init (void)
1799
1800 static void __exit microcode_exit (void)
1801 {
1802- misc_deregister(&microcode_dev);
1803+ microcode_dev_exit();
1804+ platform_device_unregister(microcode_pdev);
1805 }
1806
1807 module_init(microcode_init)
1808 module_exit(microcode_exit)
1809-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1810--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_32-xen.c 2009-05-14 11:02:43.000000000 +0200
1811+++ sle11-2009-05-14/arch/x86/kernel/mpparse_32-xen.c 2009-03-04 11:28:34.000000000 +0100
1812@@ -30,6 +30,7 @@
1813 #include <asm/io_apic.h>
1814
1815 #include <mach_apic.h>
1816+#include <mach_apicdef.h>
1817 #include <mach_mpparse.h>
1818 #include <bios_ebda.h>
1819
1820@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
1821 /* Processor that is doing the boot up */
1822 unsigned int boot_cpu_physical_apicid = -1U;
1823 /* Internal processor count */
1824-static unsigned int __devinitdata num_processors;
1825+unsigned int __cpuinitdata num_processors;
1826
1827 /* Bitmask of physically existing CPUs */
1828 physid_mask_t phys_cpu_present_map;
1829@@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
1830
1831 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
1832
1833+#if MAX_MP_BUSSES < 256
1834 if (m->mpc_busid >= MAX_MP_BUSSES) {
1835 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
1836 " is too large, max. supported is %d\n",
1837 m->mpc_busid, str, MAX_MP_BUSSES - 1);
1838 return;
1839 }
1840+#endif
1841
1842 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
1843 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
1844@@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
1845 m->mpc_irqtype, m->mpc_irqflag & 3,
1846 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
1847 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
1848- /*
1849- * Well it seems all SMP boards in existence
1850- * use ExtINT/LVT1 == LINT0 and
1851- * NMI/LVT2 == LINT1 - the following check
1852- * will show us if this assumptions is false.
1853- * Until then we do not have to add baggage.
1854- */
1855- if ((m->mpc_irqtype == mp_ExtINT) &&
1856- (m->mpc_destapiclint != 0))
1857- BUG();
1858- if ((m->mpc_irqtype == mp_NMI) &&
1859- (m->mpc_destapiclint != 1))
1860- BUG();
1861 }
1862
1863 #ifdef CONFIG_X86_NUMAQ
1864@@ -838,8 +828,7 @@ int es7000_plat;
1865
1866 #ifdef CONFIG_ACPI
1867
1868-void __init mp_register_lapic_address (
1869- u64 address)
1870+void __init mp_register_lapic_address(u64 address)
1871 {
1872 #ifndef CONFIG_XEN
1873 mp_lapic_addr = (unsigned long) address;
1874@@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
1875 #endif
1876 }
1877
1878-
1879-void __devinit mp_register_lapic (
1880- u8 id,
1881- u8 enabled)
1882+void __devinit mp_register_lapic (u8 id, u8 enabled)
1883 {
1884 struct mpc_config_processor processor;
1885- int boot_cpu = 0;
1886+ int boot_cpu = 0;
1887
1888 if (MAX_APICS - id <= 0) {
1889 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
1890@@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
1891 u32 pin_programmed[4];
1892 } mp_ioapic_routing[MAX_IO_APICS];
1893
1894-
1895-static int mp_find_ioapic (
1896- int gsi)
1897+static int mp_find_ioapic (int gsi)
1898 {
1899- int i = 0;
1900+ int i = 0;
1901
1902 /* Find the IOAPIC that manages this GSI. */
1903 for (i = 0; i < nr_ioapics; i++) {
1904@@ -915,15 +899,11 @@ static int mp_find_ioapic (
1905
1906 return -1;
1907 }
1908-
1909
1910-void __init mp_register_ioapic (
1911- u8 id,
1912- u32 address,
1913- u32 gsi_base)
1914+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
1915 {
1916- int idx = 0;
1917- int tmpid;
1918+ int idx = 0;
1919+ int tmpid;
1920
1921 if (nr_ioapics >= MAX_IO_APICS) {
1922 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
1923@@ -971,16 +951,10 @@ void __init mp_register_ioapic (
1924 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
1925 mp_ioapic_routing[idx].gsi_base,
1926 mp_ioapic_routing[idx].gsi_end);
1927-
1928- return;
1929 }
1930
1931-
1932-void __init mp_override_legacy_irq (
1933- u8 bus_irq,
1934- u8 polarity,
1935- u8 trigger,
1936- u32 gsi)
1937+void __init
1938+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1939 {
1940 struct mpc_config_intsrc intsrc;
1941 int ioapic = -1;
1942@@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
1943 mp_irqs[mp_irq_entries] = intsrc;
1944 if (++mp_irq_entries == MAX_IRQ_SOURCES)
1945 panic("Max # of irq sources exceeded!\n");
1946-
1947- return;
1948 }
1949
1950 void __init mp_config_acpi_legacy_irqs (void)
1951 {
1952 struct mpc_config_intsrc intsrc;
1953- int i = 0;
1954- int ioapic = -1;
1955+ int i = 0;
1956+ int ioapic = -1;
1957
1958 /*
1959 * Fabricate the legacy ISA bus (bus #31).
1960@@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
1961
1962 #define MAX_GSI_NUM 4096
1963
1964-int mp_register_gsi (u32 gsi, int triggering, int polarity)
1965+int mp_register_gsi(u32 gsi, int triggering, int polarity)
1966 {
1967- int ioapic = -1;
1968- int ioapic_pin = 0;
1969- int idx, bit = 0;
1970- static int pci_irq = 16;
1971+ int ioapic = -1;
1972+ int ioapic_pin = 0;
1973+ int idx, bit = 0;
1974+ static int pci_irq = 16;
1975 /*
1976 * Mapping between Global System Interrups, which
1977 * represent all possible interrupts, and IRQs
1978--- sle11-2009-05-14.orig/arch/x86/kernel/pci-dma-xen.c 2009-05-14 11:02:43.000000000 +0200
1979+++ sle11-2009-05-14/arch/x86/kernel/pci-dma-xen.c 2009-03-04 11:28:34.000000000 +0100
1980@@ -110,8 +110,7 @@ dma_map_sg(struct device *hwdev, struct
1981 {
1982 int i, rc;
1983
1984- if (direction == DMA_NONE)
1985- BUG();
1986+ BUG_ON(!valid_dma_direction(direction));
1987 WARN_ON(nents == 0 || sg[0].length == 0);
1988
1989 if (swiotlb) {
1990@@ -142,7 +141,7 @@ dma_unmap_sg(struct device *hwdev, struc
1991 {
1992 int i;
1993
1994- BUG_ON(direction == DMA_NONE);
1995+ BUG_ON(!valid_dma_direction(direction));
1996 if (swiotlb)
1997 swiotlb_unmap_sg(hwdev, sg, nents, direction);
1998 else {
1999@@ -159,8 +158,7 @@ dma_map_page(struct device *dev, struct
2000 {
2001 dma_addr_t dma_addr;
2002
2003- BUG_ON(direction == DMA_NONE);
2004-
2005+ BUG_ON(!valid_dma_direction(direction));
2006 if (swiotlb) {
2007 dma_addr = swiotlb_map_page(
2008 dev, page, offset, size, direction);
2009@@ -177,7 +175,7 @@ void
2010 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
2011 enum dma_data_direction direction)
2012 {
2013- BUG_ON(direction == DMA_NONE);
2014+ BUG_ON(!valid_dma_direction(direction));
2015 if (swiotlb)
2016 swiotlb_unmap_page(dev, dma_address, size, direction);
2017 else
2018@@ -359,8 +357,7 @@ dma_map_single(struct device *dev, void
2019 {
2020 dma_addr_t dma;
2021
2022- if (direction == DMA_NONE)
2023- BUG();
2024+ BUG_ON(!valid_dma_direction(direction));
2025 WARN_ON(size == 0);
2026
2027 if (swiotlb) {
2028@@ -381,8 +378,7 @@ void
2029 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
2030 enum dma_data_direction direction)
2031 {
2032- if (direction == DMA_NONE)
2033- BUG();
2034+ BUG_ON(!valid_dma_direction(direction));
2035 if (swiotlb)
2036 swiotlb_unmap_single(dev, dma_addr, size, direction);
2037 else
2038--- sle11-2009-05-14.orig/arch/x86/kernel/process_32-xen.c 2009-05-14 11:02:43.000000000 +0200
2039+++ sle11-2009-05-14/arch/x86/kernel/process_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2040@@ -37,6 +37,7 @@
2041 #include <linux/kallsyms.h>
2042 #include <linux/ptrace.h>
2043 #include <linux/random.h>
2044+#include <linux/personality.h>
2045
2046 #include <asm/uaccess.h>
2047 #include <asm/pgtable.h>
2048@@ -186,7 +187,7 @@ void cpu_idle(void)
2049 void cpu_idle_wait(void)
2050 {
2051 unsigned int cpu, this_cpu = get_cpu();
2052- cpumask_t map;
2053+ cpumask_t map, tmp = current->cpus_allowed;
2054
2055 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
2056 put_cpu();
2057@@ -208,6 +209,8 @@ void cpu_idle_wait(void)
2058 }
2059 cpus_and(map, map, cpu_online_map);
2060 } while (!cpus_empty(map));
2061+
2062+ set_cpus_allowed(current, tmp);
2063 }
2064 EXPORT_SYMBOL_GPL(cpu_idle_wait);
2065
2066@@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
2067 if (user_mode_vm(regs))
2068 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
2069 printk(" EFLAGS: %08lx %s (%s %.*s)\n",
2070- regs->eflags, print_tainted(), system_utsname.release,
2071- (int)strcspn(system_utsname.version, " "),
2072- system_utsname.version);
2073+ regs->eflags, print_tainted(), init_utsname()->release,
2074+ (int)strcspn(init_utsname()->version, " "),
2075+ init_utsname()->version);
2076 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
2077 regs->eax,regs->ebx,regs->ecx,regs->edx);
2078 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2079@@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
2080 * the "args".
2081 */
2082 extern void kernel_thread_helper(void);
2083-__asm__(".section .text\n"
2084- ".align 4\n"
2085- "kernel_thread_helper:\n\t"
2086- "movl %edx,%eax\n\t"
2087- "pushl %edx\n\t"
2088- "call *%ebx\n\t"
2089- "pushl %eax\n\t"
2090- "call do_exit\n"
2091- ".previous");
2092
2093 /*
2094 * Create a kernel thread
2095@@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
2096 regs.xes = __USER_DS;
2097 regs.orig_eax = -1;
2098 regs.eip = (unsigned long) kernel_thread_helper;
2099- regs.xcs = GET_KERNEL_CS();
2100+ regs.xcs = __KERNEL_CS | get_kernel_rpl();
2101 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
2102
2103 /* Ok, create the new process.. */
2104@@ -369,13 +363,12 @@ int copy_thread(int nr, unsigned long cl
2105
2106 tsk = current;
2107 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2108- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
2109+ p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
2110+ IO_BITMAP_BYTES, GFP_KERNEL);
2111 if (!p->thread.io_bitmap_ptr) {
2112 p->thread.io_bitmap_max = 0;
2113 return -ENOMEM;
2114 }
2115- memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
2116- IO_BITMAP_BYTES);
2117 set_tsk_thread_flag(p, TIF_IO_BITMAP);
2118 }
2119
2120@@ -871,7 +864,7 @@ asmlinkage int sys_get_thread_area(struc
2121
2122 unsigned long arch_align_stack(unsigned long sp)
2123 {
2124- if (randomize_va_space)
2125+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
2126 sp -= get_random_int() % 8192;
2127 return sp & ~0xf;
2128 }
2129--- sle11-2009-05-14.orig/arch/x86/kernel/setup_32-xen.c 2009-05-14 11:02:43.000000000 +0200
2130+++ sle11-2009-05-14/arch/x86/kernel/setup_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2131@@ -56,6 +56,7 @@
2132 #include <asm/apic.h>
2133 #include <asm/e820.h>
2134 #include <asm/mpspec.h>
2135+#include <asm/mmzone.h>
2136 #include <asm/setup.h>
2137 #include <asm/arch_hooks.h>
2138 #include <asm/sections.h>
2139@@ -83,9 +84,6 @@ static struct notifier_block xen_panic_b
2140 xen_panic_event, NULL, 0 /* try to go last */
2141 };
2142
2143-extern char hypercall_page[PAGE_SIZE];
2144-EXPORT_SYMBOL(hypercall_page);
2145-
2146 int disable_pse __devinitdata = 0;
2147
2148 /*
2149@@ -105,18 +103,6 @@ EXPORT_SYMBOL(boot_cpu_data);
2150
2151 unsigned long mmu_cr4_features;
2152
2153-#ifdef CONFIG_ACPI
2154- int acpi_disabled = 0;
2155-#else
2156- int acpi_disabled = 1;
2157-#endif
2158-EXPORT_SYMBOL(acpi_disabled);
2159-
2160-#ifdef CONFIG_ACPI
2161-int __initdata acpi_force = 0;
2162-extern acpi_interrupt_flags acpi_sci_flags;
2163-#endif
2164-
2165 /* for MCA, but anyone else can use it if they want */
2166 unsigned int machine_id;
2167 #ifdef CONFIG_MCA
2168@@ -170,7 +156,6 @@ struct e820map machine_e820;
2169 #endif
2170
2171 extern void early_cpu_init(void);
2172-extern void generic_apic_probe(char *);
2173 extern int root_mountflags;
2174
2175 unsigned long saved_videomode;
2176@@ -243,9 +228,6 @@ static struct resource adapter_rom_resou
2177 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2178 } };
2179
2180-#define ADAPTER_ROM_RESOURCES \
2181- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
2182-
2183 static struct resource video_rom_resource = {
2184 .name = "Video ROM",
2185 .start = 0xc0000,
2186@@ -307,9 +289,6 @@ static struct resource standard_io_resou
2187 .flags = IORESOURCE_BUSY | IORESOURCE_IO
2188 } };
2189
2190-#define STANDARD_IO_RESOURCES \
2191- (sizeof standard_io_resources / sizeof standard_io_resources[0])
2192-
2193 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2194
2195 static int __init romchecksum(unsigned char *rom, unsigned long length)
2196@@ -372,7 +351,7 @@ static void __init probe_roms(void)
2197 }
2198
2199 /* check for adapter roms on 2k boundaries */
2200- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
2201+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
2202 rom = isa_bus_to_virt(start);
2203 if (!romsignature(rom))
2204 continue;
2205@@ -779,246 +758,152 @@ static inline void copy_edd(void)
2206 }
2207 #endif
2208
2209-static void __init parse_cmdline_early (char ** cmdline_p)
2210+static int __initdata user_defined_memmap = 0;
2211+
2212+/*
2213+ * "mem=nopentium" disables the 4MB page tables.
2214+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2215+ * to <mem>, overriding the bios size.
2216+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2217+ * <start> to <start>+<mem>, overriding the bios size.
2218+ *
2219+ * HPA tells me bootloaders need to parse mem=, so no new
2220+ * option should be mem= [also see Documentation/i386/boot.txt]
2221+ */
2222+static int __init parse_mem(char *arg)
2223 {
2224- char c = ' ', *to = command_line, *from = saved_command_line;
2225- int len = 0, max_cmdline;
2226- int userdef = 0;
2227-
2228- if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2229- max_cmdline = COMMAND_LINE_SIZE;
2230- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
2231- /* Save unparsed command line copy for /proc/cmdline */
2232- saved_command_line[max_cmdline-1] = '\0';
2233-
2234- for (;;) {
2235- if (c != ' ')
2236- goto next_char;
2237- /*
2238- * "mem=nopentium" disables the 4MB page tables.
2239- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2240- * to <mem>, overriding the bios size.
2241- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2242- * <start> to <start>+<mem>, overriding the bios size.
2243- *
2244- * HPA tells me bootloaders need to parse mem=, so no new
2245- * option should be mem= [also see Documentation/i386/boot.txt]
2246- */
2247- if (!memcmp(from, "mem=", 4)) {
2248- if (to != command_line)
2249- to--;
2250- if (!memcmp(from+4, "nopentium", 9)) {
2251- from += 9+4;
2252- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2253- disable_pse = 1;
2254- } else {
2255- /* If the user specifies memory size, we
2256- * limit the BIOS-provided memory map to
2257- * that size. exactmap can be used to specify
2258- * the exact map. mem=number can be used to
2259- * trim the existing memory map.
2260- */
2261- unsigned long long mem_size;
2262-
2263- mem_size = memparse(from+4, &from);
2264- limit_regions(mem_size);
2265- userdef=1;
2266- }
2267- }
2268+ if (!arg)
2269+ return -EINVAL;
2270
2271- else if (!memcmp(from, "memmap=", 7)) {
2272- if (to != command_line)
2273- to--;
2274- if (!memcmp(from+7, "exactmap", 8)) {
2275-#ifdef CONFIG_CRASH_DUMP
2276- /* If we are doing a crash dump, we
2277- * still need to know the real mem
2278- * size before original memory map is
2279- * reset.
2280- */
2281- find_max_pfn();
2282- saved_max_pfn = max_pfn;
2283-#endif
2284- from += 8+7;
2285- e820.nr_map = 0;
2286- userdef = 1;
2287- } else {
2288- /* If the user specifies memory size, we
2289- * limit the BIOS-provided memory map to
2290- * that size. exactmap can be used to specify
2291- * the exact map. mem=number can be used to
2292- * trim the existing memory map.
2293- */
2294- unsigned long long start_at, mem_size;
2295+ if (strcmp(arg, "nopentium") == 0) {
2296+ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2297+ disable_pse = 1;
2298+ } else {
2299+ /* If the user specifies memory size, we
2300+ * limit the BIOS-provided memory map to
2301+ * that size. exactmap can be used to specify
2302+ * the exact map. mem=number can be used to
2303+ * trim the existing memory map.
2304+ */
2305+ unsigned long long mem_size;
2306
2307- mem_size = memparse(from+7, &from);
2308- if (*from == '@') {
2309- start_at = memparse(from+1, &from);
2310- add_memory_region(start_at, mem_size, E820_RAM);
2311- } else if (*from == '#') {
2312- start_at = memparse(from+1, &from);
2313- add_memory_region(start_at, mem_size, E820_ACPI);
2314- } else if (*from == '$') {
2315- start_at = memparse(from+1, &from);
2316- add_memory_region(start_at, mem_size, E820_RESERVED);
2317- } else {
2318- limit_regions(mem_size);
2319- userdef=1;
2320- }
2321- }
2322- }
2323-
2324- else if (!memcmp(from, "noexec=", 7))
2325- noexec_setup(from + 7);
2326+ mem_size = memparse(arg, &arg);
2327+ limit_regions(mem_size);
2328+ user_defined_memmap = 1;
2329+ }
2330+ return 0;
2331+}
2332+early_param("mem", parse_mem);
2333
2334+static int __init parse_memmap(char *arg)
2335+{
2336+ if (!arg)
2337+ return -EINVAL;
2338
2339-#ifdef CONFIG_X86_MPPARSE
2340- /*
2341- * If the BIOS enumerates physical processors before logical,
2342- * maxcpus=N at enumeration-time can be used to disable HT.
2343+ if (strcmp(arg, "exactmap") == 0) {
2344+#ifdef CONFIG_CRASH_DUMP
2345+ /* If we are doing a crash dump, we
2346+ * still need to know the real mem
2347+ * size before original memory map is
2348+ * reset.
2349 */
2350- else if (!memcmp(from, "maxcpus=", 8)) {
2351- extern unsigned int maxcpus;
2352-
2353- maxcpus = simple_strtoul(from + 8, NULL, 0);
2354- }
2355+ find_max_pfn();
2356+ saved_max_pfn = max_pfn;
2357 #endif
2358+ e820.nr_map = 0;
2359+ user_defined_memmap = 1;
2360+ } else {
2361+ /* If the user specifies memory size, we
2362+ * limit the BIOS-provided memory map to
2363+ * that size. exactmap can be used to specify
2364+ * the exact map. mem=number can be used to
2365+ * trim the existing memory map.
2366+ */
2367+ unsigned long long start_at, mem_size;
2368
2369-#ifdef CONFIG_ACPI
2370- /* "acpi=off" disables both ACPI table parsing and interpreter */
2371- else if (!memcmp(from, "acpi=off", 8)) {
2372- disable_acpi();
2373- }
2374-
2375- /* acpi=force to over-ride black-list */
2376- else if (!memcmp(from, "acpi=force", 10)) {
2377- acpi_force = 1;
2378- acpi_ht = 1;
2379- acpi_disabled = 0;
2380- }
2381-
2382- /* acpi=strict disables out-of-spec workarounds */
2383- else if (!memcmp(from, "acpi=strict", 11)) {
2384- acpi_strict = 1;
2385- }
2386-
2387- /* Limit ACPI just to boot-time to enable HT */
2388- else if (!memcmp(from, "acpi=ht", 7)) {
2389- if (!acpi_force)
2390- disable_acpi();
2391- acpi_ht = 1;
2392- }
2393-
2394- /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
2395- else if (!memcmp(from, "pci=noacpi", 10)) {
2396- acpi_disable_pci();
2397- }
2398- /* "acpi=noirq" disables ACPI interrupt routing */
2399- else if (!memcmp(from, "acpi=noirq", 10)) {
2400- acpi_noirq_set();
2401+ mem_size = memparse(arg, &arg);
2402+ if (*arg == '@') {
2403+ start_at = memparse(arg+1, &arg);
2404+ add_memory_region(start_at, mem_size, E820_RAM);
2405+ } else if (*arg == '#') {
2406+ start_at = memparse(arg+1, &arg);
2407+ add_memory_region(start_at, mem_size, E820_ACPI);
2408+ } else if (*arg == '$') {
2409+ start_at = memparse(arg+1, &arg);
2410+ add_memory_region(start_at, mem_size, E820_RESERVED);
2411+ } else {
2412+ limit_regions(mem_size);
2413+ user_defined_memmap = 1;
2414 }
2415+ }
2416+ return 0;
2417+}
2418+early_param("memmap", parse_memmap);
2419
2420- else if (!memcmp(from, "acpi_sci=edge", 13))
2421- acpi_sci_flags.trigger = 1;
2422-
2423- else if (!memcmp(from, "acpi_sci=level", 14))
2424- acpi_sci_flags.trigger = 3;
2425+#ifdef CONFIG_PROC_VMCORE
2426+/* elfcorehdr= specifies the location of elf core header
2427+ * stored by the crashed kernel.
2428+ */
2429+static int __init parse_elfcorehdr(char *arg)
2430+{
2431+ if (!arg)
2432+ return -EINVAL;
2433
2434- else if (!memcmp(from, "acpi_sci=high", 13))
2435- acpi_sci_flags.polarity = 1;
2436+ elfcorehdr_addr = memparse(arg, &arg);
2437+ return 0;
2438+}
2439+early_param("elfcorehdr", parse_elfcorehdr);
2440+#endif /* CONFIG_PROC_VMCORE */
2441
2442- else if (!memcmp(from, "acpi_sci=low", 12))
2443- acpi_sci_flags.polarity = 3;
2444+/*
2445+ * highmem=size forces highmem to be exactly 'size' bytes.
2446+ * This works even on boxes that have no highmem otherwise.
2447+ * This also works to reduce highmem size on bigger boxes.
2448+ */
2449+static int __init parse_highmem(char *arg)
2450+{
2451+ if (!arg)
2452+ return -EINVAL;
2453
2454-#ifdef CONFIG_X86_IO_APIC
2455- else if (!memcmp(from, "acpi_skip_timer_override", 24))
2456- acpi_skip_timer_override = 1;
2457+ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
2458+ return 0;
2459+}
2460+early_param("highmem", parse_highmem);
2461
2462- if (!memcmp(from, "disable_timer_pin_1", 19))
2463- disable_timer_pin_1 = 1;
2464- if (!memcmp(from, "enable_timer_pin_1", 18))
2465- disable_timer_pin_1 = -1;
2466-
2467- /* disable IO-APIC */
2468- else if (!memcmp(from, "noapic", 6))
2469- disable_ioapic_setup();
2470-#endif /* CONFIG_X86_IO_APIC */
2471-#endif /* CONFIG_ACPI */
2472+/*
2473+ * vmalloc=size forces the vmalloc area to be exactly 'size'
2474+ * bytes. This can be used to increase (or decrease) the
2475+ * vmalloc area - the default is 128m.
2476+ */
2477+static int __init parse_vmalloc(char *arg)
2478+{
2479+ if (!arg)
2480+ return -EINVAL;
2481
2482-#ifdef CONFIG_X86_LOCAL_APIC
2483- /* enable local APIC */
2484- else if (!memcmp(from, "lapic", 5))
2485- lapic_enable();
2486-
2487- /* disable local APIC */
2488- else if (!memcmp(from, "nolapic", 6))
2489- lapic_disable();
2490-#endif /* CONFIG_X86_LOCAL_APIC */
2491+ __VMALLOC_RESERVE = memparse(arg, &arg);
2492+ return 0;
2493+}
2494+early_param("vmalloc", parse_vmalloc);
2495
2496-#ifdef CONFIG_KEXEC
2497- /* crashkernel=size@addr specifies the location to reserve for
2498- * a crash kernel. By reserving this memory we guarantee
2499- * that linux never set's it up as a DMA target.
2500- * Useful for holding code to do something appropriate
2501- * after a kernel panic.
2502- */
2503- else if (!memcmp(from, "crashkernel=", 12)) {
2504 #ifndef CONFIG_XEN
2505- unsigned long size, base;
2506- size = memparse(from+12, &from);
2507- if (*from == '@') {
2508- base = memparse(from+1, &from);
2509- /* FIXME: Do I want a sanity check
2510- * to validate the memory range?
2511- */
2512- crashk_res.start = base;
2513- crashk_res.end = base + size - 1;
2514- }
2515-#else
2516- printk("Ignoring crashkernel command line, "
2517- "parameter will be supplied by xen\n");
2518-#endif
2519- }
2520-#endif
2521-#ifdef CONFIG_PROC_VMCORE
2522- /* elfcorehdr= specifies the location of elf core header
2523- * stored by the crashed kernel.
2524- */
2525- else if (!memcmp(from, "elfcorehdr=", 11))
2526- elfcorehdr_addr = memparse(from+11, &from);
2527-#endif
2528+/*
2529+ * reservetop=size reserves a hole at the top of the kernel address space which
2530+ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
2531+ * so relocating the fixmap can be done before paging initialization.
2532+ */
2533+static int __init parse_reservetop(char *arg)
2534+{
2535+ unsigned long address;
2536
2537- /*
2538- * highmem=size forces highmem to be exactly 'size' bytes.
2539- * This works even on boxes that have no highmem otherwise.
2540- * This also works to reduce highmem size on bigger boxes.
2541- */
2542- else if (!memcmp(from, "highmem=", 8))
2543- highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
2544-
2545- /*
2546- * vmalloc=size forces the vmalloc area to be exactly 'size'
2547- * bytes. This can be used to increase (or decrease) the
2548- * vmalloc area - the default is 128m.
2549- */
2550- else if (!memcmp(from, "vmalloc=", 8))
2551- __VMALLOC_RESERVE = memparse(from+8, &from);
2552+ if (!arg)
2553+ return -EINVAL;
2554
2555- next_char:
2556- c = *(from++);
2557- if (!c)
2558- break;
2559- if (COMMAND_LINE_SIZE <= ++len)
2560- break;
2561- *(to++) = c;
2562- }
2563- *to = '\0';
2564- *cmdline_p = command_line;
2565- if (userdef) {
2566- printk(KERN_INFO "user-defined physical RAM map:\n");
2567- print_memory_map("user");
2568- }
2569+ address = memparse(arg, &arg);
2570+ reserve_top_address(address);
2571+ return 0;
2572 }
2573+early_param("reservetop", parse_reservetop);
2574+#endif
2575
2576 /*
2577 * Callback for efi_memory_walk.
2578@@ -1039,7 +924,7 @@ efi_find_max_pfn(unsigned long start, un
2579 static int __init
2580 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
2581 {
2582- memory_present(0, start, end);
2583+ memory_present(0, PFN_UP(start), PFN_DOWN(end));
2584 return 0;
2585 }
2586
2587@@ -1306,6 +1191,14 @@ static unsigned long __init setup_memory
2588 }
2589 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
2590 pages_to_mb(highend_pfn - highstart_pfn));
2591+ num_physpages = highend_pfn;
2592+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
2593+#else
2594+ num_physpages = max_low_pfn;
2595+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
2596+#endif
2597+#ifdef CONFIG_FLATMEM
2598+ max_mapnr = num_physpages;
2599 #endif
2600 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
2601 pages_to_mb(max_low_pfn));
2602@@ -1317,22 +1210,19 @@ static unsigned long __init setup_memory
2603
2604 void __init zone_sizes_init(void)
2605 {
2606- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
2607- unsigned int max_dma, low;
2608-
2609- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2610- low = max_low_pfn;
2611-
2612- if (low < max_dma)
2613- zones_size[ZONE_DMA] = low;
2614- else {
2615- zones_size[ZONE_DMA] = max_dma;
2616- zones_size[ZONE_NORMAL] = low - max_dma;
2617+ unsigned long max_zone_pfns[MAX_NR_ZONES];
2618+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
2619+ max_zone_pfns[ZONE_DMA] =
2620+ virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2621+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
2622 #ifdef CONFIG_HIGHMEM
2623- zones_size[ZONE_HIGHMEM] = highend_pfn - low;
2624+ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
2625+ add_active_range(0, 0, highend_pfn);
2626+#else
2627+ add_active_range(0, 0, max_low_pfn);
2628 #endif
2629- }
2630- free_area_init(zones_size);
2631+
2632+ free_area_init_nodes(max_zone_pfns);
2633 }
2634 #else
2635 extern unsigned long __init setup_memory(void);
2636@@ -1389,6 +1279,7 @@ void __init setup_bootmem_allocator(void
2637 */
2638 acpi_reserve_bootmem();
2639 #endif
2640+ numa_kva_reserve();
2641 #endif /* !CONFIG_XEN */
2642
2643 #ifdef CONFIG_BLK_DEV_INITRD
2644@@ -1574,7 +1465,7 @@ static int __init request_standard_resou
2645 request_resource(&iomem_resource, &video_ram_resource);
2646
2647 /* request I/O space for devices used on all i[345]86 PCs */
2648- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
2649+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
2650 request_resource(&ioport_resource, &standard_io_resources[i]);
2651 return 0;
2652 }
2653@@ -1705,17 +1596,19 @@ void __init setup_arch(char **cmdline_p)
2654 data_resource.start = virt_to_phys(_etext);
2655 data_resource.end = virt_to_phys(_edata)-1;
2656
2657- parse_cmdline_early(cmdline_p);
2658+ if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2659+ i = COMMAND_LINE_SIZE;
2660+ memcpy(saved_command_line, xen_start_info->cmd_line, i);
2661+ saved_command_line[i - 1] = '\0';
2662+ parse_early_param();
2663
2664-#ifdef CONFIG_EARLY_PRINTK
2665- {
2666- char *s = strstr(*cmdline_p, "earlyprintk=");
2667- if (s) {
2668- setup_early_printk(strchr(s, '=') + 1);
2669- printk("early console enabled\n");
2670- }
2671+ if (user_defined_memmap) {
2672+ printk(KERN_INFO "user-defined physical RAM map:\n");
2673+ print_memory_map("user");
2674 }
2675-#endif
2676+
2677+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2678+ *cmdline_p = command_line;
2679
2680 max_low_pfn = setup_memory();
2681
2682@@ -1822,7 +1715,7 @@ void __init setup_arch(char **cmdline_p)
2683 dmi_scan_machine();
2684
2685 #ifdef CONFIG_X86_GENERICARCH
2686- generic_apic_probe(*cmdline_p);
2687+ generic_apic_probe();
2688 #endif
2689 if (efi_enabled)
2690 efi_map_memmap();
2691@@ -1843,9 +1736,11 @@ void __init setup_arch(char **cmdline_p)
2692 acpi_boot_table_init();
2693 #endif
2694
2695+#ifdef CONFIG_PCI
2696 #ifdef CONFIG_X86_IO_APIC
2697 check_acpi_pci(); /* Checks more than just ACPI actually */
2698 #endif
2699+#endif
2700
2701 #ifdef CONFIG_ACPI
2702 acpi_boot_init();
2703--- sle11-2009-05-14.orig/arch/x86/kernel/smp_32-xen.c 2009-05-14 11:02:43.000000000 +0200
2704+++ sle11-2009-05-14/arch/x86/kernel/smp_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2705@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
2706 * 2) Leave the mm if we are in the lazy tlb mode.
2707 */
2708
2709-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
2710- struct pt_regs *regs)
2711+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
2712 {
2713 unsigned long cpu;
2714
2715@@ -567,16 +566,14 @@ void smp_send_stop(void)
2716 * all the work is done automatically when
2717 * we return from the interrupt.
2718 */
2719-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
2720- struct pt_regs *regs)
2721+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
2722 {
2723
2724 return IRQ_HANDLED;
2725 }
2726
2727 #include <linux/kallsyms.h>
2728-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
2729- struct pt_regs *regs)
2730+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
2731 {
2732 void (*func) (void *info) = call_data->func;
2733 void *info = call_data->info;
2734@@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt(
2735 return IRQ_HANDLED;
2736 }
2737
2738+/*
2739+ * this function sends a 'generic call function' IPI to one other CPU
2740+ * in the system.
2741+ *
2742+ * cpu is a standard Linux logical CPU number.
2743+ */
2744+static void
2745+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
2746+ int nonatomic, int wait)
2747+{
2748+ struct call_data_struct data;
2749+ int cpus = 1;
2750+
2751+ data.func = func;
2752+ data.info = info;
2753+ atomic_set(&data.started, 0);
2754+ data.wait = wait;
2755+ if (wait)
2756+ atomic_set(&data.finished, 0);
2757+
2758+ call_data = &data;
2759+ wmb();
2760+ /* Send a message to all other CPUs and wait for them to respond */
2761+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
2762+
2763+ /* Wait for response */
2764+ while (atomic_read(&data.started) != cpus)
2765+ cpu_relax();
2766+
2767+ if (!wait)
2768+ return;
2769+
2770+ while (atomic_read(&data.finished) != cpus)
2771+ cpu_relax();
2772+}
2773+
2774+/*
2775+ * smp_call_function_single - Run a function on another CPU
2776+ * @func: The function to run. This must be fast and non-blocking.
2777+ * @info: An arbitrary pointer to pass to the function.
2778+ * @nonatomic: Currently unused.
2779+ * @wait: If true, wait until function has completed on other CPUs.
2780+ *
2781+ * Retrurns 0 on success, else a negative status code.
2782+ *
2783+ * Does not return until the remote CPU is nearly ready to execute <func>
2784+ * or is or has executed.
2785+ */
2786+
2787+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
2788+ int nonatomic, int wait)
2789+{
2790+ /* prevent preemption and reschedule on another processor */
2791+ int me = get_cpu();
2792+ if (cpu == me) {
2793+ WARN_ON(1);
2794+ put_cpu();
2795+ return -EBUSY;
2796+ }
2797+ spin_lock_bh(&call_lock);
2798+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
2799+ spin_unlock_bh(&call_lock);
2800+ put_cpu();
2801+ return 0;
2802+}
2803+EXPORT_SYMBOL(smp_call_function_single);
2804--- sle11-2009-05-14.orig/arch/x86/kernel/time_32-xen.c 2009-04-20 11:36:10.000000000 +0200
2805+++ sle11-2009-05-14/arch/x86/kernel/time_32-xen.c 2009-03-24 10:08:00.000000000 +0100
2806@@ -89,7 +89,6 @@ int pit_latch_buggy; /* ext
2807 unsigned long vxtime_hz = PIT_TICK_RATE;
2808 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
2809 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
2810-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
2811 struct timespec __xtime __section_xtime;
2812 struct timezone __sys_tz __section_sys_tz;
2813 #endif
2814@@ -97,8 +96,6 @@ struct timezone __sys_tz __section_sys_t
2815 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
2816 EXPORT_SYMBOL(cpu_khz);
2817
2818-extern unsigned long wall_jiffies;
2819-
2820 DEFINE_SPINLOCK(rtc_lock);
2821 EXPORT_SYMBOL(rtc_lock);
2822
2823@@ -265,11 +262,10 @@ static void __update_wallclock(time_t se
2824 time_t wtm_sec, xtime_sec;
2825 u64 tmp, wc_nsec;
2826
2827- /* Adjust wall-clock time base based on wall_jiffies ticks. */
2828+ /* Adjust wall-clock time base. */
2829 wc_nsec = processed_system_time;
2830 wc_nsec += sec * (u64)NSEC_PER_SEC;
2831 wc_nsec += nsec;
2832- wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
2833
2834 /* Split wallclock base into seconds and nanoseconds. */
2835 tmp = wc_nsec;
2836@@ -392,16 +388,10 @@ void do_gettimeofday(struct timeval *tv)
2837 shadow = &per_cpu(shadow_time, cpu);
2838
2839 do {
2840- unsigned long lost;
2841-
2842 local_time_version = shadow->version;
2843 seq = read_seqbegin(&xtime_lock);
2844
2845 usec = get_usec_offset(shadow);
2846- lost = jiffies - wall_jiffies;
2847-
2848- if (unlikely(lost))
2849- usec += lost * (USEC_PER_SEC / HZ);
2850
2851 sec = xtime.tv_sec;
2852 usec += (xtime.tv_nsec / NSEC_PER_USEC);
2853@@ -525,7 +515,7 @@ static void sync_xen_wallclock(unsigned
2854 write_seqlock_irq(&xtime_lock);
2855
2856 sec = xtime.tv_sec;
2857- nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
2858+ nsec = xtime.tv_nsec;
2859 __normalize_time(&sec, &nsec);
2860
2861 op.cmd = XENPF_settime;
2862@@ -599,42 +589,49 @@ unsigned long long sched_clock(void)
2863 }
2864 #endif
2865
2866-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
2867 unsigned long profile_pc(struct pt_regs *regs)
2868 {
2869 unsigned long pc = instruction_pointer(regs);
2870
2871-#ifdef __x86_64__
2872- /* Assume the lock function has either no stack frame or only a single word.
2873- This checks if the address on the stack looks like a kernel text address.
2874- There is a small window for false hits, but in that case the tick
2875- is just accounted to the spinlock function.
2876- Better would be to write these functions in assembler again
2877- and check exactly. */
2878+#if defined(CONFIG_SMP) || defined(__x86_64__)
2879 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
2880- char *v = *(char **)regs->rsp;
2881- if ((v >= _stext && v <= _etext) ||
2882- (v >= _sinittext && v <= _einittext) ||
2883- (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
2884- return (unsigned long)v;
2885- return ((unsigned long *)regs->rsp)[1];
2886+# ifdef CONFIG_FRAME_POINTER
2887+# ifdef __i386__
2888+ return ((unsigned long *)regs->ebp)[1];
2889+# else
2890+ return ((unsigned long *)regs->rbp)[1];
2891+# endif
2892+# else
2893+# ifdef __i386__
2894+ unsigned long *sp;
2895+ if ((regs->xcs & 2) == 0)
2896+ sp = (unsigned long *)&regs->esp;
2897+ else
2898+ sp = (unsigned long *)regs->esp;
2899+# else
2900+ unsigned long *sp = (unsigned long *)regs->rsp;
2901+# endif
2902+ /* Return address is either directly at stack pointer
2903+ or above a saved eflags. Eflags has bits 22-31 zero,
2904+ kernel addresses don't. */
2905+ if (sp[0] >> 22)
2906+ return sp[0];
2907+ if (sp[1] >> 22)
2908+ return sp[1];
2909+# endif
2910 }
2911-#else
2912- if (!user_mode_vm(regs) && in_lock_functions(pc))
2913- return *(unsigned long *)(regs->ebp + 4);
2914 #endif
2915
2916 return pc;
2917 }
2918 EXPORT_SYMBOL(profile_pc);
2919-#endif
2920
2921 /*
2922 * This is the same as the above, except we _also_ save the current
2923 * Time Stamp Counter value at the time of the timer interrupt, so that
2924 * we later on can estimate the time of day more exactly.
2925 */
2926-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
2927+irqreturn_t timer_interrupt(int irq, void *dev_id)
2928 {
2929 s64 delta, delta_cpu, stolen, blocked;
2930 u64 sched_time;
2931@@ -692,10 +689,14 @@ irqreturn_t timer_interrupt(int irq, voi
2932 }
2933
2934 /* System-wide jiffy work. */
2935- while (delta >= NS_PER_TICK) {
2936- delta -= NS_PER_TICK;
2937- processed_system_time += NS_PER_TICK;
2938- do_timer(regs);
2939+ if (delta >= NS_PER_TICK) {
2940+ do_div(delta, NS_PER_TICK);
2941+ processed_system_time += delta * NS_PER_TICK;
2942+ while (delta > HZ) {
2943+ do_timer(HZ);
2944+ delta -= HZ;
2945+ }
2946+ do_timer(delta);
2947 }
2948
2949 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
2950@@ -740,7 +741,7 @@ irqreturn_t timer_interrupt(int irq, voi
2951 if (delta_cpu > 0) {
2952 do_div(delta_cpu, NS_PER_TICK);
2953 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
2954- if (user_mode_vm(regs))
2955+ if (user_mode_vm(get_irq_regs()))
2956 account_user_time(current, (cputime_t)delta_cpu);
2957 else
2958 account_system_time(current, HARDIRQ_OFFSET,
2959@@ -754,10 +755,10 @@ irqreturn_t timer_interrupt(int irq, voi
2960 /* Local timer processing (see update_process_times()). */
2961 run_local_timers();
2962 if (rcu_pending(cpu))
2963- rcu_check_callbacks(cpu, user_mode_vm(regs));
2964+ rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
2965 scheduler_tick();
2966 run_posix_cpu_timers(current);
2967- profile_tick(CPU_PROFILING, regs);
2968+ profile_tick(CPU_PROFILING);
2969
2970 return IRQ_HANDLED;
2971 }
2972@@ -967,10 +968,11 @@ extern void (*late_time_init)(void);
2973 /* Duplicate of time_init() below, with hpet_enable part added */
2974 static void __init hpet_time_init(void)
2975 {
2976- xtime.tv_sec = get_cmos_time();
2977- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2978- set_normalized_timespec(&wall_to_monotonic,
2979- -xtime.tv_sec, -xtime.tv_nsec);
2980+ struct timespec ts;
2981+ ts.tv_sec = get_cmos_time();
2982+ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2983+
2984+ do_settimeofday(&ts);
2985
2986 if ((hpet_enable() >= 0) && hpet_use_timer) {
2987 printk("Using HPET for base-timer\n");
2988--- sle11-2009-05-14.orig/arch/x86/kernel/traps_32-xen.c 2009-05-14 11:02:43.000000000 +0200
2989+++ sle11-2009-05-14/arch/x86/kernel/traps_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2990@@ -28,6 +28,7 @@
2991 #include <linux/kprobes.h>
2992 #include <linux/kexec.h>
2993 #include <linux/unwind.h>
2994+#include <linux/uaccess.h>
2995
2996 #ifdef CONFIG_EISA
2997 #include <linux/ioport.h>
2998@@ -40,7 +41,6 @@
2999
3000 #include <asm/processor.h>
3001 #include <asm/system.h>
3002-#include <asm/uaccess.h>
3003 #include <asm/io.h>
3004 #include <asm/atomic.h>
3005 #include <asm/debugreg.h>
3006@@ -51,11 +51,14 @@
3007 #include <asm/smp.h>
3008 #include <asm/arch_hooks.h>
3009 #include <asm/kdebug.h>
3010+#include <asm/stacktrace.h>
3011
3012 #include <linux/module.h>
3013
3014 #include "mach_traps.h"
3015
3016+int panic_on_unrecovered_nmi;
3017+
3018 asmlinkage int system_call(void);
3019
3020 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
3021@@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct
3022 p < (void *)tinfo + THREAD_SIZE - 3;
3023 }
3024
3025-/*
3026- * Print one address/symbol entries per line.
3027- */
3028-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
3029-{
3030- printk(" [<%08lx>] ", addr);
3031-
3032- print_symbol("%s\n", addr);
3033-}
3034-
3035 static inline unsigned long print_context_stack(struct thread_info *tinfo,
3036 unsigned long *stack, unsigned long ebp,
3037- char *log_lvl)
3038+ struct stacktrace_ops *ops, void *data)
3039 {
3040 unsigned long addr;
3041
3042 #ifdef CONFIG_FRAME_POINTER
3043 while (valid_stack_ptr(tinfo, (void *)ebp)) {
3044+ unsigned long new_ebp;
3045 addr = *(unsigned long *)(ebp + 4);
3046- print_addr_and_symbol(addr, log_lvl);
3047+ ops->address(data, addr);
3048 /*
3049 * break out of recursive entries (such as
3050- * end_of_stack_stop_unwind_function):
3051+ * end_of_stack_stop_unwind_function). Also,
3052+ * we can never allow a frame pointer to
3053+ * move downwards!
3054 */
3055- if (ebp == *(unsigned long *)ebp)
3056+ new_ebp = *(unsigned long *)ebp;
3057+ if (new_ebp <= ebp)
3058 break;
3059- ebp = *(unsigned long *)ebp;
3060+ ebp = new_ebp;
3061 }
3062 #else
3063 while (valid_stack_ptr(tinfo, stack)) {
3064 addr = *stack++;
3065 if (__kernel_text_address(addr))
3066- print_addr_and_symbol(addr, log_lvl);
3067+ ops->address(data, addr);
3068 }
3069 #endif
3070 return ebp;
3071 }
3072
3073+struct ops_and_data {
3074+ struct stacktrace_ops *ops;
3075+ void *data;
3076+};
3077+
3078 static asmlinkage int
3079-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
3080+dump_trace_unwind(struct unwind_frame_info *info, void *data)
3081 {
3082+ struct ops_and_data *oad = (struct ops_and_data *)data;
3083 int n = 0;
3084
3085 while (unwind(info) == 0 && UNW_PC(info)) {
3086 n++;
3087- print_addr_and_symbol(UNW_PC(info), log_lvl);
3088+ oad->ops->address(oad->data, UNW_PC(info));
3089 if (arch_unw_user_mode(info))
3090 break;
3091 }
3092 return n;
3093 }
3094
3095-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3096- unsigned long *stack, char *log_lvl)
3097+void dump_trace(struct task_struct *task, struct pt_regs *regs,
3098+ unsigned long *stack,
3099+ struct stacktrace_ops *ops, void *data)
3100 {
3101- unsigned long ebp;
3102+ unsigned long ebp = 0;
3103
3104 if (!task)
3105 task = current;
3106@@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta
3107 if (call_trace >= 0) {
3108 int unw_ret = 0;
3109 struct unwind_frame_info info;
3110+ struct ops_and_data oad = { .ops = ops, .data = data };
3111
3112 if (regs) {
3113 if (unwind_init_frame_info(&info, task, regs) == 0)
3114- unw_ret = show_trace_unwind(&info, log_lvl);
3115+ unw_ret = dump_trace_unwind(&info, &oad);
3116 } else if (task == current)
3117- unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
3118+ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
3119 else {
3120 if (unwind_init_blocked(&info, task) == 0)
3121- unw_ret = show_trace_unwind(&info, log_lvl);
3122+ unw_ret = dump_trace_unwind(&info, &oad);
3123 }
3124 if (unw_ret > 0) {
3125 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
3126- print_symbol("DWARF2 unwinder stuck at %s\n",
3127+ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
3128 UNW_PC(&info));
3129 if (UNW_SP(&info) >= PAGE_OFFSET) {
3130- printk("Leftover inexact backtrace:\n");
3131+ ops->warning(data, "Leftover inexact backtrace:\n");
3132 stack = (void *)UNW_SP(&info);
3133+ if (!stack)
3134+ return;
3135+ ebp = UNW_FP(&info);
3136 } else
3137- printk("Full inexact backtrace again:\n");
3138+ ops->warning(data, "Full inexact backtrace again:\n");
3139 } else if (call_trace >= 1)
3140 return;
3141 else
3142- printk("Full inexact backtrace again:\n");
3143+ ops->warning(data, "Full inexact backtrace again:\n");
3144 } else
3145- printk("Inexact backtrace:\n");
3146+ ops->warning(data, "Inexact backtrace:\n");
3147 }
3148-
3149- if (task == current) {
3150- /* Grab ebp right from our regs */
3151- asm ("movl %%ebp, %0" : "=r" (ebp) : );
3152- } else {
3153- /* ebp is the last reg pushed by switch_to */
3154- ebp = *(unsigned long *) task->thread.esp;
3155+ if (!stack) {
3156+ unsigned long dummy;
3157+ stack = &dummy;
3158+ if (task && task != current)
3159+ stack = (unsigned long *)task->thread.esp;
3160+ }
3161+
3162+#ifdef CONFIG_FRAME_POINTER
3163+ if (!ebp) {
3164+ if (task == current) {
3165+ /* Grab ebp right from our regs */
3166+ asm ("movl %%ebp, %0" : "=r" (ebp) : );
3167+ } else {
3168+ /* ebp is the last reg pushed by switch_to */
3169+ ebp = *(unsigned long *) task->thread.esp;
3170+ }
3171 }
3172+#endif
3173
3174 while (1) {
3175 struct thread_info *context;
3176 context = (struct thread_info *)
3177 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
3178- ebp = print_context_stack(context, stack, ebp, log_lvl);
3179+ ebp = print_context_stack(context, stack, ebp, ops, data);
3180+ /* Should be after the line below, but somewhere
3181+ in early boot context comes out corrupted and we
3182+ can't reference it -AK */
3183+ if (ops->stack(data, "IRQ") < 0)
3184+ break;
3185 stack = (unsigned long*)context->previous_esp;
3186 if (!stack)
3187 break;
3188- printk("%s =======================\n", log_lvl);
3189 }
3190 }
3191+EXPORT_SYMBOL(dump_trace);
3192+
3193+static void
3194+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
3195+{
3196+ printk(data);
3197+ print_symbol(msg, symbol);
3198+ printk("\n");
3199+}
3200+
3201+static void print_trace_warning(void *data, char *msg)
3202+{
3203+ printk("%s%s\n", (char *)data, msg);
3204+}
3205
3206-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
3207+static int print_trace_stack(void *data, char *name)
3208+{
3209+ return 0;
3210+}
3211+
3212+/*
3213+ * Print one address/symbol entries per line.
3214+ */
3215+static void print_trace_address(void *data, unsigned long addr)
3216+{
3217+ printk("%s [<%08lx>] ", (char *)data, addr);
3218+ print_symbol("%s\n", addr);
3219+}
3220+
3221+static struct stacktrace_ops print_trace_ops = {
3222+ .warning = print_trace_warning,
3223+ .warning_symbol = print_trace_warning_symbol,
3224+ .stack = print_trace_stack,
3225+ .address = print_trace_address,
3226+};
3227+
3228+static void
3229+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3230+ unsigned long * stack, char *log_lvl)
3231+{
3232+ dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
3233+ printk("%s =======================\n", log_lvl);
3234+}
3235+
3236+void show_trace(struct task_struct *task, struct pt_regs *regs,
3237+ unsigned long * stack)
3238 {
3239 show_trace_log_lvl(task, regs, stack, "");
3240 }
3241@@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs
3242 ss = regs->xss & 0xffff;
3243 }
3244 print_modules();
3245- printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
3246- "EFLAGS: %08lx (%s %.*s) \n",
3247+ printk(KERN_EMERG "CPU: %d\n"
3248+ KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
3249+ KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
3250 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
3251- print_tainted(), regs->eflags, system_utsname.release,
3252- (int)strcspn(system_utsname.version, " "),
3253- system_utsname.version);
3254+ print_tainted(), regs->eflags, init_utsname()->release,
3255+ (int)strcspn(init_utsname()->version, " "),
3256+ init_utsname()->version);
3257 print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
3258 printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
3259 regs->eax, regs->ebx, regs->ecx, regs->edx);
3260@@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs
3261 */
3262 if (in_kernel) {
3263 u8 __user *eip;
3264+ int code_bytes = 64;
3265+ unsigned char c;
3266
3267 printk("\n" KERN_EMERG "Stack: ");
3268 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
3269@@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs
3270 printk(KERN_EMERG "Code: ");
3271
3272 eip = (u8 __user *)regs->eip - 43;
3273- for (i = 0; i < 64; i++, eip++) {
3274- unsigned char c;
3275-
3276+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
3277+ /* try starting at EIP */
3278+ eip = (u8 __user *)regs->eip;
3279+ code_bytes = 32;
3280+ }
3281+ for (i = 0; i < code_bytes; i++, eip++) {
3282 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
3283 printk(" Bad EIP value.");
3284 break;
3285@@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r
3286
3287 if (eip < PAGE_OFFSET)
3288 return;
3289- if (__get_user(ud2, (unsigned short __user *)eip))
3290+ if (probe_kernel_address((unsigned short __user *)eip, ud2))
3291 return;
3292 if (ud2 != 0x0b0f)
3293 return;
3294@@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r
3295 char *file;
3296 char c;
3297
3298- if (__get_user(line, (unsigned short __user *)(eip + 2)))
3299+ if (probe_kernel_address((unsigned short __user *)(eip + 2),
3300+ line))
3301 break;
3302 if (__get_user(file, (char * __user *)(eip + 4)) ||
3303 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
3304@@ -604,18 +677,24 @@ gp_in_kernel:
3305 }
3306 }
3307
3308-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
3309+static __kprobes void
3310+mem_parity_error(unsigned char reason, struct pt_regs * regs)
3311 {
3312- printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
3313- "to continue\n");
3314+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
3315+ "CPU %d.\n", reason, smp_processor_id());
3316 printk(KERN_EMERG "You probably have a hardware problem with your RAM "
3317 "chips\n");
3318+ if (panic_on_unrecovered_nmi)
3319+ panic("NMI: Not continuing");
3320+
3321+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3322
3323 /* Clear and disable the memory parity error line. */
3324 clear_mem_error(reason);
3325 }
3326
3327-static void io_check_error(unsigned char reason, struct pt_regs * regs)
3328+static __kprobes void
3329+io_check_error(unsigned char reason, struct pt_regs * regs)
3330 {
3331 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
3332 show_registers(regs);
3333@@ -624,7 +703,8 @@ static void io_check_error(unsigned char
3334 clear_io_check_error(reason);
3335 }
3336
3337-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
3338+static __kprobes void
3339+unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
3340 {
3341 #ifdef CONFIG_MCA
3342 /* Might actually be able to figure out what the guilty party
3343@@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c
3344 return;
3345 }
3346 #endif
3347- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
3348- reason, smp_processor_id());
3349- printk("Dazed and confused, but trying to continue\n");
3350- printk("Do you have a strange power saving mode enabled?\n");
3351+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
3352+ "CPU %d.\n", reason, smp_processor_id());
3353+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
3354+ if (panic_on_unrecovered_nmi)
3355+ panic("NMI: Not continuing");
3356+
3357+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3358 }
3359
3360 static DEFINE_SPINLOCK(nmi_print_lock);
3361
3362-void die_nmi (struct pt_regs *regs, const char *msg)
3363+void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
3364 {
3365 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
3366 NOTIFY_STOP)
3367@@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons
3368 do_exit(SIGSEGV);
3369 }
3370
3371-static void default_do_nmi(struct pt_regs * regs)
3372+static __kprobes void default_do_nmi(struct pt_regs * regs)
3373 {
3374 unsigned char reason = 0;
3375
3376@@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg
3377 * Ok, so this is none of the documented NMI sources,
3378 * so it must be the NMI watchdog.
3379 */
3380- if (nmi_watchdog) {
3381- nmi_watchdog_tick(regs);
3382+ if (nmi_watchdog_tick(regs, reason))
3383 return;
3384- }
3385+ if (!do_nmi_callback(regs, smp_processor_id()))
3386 #endif
3387- unknown_nmi_error(reason, regs);
3388+ unknown_nmi_error(reason, regs);
3389+
3390 return;
3391 }
3392 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
3393@@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg
3394 reassert_nmi();
3395 }
3396
3397-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
3398-{
3399- return 0;
3400-}
3401-
3402-static nmi_callback_t nmi_callback = dummy_nmi_callback;
3403-
3404-fastcall void do_nmi(struct pt_regs * regs, long error_code)
3405+fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
3406 {
3407 int cpu;
3408
3409@@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re
3410
3411 ++nmi_count(cpu);
3412
3413- if (!rcu_dereference(nmi_callback)(regs, cpu))
3414- default_do_nmi(regs);
3415+ default_do_nmi(regs);
3416
3417 nmi_exit();
3418 }
3419
3420-void set_nmi_callback(nmi_callback_t callback)
3421-{
3422- vmalloc_sync_all();
3423- rcu_assign_pointer(nmi_callback, callback);
3424-}
3425-EXPORT_SYMBOL_GPL(set_nmi_callback);
3426-
3427-void unset_nmi_callback(void)
3428-{
3429- nmi_callback = dummy_nmi_callback;
3430-}
3431-EXPORT_SYMBOL_GPL(unset_nmi_callback);
3432-
3433 #ifdef CONFIG_KPROBES
3434 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
3435 {
3436--- sle11-2009-05-14.orig/arch/x86/mach-xen/setup.c 2009-05-14 11:02:43.000000000 +0200
3437+++ sle11-2009-05-14/arch/x86/mach-xen/setup.c 2009-03-04 11:28:34.000000000 +0100
3438@@ -103,8 +103,10 @@ void __init pre_setup_arch_hook(void)
3439
3440 setup_xen_features();
3441
3442- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
3443- set_fixaddr_top(pp.virt_start);
3444+ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
3445+ hypervisor_virt_start = pp.virt_start;
3446+ reserve_top_address(0UL - pp.virt_start);
3447+ }
3448
3449 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
3450 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
3451--- sle11-2009-05-14.orig/arch/x86/mm/fault_32-xen.c 2009-05-14 11:02:43.000000000 +0200
3452+++ sle11-2009-05-14/arch/x86/mm/fault_32-xen.c 2009-03-04 11:28:34.000000000 +0100
3453@@ -27,21 +27,24 @@
3454 #include <asm/uaccess.h>
3455 #include <asm/desc.h>
3456 #include <asm/kdebug.h>
3457+#include <asm/segment.h>
3458
3459 extern void die(const char *,struct pt_regs *,long);
3460
3461-#ifdef CONFIG_KPROBES
3462-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3463+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3464+
3465 int register_page_fault_notifier(struct notifier_block *nb)
3466 {
3467 vmalloc_sync_all();
3468 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
3469 }
3470+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
3471
3472 int unregister_page_fault_notifier(struct notifier_block *nb)
3473 {
3474 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
3475 }
3476+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3477
3478 static inline int notify_page_fault(enum die_val val, const char *str,
3479 struct pt_regs *regs, long err, int trap, int sig)
3480@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
3481 };
3482 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3483 }
3484-#else
3485-static inline int notify_page_fault(enum die_val val, const char *str,
3486- struct pt_regs *regs, long err, int trap, int sig)
3487-{
3488- return NOTIFY_DONE;
3489-}
3490-#endif
3491-
3492
3493 /*
3494 * Unlock any spinlocks which will prevent us from getting the
3495@@ -119,10 +114,10 @@ static inline unsigned long get_segment_
3496 }
3497
3498 /* The standard kernel/user address space limit. */
3499- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
3500+ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
3501
3502 /* By far the most common cases. */
3503- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
3504+ if (likely(SEGMENT_IS_FLAT_CODE(seg)))
3505 return eip;
3506
3507 /* Check the segment exists, is within the current LDT/GDT size,
3508@@ -559,11 +554,7 @@ good_area:
3509 write = 0;
3510 switch (error_code & 3) {
3511 default: /* 3: write, present */
3512-#ifdef TEST_VERIFY_AREA
3513- if (regs->cs == GET_KERNEL_CS())
3514- printk("WP fault at %08lx\n", regs->eip);
3515-#endif
3516- /* fall through */
3517+ /* fall through */
3518 case 2: /* write, not present */
3519 if (!(vma->vm_flags & VM_WRITE))
3520 goto bad_area;
3521@@ -572,7 +563,7 @@ good_area:
3522 case 1: /* read, present */
3523 goto bad_area;
3524 case 0: /* read, not present */
3525- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
3526+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
3527 goto bad_area;
3528 }
3529
3530@@ -704,7 +695,7 @@ no_context:
3531 */
3532 out_of_memory:
3533 up_read(&mm->mmap_sem);
3534- if (tsk->pid == 1) {
3535+ if (is_init(tsk)) {
3536 yield();
3537 down_read(&mm->mmap_sem);
3538 goto survive;
3539--- sle11-2009-05-14.orig/arch/x86/mm/highmem_32-xen.c 2009-05-14 11:02:43.000000000 +0200
3540+++ sle11-2009-05-14/arch/x86/mm/highmem_32-xen.c 2009-03-04 11:28:34.000000000 +0100
3541@@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
3542
3543 idx = type + KM_TYPE_NR*smp_processor_id();
3544 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3545-#ifdef CONFIG_DEBUG_HIGHMEM
3546 if (!pte_none(*(kmap_pte-idx)))
3547 BUG();
3548-#endif
3549- set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3550+ set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3551
3552 return (void*) vaddr;
3553 }
3554@@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page,
3555
3556 void kunmap_atomic(void *kvaddr, enum km_type type)
3557 {
3558-#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
3559 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
3560 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
3561
3562- if (vaddr < FIXADDR_START) { // FIXME
3563+#ifdef CONFIG_DEBUG_HIGHMEM
3564+ if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
3565 dec_preempt_count();
3566 preempt_check_resched();
3567 return;
3568 }
3569-#endif
3570
3571-#if defined(CONFIG_DEBUG_HIGHMEM)
3572 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
3573 BUG();
3574-
3575- /*
3576- * force other mappings to Oops if they'll try to access
3577- * this pte without first remap it
3578- */
3579- pte_clear(&init_mm, vaddr, kmap_pte-idx);
3580- __flush_tlb_one(vaddr);
3581-#elif defined(CONFIG_XEN)
3582+#endif
3583 /*
3584- * We must ensure there are no dangling pagetable references when
3585- * returning memory to Xen (decrease_reservation).
3586- * XXX TODO: We could make this faster by only zapping when
3587- * kmap_flush_unused is called but that is trickier and more invasive.
3588+ * Force other mappings to Oops if they'll try to access this pte
3589+ * without first remap it. Keeping stale mappings around is a bad idea
3590+ * also, in case the page changes cacheability attributes or becomes
3591+ * a protected page in a hypervisor.
3592 */
3593- pte_clear(&init_mm, vaddr, kmap_pte-idx);
3594-#endif
3595+ kpte_clear_flush(kmap_pte-idx, vaddr);
3596
3597 dec_preempt_count();
3598 preempt_check_resched();
3599@@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn,
3600 idx = type + KM_TYPE_NR*smp_processor_id();
3601 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3602 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
3603- __flush_tlb_one(vaddr);
3604
3605 return (void*) vaddr;
3606 }
3607--- sle11-2009-05-14.orig/arch/x86/mm/hypervisor.c 2008-12-15 11:13:45.000000000 +0100
3608+++ sle11-2009-05-14/arch/x86/mm/hypervisor.c 2009-03-04 11:28:34.000000000 +0100
3609@@ -31,6 +31,7 @@
3610 */
3611
3612 #include <linux/sched.h>
3613+#include <linux/hardirq.h>
3614 #include <linux/mm.h>
3615 #include <linux/vmalloc.h>
3616 #include <asm/page.h>
3617@@ -44,6 +45,302 @@
3618 #include <asm/tlbflush.h>
3619 #include <linux/highmem.h>
3620
3621+EXPORT_SYMBOL(hypercall_page);
3622+
3623+#define NR_MC BITS_PER_LONG
3624+#define NR_MMU BITS_PER_LONG
3625+#define NR_MMUEXT (BITS_PER_LONG / 4)
3626+
3627+DEFINE_PER_CPU(bool, xen_lazy_mmu);
3628+EXPORT_PER_CPU_SYMBOL(xen_lazy_mmu);
3629+struct lazy_mmu {
3630+ unsigned int nr_mc, nr_mmu, nr_mmuext;
3631+ multicall_entry_t mc[NR_MC];
3632+ mmu_update_t mmu[NR_MMU];
3633+ struct mmuext_op mmuext[NR_MMUEXT];
3634+};
3635+static DEFINE_PER_CPU(struct lazy_mmu, lazy_mmu);
3636+
3637+static inline bool use_lazy_mmu_mode(void)
3638+{
3639+#ifdef CONFIG_PREEMPT
3640+ if (!preempt_count())
3641+ return false;
3642+#endif
3643+ return !irq_count();
3644+}
3645+
3646+static void multicall_failed(const multicall_entry_t *mc, int rc)
3647+{
3648+ printk(KERN_EMERG "hypercall#%lu(%lx, %lx, %lx, %lx)"
3649+ " failed: %d (caller %lx)\n",
3650+ mc->op, mc->args[0], mc->args[1], mc->args[2], mc->args[3],
3651+ rc, mc->args[5]);
3652+ BUG();
3653+}
3654+
3655+int xen_multicall_flush(bool ret_last) {
3656+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3657+ multicall_entry_t *mc = lazy->mc;
3658+ unsigned int count = lazy->nr_mc;
3659+
3660+ if (!count || !use_lazy_mmu_mode())
3661+ return 0;
3662+
3663+ lazy->nr_mc = 0;
3664+ lazy->nr_mmu = 0;
3665+ lazy->nr_mmuext = 0;
3666+
3667+ if (count == 1) {
3668+ int rc = _hypercall(int, mc->op, mc->args[0], mc->args[1],
3669+ mc->args[2], mc->args[3], mc->args[4]);
3670+
3671+ if (unlikely(rc)) {
3672+ if (ret_last)
3673+ return rc;
3674+ multicall_failed(mc, rc);
3675+ }
3676+ } else {
3677+ if (HYPERVISOR_multicall(mc, count))
3678+ BUG();
3679+ while (count-- > ret_last)
3680+ if (unlikely(mc++->result))
3681+ multicall_failed(mc - 1, mc[-1].result);
3682+ if (ret_last)
3683+ return mc->result;
3684+ }
3685+
3686+ return 0;
3687+}
3688+EXPORT_SYMBOL(xen_multicall_flush);
3689+
3690+int xen_multi_update_va_mapping(unsigned long va, pte_t pte,
3691+ unsigned long uvmf)
3692+{
3693+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3694+ multicall_entry_t *mc;
3695+
3696+ if (unlikely(!use_lazy_mmu_mode()))
3697+#ifdef CONFIG_X86_PAE
3698+ return _hypercall4(int, update_va_mapping, va,
3699+ pte.pte_low, pte.pte_high, uvmf);
3700+#else
3701+ return _hypercall3(int, update_va_mapping, va,
3702+ pte.pte, uvmf);
3703+#endif
3704+
3705+ if (unlikely(lazy->nr_mc == NR_MC))
3706+ xen_multicall_flush(false);
3707+
3708+ mc = lazy->mc + lazy->nr_mc++;
3709+ mc->op = __HYPERVISOR_update_va_mapping;
3710+ mc->args[0] = va;
3711+#ifndef CONFIG_X86_PAE
3712+ mc->args[1] = pte.pte;
3713+#else
3714+ mc->args[1] = pte.pte_low;
3715+ mc->args[2] = pte.pte_high;
3716+#endif
3717+ mc->args[MULTI_UVMFLAGS_INDEX] = uvmf;
3718+ mc->args[5] = (long)__builtin_return_address(0);
3719+
3720+ return 0;
3721+}
3722+
3723+static inline bool mmu_may_merge(const multicall_entry_t *mc,
3724+ unsigned int op, domid_t domid)
3725+{
3726+ return mc->op == op && !mc->args[2] && mc->args[3] == domid;
3727+}
3728+
3729+int xen_multi_mmu_update(mmu_update_t *src, unsigned int count,
3730+ unsigned int *success_count, domid_t domid)
3731+{
3732+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3733+ multicall_entry_t *mc = lazy->mc + lazy->nr_mc;
3734+ mmu_update_t *dst;
3735+ bool commit, merge;
3736+
3737+ if (unlikely(!use_lazy_mmu_mode()))
3738+ return _hypercall4(int, mmu_update, src, count,
3739+ success_count, domid);
3740+
3741+ commit = (lazy->nr_mmu + count) > NR_MMU || success_count;
3742+ merge = lazy->nr_mc && !commit
3743+ && mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid);
3744+ if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
3745+ xen_multicall_flush(false);
3746+ mc = lazy->mc;
3747+ commit = count > NR_MMU || success_count;
3748+ }
3749+
3750+ if (!lazy->nr_mc && unlikely(commit))
3751+ return _hypercall4(int, mmu_update, src, count,
3752+ success_count, domid);
3753+
3754+ dst = lazy->mmu + lazy->nr_mmu;
3755+ lazy->nr_mmu += count;
3756+ if (merge) {
3757+ mc[-1].args[1] += count;
3758+ memcpy(dst, src, count * sizeof(*src));
3759+ } else {
3760+ ++lazy->nr_mc;
3761+ mc->op = __HYPERVISOR_mmu_update;
3762+ if (!commit) {
3763+ mc->args[0] = (unsigned long)dst;
3764+ memcpy(dst, src, count * sizeof(*src));
3765+ } else
3766+ mc->args[0] = (unsigned long)src;
3767+ mc->args[1] = count;
3768+ mc->args[2] = (unsigned long)success_count;
3769+ mc->args[3] = domid;
3770+ mc->args[5] = (long)__builtin_return_address(0);
3771+ }
3772+
3773+ while (!commit && count--)
3774+ switch (src++->ptr & (sizeof(pteval_t) - 1)) {
3775+ case MMU_NORMAL_PT_UPDATE:
3776+ case MMU_PT_UPDATE_PRESERVE_AD:
3777+ break;
3778+ default:
3779+ commit = true;
3780+ break;
3781+ }
3782+
3783+ return commit ? xen_multicall_flush(true) : 0;
3784+}
3785+
3786+int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count,
3787+ unsigned int *success_count, domid_t domid)
3788+{
3789+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3790+ multicall_entry_t *mc;
3791+ struct mmuext_op *dst;
3792+ bool commit, merge;
3793+
3794+ if (unlikely(!use_lazy_mmu_mode()))
3795+ return _hypercall4(int, mmuext_op, src, count,
3796+ success_count, domid);
3797+
3798+ /*
3799+ * While it could be useful in theory, I've never seen the body of
3800+ * this conditional to be reached, hence it seems more reasonable
3801+ * to disable it for the time being.
3802+ */
3803+ if (0 && likely(count)
3804+ && likely(!success_count)
3805+ && likely(domid == DOMID_SELF)
3806+ && likely(lazy->nr_mc)
3807+ && lazy->mc[lazy->nr_mc - 1].op == __HYPERVISOR_update_va_mapping) {
3808+ unsigned long oldf, newf = UVMF_NONE;
3809+
3810+ switch (src->cmd) {
3811+ case MMUEXT_TLB_FLUSH_ALL:
3812+ newf = UVMF_TLB_FLUSH | UVMF_ALL;
3813+ break;
3814+ case MMUEXT_INVLPG_ALL:
3815+ newf = UVMF_INVLPG | UVMF_ALL;
3816+ break;
3817+ case MMUEXT_TLB_FLUSH_MULTI:
3818+ newf = UVMF_TLB_FLUSH | UVMF_MULTI
3819+ | (unsigned long)src->arg2.vcpumask.p;
3820+ break;
3821+ case MMUEXT_INVLPG_MULTI:
3822+ newf = UVMF_INVLPG | UVMF_MULTI
3823+ | (unsigned long)src->arg2.vcpumask.p;
3824+ break;
3825+ case MMUEXT_TLB_FLUSH_LOCAL:
3826+ newf = UVMF_TLB_FLUSH | UVMF_LOCAL;
3827+ break;
3828+ case MMUEXT_INVLPG_LOCAL:
3829+ newf = UVMF_INVLPG | UVMF_LOCAL;
3830+ break;
3831+ }
3832+ mc = lazy->mc + lazy->nr_mc - 1;
3833+ oldf = mc->args[MULTI_UVMFLAGS_INDEX];
3834+ if (newf == UVMF_NONE || oldf == UVMF_NONE
3835+ || newf == (UVMF_TLB_FLUSH | UVMF_ALL))
3836+ ;
3837+ else if (oldf == (UVMF_TLB_FLUSH | UVMF_ALL))
3838+ newf = UVMF_TLB_FLUSH | UVMF_ALL;
3839+ else if ((newf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
3840+ && (oldf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
3841+ && ((src->arg1.linear_addr ^ mc->args[0])
3842+ >> PAGE_SHIFT))
3843+ newf = UVMF_NONE;
3844+ else if (((oldf | newf) & UVMF_ALL)
3845+ && !((oldf ^ newf) & UVMF_FLUSHTYPE_MASK))
3846+ newf |= UVMF_ALL;
3847+ else if ((oldf ^ newf) & ~UVMF_FLUSHTYPE_MASK)
3848+ newf = UVMF_NONE;
3849+ else if ((oldf & UVMF_FLUSHTYPE_MASK) == UVMF_TLB_FLUSH)
3850+ newf = (newf & ~UVMF_FLUSHTYPE_MASK) | UVMF_TLB_FLUSH;
3851+ else if ((newf & UVMF_FLUSHTYPE_MASK) != UVMF_TLB_FLUSH
3852+ && ((newf ^ oldf) & UVMF_FLUSHTYPE_MASK))
3853+ newf = UVMF_NONE;
3854+ if (newf != UVMF_NONE) {
3855+ mc->args[MULTI_UVMFLAGS_INDEX] = newf;
3856+ ++src;
3857+ if (!--count)
3858+ return 0;
3859+ }
3860+ }
3861+
3862+ mc = lazy->mc + lazy->nr_mc;
3863+ commit = (lazy->nr_mmuext + count) > NR_MMUEXT || success_count;
3864+ merge = lazy->nr_mc && !commit
3865+ && mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid);
3866+ if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
3867+ xen_multicall_flush(false);
3868+ mc = lazy->mc;
3869+ commit = count > NR_MMUEXT || success_count;
3870+ }
3871+
3872+ if (!lazy->nr_mc && unlikely(commit))
3873+ return _hypercall4(int, mmuext_op, src, count,
3874+ success_count, domid);
3875+
3876+ dst = lazy->mmuext + lazy->nr_mmuext;
3877+ lazy->nr_mmuext += count;
3878+ if (merge) {
3879+ mc[-1].args[1] += count;
3880+ memcpy(dst, src, count * sizeof(*src));
3881+ } else {
3882+ ++lazy->nr_mc;
3883+ mc->op = __HYPERVISOR_mmuext_op;
3884+ if (!commit) {
3885+ mc->args[0] = (unsigned long)dst;
3886+ memcpy(dst, src, count * sizeof(*src));
3887+ } else
3888+ mc->args[0] = (unsigned long)src;
3889+ mc->args[1] = count;
3890+ mc->args[2] = (unsigned long)success_count;
3891+ mc->args[3] = domid;
3892+ mc->args[5] = (long)__builtin_return_address(0);
3893+ }
3894+
3895+ while (!commit && count--)
3896+ switch (src++->cmd) {
3897+ case MMUEXT_PIN_L1_TABLE:
3898+ case MMUEXT_PIN_L2_TABLE:
3899+ case MMUEXT_PIN_L3_TABLE:
3900+ case MMUEXT_PIN_L4_TABLE:
3901+ case MMUEXT_UNPIN_TABLE:
3902+ case MMUEXT_TLB_FLUSH_LOCAL:
3903+ case MMUEXT_INVLPG_LOCAL:
3904+ case MMUEXT_TLB_FLUSH_MULTI:
3905+ case MMUEXT_INVLPG_MULTI:
3906+ case MMUEXT_TLB_FLUSH_ALL:
3907+ case MMUEXT_INVLPG_ALL:
3908+ break;
3909+ default:
3910+ commit = true;
3911+ break;
3912+ }
3913+
3914+ return commit ? xen_multicall_flush(true) : 0;
3915+}
3916+
3917 void xen_l1_entry_update(pte_t *ptr, pte_t val)
3918 {
3919 mmu_update_t u;
3920@@ -542,7 +839,8 @@ int write_ldt_entry(void *ldt, int entry
3921 #define MAX_BATCHED_FULL_PTES 32
3922
3923 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
3924- unsigned long addr, unsigned long end, pgprot_t newprot)
3925+ unsigned long addr, unsigned long end, pgprot_t newprot,
3926+ int dirty_accountable)
3927 {
3928 int rc = 0, i = 0;
3929 mmu_update_t u[MAX_BATCHED_FULL_PTES];
3930@@ -555,10 +853,14 @@ int xen_change_pte_range(struct mm_struc
3931 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
3932 do {
3933 if (pte_present(*pte)) {
3934+ pte_t ptent = pte_modify(*pte, newprot);
3935+
3936+ if (dirty_accountable && pte_dirty(ptent))
3937+ ptent = pte_mkwrite(ptent);
3938 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
3939 | ((unsigned long)pte & ~PAGE_MASK)
3940 | MMU_PT_UPDATE_PRESERVE_AD;
3941- u[i].val = __pte_val(pte_modify(*pte, newprot));
3942+ u[i].val = __pte_val(ptent);
3943 if (++i == MAX_BATCHED_FULL_PTES) {
3944 if ((rc = HYPERVISOR_mmu_update(
3945 &u[0], i, NULL, DOMID_SELF)) != 0)
3946--- sle11-2009-05-14.orig/arch/x86/mm/init_32-xen.c 2009-05-14 11:02:43.000000000 +0200
3947+++ sle11-2009-05-14/arch/x86/mm/init_32-xen.c 2009-03-04 11:28:34.000000000 +0100
3948@@ -462,16 +462,22 @@ EXPORT_SYMBOL(__supported_pte_mask);
3949 * on Enable
3950 * off Disable
3951 */
3952-void __init noexec_setup(const char *str)
3953+static int __init noexec_setup(char *str)
3954 {
3955- if (!strncmp(str, "on",2) && cpu_has_nx) {
3956- __supported_pte_mask |= _PAGE_NX;
3957- disable_nx = 0;
3958- } else if (!strncmp(str,"off",3)) {
3959+ if (!str || !strcmp(str, "on")) {
3960+ if (cpu_has_nx) {
3961+ __supported_pte_mask |= _PAGE_NX;
3962+ disable_nx = 0;
3963+ }
3964+ } else if (!strcmp(str,"off")) {
3965 disable_nx = 1;
3966 __supported_pte_mask &= ~_PAGE_NX;
3967- }
3968+ } else
3969+ return -EINVAL;
3970+
3971+ return 0;
3972 }
3973+early_param("noexec", noexec_setup);
3974
3975 int nx_enabled = 0;
3976 #ifdef CONFIG_X86_PAE
3977@@ -514,6 +520,7 @@ int __init set_kernel_exec(unsigned long
3978 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
3979 else
3980 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
3981+ pte_update_defer(&init_mm, vaddr, pte);
3982 __flush_tlb_all();
3983 out:
3984 return ret;
3985@@ -596,18 +603,6 @@ static void __init test_wp_bit(void)
3986 }
3987 }
3988
3989-static void __init set_max_mapnr_init(void)
3990-{
3991-#ifdef CONFIG_HIGHMEM
3992- num_physpages = highend_pfn;
3993-#else
3994- num_physpages = max_low_pfn;
3995-#endif
3996-#ifdef CONFIG_FLATMEM
3997- max_mapnr = num_physpages;
3998-#endif
3999-}
4000-
4001 static struct kcore_list kcore_mem, kcore_vmalloc;
4002
4003 void __init mem_init(void)
4004@@ -623,8 +618,7 @@ void __init mem_init(void)
4005 #endif
4006
4007 #ifdef CONFIG_FLATMEM
4008- if (!mem_map)
4009- BUG();
4010+ BUG_ON(!mem_map);
4011 #endif
4012
4013 bad_ppro = ppro_with_ram_bug();
4014@@ -639,17 +633,6 @@ void __init mem_init(void)
4015 }
4016 #endif
4017
4018- set_max_mapnr_init();
4019-
4020-#ifdef CONFIG_HIGHMEM
4021- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
4022-#else
4023- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
4024-#endif
4025- printk("vmalloc area: %lx-%lx, maxmem %lx\n",
4026- VMALLOC_START,VMALLOC_END,MAXMEM);
4027- BUG_ON(VMALLOC_START > VMALLOC_END);
4028-
4029 /* this will put all low memory onto the freelists */
4030 totalram_pages += free_all_bootmem();
4031 /* XEN: init and count low-mem pages outside initial allocation. */
4032@@ -687,6 +670,48 @@ void __init mem_init(void)
4033 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
4034 );
4035
4036+#if 1 /* double-sanity-check paranoia */
4037+ printk("virtual kernel memory layout:\n"
4038+ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
4039+#ifdef CONFIG_HIGHMEM
4040+ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
4041+#endif
4042+ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
4043+ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
4044+ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
4045+ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
4046+ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
4047+ FIXADDR_START, FIXADDR_TOP,
4048+ (FIXADDR_TOP - FIXADDR_START) >> 10,
4049+
4050+#ifdef CONFIG_HIGHMEM
4051+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
4052+ (LAST_PKMAP*PAGE_SIZE) >> 10,
4053+#endif
4054+
4055+ VMALLOC_START, VMALLOC_END,
4056+ (VMALLOC_END - VMALLOC_START) >> 20,
4057+
4058+ (unsigned long)__va(0), (unsigned long)high_memory,
4059+ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
4060+
4061+ (unsigned long)&__init_begin, (unsigned long)&__init_end,
4062+ ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
4063+
4064+ (unsigned long)&_etext, (unsigned long)&_edata,
4065+ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
4066+
4067+ (unsigned long)&_text, (unsigned long)&_etext,
4068+ ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
4069+
4070+#ifdef CONFIG_HIGHMEM
4071+ BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
4072+ BUG_ON(VMALLOC_END > PKMAP_BASE);
4073+#endif
4074+ BUG_ON(VMALLOC_START > VMALLOC_END);
4075+ BUG_ON((unsigned long)high_memory > VMALLOC_START);
4076+#endif /* double-sanity-check paranoia */
4077+
4078 #ifdef CONFIG_X86_PAE
4079 if (!cpu_has_pae)
4080 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
4081@@ -717,7 +742,7 @@ void __init mem_init(void)
4082 int arch_add_memory(int nid, u64 start, u64 size)
4083 {
4084 struct pglist_data *pgdata = &contig_page_data;
4085- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
4086+ struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
4087 unsigned long start_pfn = start >> PAGE_SHIFT;
4088 unsigned long nr_pages = size >> PAGE_SHIFT;
4089
4090--- sle11-2009-05-14.orig/arch/x86/mm/ioremap_32-xen.c 2009-05-14 11:02:43.000000000 +0200
4091+++ sle11-2009-05-14/arch/x86/mm/ioremap_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4092@@ -12,7 +12,7 @@
4093 #include <linux/init.h>
4094 #include <linux/slab.h>
4095 #include <linux/module.h>
4096-#include <asm/io.h>
4097+#include <linux/io.h>
4098 #include <asm/fixmap.h>
4099 #include <asm/cacheflush.h>
4100 #include <asm/tlbflush.h>
4101@@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are
4102 if (domid == DOMID_SELF)
4103 return -EINVAL;
4104
4105- vma->vm_flags |= VM_IO | VM_RESERVED;
4106+ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
4107
4108 vma->vm_mm->context.has_foreign_mappings = 1;
4109
4110@@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p
4111 void __iomem * addr;
4112 struct vm_struct * area;
4113 unsigned long offset, last_addr;
4114+ pgprot_t prot;
4115 domid_t domid = DOMID_IO;
4116
4117 /* Don't allow wraparound or zero size */
4118@@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p
4119 domid = DOMID_SELF;
4120 }
4121
4122+ prot = __pgprot(_KERNPG_TABLE | flags);
4123+
4124 /*
4125 * Mappings have to be page-aligned
4126 */
4127@@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p
4128 return NULL;
4129 area->phys_addr = phys_addr;
4130 addr = (void __iomem *) area->addr;
4131- flags |= _KERNPG_TABLE;
4132 if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
4133 phys_addr>>PAGE_SHIFT,
4134- size, __pgprot(flags), domid)) {
4135+ size, prot, domid)) {
4136 vunmap((void __force *) addr);
4137 return NULL;
4138 }
4139--- sle11-2009-05-14.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:25:57.000000000 +0100
4140+++ sle11-2009-05-14/arch/x86/mm/pgtable_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4141@@ -68,7 +68,9 @@ void show_mem(void)
4142 printk(KERN_INFO "%lu pages writeback\n",
4143 global_page_state(NR_WRITEBACK));
4144 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
4145- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
4146+ printk(KERN_INFO "%lu pages slab\n",
4147+ global_page_state(NR_SLAB_RECLAIMABLE) +
4148+ global_page_state(NR_SLAB_UNRECLAIMABLE));
4149 printk(KERN_INFO "%lu pages pagetables\n",
4150 global_page_state(NR_PAGETABLE));
4151 }
4152@@ -108,18 +110,11 @@ void set_pmd_pfn(unsigned long vaddr, un
4153 __flush_tlb_one(vaddr);
4154 }
4155
4156-static int nr_fixmaps = 0;
4157+static int fixmaps;
4158 unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
4159-unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
4160+unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
4161 EXPORT_SYMBOL(__FIXADDR_TOP);
4162
4163-void __init set_fixaddr_top(unsigned long top)
4164-{
4165- BUG_ON(nr_fixmaps > 0);
4166- hypervisor_virt_start = top;
4167- __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
4168-}
4169-
4170 void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
4171 {
4172 unsigned long address = __fix_to_virt(idx);
4173@@ -141,7 +136,21 @@ void __set_fixmap (enum fixed_addresses
4174 if (HYPERVISOR_update_va_mapping(address, pte,
4175 UVMF_INVLPG|UVMF_ALL))
4176 BUG();
4177- nr_fixmaps++;
4178+ fixmaps++;
4179+}
4180+
4181+/**
4182+ * reserve_top_address - reserves a hole in the top of kernel address space
4183+ * @reserve - size of hole to reserve
4184+ *
4185+ * Can be used to relocate the fixmap area and poke a hole in the top
4186+ * of kernel address space to make room for a hypervisor.
4187+ */
4188+void __init reserve_top_address(unsigned long reserve)
4189+{
4190+ BUG_ON(fixmaps > 0);
4191+ __FIXADDR_TOP = -reserve - PAGE_SIZE;
4192+ __VMALLOC_RESERVE += reserve;
4193 }
4194
4195 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
4196--- sle11-2009-05-14.orig/arch/x86/pci/irq-xen.c 2009-05-14 11:02:43.000000000 +0200
4197+++ sle11-2009-05-14/arch/x86/pci/irq-xen.c 2009-03-04 11:28:34.000000000 +0100
4198@@ -991,10 +991,6 @@ static void __init pcibios_fixup_irqs(vo
4199 pci_name(bridge), 'A' + pin, irq);
4200 }
4201 if (irq >= 0) {
4202- if (use_pci_vector() &&
4203- !platform_legacy_irq(irq))
4204- irq = IO_APIC_VECTOR(irq);
4205-
4206 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
4207 pci_name(dev), 'A' + pin, irq);
4208 dev->irq = irq;
4209@@ -1155,10 +1151,6 @@ static int pirq_enable_irq(struct pci_de
4210 }
4211 dev = temp_dev;
4212 if (irq >= 0) {
4213-#ifdef CONFIG_PCI_MSI
4214- if (!platform_legacy_irq(irq))
4215- irq = IO_APIC_VECTOR(irq);
4216-#endif
4217 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
4218 pci_name(dev), 'A' + pin, irq);
4219 dev->irq = irq;
4220@@ -1179,33 +1171,3 @@ static int pirq_enable_irq(struct pci_de
4221 }
4222 return 0;
4223 }
4224-
4225-int pci_vector_resources(int last, int nr_released)
4226-{
4227- int count = nr_released;
4228-
4229- int next = last;
4230- int offset = (last % 8);
4231-
4232- while (next < FIRST_SYSTEM_VECTOR) {
4233- next += 8;
4234-#ifdef CONFIG_X86_64
4235- if (next == IA32_SYSCALL_VECTOR)
4236- continue;
4237-#else
4238- if (next == SYSCALL_VECTOR)
4239- continue;
4240-#endif
4241- count++;
4242- if (next >= FIRST_SYSTEM_VECTOR) {
4243- if (offset%8) {
4244- next = FIRST_DEVICE_VECTOR + offset;
4245- offset++;
4246- continue;
4247- }
4248- count--;
4249- }
4250- }
4251-
4252- return count;
4253-}
4254--- sle11-2009-05-14.orig/arch/x86/ia32/ia32entry-xen.S 2009-05-14 11:02:43.000000000 +0200
4255+++ sle11-2009-05-14/arch/x86/ia32/ia32entry-xen.S 2009-03-04 11:28:34.000000000 +0100
4256@@ -83,6 +83,7 @@
4257 */
4258 ENTRY(ia32_sysenter_target)
4259 CFI_STARTPROC32 simple
4260+ CFI_SIGNAL_FRAME
4261 CFI_DEF_CFA rsp,SS+8-RIP+16
4262 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4263 CFI_REL_OFFSET rsp,RSP-RIP+16
4264@@ -164,6 +165,7 @@ ENDPROC(ia32_sysenter_target)
4265 */
4266 ENTRY(ia32_cstar_target)
4267 CFI_STARTPROC32 simple
4268+ CFI_SIGNAL_FRAME
4269 CFI_DEF_CFA rsp,SS+8-RIP+16
4270 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4271 CFI_REL_OFFSET rsp,RSP-RIP+16
4272@@ -243,6 +245,7 @@ ia32_badarg:
4273
4274 ENTRY(ia32_syscall)
4275 CFI_STARTPROC simple
4276+ CFI_SIGNAL_FRAME
4277 CFI_DEF_CFA rsp,SS+8-RIP+16
4278 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4279 CFI_REL_OFFSET rsp,RSP-RIP+16
4280@@ -320,6 +323,7 @@ ENTRY(ia32_ptregs_common)
4281 popq %r11
4282 CFI_ENDPROC
4283 CFI_STARTPROC32 simple
4284+ CFI_SIGNAL_FRAME
4285 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
4286 CFI_REL_OFFSET rax,RAX-ARGOFFSET
4287 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
4288@@ -653,8 +657,8 @@ ia32_sys_call_table:
4289 .quad sys_readlinkat /* 305 */
4290 .quad sys_fchmodat
4291 .quad sys_faccessat
4292- .quad quiet_ni_syscall /* pselect6 for now */
4293- .quad quiet_ni_syscall /* ppoll for now */
4294+ .quad compat_sys_pselect6
4295+ .quad compat_sys_ppoll
4296 .quad sys_unshare /* 310 */
4297 .quad compat_sys_set_robust_list
4298 .quad compat_sys_get_robust_list
4299@@ -663,4 +667,5 @@ ia32_sys_call_table:
4300 .quad sys_tee
4301 .quad compat_sys_vmsplice
4302 .quad compat_sys_move_pages
4303+ .quad sys_getcpu
4304 ia32_syscall_end:
4305--- sle11-2009-05-14.orig/arch/x86/kernel/Makefile 2009-04-20 11:36:10.000000000 +0200
4306+++ sle11-2009-05-14/arch/x86/kernel/Makefile 2009-03-04 11:28:34.000000000 +0100
4307@@ -104,9 +104,9 @@ obj-$(CONFIG_X86_XEN) += fixup.o
4308 ###
4309 # 64 bit specific files
4310 ifeq ($(CONFIG_X86_64),y)
4311- obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
4312- obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o
4313- obj-y += uv_sysfs.o
4314+ obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
4315+ obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
4316+ obj-y += tlb_uv.o uv_sysfs.o
4317 obj-y += genx2apic_cluster.o
4318 obj-y += genx2apic_phys.o
4319 obj-y += bios_uv.o uv_irq.o
4320@@ -124,5 +124,7 @@ ifeq ($(CONFIG_X86_64),y)
4321 pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
4322 endif
4323
4324-disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
4325+disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
4326+ smpboot_$(BITS).o tsc_$(BITS).o
4327+disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
4328 %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
4329--- sle11-2009-05-14.orig/arch/x86/kernel/apic_64-xen.c 2009-05-14 11:02:43.000000000 +0200
4330+++ sle11-2009-05-14/arch/x86/kernel/apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
4331@@ -43,7 +43,7 @@ int apic_verbosity;
4332 */
4333 void ack_bad_irq(unsigned int irq)
4334 {
4335- printk("unexpected IRQ trap at vector %02x\n", irq);
4336+ printk("unexpected IRQ trap at irq %02x\n", irq);
4337 /*
4338 * Currently unexpected vectors happen only on SMP and APIC.
4339 * We _must_ ack these because every local APIC has only N
4340@@ -62,19 +62,19 @@ int setup_profiling_timer(unsigned int m
4341 return -EINVAL;
4342 }
4343
4344-void smp_local_timer_interrupt(struct pt_regs *regs)
4345+void smp_local_timer_interrupt(void)
4346 {
4347- profile_tick(CPU_PROFILING, regs);
4348+ profile_tick(CPU_PROFILING);
4349 #ifndef CONFIG_XEN
4350 #ifdef CONFIG_SMP
4351- update_process_times(user_mode(regs));
4352+ update_process_times(user_mode(get_irq_regs()));
4353 #endif
4354 #endif
4355 /*
4356 * We take the 'long' return path, and there every subsystem
4357 * grabs the appropriate locks (kernel lock/ irq lock).
4358 *
4359- * we might want to decouple profiling from the 'long path',
4360+ * We might want to decouple profiling from the 'long path',
4361 * and do the profiling totally in assembly.
4362 *
4363 * Currently this isn't too much of an issue (performance wise),
4364@@ -92,6 +92,8 @@ void smp_local_timer_interrupt(struct pt
4365 */
4366 void smp_apic_timer_interrupt(struct pt_regs *regs)
4367 {
4368+ struct pt_regs *old_regs = set_irq_regs(regs);
4369+
4370 /*
4371 * the NMI deadlock-detector uses this.
4372 */
4373@@ -109,8 +111,9 @@ void smp_apic_timer_interrupt(struct pt_
4374 */
4375 exit_idle();
4376 irq_enter();
4377- smp_local_timer_interrupt(regs);
4378+ smp_local_timer_interrupt();
4379 irq_exit();
4380+ set_irq_regs(old_regs);
4381 }
4382
4383 /*
4384@@ -188,9 +191,8 @@ int disable_apic;
4385 int __init APIC_init_uniprocessor (void)
4386 {
4387 #ifdef CONFIG_X86_IO_APIC
4388- if (smp_found_config)
4389- if (!skip_ioapic_setup && nr_ioapics)
4390- setup_IO_APIC();
4391+ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
4392+ setup_IO_APIC();
4393 #endif
4394
4395 return 1;
4396--- sle11-2009-05-14.orig/arch/x86/kernel/e820_64-xen.c 2009-05-14 11:02:43.000000000 +0200
4397+++ sle11-2009-05-14/arch/x86/kernel/e820_64-xen.c 2009-03-04 11:28:34.000000000 +0100
4398@@ -16,6 +16,7 @@
4399 #include <linux/string.h>
4400 #include <linux/kexec.h>
4401 #include <linux/module.h>
4402+#include <linux/mm.h>
4403
4404 #include <asm/pgtable.h>
4405 #include <asm/page.h>
4406@@ -25,6 +26,11 @@
4407 #include <asm/sections.h>
4408 #include <xen/interface/memory.h>
4409
4410+struct e820map e820 __initdata;
4411+#ifdef CONFIG_XEN
4412+struct e820map machine_e820 __initdata;
4413+#endif
4414+
4415 /*
4416 * PFN of last memory page.
4417 */
4418@@ -41,14 +47,10 @@ unsigned long end_pfn_map;
4419 /*
4420 * Last pfn which the user wants to use.
4421 */
4422-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
4423+static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
4424
4425 extern struct resource code_resource, data_resource;
4426
4427-#ifdef CONFIG_XEN
4428-extern struct e820map machine_e820;
4429-#endif
4430-
4431 /* Check for some hardcoded bad areas that early boot is not allowed to touch */
4432 static inline int bad_addr(unsigned long *addrp, unsigned long size)
4433 {
4434@@ -57,13 +59,13 @@ static inline int bad_addr(unsigned long
4435 #ifndef CONFIG_XEN
4436 /* various gunk below that needed for SMP startup */
4437 if (addr < 0x8000) {
4438- *addrp = 0x8000;
4439+ *addrp = PAGE_ALIGN(0x8000);
4440 return 1;
4441 }
4442
4443 /* direct mapping tables of the kernel */
4444 if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
4445- *addrp = table_end << PAGE_SHIFT;
4446+ *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
4447 return 1;
4448 }
4449
4450@@ -71,23 +73,18 @@ static inline int bad_addr(unsigned long
4451 #ifdef CONFIG_BLK_DEV_INITRD
4452 if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
4453 addr < INITRD_START+INITRD_SIZE) {
4454- *addrp = INITRD_START + INITRD_SIZE;
4455+ *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
4456 return 1;
4457 }
4458 #endif
4459- /* kernel code + 640k memory hole (later should not be needed, but
4460- be paranoid for now) */
4461- if (last >= 640*1024 && addr < 1024*1024) {
4462- *addrp = 1024*1024;
4463- return 1;
4464- }
4465- if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
4466- *addrp = __pa_symbol(&_end);
4467+ /* kernel code */
4468+ if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
4469+ *addrp = PAGE_ALIGN(__pa_symbol(&_end));
4470 return 1;
4471 }
4472
4473 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
4474- *addrp = ebda_addr + ebda_size;
4475+ *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
4476 return 1;
4477 }
4478
4479@@ -184,7 +181,7 @@ unsigned long __init find_e820_area(unsi
4480 continue;
4481 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
4482 ;
4483- last = addr + size;
4484+ last = PAGE_ALIGN(addr) + size;
4485 if (last > ei->addr + ei->size)
4486 continue;
4487 if (last > end)
4488@@ -194,59 +191,14 @@ unsigned long __init find_e820_area(unsi
4489 return -1UL;
4490 }
4491
4492-/*
4493- * Free bootmem based on the e820 table for a node.
4494- */
4495-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
4496-{
4497- int i;
4498- for (i = 0; i < e820.nr_map; i++) {
4499- struct e820entry *ei = &e820.map[i];
4500- unsigned long last, addr;
4501-
4502- if (ei->type != E820_RAM ||
4503- ei->addr+ei->size <= start ||
4504- ei->addr >= end)
4505- continue;
4506-
4507- addr = round_up(ei->addr, PAGE_SIZE);
4508- if (addr < start)
4509- addr = start;
4510-
4511- last = round_down(ei->addr + ei->size, PAGE_SIZE);
4512- if (last >= end)
4513- last = end;
4514-
4515- if (last > addr && last-addr >= PAGE_SIZE)
4516- free_bootmem_node(pgdat, addr, last-addr);
4517- }
4518-}
4519-
4520 /*
4521 * Find the highest page frame number we have available
4522 */
4523 unsigned long __init e820_end_of_ram(void)
4524 {
4525- int i;
4526 unsigned long end_pfn = 0;
4527+ end_pfn = find_max_pfn_with_active_regions();
4528
4529- for (i = 0; i < e820.nr_map; i++) {
4530- struct e820entry *ei = &e820.map[i];
4531- unsigned long start, end;
4532-
4533- start = round_up(ei->addr, PAGE_SIZE);
4534- end = round_down(ei->addr + ei->size, PAGE_SIZE);
4535- if (start >= end)
4536- continue;
4537- if (ei->type == E820_RAM) {
4538- if (end > end_pfn<<PAGE_SHIFT)
4539- end_pfn = end>>PAGE_SHIFT;
4540- } else {
4541- if (end > end_pfn_map<<PAGE_SHIFT)
4542- end_pfn_map = end>>PAGE_SHIFT;
4543- }
4544- }
4545-
4546 if (end_pfn > end_pfn_map)
4547 end_pfn_map = end_pfn;
4548 if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
4549@@ -256,43 +208,10 @@ unsigned long __init e820_end_of_ram(voi
4550 if (end_pfn > end_pfn_map)
4551 end_pfn = end_pfn_map;
4552
4553+ printk("end_pfn_map = %lu\n", end_pfn_map);
4554 return end_pfn;
4555 }
4556
4557-/*
4558- * Compute how much memory is missing in a range.
4559- * Unlike the other functions in this file the arguments are in page numbers.
4560- */
4561-unsigned long __init
4562-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
4563-{
4564- unsigned long ram = 0;
4565- unsigned long start = start_pfn << PAGE_SHIFT;
4566- unsigned long end = end_pfn << PAGE_SHIFT;
4567- int i;
4568- for (i = 0; i < e820.nr_map; i++) {
4569- struct e820entry *ei = &e820.map[i];
4570- unsigned long last, addr;
4571-
4572- if (ei->type != E820_RAM ||
4573- ei->addr+ei->size <= start ||
4574- ei->addr >= end)
4575- continue;
4576-
4577- addr = round_up(ei->addr, PAGE_SIZE);
4578- if (addr < start)
4579- addr = start;
4580-
4581- last = round_down(ei->addr + ei->size, PAGE_SIZE);
4582- if (last >= end)
4583- last = end;
4584-
4585- if (last > addr)
4586- ram += last - addr;
4587- }
4588- return ((end - start) - ram) >> PAGE_SHIFT;
4589-}
4590-
4591 /*
4592 * Mark e820 reserved areas as busy for the resource manager.
4593 */
4594@@ -333,6 +252,98 @@ void __init e820_reserve_resources(struc
4595 }
4596 }
4597
4598+#ifndef CONFIG_XEN
4599+/* Mark pages corresponding to given address range as nosave */
4600+static void __init
4601+e820_mark_nosave_range(unsigned long start, unsigned long end)
4602+{
4603+ unsigned long pfn, max_pfn;
4604+
4605+ if (start >= end)
4606+ return;
4607+
4608+ printk("Nosave address range: %016lx - %016lx\n", start, end);
4609+ max_pfn = end >> PAGE_SHIFT;
4610+ for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
4611+ if (pfn_valid(pfn))
4612+ SetPageNosave(pfn_to_page(pfn));
4613+}
4614+
4615+/*
4616+ * Find the ranges of physical addresses that do not correspond to
4617+ * e820 RAM areas and mark the corresponding pages as nosave for software
4618+ * suspend and suspend to RAM.
4619+ *
4620+ * This function requires the e820 map to be sorted and without any
4621+ * overlapping entries and assumes the first e820 area to be RAM.
4622+ */
4623+void __init e820_mark_nosave_regions(void)
4624+{
4625+ int i;
4626+ unsigned long paddr;
4627+
4628+ paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
4629+ for (i = 1; i < e820.nr_map; i++) {
4630+ struct e820entry *ei = &e820.map[i];
4631+
4632+ if (paddr < ei->addr)
4633+ e820_mark_nosave_range(paddr,
4634+ round_up(ei->addr, PAGE_SIZE));
4635+
4636+ paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
4637+ if (ei->type != E820_RAM)
4638+ e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
4639+ paddr);
4640+
4641+ if (paddr >= (end_pfn << PAGE_SHIFT))
4642+ break;
4643+ }
4644+}
4645+#endif
4646+
4647+/* Walk the e820 map and register active regions within a node */
4648+void __init
4649+e820_register_active_regions(int nid, unsigned long start_pfn,
4650+ unsigned long end_pfn)
4651+{
4652+ int i;
4653+ unsigned long ei_startpfn, ei_endpfn;
4654+ for (i = 0; i < e820.nr_map; i++) {
4655+ struct e820entry *ei = &e820.map[i];
4656+ ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
4657+ ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
4658+ >> PAGE_SHIFT;
4659+
4660+ /* Skip map entries smaller than a page */
4661+ if (ei_startpfn >= ei_endpfn)
4662+ continue;
4663+
4664+ /* Check if end_pfn_map should be updated */
4665+ if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
4666+ end_pfn_map = ei_endpfn;
4667+
4668+ /* Skip if map is outside the node */
4669+ if (ei->type != E820_RAM ||
4670+ ei_endpfn <= start_pfn ||
4671+ ei_startpfn >= end_pfn)
4672+ continue;
4673+
4674+ /* Check for overlaps */
4675+ if (ei_startpfn < start_pfn)
4676+ ei_startpfn = start_pfn;
4677+ if (ei_endpfn > end_pfn)
4678+ ei_endpfn = end_pfn;
4679+
4680+ /* Obey end_user_pfn to save on memmap */
4681+ if (ei_startpfn >= end_user_pfn)
4682+ continue;
4683+ if (ei_endpfn > end_user_pfn)
4684+ ei_endpfn = end_user_pfn;
4685+
4686+ add_active_range(nid, ei_startpfn, ei_endpfn);
4687+ }
4688+}
4689+
4690 /*
4691 * Add a memory region to the kernel e820 map.
4692 */
4693@@ -553,13 +564,6 @@ static int __init sanitize_e820_map(stru
4694 * If we're lucky and live on a modern system, the setup code
4695 * will have given us a memory map that we can use to properly
4696 * set up memory. If we aren't, we'll fake a memory map.
4697- *
4698- * We check to see that the memory map contains at least 2 elements
4699- * before we'll use it, because the detection code in setup.S may
4700- * not be perfect and most every PC known to man has two memory
4701- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
4702- * thinkpad 560x, for example, does not cooperate with the memory
4703- * detection code.)
4704 */
4705 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
4706 {
4707@@ -581,27 +585,6 @@ static int __init copy_e820_map(struct e
4708 if (start > end)
4709 return -1;
4710
4711-#ifndef CONFIG_XEN
4712- /*
4713- * Some BIOSes claim RAM in the 640k - 1M region.
4714- * Not right. Fix it up.
4715- *
4716- * This should be removed on Hammer which is supposed to not
4717- * have non e820 covered ISA mappings there, but I had some strange
4718- * problems so it stays for now. -AK
4719- */
4720- if (type == E820_RAM) {
4721- if (start < 0x100000ULL && end > 0xA0000ULL) {
4722- if (start < 0xA0000ULL)
4723- add_memory_region(start, 0xA0000ULL-start, type);
4724- if (end <= 0x100000ULL)
4725- continue;
4726- start = 0x100000ULL;
4727- size = end - start;
4728- }
4729- }
4730-#endif
4731-
4732 add_memory_region(start, size, type);
4733 } while (biosmap++,--nr_map);
4734
4735@@ -622,11 +605,15 @@ static int __init copy_e820_map(struct e
4736 return 0;
4737 }
4738
4739+void early_panic(char *msg)
4740+{
4741+ early_printk(msg);
4742+ panic(msg);
4743+}
4744+
4745 #ifndef CONFIG_XEN
4746 void __init setup_memory_region(void)
4747 {
4748- char *who = "BIOS-e820";
4749-
4750 /*
4751 * Try to copy the BIOS-supplied E820-map.
4752 *
4753@@ -634,24 +621,10 @@ void __init setup_memory_region(void)
4754 * the next section from 1mb->appropriate_mem_k
4755 */
4756 sanitize_e820_map(E820_MAP, &E820_MAP_NR);
4757- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
4758- unsigned long mem_size;
4759-
4760- /* compare results from other methods and take the greater */
4761- if (ALT_MEM_K < EXT_MEM_K) {
4762- mem_size = EXT_MEM_K;
4763- who = "BIOS-88";
4764- } else {
4765- mem_size = ALT_MEM_K;
4766- who = "BIOS-e801";
4767- }
4768-
4769- e820.nr_map = 0;
4770- add_memory_region(0, LOWMEMSIZE(), E820_RAM);
4771- add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
4772- }
4773+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
4774+ early_panic("Cannot find a valid memory map");
4775 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4776- e820_print_map(who);
4777+ e820_print_map("BIOS-e820");
4778 }
4779
4780 #else /* CONFIG_XEN */
4781@@ -683,20 +656,23 @@ void __init setup_memory_region(void)
4782
4783 sanitize_e820_map(map, (char *)&memmap.nr_entries);
4784
4785- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
4786+ if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
4787+ early_panic("Cannot find a valid memory map");
4788
4789 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4790 e820_print_map("Xen");
4791 }
4792 #endif
4793
4794-void __init parse_memopt(char *p, char **from)
4795-{
4796+static int __init parse_memopt(char *p)
4797+{
4798 int i;
4799 unsigned long current_end;
4800 unsigned long end;
4801
4802- end_user_pfn = memparse(p, from);
4803+ if (!p)
4804+ return -EINVAL;
4805+ end_user_pfn = memparse(p, &p);
4806 end_user_pfn >>= PAGE_SHIFT;
4807
4808 end = end_user_pfn<<PAGE_SHIFT;
4809@@ -713,27 +689,61 @@ void __init parse_memopt(char *p, char *
4810 else
4811 add_memory_region(current_end, end - current_end, E820_RAM);
4812 }
4813+
4814+ return 0;
4815 }
4816+early_param("mem", parse_memopt);
4817+
4818+static int userdef __initdata;
4819
4820-void __init parse_memmapopt(char *p, char **from)
4821+static int __init parse_memmap_opt(char *p)
4822 {
4823+ char *oldp;
4824 unsigned long long start_at, mem_size;
4825
4826- mem_size = memparse(p, from);
4827- p = *from;
4828+ if (!strcmp(p, "exactmap")) {
4829+#ifdef CONFIG_CRASH_DUMP
4830+ /* If we are doing a crash dump, we
4831+ * still need to know the real mem
4832+ * size before original memory map is
4833+ * reset.
4834+ */
4835+ e820_register_active_regions(0, 0, -1UL);
4836+ saved_max_pfn = e820_end_of_ram();
4837+ remove_all_active_ranges();
4838+#endif
4839+ end_pfn_map = 0;
4840+ e820.nr_map = 0;
4841+ userdef = 1;
4842+ return 0;
4843+ }
4844+
4845+ oldp = p;
4846+ mem_size = memparse(p, &p);
4847+ if (p == oldp)
4848+ return -EINVAL;
4849 if (*p == '@') {
4850- start_at = memparse(p+1, from);
4851+ start_at = memparse(p+1, &p);
4852 add_memory_region(start_at, mem_size, E820_RAM);
4853 } else if (*p == '#') {
4854- start_at = memparse(p+1, from);
4855+ start_at = memparse(p+1, &p);
4856 add_memory_region(start_at, mem_size, E820_ACPI);
4857 } else if (*p == '$') {
4858- start_at = memparse(p+1, from);
4859+ start_at = memparse(p+1, &p);
4860 add_memory_region(start_at, mem_size, E820_RESERVED);
4861 } else {
4862 end_user_pfn = (mem_size >> PAGE_SHIFT);
4863 }
4864- p = *from;
4865+ return *p == '\0' ? 0 : -EINVAL;
4866+}
4867+early_param("memmap", parse_memmap_opt);
4868+
4869+void finish_e820_parsing(void)
4870+{
4871+ if (userdef) {
4872+ printk(KERN_INFO "user-defined physical RAM map:\n");
4873+ e820_print_map("user");
4874+ }
4875 }
4876
4877 unsigned long pci_mem_start = 0xaeedbabe;
4878--- sle11-2009-05-14.orig/arch/x86/kernel/early_printk-xen.c 2009-05-14 11:02:43.000000000 +0200
4879+++ sle11-2009-05-14/arch/x86/kernel/early_printk-xen.c 2009-03-04 11:28:34.000000000 +0100
4880@@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...)
4881
4882 static int __initdata keep_early;
4883
4884-int __init setup_early_printk(char *opt)
4885+static int __init setup_early_printk(char *buf)
4886 {
4887- char *space;
4888- char buf[256];
4889+ if (!buf)
4890+ return 0;
4891
4892 if (early_console_initialized)
4893- return 1;
4894-
4895- strlcpy(buf,opt,sizeof(buf));
4896- space = strchr(buf, ' ');
4897- if (space)
4898- *space = 0;
4899+ return 0;
4900+ early_console_initialized = 1;
4901
4902- if (strstr(buf,"keep"))
4903+ if (strstr(buf, "keep"))
4904 keep_early = 1;
4905
4906 if (!strncmp(buf, "serial", 6)) {
4907@@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt)
4908 early_console = &simnow_console;
4909 keep_early = 1;
4910 }
4911- early_console_initialized = 1;
4912 register_console(early_console);
4913 return 0;
4914 }
4915
4916+early_param("earlyprintk", setup_early_printk);
4917+
4918 void __init disable_early_printk(void)
4919 {
4920 if (!early_console_initialized || !early_console)
4921@@ -299,4 +296,3 @@ void __init disable_early_printk(void)
4922 }
4923 }
4924
4925-__setup("earlyprintk=", setup_early_printk);
4926--- sle11-2009-05-14.orig/arch/x86/kernel/entry_64-xen.S 2009-05-14 11:02:43.000000000 +0200
4927+++ sle11-2009-05-14/arch/x86/kernel/entry_64-xen.S 2009-03-04 11:28:34.000000000 +0100
4928@@ -4,9 +4,6 @@
4929 * Copyright (C) 1991, 1992 Linus Torvalds
4930 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
4931 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
4932- *
4933- * $Id$
4934- *
4935 * Jun Nakajima <jun.nakajima@intel.com>
4936 * Asit Mallick <asit.k.mallick@intel.com>
4937 * Modified for Xen
4938@@ -26,15 +23,25 @@
4939 * at the top of the kernel process stack.
4940 * - partial stack frame: partially saved registers upto R11.
4941 * - full stack frame: Like partial stack frame, but all register saved.
4942- *
4943- * TODO:
4944- * - schedule it carefully for the final hardware.
4945+ *
4946+ * Some macro usage:
4947+ * - CFI macros are used to generate dwarf2 unwind information for better
4948+ * backtraces. They don't change any code.
4949+ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
4950+ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
4951+ * There are unfortunately lots of special cases where some registers
4952+ * not touched. The macro is a big mess that should be cleaned up.
4953+ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
4954+ * Gives a full stack frame.
4955+ * - ENTRY/END Define functions in the symbol table.
4956+ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
4957+ * frame that is otherwise undefined after a SYSCALL
4958+ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
4959+ * - errorentry/paranoidentry/zeroentry - Define exception entry points.
4960 */
4961
4962-#define ASSEMBLY 1
4963 #include <linux/linkage.h>
4964 #include <asm/segment.h>
4965-#include <asm/smp.h>
4966 #include <asm/cache.h>
4967 #include <asm/errno.h>
4968 #include <asm/dwarf2.h>
4969@@ -117,6 +124,7 @@ NMI_MASK = 0x80000000
4970 .macro CFI_DEFAULT_STACK start=1,adj=0
4971 .if \start
4972 CFI_STARTPROC simple
4973+ CFI_SIGNAL_FRAME
4974 CFI_DEF_CFA rsp,SS+8 - \adj*ARGOFFSET
4975 .else
4976 CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET
4977@@ -207,6 +215,7 @@ END(ret_from_fork)
4978 */
4979 .macro _frame ref
4980 CFI_STARTPROC simple
4981+ CFI_SIGNAL_FRAME
4982 CFI_DEF_CFA rsp,SS+8-\ref
4983 /*CFI_REL_OFFSET ss,SS-\ref*/
4984 CFI_REL_OFFSET rsp,RSP-\ref
4985@@ -334,6 +343,8 @@ tracesys:
4986 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
4987 RESTORE_REST
4988 cmpq $__NR_syscall_max,%rax
4989+ movq $-ENOSYS,%rcx
4990+ cmova %rcx,%rax
4991 ja 1f
4992 movq %r10,%rcx /* fixup for C */
4993 call *sys_call_table(,%rax,8)
4994@@ -349,6 +360,7 @@ END(system_call)
4995 */
4996 ENTRY(int_ret_from_sys_call)
4997 CFI_STARTPROC simple
4998+ CFI_SIGNAL_FRAME
4999 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
5000 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
5001 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
5002@@ -583,8 +595,7 @@ retint_signal:
5003 #ifdef CONFIG_PREEMPT
5004 /* Returning to kernel space. Check if we need preemption */
5005 /* rcx: threadinfo. interrupts off. */
5006- .p2align
5007-retint_kernel:
5008+ENTRY(retint_kernel)
5009 cmpl $0,threadinfo_preempt_count(%rcx)
5010 jnz retint_restore_args
5011 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
5012@@ -644,7 +655,6 @@ ENTRY(call_function_interrupt)
5013 END(call_function_interrupt)
5014 #endif
5015
5016-#ifdef CONFIG_X86_LOCAL_APIC
5017 ENTRY(apic_timer_interrupt)
5018 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
5019 END(apic_timer_interrupt)
5020@@ -656,7 +666,6 @@ END(error_interrupt)
5021 ENTRY(spurious_interrupt)
5022 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
5023 END(spurious_interrupt)
5024-#endif
5025 #endif /* !CONFIG_XEN */
5026
5027 /*
5028@@ -755,7 +764,9 @@ paranoid_exit\trace:
5029 testl $3,CS(%rsp)
5030 jnz paranoid_userspace\trace
5031 paranoid_swapgs\trace:
5032+ .if \trace
5033 TRACE_IRQS_IRETQ 0
5034+ .endif
5035 swapgs
5036 paranoid_restore\trace:
5037 RESTORE_ALL 8
5038@@ -802,7 +813,7 @@ paranoid_schedule\trace:
5039 * Exception entry point. This expects an error code/orig_rax on the stack
5040 * and the exception handler in %rax.
5041 */
5042-ENTRY(error_entry)
5043+KPROBE_ENTRY(error_entry)
5044 _frame RDI
5045 CFI_REL_OFFSET rax,0
5046 /* rdi slot contains rax, oldrax contains error code */
5047@@ -896,7 +907,7 @@ error_kernelspace:
5048 jmp error_sti
5049 #endif
5050 CFI_ENDPROC
5051-END(error_entry)
5052+KPROBE_END(error_entry)
5053
5054 ENTRY(hypervisor_callback)
5055 zeroentry do_hypervisor_callback
5056@@ -936,26 +947,6 @@ ENTRY(do_hypervisor_callback) # do_hyp
5057 CFI_ENDPROC
5058 END(do_hypervisor_callback)
5059
5060-#ifdef CONFIG_X86_LOCAL_APIC
5061-KPROBE_ENTRY(nmi)
5062- zeroentry do_nmi_callback
5063-ENTRY(do_nmi_callback)
5064- CFI_STARTPROC
5065- addq $8, %rsp
5066- CFI_ENDPROC
5067- CFI_DEFAULT_STACK
5068- call do_nmi
5069- orl $NMI_MASK,EFLAGS(%rsp)
5070- RESTORE_REST
5071- XEN_BLOCK_EVENTS(%rsi)
5072- TRACE_IRQS_OFF
5073- GET_THREAD_INFO(%rcx)
5074- jmp retint_restore_args
5075- CFI_ENDPROC
5076- .previous .text
5077-END(nmi)
5078-#endif
5079-
5080 ALIGN
5081 restore_all_enable_events:
5082 CFI_DEFAULT_STACK adj=1
5083@@ -1121,7 +1112,7 @@ ENDPROC(child_rip)
5084 * do_sys_execve asm fallback arguments:
5085 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
5086 */
5087-ENTRY(execve)
5088+ENTRY(kernel_execve)
5089 CFI_STARTPROC
5090 FAKE_STACK_FRAME $0
5091 SAVE_ALL
5092@@ -1135,12 +1126,11 @@ ENTRY(execve)
5093 UNFAKE_STACK_FRAME
5094 ret
5095 CFI_ENDPROC
5096-ENDPROC(execve)
5097+ENDPROC(kernel_execve)
5098
5099 KPROBE_ENTRY(page_fault)
5100 errorentry do_page_fault
5101-END(page_fault)
5102- .previous .text
5103+KPROBE_END(page_fault)
5104
5105 ENTRY(coprocessor_error)
5106 zeroentry do_coprocessor_error
5107@@ -1162,25 +1152,25 @@ KPROBE_ENTRY(debug)
5108 zeroentry do_debug
5109 /* paranoidexit
5110 CFI_ENDPROC */
5111-END(debug)
5112- .previous .text
5113+KPROBE_END(debug)
5114
5115-#if 0
5116- /* runs on exception stack */
5117 KPROBE_ENTRY(nmi)
5118- INTR_FRAME
5119- pushq $-1
5120- CFI_ADJUST_CFA_OFFSET 8
5121- paranoidentry do_nmi, 0, 0
5122-#ifdef CONFIG_TRACE_IRQFLAGS
5123- paranoidexit 0
5124-#else
5125- jmp paranoid_exit1
5126- CFI_ENDPROC
5127-#endif
5128-END(nmi)
5129- .previous .text
5130-#endif
5131+ zeroentry do_nmi_callback
5132+KPROBE_END(nmi)
5133+do_nmi_callback:
5134+ CFI_STARTPROC
5135+ addq $8, %rsp
5136+ CFI_ENDPROC
5137+ CFI_DEFAULT_STACK
5138+ call do_nmi
5139+ orl $NMI_MASK,EFLAGS(%rsp)
5140+ RESTORE_REST
5141+ XEN_BLOCK_EVENTS(%rsi)
5142+ TRACE_IRQS_OFF
5143+ GET_THREAD_INFO(%rcx)
5144+ jmp retint_restore_args
5145+ CFI_ENDPROC
5146+END(do_nmi_callback)
5147
5148 KPROBE_ENTRY(int3)
5149 /* INTR_FRAME
5150@@ -1189,8 +1179,7 @@ KPROBE_ENTRY(int3)
5151 zeroentry do_int3
5152 /* jmp paranoid_exit1
5153 CFI_ENDPROC */
5154-END(int3)
5155- .previous .text
5156+KPROBE_END(int3)
5157
5158 ENTRY(overflow)
5159 zeroentry do_overflow
5160@@ -1241,8 +1230,7 @@ END(stack_segment)
5161
5162 KPROBE_ENTRY(general_protection)
5163 errorentry do_general_protection
5164-END(general_protection)
5165- .previous .text
5166+KPROBE_END(general_protection)
5167
5168 ENTRY(alignment_check)
5169 errorentry do_alignment_check
5170--- sle11-2009-05-14.orig/arch/x86/kernel/genapic_xen_64.c 2009-05-14 11:02:43.000000000 +0200
5171+++ sle11-2009-05-14/arch/x86/kernel/genapic_xen_64.c 2009-03-04 11:28:34.000000000 +0100
5172@@ -71,6 +71,13 @@ static cpumask_t xen_target_cpus(void)
5173 return cpu_online_map;
5174 }
5175
5176+static cpumask_t xen_vector_allocation_domain(int cpu)
5177+{
5178+ cpumask_t domain = CPU_MASK_NONE;
5179+ cpu_set(cpu, domain);
5180+ return domain;
5181+}
5182+
5183 /*
5184 * Set up the logical destination ID.
5185 * Do nothing, not called now.
5186@@ -147,8 +154,8 @@ struct genapic apic_xen = {
5187 .int_delivery_mode = dest_LowestPrio,
5188 #endif
5189 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
5190- .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
5191 .target_cpus = xen_target_cpus,
5192+ .vector_allocation_domain = xen_vector_allocation_domain,
5193 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
5194 .apic_id_registered = xen_apic_id_registered,
5195 #endif
5196--- sle11-2009-05-14.orig/arch/x86/kernel/head_64-xen.S 2009-05-14 11:02:43.000000000 +0200
5197+++ sle11-2009-05-14/arch/x86/kernel/head_64-xen.S 2009-03-04 11:28:34.000000000 +0100
5198@@ -5,9 +5,6 @@
5199 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
5200 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
5201 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
5202- *
5203- * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
5204- *
5205 * Jun Nakajima <jun.nakajima@intel.com>
5206 * Modified for Xen
5207 */
5208@@ -149,7 +146,7 @@ ENTRY(cpu_gdt_table)
5209 .quad 0,0 /* TSS */
5210 .quad 0,0 /* LDT */
5211 .quad 0,0,0 /* three TLS descriptors */
5212- .quad 0 /* unused */
5213+ .quad 0x0000f40000000000 /* node/CPU stored in limit */
5214 gdt_end:
5215 /* asm/segment.h:GDT_ENTRIES must match this */
5216 /* This should be a multiple of the cache line size */
5217--- sle11-2009-05-14.orig/arch/x86/kernel/head64-xen.c 2009-05-14 11:02:43.000000000 +0200
5218+++ sle11-2009-05-14/arch/x86/kernel/head64-xen.c 2009-03-04 11:28:34.000000000 +0100
5219@@ -7,6 +7,9 @@
5220 * Modified for Xen.
5221 */
5222
5223+/* PDA is not ready to be used until the end of x86_64_start_kernel(). */
5224+#define arch_use_lazy_mmu_mode() false
5225+
5226 #include <linux/init.h>
5227 #include <linux/linkage.h>
5228 #include <linux/types.h>
5229@@ -54,11 +57,9 @@ static void __init copy_bootdata(char *r
5230 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
5231 if (!new_data) {
5232 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
5233- printk("so old bootloader that it does not support commandline?!\n");
5234 return;
5235 }
5236 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
5237- printk("old bootloader convention, maybe loadlin?\n");
5238 }
5239 command_line = (char *) ((u64)(new_data));
5240 memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
5241@@ -70,25 +71,6 @@ static void __init copy_bootdata(char *r
5242 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
5243 saved_command_line[max_cmdline-1] = '\0';
5244 #endif
5245- printk("Bootdata ok (command line is %s)\n", saved_command_line);
5246-}
5247-
5248-static void __init setup_boot_cpu_data(void)
5249-{
5250- unsigned int dummy, eax;
5251-
5252- /* get vendor info */
5253- cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
5254- (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
5255- (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
5256- (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
5257-
5258- /* get cpu type */
5259- cpuid(1, &eax, &dummy, &dummy,
5260- (unsigned int *) &boot_cpu_data.x86_capability);
5261- boot_cpu_data.x86 = (eax >> 8) & 0xf;
5262- boot_cpu_data.x86_model = (eax >> 4) & 0xf;
5263- boot_cpu_data.x86_mask = eax & 0xf;
5264 }
5265
5266 #include <xen/interface/memory.h>
5267@@ -101,7 +83,6 @@ void __init x86_64_start_kernel(char * r
5268 {
5269 struct xen_machphys_mapping mapping;
5270 unsigned long machine_to_phys_nr_ents;
5271- char *s;
5272 int i;
5273
5274 setup_xen_features();
5275@@ -128,10 +109,7 @@ void __init x86_64_start_kernel(char * r
5276 asm volatile("lidt %0" :: "m" (idt_descr));
5277 #endif
5278
5279- /*
5280- * This must be called really, really early:
5281- */
5282- lockdep_init();
5283+ early_printk("Kernel alive\n");
5284
5285 for (i = 0; i < NR_CPUS; i++)
5286 cpu_pda(i) = &boot_cpu_pda[i];
5287@@ -141,22 +119,5 @@ void __init x86_64_start_kernel(char * r
5288 #ifdef CONFIG_SMP
5289 cpu_set(0, cpu_online_map);
5290 #endif
5291- s = strstr(saved_command_line, "earlyprintk=");
5292- if (s != NULL)
5293- setup_early_printk(strchr(s, '=') + 1);
5294-#ifdef CONFIG_NUMA
5295- s = strstr(saved_command_line, "numa=");
5296- if (s != NULL)
5297- numa_setup(s+5);
5298-#endif
5299-#ifdef CONFIG_X86_IO_APIC
5300- if (strstr(saved_command_line, "disableapic"))
5301- disable_apic = 1;
5302-#endif
5303- /* You need early console to see that */
5304- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
5305- panic("Kernel too big for kernel mapping\n");
5306-
5307- setup_boot_cpu_data();
5308 start_kernel();
5309 }
5310--- sle11-2009-05-14.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:13:45.000000000 +0100
5311+++ sle11-2009-05-14/arch/x86/kernel/io_apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5312@@ -26,9 +26,12 @@
5313 #include <linux/delay.h>
5314 #include <linux/sched.h>
5315 #include <linux/smp_lock.h>
5316+#include <linux/pci.h>
5317 #include <linux/mc146818rtc.h>
5318 #include <linux/acpi.h>
5319 #include <linux/sysdev.h>
5320+#include <linux/msi.h>
5321+#include <linux/htirq.h>
5322 #ifdef CONFIG_ACPI
5323 #include <acpi/acpi_bus.h>
5324 #endif
5325@@ -41,6 +44,10 @@
5326 #include <asm/acpi.h>
5327 #include <asm/dma.h>
5328 #include <asm/nmi.h>
5329+#include <asm/msidef.h>
5330+#include <asm/hypertransport.h>
5331+
5332+static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
5333
5334 #define __apicdebuginit __init
5335
5336@@ -48,17 +55,30 @@ int sis_apic_bug; /* not actually suppor
5337
5338 static int no_timer_check;
5339
5340-int disable_timer_pin_1 __initdata;
5341+static int disable_timer_pin_1 __initdata;
5342
5343-#ifndef CONFIG_XEN
5344-int timer_over_8254 __initdata = 0;
5345+#ifdef CONFIG_XEN
5346+#include <xen/interface/xen.h>
5347+#include <xen/interface/physdev.h>
5348+#include <xen/evtchn.h>
5349+
5350+/* Fake i8259 */
5351+#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
5352+#define disable_8259A_irq(_irq) ((void)0)
5353+#define i8259A_irq_pending(_irq) (0)
5354+
5355+unsigned long io_apic_irqs;
5356+
5357+#define clear_IO_APIC() ((void)0)
5358+#else
5359+int timer_over_8254 __initdata = 1;
5360
5361 /* Where if anywhere is the i8259 connect in external int mode */
5362 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
5363 #endif
5364
5365 static DEFINE_SPINLOCK(ioapic_lock);
5366-static DEFINE_SPINLOCK(vector_lock);
5367+DEFINE_SPINLOCK(vector_lock);
5368
5369 /*
5370 * # of IRQ routing registers
5371@@ -83,29 +103,27 @@ static struct irq_pin_list {
5372 short apic, pin, next;
5373 } irq_2_pin[PIN_MAP_SIZE];
5374
5375-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
5376-#ifdef CONFIG_PCI_MSI
5377-#define vector_to_irq(vector) \
5378- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
5379-#else
5380-#define vector_to_irq(vector) (vector)
5381-#endif
5382-
5383-#ifdef CONFIG_XEN
5384-
5385-#include <xen/interface/xen.h>
5386-#include <xen/interface/physdev.h>
5387-#include <xen/evtchn.h>
5388-
5389-/* Fake i8259 */
5390-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
5391-#define disable_8259A_irq(_irq) ((void)0)
5392-#define i8259A_irq_pending(_irq) (0)
5393+#ifndef CONFIG_XEN
5394+struct io_apic {
5395+ unsigned int index;
5396+ unsigned int unused[3];
5397+ unsigned int data;
5398+};
5399
5400-unsigned long io_apic_irqs;
5401+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
5402+{
5403+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
5404+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
5405+}
5406+#endif
5407
5408-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
5409+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
5410 {
5411+#ifndef CONFIG_XEN
5412+ struct io_apic __iomem *io_apic = io_apic_base(apic);
5413+ writel(reg, &io_apic->index);
5414+ return readl(&io_apic->data);
5415+#else
5416 struct physdev_apic apic_op;
5417 int ret;
5418
5419@@ -115,31 +133,133 @@ static inline unsigned int xen_io_apic_r
5420 if (ret)
5421 return ret;
5422 return apic_op.value;
5423+#endif
5424 }
5425
5426-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
5427+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
5428 {
5429+#ifndef CONFIG_XEN
5430+ struct io_apic __iomem *io_apic = io_apic_base(apic);
5431+ writel(reg, &io_apic->index);
5432+ writel(value, &io_apic->data);
5433+#else
5434 struct physdev_apic apic_op;
5435
5436 apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
5437 apic_op.reg = reg;
5438 apic_op.value = value;
5439 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
5440+#endif
5441+}
5442+
5443+#ifndef CONFIG_XEN
5444+/*
5445+ * Re-write a value: to be used for read-modify-write
5446+ * cycles where the read already set up the index register.
5447+ */
5448+static inline void io_apic_modify(unsigned int apic, unsigned int value)
5449+{
5450+ struct io_apic __iomem *io_apic = io_apic_base(apic);
5451+ writel(value, &io_apic->data);
5452 }
5453+#else
5454+#define io_apic_modify io_apic_write
5455+#endif
5456
5457-#define io_apic_read(a,r) xen_io_apic_read(a,r)
5458-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
5459+/*
5460+ * Synchronize the IO-APIC and the CPU by doing
5461+ * a dummy read from the IO-APIC
5462+ */
5463+static inline void io_apic_sync(unsigned int apic)
5464+{
5465+#ifndef CONFIG_XEN
5466+ struct io_apic __iomem *io_apic = io_apic_base(apic);
5467+ readl(&io_apic->data);
5468+#endif
5469+}
5470
5471-#define clear_IO_APIC() ((void)0)
5472+union entry_union {
5473+ struct { u32 w1, w2; };
5474+ struct IO_APIC_route_entry entry;
5475+};
5476
5477-#else
5478+#ifndef CONFIG_XEN
5479+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
5480+{
5481+ union entry_union eu;
5482+ unsigned long flags;
5483+ spin_lock_irqsave(&ioapic_lock, flags);
5484+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
5485+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
5486+ spin_unlock_irqrestore(&ioapic_lock, flags);
5487+ return eu.entry;
5488+}
5489+#endif
5490+
5491+/*
5492+ * When we write a new IO APIC routing entry, we need to write the high
5493+ * word first! If the mask bit in the low word is clear, we will enable
5494+ * the interrupt, and we need to make sure the entry is fully populated
5495+ * before that happens.
5496+ */
5497+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
5498+{
5499+ unsigned long flags;
5500+ union entry_union eu;
5501+ eu.entry = e;
5502+ spin_lock_irqsave(&ioapic_lock, flags);
5503+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
5504+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
5505+ spin_unlock_irqrestore(&ioapic_lock, flags);
5506+}
5507+
5508+#ifndef CONFIG_XEN
5509+/*
5510+ * When we mask an IO APIC routing entry, we need to write the low
5511+ * word first, in order to set the mask bit before we change the
5512+ * high bits!
5513+ */
5514+static void ioapic_mask_entry(int apic, int pin)
5515+{
5516+ unsigned long flags;
5517+ union entry_union eu = { .entry.mask = 1 };
5518+
5519+ spin_lock_irqsave(&ioapic_lock, flags);
5520+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
5521+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
5522+ spin_unlock_irqrestore(&ioapic_lock, flags);
5523+}
5524
5525 #ifdef CONFIG_SMP
5526+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
5527+{
5528+ int apic, pin;
5529+ struct irq_pin_list *entry = irq_2_pin + irq;
5530+
5531+ BUG_ON(irq >= NR_IRQS);
5532+ for (;;) {
5533+ unsigned int reg;
5534+ apic = entry->apic;
5535+ pin = entry->pin;
5536+ if (pin == -1)
5537+ break;
5538+ io_apic_write(apic, 0x11 + pin*2, dest);
5539+ reg = io_apic_read(apic, 0x10 + pin*2);
5540+ reg &= ~0x000000ff;
5541+ reg |= vector;
5542+ io_apic_modify(apic, reg);
5543+ if (!entry->next)
5544+ break;
5545+ entry = irq_2_pin + entry->next;
5546+ }
5547+}
5548+
5549 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
5550 {
5551 unsigned long flags;
5552 unsigned int dest;
5553 cpumask_t tmp;
5554+ int vector;
5555
5556 cpus_and(tmp, mask, cpu_online_map);
5557 if (cpus_empty(tmp))
5558@@ -147,7 +267,11 @@ static void set_ioapic_affinity_irq(unsi
5559
5560 cpus_and(mask, tmp, CPU_MASK_ALL);
5561
5562- dest = cpu_mask_to_apicid(mask);
5563+ vector = assign_irq_vector(irq, mask, &tmp);
5564+ if (vector < 0)
5565+ return;
5566+
5567+ dest = cpu_mask_to_apicid(tmp);
5568
5569 /*
5570 * Only the high 8 bits are valid.
5571@@ -155,13 +279,12 @@ static void set_ioapic_affinity_irq(unsi
5572 dest = SET_APIC_LOGICAL_ID(dest);
5573
5574 spin_lock_irqsave(&ioapic_lock, flags);
5575- __DO_ACTION(1, = dest, )
5576- set_irq_info(irq, mask);
5577+ __target_IO_APIC_irq(irq, dest, vector);
5578+ set_native_irq_info(irq, mask);
5579 spin_unlock_irqrestore(&ioapic_lock, flags);
5580 }
5581 #endif
5582-
5583-#endif /* !CONFIG_XEN */
5584+#endif
5585
5586 /*
5587 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
5588@@ -241,24 +364,15 @@ static void unmask_IO_APIC_irq (unsigned
5589 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
5590 {
5591 struct IO_APIC_route_entry entry;
5592- unsigned long flags;
5593
5594 /* Check delivery_mode to be sure we're not clearing an SMI pin */
5595- spin_lock_irqsave(&ioapic_lock, flags);
5596- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
5597- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
5598- spin_unlock_irqrestore(&ioapic_lock, flags);
5599+ entry = ioapic_read_entry(apic, pin);
5600 if (entry.delivery_mode == dest_SMI)
5601 return;
5602 /*
5603 * Disable it in the IO-APIC irq-routing table:
5604 */
5605- memset(&entry, 0, sizeof(entry));
5606- entry.mask = 1;
5607- spin_lock_irqsave(&ioapic_lock, flags);
5608- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
5609- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
5610- spin_unlock_irqrestore(&ioapic_lock, flags);
5611+ ioapic_mask_entry(apic, pin);
5612 }
5613
5614 static void clear_IO_APIC (void)
5615@@ -272,16 +386,6 @@ static void clear_IO_APIC (void)
5616
5617 #endif /* !CONFIG_XEN */
5618
5619-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
5620-
5621-/*
5622- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
5623- * specific CPU-side IRQs.
5624- */
5625-
5626-#define MAX_PIRQS 8
5627-static int pirq_entries [MAX_PIRQS];
5628-static int pirqs_enabled;
5629 int skip_ioapic_setup;
5630 int ioapic_force;
5631
5632@@ -290,18 +394,17 @@ int ioapic_force;
5633 static int __init disable_ioapic_setup(char *str)
5634 {
5635 skip_ioapic_setup = 1;
5636- return 1;
5637+ return 0;
5638 }
5639+early_param("noapic", disable_ioapic_setup);
5640
5641-static int __init enable_ioapic_setup(char *str)
5642+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
5643+static int __init disable_timer_pin_setup(char *arg)
5644 {
5645- ioapic_force = 1;
5646- skip_ioapic_setup = 0;
5647+ disable_timer_pin_1 = 1;
5648 return 1;
5649 }
5650-
5651-__setup("noapic", disable_ioapic_setup);
5652-__setup("apic", enable_ioapic_setup);
5653+__setup("disable_timer_pin_1", disable_timer_pin_setup);
5654
5655 #ifndef CONFIG_XEN
5656 static int __init setup_disable_8254_timer(char *s)
5657@@ -319,137 +422,6 @@ __setup("disable_8254_timer", setup_disa
5658 __setup("enable_8254_timer", setup_enable_8254_timer);
5659 #endif /* !CONFIG_XEN */
5660
5661-#include <asm/pci-direct.h>
5662-#include <linux/pci_ids.h>
5663-#include <linux/pci.h>
5664-
5665-
5666-#ifdef CONFIG_ACPI
5667-
5668-static int nvidia_hpet_detected __initdata;
5669-
5670-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
5671-{
5672- nvidia_hpet_detected = 1;
5673- return 0;
5674-}
5675-#endif
5676-
5677-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
5678- off. Check for an Nvidia or VIA PCI bridge and turn it off.
5679- Use pci direct infrastructure because this runs before the PCI subsystem.
5680-
5681- Can be overwritten with "apic"
5682-
5683- And another hack to disable the IOMMU on VIA chipsets.
5684-
5685- ... and others. Really should move this somewhere else.
5686-
5687- Kludge-O-Rama. */
5688-void __init check_ioapic(void)
5689-{
5690- int num,slot,func;
5691- /* Poor man's PCI discovery */
5692- for (num = 0; num < 32; num++) {
5693- for (slot = 0; slot < 32; slot++) {
5694- for (func = 0; func < 8; func++) {
5695- u32 class;
5696- u32 vendor;
5697- u8 type;
5698- class = read_pci_config(num,slot,func,
5699- PCI_CLASS_REVISION);
5700- if (class == 0xffffffff)
5701- break;
5702-
5703- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
5704- continue;
5705-
5706- vendor = read_pci_config(num, slot, func,
5707- PCI_VENDOR_ID);
5708- vendor &= 0xffff;
5709- switch (vendor) {
5710- case PCI_VENDOR_ID_VIA:
5711-#ifdef CONFIG_IOMMU
5712- if ((end_pfn > MAX_DMA32_PFN ||
5713- force_iommu) &&
5714- !iommu_aperture_allowed) {
5715- printk(KERN_INFO
5716- "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
5717- iommu_aperture_disabled = 1;
5718- }
5719-#endif
5720- return;
5721- case PCI_VENDOR_ID_NVIDIA:
5722-#ifdef CONFIG_ACPI
5723- /*
5724- * All timer overrides on Nvidia are
5725- * wrong unless HPET is enabled.
5726- */
5727- nvidia_hpet_detected = 0;
5728- acpi_table_parse(ACPI_HPET,
5729- nvidia_hpet_check);
5730- if (nvidia_hpet_detected == 0) {
5731- acpi_skip_timer_override = 1;
5732- printk(KERN_INFO "Nvidia board "
5733- "detected. Ignoring ACPI "
5734- "timer override.\n");
5735- }
5736-#endif
5737- /* RED-PEN skip them on mptables too? */
5738- return;
5739- case PCI_VENDOR_ID_ATI:
5740-
5741- /* This should be actually default, but
5742- for 2.6.16 let's do it for ATI only where
5743- it's really needed. */
5744-#ifndef CONFIG_XEN
5745- if (timer_over_8254 == 1) {
5746- timer_over_8254 = 0;
5747- printk(KERN_INFO
5748- "ATI board detected. Disabling timer routing over 8254.\n");
5749- }
5750-#endif
5751- return;
5752- }
5753-
5754-
5755- /* No multi-function device? */
5756- type = read_pci_config_byte(num,slot,func,
5757- PCI_HEADER_TYPE);
5758- if (!(type & 0x80))
5759- break;
5760- }
5761- }
5762- }
5763-}
5764-
5765-static int __init ioapic_pirq_setup(char *str)
5766-{
5767- int i, max;
5768- int ints[MAX_PIRQS+1];
5769-
5770- get_options(str, ARRAY_SIZE(ints), ints);
5771-
5772- for (i = 0; i < MAX_PIRQS; i++)
5773- pirq_entries[i] = -1;
5774-
5775- pirqs_enabled = 1;
5776- apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
5777- max = MAX_PIRQS;
5778- if (ints[0] < MAX_PIRQS)
5779- max = ints[0];
5780-
5781- for (i = 0; i < max; i++) {
5782- apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
5783- /*
5784- * PIRQs are mapped upside down, usually.
5785- */
5786- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
5787- }
5788- return 1;
5789-}
5790-
5791-__setup("pirq=", ioapic_pirq_setup);
5792
5793 /*
5794 * Find the IRQ entry number of a certain pin.
5795@@ -479,9 +451,7 @@ static int __init find_isa_irq_pin(int i
5796 for (i = 0; i < mp_irq_entries; i++) {
5797 int lbus = mp_irqs[i].mpc_srcbus;
5798
5799- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
5800- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
5801- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
5802+ if (test_bit(lbus, mp_bus_not_pci) &&
5803 (mp_irqs[i].mpc_irqtype == type) &&
5804 (mp_irqs[i].mpc_srcbusirq == irq))
5805
5806@@ -497,9 +467,7 @@ static int __init find_isa_irq_apic(int
5807 for (i = 0; i < mp_irq_entries; i++) {
5808 int lbus = mp_irqs[i].mpc_srcbus;
5809
5810- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
5811- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
5812- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
5813+ if (test_bit(lbus, mp_bus_not_pci) &&
5814 (mp_irqs[i].mpc_irqtype == type) &&
5815 (mp_irqs[i].mpc_srcbusirq == irq))
5816 break;
5817@@ -540,7 +508,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
5818 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
5819 break;
5820
5821- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
5822+ if (!test_bit(lbus, mp_bus_not_pci) &&
5823 !mp_irqs[i].mpc_irqtype &&
5824 (bus == lbus) &&
5825 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
5826@@ -563,27 +531,6 @@ int IO_APIC_get_PCI_irq_vector(int bus,
5827 return best_guess;
5828 }
5829
5830-/*
5831- * EISA Edge/Level control register, ELCR
5832- */
5833-static int EISA_ELCR(unsigned int irq)
5834-{
5835- if (irq < 16) {
5836- unsigned int port = 0x4d0 + (irq >> 3);
5837- return (inb(port) >> (irq & 7)) & 1;
5838- }
5839- apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
5840- return 0;
5841-}
5842-
5843-/* EISA interrupts are always polarity zero and can be edge or level
5844- * trigger depending on the ELCR value. If an interrupt is listed as
5845- * EISA conforming in the MP table, that means its trigger type must
5846- * be read in from the ELCR */
5847-
5848-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
5849-#define default_EISA_polarity(idx) (0)
5850-
5851 /* ISA interrupts are always polarity zero edge triggered,
5852 * when listed as conforming in the MP table. */
5853
5854@@ -596,12 +543,6 @@ static int EISA_ELCR(unsigned int irq)
5855 #define default_PCI_trigger(idx) (1)
5856 #define default_PCI_polarity(idx) (1)
5857
5858-/* MCA interrupts are always polarity zero level triggered,
5859- * when listed as conforming in the MP table. */
5860-
5861-#define default_MCA_trigger(idx) (1)
5862-#define default_MCA_polarity(idx) (0)
5863-
5864 static int __init MPBIOS_polarity(int idx)
5865 {
5866 int bus = mp_irqs[idx].mpc_srcbus;
5867@@ -613,38 +554,11 @@ static int __init MPBIOS_polarity(int id
5868 switch (mp_irqs[idx].mpc_irqflag & 3)
5869 {
5870 case 0: /* conforms, ie. bus-type dependent polarity */
5871- {
5872- switch (mp_bus_id_to_type[bus])
5873- {
5874- case MP_BUS_ISA: /* ISA pin */
5875- {
5876- polarity = default_ISA_polarity(idx);
5877- break;
5878- }
5879- case MP_BUS_EISA: /* EISA pin */
5880- {
5881- polarity = default_EISA_polarity(idx);
5882- break;
5883- }
5884- case MP_BUS_PCI: /* PCI pin */
5885- {
5886- polarity = default_PCI_polarity(idx);
5887- break;
5888- }
5889- case MP_BUS_MCA: /* MCA pin */
5890- {
5891- polarity = default_MCA_polarity(idx);
5892- break;
5893- }
5894- default:
5895- {
5896- printk(KERN_WARNING "broken BIOS!!\n");
5897- polarity = 1;
5898- break;
5899- }
5900- }
5901+ if (test_bit(bus, mp_bus_not_pci))
5902+ polarity = default_ISA_polarity(idx);
5903+ else
5904+ polarity = default_PCI_polarity(idx);
5905 break;
5906- }
5907 case 1: /* high active */
5908 {
5909 polarity = 0;
5910@@ -682,38 +596,11 @@ static int MPBIOS_trigger(int idx)
5911 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
5912 {
5913 case 0: /* conforms, ie. bus-type dependent */
5914- {
5915- switch (mp_bus_id_to_type[bus])
5916- {
5917- case MP_BUS_ISA: /* ISA pin */
5918- {
5919- trigger = default_ISA_trigger(idx);
5920- break;
5921- }
5922- case MP_BUS_EISA: /* EISA pin */
5923- {
5924- trigger = default_EISA_trigger(idx);
5925- break;
5926- }
5927- case MP_BUS_PCI: /* PCI pin */
5928- {
5929- trigger = default_PCI_trigger(idx);
5930- break;
5931- }
5932- case MP_BUS_MCA: /* MCA pin */
5933- {
5934- trigger = default_MCA_trigger(idx);
5935- break;
5936- }
5937- default:
5938- {
5939- printk(KERN_WARNING "broken BIOS!!\n");
5940- trigger = 1;
5941- break;
5942- }
5943- }
5944+ if (test_bit(bus, mp_bus_not_pci))
5945+ trigger = default_ISA_trigger(idx);
5946+ else
5947+ trigger = default_PCI_trigger(idx);
5948 break;
5949- }
5950 case 1: /* edge */
5951 {
5952 trigger = 0;
5953@@ -750,64 +637,6 @@ static inline int irq_trigger(int idx)
5954 return MPBIOS_trigger(idx);
5955 }
5956
5957-static int next_irq = 16;
5958-
5959-/*
5960- * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
5961- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
5962- * from ACPI, which can reach 800 in large boxen.
5963- *
5964- * Compact the sparse GSI space into a sequential IRQ series and reuse
5965- * vectors if possible.
5966- */
5967-int gsi_irq_sharing(int gsi)
5968-{
5969- int i, tries, vector;
5970-
5971- BUG_ON(gsi >= NR_IRQ_VECTORS);
5972-
5973- if (platform_legacy_irq(gsi))
5974- return gsi;
5975-
5976- if (gsi_2_irq[gsi] != 0xFF)
5977- return (int)gsi_2_irq[gsi];
5978-
5979- tries = NR_IRQS;
5980- try_again:
5981- vector = assign_irq_vector(gsi);
5982-
5983- /*
5984- * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
5985- * use of vector and if found, return that IRQ. However, we never want
5986- * to share legacy IRQs, which usually have a different trigger mode
5987- * than PCI.
5988- */
5989- for (i = 0; i < NR_IRQS; i++)
5990- if (IO_APIC_VECTOR(i) == vector)
5991- break;
5992- if (platform_legacy_irq(i)) {
5993- if (--tries >= 0) {
5994- IO_APIC_VECTOR(i) = 0;
5995- goto try_again;
5996- }
5997- panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
5998- }
5999- if (i < NR_IRQS) {
6000- gsi_2_irq[gsi] = i;
6001- printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
6002- gsi, vector, i);
6003- return i;
6004- }
6005-
6006- i = next_irq++;
6007- BUG_ON(i >= NR_IRQS);
6008- gsi_2_irq[gsi] = i;
6009- IO_APIC_VECTOR(i) = vector;
6010- printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
6011- gsi, vector, i);
6012- return i;
6013-}
6014-
6015 static int pin_2_irq(int idx, int apic, int pin)
6016 {
6017 int irq, i;
6018@@ -819,49 +648,16 @@ static int pin_2_irq(int idx, int apic,
6019 if (mp_irqs[idx].mpc_dstirq != pin)
6020 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
6021
6022- switch (mp_bus_id_to_type[bus])
6023- {
6024- case MP_BUS_ISA: /* ISA pin */
6025- case MP_BUS_EISA:
6026- case MP_BUS_MCA:
6027- {
6028- irq = mp_irqs[idx].mpc_srcbusirq;
6029- break;
6030- }
6031- case MP_BUS_PCI: /* PCI pin */
6032- {
6033- /*
6034- * PCI IRQs are mapped in order
6035- */
6036- i = irq = 0;
6037- while (i < apic)
6038- irq += nr_ioapic_registers[i++];
6039- irq += pin;
6040- irq = gsi_irq_sharing(irq);
6041- break;
6042- }
6043- default:
6044- {
6045- printk(KERN_ERR "unknown bus type %d.\n",bus);
6046- irq = 0;
6047- break;
6048- }
6049- }
6050- BUG_ON(irq >= NR_IRQS);
6051-
6052- /*
6053- * PCI IRQ command line redirection. Yes, limits are hardcoded.
6054- */
6055- if ((pin >= 16) && (pin <= 23)) {
6056- if (pirq_entries[pin-16] != -1) {
6057- if (!pirq_entries[pin-16]) {
6058- apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
6059- } else {
6060- irq = pirq_entries[pin-16];
6061- apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
6062- pin-16, irq);
6063- }
6064- }
6065+ if (test_bit(bus, mp_bus_not_pci)) {
6066+ irq = mp_irqs[idx].mpc_srcbusirq;
6067+ } else {
6068+ /*
6069+ * PCI IRQs are mapped in order
6070+ */
6071+ i = irq = 0;
6072+ while (i < apic)
6073+ irq += nr_ioapic_registers[i++];
6074+ irq += pin;
6075 }
6076 BUG_ON(irq >= NR_IRQS);
6077 return irq;
6078@@ -885,46 +681,71 @@ static inline int IO_APIC_irq_trigger(in
6079 }
6080
6081 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
6082-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
6083+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
6084
6085-int assign_irq_vector(int irq)
6086+static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
6087 {
6088- unsigned long flags;
6089 int vector;
6090 struct physdev_irq irq_op;
6091
6092- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
6093+ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
6094
6095 if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
6096 return -EINVAL;
6097
6098- spin_lock_irqsave(&vector_lock, flags);
6099+ cpus_and(*result, mask, cpu_online_map);
6100
6101- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
6102- spin_unlock_irqrestore(&vector_lock, flags);
6103- return IO_APIC_VECTOR(irq);
6104- }
6105+ if (irq_vector[irq] > 0)
6106+ return irq_vector[irq];
6107
6108 irq_op.irq = irq;
6109- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
6110- spin_unlock_irqrestore(&vector_lock, flags);
6111+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
6112 return -ENOSPC;
6113- }
6114
6115 vector = irq_op.vector;
6116- vector_irq[vector] = irq;
6117- if (irq != AUTO_ASSIGN)
6118- IO_APIC_VECTOR(irq) = vector;
6119+ irq_vector[irq] = vector;
6120
6121- spin_unlock_irqrestore(&vector_lock, flags);
6122+ return vector;
6123+}
6124+
6125+static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
6126+{
6127+ int vector;
6128+ unsigned long flags;
6129
6130+ spin_lock_irqsave(&vector_lock, flags);
6131+ vector = __assign_irq_vector(irq, mask, result);
6132+ spin_unlock_irqrestore(&vector_lock, flags);
6133 return vector;
6134 }
6135
6136-extern void (*interrupt[NR_IRQS])(void);
6137 #ifndef CONFIG_XEN
6138-static struct hw_interrupt_type ioapic_level_type;
6139-static struct hw_interrupt_type ioapic_edge_type;
6140+void __setup_vector_irq(int cpu)
6141+{
6142+ /* Initialize vector_irq on a new cpu */
6143+ /* This function must be called with vector_lock held */
6144+ int irq, vector;
6145+
6146+ /* Mark the inuse vectors */
6147+ for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
6148+ if (!cpu_isset(cpu, irq_domain[irq]))
6149+ continue;
6150+ vector = irq_vector[irq];
6151+ per_cpu(vector_irq, cpu)[vector] = irq;
6152+ }
6153+ /* Mark the free vectors */
6154+ for (vector = 0; vector < NR_VECTORS; ++vector) {
6155+ irq = per_cpu(vector_irq, cpu)[vector];
6156+ if (irq < 0)
6157+ continue;
6158+ if (!cpu_isset(cpu, irq_domain[irq]))
6159+ per_cpu(vector_irq, cpu)[vector] = -1;
6160+ }
6161+}
6162+
6163+extern void (*interrupt[NR_IRQS])(void);
6164+
6165+static struct irq_chip ioapic_chip;
6166
6167 #define IOAPIC_AUTO -1
6168 #define IOAPIC_EDGE 0
6169@@ -932,16 +753,15 @@ static struct hw_interrupt_type ioapic_e
6170
6171 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
6172 {
6173- unsigned idx;
6174-
6175- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
6176-
6177 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
6178 trigger == IOAPIC_LEVEL)
6179- irq_desc[idx].chip = &ioapic_level_type;
6180- else
6181- irq_desc[idx].chip = &ioapic_edge_type;
6182- set_intr_gate(vector, interrupt[idx]);
6183+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
6184+ handle_fasteoi_irq, "fasteoi");
6185+ else {
6186+ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
6187+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
6188+ handle_edge_irq, "edge");
6189+ }
6190 }
6191 #else
6192 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
6193@@ -994,16 +814,21 @@ static void __init setup_IO_APIC_irqs(vo
6194 continue;
6195
6196 if (IO_APIC_IRQ(irq)) {
6197- vector = assign_irq_vector(irq);
6198+ cpumask_t mask;
6199+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
6200+ if (vector < 0)
6201+ continue;
6202+
6203+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
6204 entry.vector = vector;
6205
6206 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
6207 if (!apic && (irq < 16))
6208 disable_8259A_irq(irq);
6209 }
6210+ ioapic_write_entry(apic, pin, entry);
6211+
6212 spin_lock_irqsave(&ioapic_lock, flags);
6213- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
6214- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
6215 set_native_irq_info(irq, TARGET_CPUS);
6216 spin_unlock_irqrestore(&ioapic_lock, flags);
6217 }
6218@@ -1046,7 +871,7 @@ static void __init setup_ExtINT_IRQ0_pin
6219 * The timer IRQ doesn't have to know that behind the
6220 * scene we have a 8259A-master in AEOI mode ...
6221 */
6222- irq_desc[0].chip = &ioapic_edge_type;
6223+ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
6224
6225 /*
6226 * Add it to the IO-APIC irq-routing table:
6227@@ -1142,10 +967,7 @@ void __apicdebuginit print_IO_APIC(void)
6228 for (i = 0; i <= reg_01.bits.entries; i++) {
6229 struct IO_APIC_route_entry entry;
6230
6231- spin_lock_irqsave(&ioapic_lock, flags);
6232- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
6233- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
6234- spin_unlock_irqrestore(&ioapic_lock, flags);
6235+ entry = ioapic_read_entry(apic, i);
6236
6237 printk(KERN_DEBUG " %02x %03X %02X ",
6238 i,
6239@@ -1165,17 +987,12 @@ void __apicdebuginit print_IO_APIC(void)
6240 );
6241 }
6242 }
6243- if (use_pci_vector())
6244- printk(KERN_INFO "Using vector-based indexing\n");
6245 printk(KERN_DEBUG "IRQ to pin mappings:\n");
6246 for (i = 0; i < NR_IRQS; i++) {
6247 struct irq_pin_list *entry = irq_2_pin + i;
6248 if (entry->pin < 0)
6249 continue;
6250- if (use_pci_vector() && !platform_legacy_irq(i))
6251- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
6252- else
6253- printk(KERN_DEBUG "IRQ%d ", i);
6254+ printk(KERN_DEBUG "IRQ%d ", i);
6255 for (;;) {
6256 printk("-> %d:%d", entry->apic, entry->pin);
6257 if (!entry->next)
6258@@ -1339,9 +1156,6 @@ static void __init enable_IO_APIC(void)
6259 irq_2_pin[i].pin = -1;
6260 irq_2_pin[i].next = 0;
6261 }
6262- if (!pirqs_enabled)
6263- for (i = 0; i < MAX_PIRQS; i++)
6264- pirq_entries[i] = -1;
6265
6266 /*
6267 * The number of IO-APIC IRQ registers (== #pins):
6268@@ -1358,11 +1172,7 @@ static void __init enable_IO_APIC(void)
6269 /* See if any of the pins is in ExtINT mode */
6270 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
6271 struct IO_APIC_route_entry entry;
6272- spin_lock_irqsave(&ioapic_lock, flags);
6273- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
6274- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
6275- spin_unlock_irqrestore(&ioapic_lock, flags);
6276-
6277+ entry = ioapic_read_entry(apic, pin);
6278
6279 /* If the interrupt line is enabled and in ExtInt mode
6280 * I have found the pin where the i8259 is connected.
6281@@ -1416,7 +1226,6 @@ void disable_IO_APIC(void)
6282 */
6283 if (ioapic_i8259.pin != -1) {
6284 struct IO_APIC_route_entry entry;
6285- unsigned long flags;
6286
6287 memset(&entry, 0, sizeof(entry));
6288 entry.mask = 0; /* Enabled */
6289@@ -1433,12 +1242,7 @@ void disable_IO_APIC(void)
6290 /*
6291 * Add it to the IO-APIC irq-routing table:
6292 */
6293- spin_lock_irqsave(&ioapic_lock, flags);
6294- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
6295- *(((int *)&entry)+1));
6296- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
6297- *(((int *)&entry)+0));
6298- spin_unlock_irqrestore(&ioapic_lock, flags);
6299+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
6300 }
6301
6302 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
6303@@ -1446,76 +1250,6 @@ void disable_IO_APIC(void)
6304 }
6305
6306 /*
6307- * function to set the IO-APIC physical IDs based on the
6308- * values stored in the MPC table.
6309- *
6310- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
6311- */
6312-
6313-#ifndef CONFIG_XEN
6314-static void __init setup_ioapic_ids_from_mpc (void)
6315-{
6316- union IO_APIC_reg_00 reg_00;
6317- int apic;
6318- int i;
6319- unsigned char old_id;
6320- unsigned long flags;
6321-
6322- /*
6323- * Set the IOAPIC ID to the value stored in the MPC table.
6324- */
6325- for (apic = 0; apic < nr_ioapics; apic++) {
6326-
6327- /* Read the register 0 value */
6328- spin_lock_irqsave(&ioapic_lock, flags);
6329- reg_00.raw = io_apic_read(apic, 0);
6330- spin_unlock_irqrestore(&ioapic_lock, flags);
6331-
6332- old_id = mp_ioapics[apic].mpc_apicid;
6333-
6334-
6335- printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
6336-
6337-
6338- /*
6339- * We need to adjust the IRQ routing table
6340- * if the ID changed.
6341- */
6342- if (old_id != mp_ioapics[apic].mpc_apicid)
6343- for (i = 0; i < mp_irq_entries; i++)
6344- if (mp_irqs[i].mpc_dstapic == old_id)
6345- mp_irqs[i].mpc_dstapic
6346- = mp_ioapics[apic].mpc_apicid;
6347-
6348- /*
6349- * Read the right value from the MPC table and
6350- * write it into the ID register.
6351- */
6352- apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
6353- mp_ioapics[apic].mpc_apicid);
6354-
6355- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
6356- spin_lock_irqsave(&ioapic_lock, flags);
6357- io_apic_write(apic, 0, reg_00.raw);
6358- spin_unlock_irqrestore(&ioapic_lock, flags);
6359-
6360- /*
6361- * Sanity check
6362- */
6363- spin_lock_irqsave(&ioapic_lock, flags);
6364- reg_00.raw = io_apic_read(apic, 0);
6365- spin_unlock_irqrestore(&ioapic_lock, flags);
6366- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
6367- printk("could not set ID!\n");
6368- else
6369- apic_printk(APIC_VERBOSE," ok.\n");
6370- }
6371-}
6372-#else
6373-static void __init setup_ioapic_ids_from_mpc(void) { }
6374-#endif
6375-
6376-/*
6377 * There is a nasty bug in some older SMP boards, their mptable lies
6378 * about the timer IRQ. We do the following to work around the situation:
6379 *
6380@@ -1569,7 +1303,7 @@ static int __init timer_irq_works(void)
6381 * an edge even if it isn't on the 8259A...
6382 */
6383
6384-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
6385+static unsigned int startup_ioapic_irq(unsigned int irq)
6386 {
6387 int was_pending = 0;
6388 unsigned long flags;
6389@@ -1586,107 +1320,19 @@ static unsigned int startup_edge_ioapic_
6390 return was_pending;
6391 }
6392
6393-/*
6394- * Once we have recorded IRQ_PENDING already, we can mask the
6395- * interrupt for real. This prevents IRQ storms from unhandled
6396- * devices.
6397- */
6398-static void ack_edge_ioapic_irq(unsigned int irq)
6399-{
6400- move_irq(irq);
6401- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
6402- == (IRQ_PENDING | IRQ_DISABLED))
6403- mask_IO_APIC_irq(irq);
6404- ack_APIC_irq();
6405-}
6406-
6407-/*
6408- * Level triggered interrupts can just be masked,
6409- * and shutting down and starting up the interrupt
6410- * is the same as enabling and disabling them -- except
6411- * with a startup need to return a "was pending" value.
6412- *
6413- * Level triggered interrupts are special because we
6414- * do not touch any IO-APIC register while handling
6415- * them. We ack the APIC in the end-IRQ handler, not
6416- * in the start-IRQ-handler. Protection against reentrance
6417- * from the same interrupt is still provided, both by the
6418- * generic IRQ layer and by the fact that an unacked local
6419- * APIC does not accept IRQs.
6420- */
6421-static unsigned int startup_level_ioapic_irq (unsigned int irq)
6422-{
6423- unmask_IO_APIC_irq(irq);
6424-
6425- return 0; /* don't check for pending */
6426-}
6427-
6428-static void end_level_ioapic_irq (unsigned int irq)
6429-{
6430- move_irq(irq);
6431- ack_APIC_irq();
6432-}
6433-
6434-#ifdef CONFIG_PCI_MSI
6435-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
6436-{
6437- int irq = vector_to_irq(vector);
6438-
6439- return startup_edge_ioapic_irq(irq);
6440-}
6441-
6442-static void ack_edge_ioapic_vector(unsigned int vector)
6443-{
6444- int irq = vector_to_irq(vector);
6445-
6446- move_native_irq(vector);
6447- ack_edge_ioapic_irq(irq);
6448-}
6449-
6450-static unsigned int startup_level_ioapic_vector (unsigned int vector)
6451-{
6452- int irq = vector_to_irq(vector);
6453-
6454- return startup_level_ioapic_irq (irq);
6455-}
6456-
6457-static void end_level_ioapic_vector (unsigned int vector)
6458-{
6459- int irq = vector_to_irq(vector);
6460-
6461- move_native_irq(vector);
6462- end_level_ioapic_irq(irq);
6463-}
6464-
6465-static void mask_IO_APIC_vector (unsigned int vector)
6466-{
6467- int irq = vector_to_irq(vector);
6468-
6469- mask_IO_APIC_irq(irq);
6470-}
6471-
6472-static void unmask_IO_APIC_vector (unsigned int vector)
6473+static int ioapic_retrigger_irq(unsigned int irq)
6474 {
6475- int irq = vector_to_irq(vector);
6476-
6477- unmask_IO_APIC_irq(irq);
6478-}
6479-
6480-#ifdef CONFIG_SMP
6481-static void set_ioapic_affinity_vector (unsigned int vector,
6482- cpumask_t cpu_mask)
6483-{
6484- int irq = vector_to_irq(vector);
6485+ cpumask_t mask;
6486+ unsigned vector;
6487+ unsigned long flags;
6488
6489- set_native_irq_info(vector, cpu_mask);
6490- set_ioapic_affinity_irq(irq, cpu_mask);
6491-}
6492-#endif // CONFIG_SMP
6493-#endif // CONFIG_PCI_MSI
6494+ spin_lock_irqsave(&vector_lock, flags);
6495+ vector = irq_vector[irq];
6496+ cpus_clear(mask);
6497+ cpu_set(first_cpu(irq_domain[irq]), mask);
6498
6499-static int ioapic_retrigger(unsigned int irq)
6500-{
6501- send_IPI_self(IO_APIC_VECTOR(irq));
6502+ send_IPI_mask(mask, vector);
6503+ spin_unlock_irqrestore(&vector_lock, flags);
6504
6505 return 1;
6506 }
6507@@ -1700,32 +1346,47 @@ static int ioapic_retrigger(unsigned int
6508 * races.
6509 */
6510
6511-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
6512- .typename = "IO-APIC-edge",
6513- .startup = startup_edge_ioapic,
6514- .shutdown = shutdown_edge_ioapic,
6515- .enable = enable_edge_ioapic,
6516- .disable = disable_edge_ioapic,
6517- .ack = ack_edge_ioapic,
6518- .end = end_edge_ioapic,
6519-#ifdef CONFIG_SMP
6520- .set_affinity = set_ioapic_affinity,
6521+static void ack_apic_edge(unsigned int irq)
6522+{
6523+ move_native_irq(irq);
6524+ ack_APIC_irq();
6525+}
6526+
6527+static void ack_apic_level(unsigned int irq)
6528+{
6529+ int do_unmask_irq = 0;
6530+
6531+#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
6532+ /* If we are moving the irq we need to mask it */
6533+ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
6534+ do_unmask_irq = 1;
6535+ mask_IO_APIC_irq(irq);
6536+ }
6537 #endif
6538- .retrigger = ioapic_retrigger,
6539-};
6540
6541-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
6542- .typename = "IO-APIC-level",
6543- .startup = startup_level_ioapic,
6544- .shutdown = shutdown_level_ioapic,
6545- .enable = enable_level_ioapic,
6546- .disable = disable_level_ioapic,
6547- .ack = mask_and_ack_level_ioapic,
6548- .end = end_level_ioapic,
6549+ /*
6550+ * We must acknowledge the irq before we move it or the acknowledge will
6551+ * not propogate properly.
6552+ */
6553+ ack_APIC_irq();
6554+
6555+ /* Now we can move and renable the irq */
6556+ move_masked_irq(irq);
6557+ if (unlikely(do_unmask_irq))
6558+ unmask_IO_APIC_irq(irq);
6559+}
6560+
6561+static struct irq_chip ioapic_chip __read_mostly = {
6562+ .name = "IO-APIC",
6563+ .startup = startup_ioapic_irq,
6564+ .mask = mask_IO_APIC_irq,
6565+ .unmask = unmask_IO_APIC_irq,
6566+ .ack = ack_apic_edge,
6567+ .eoi = ack_apic_level,
6568 #ifdef CONFIG_SMP
6569- .set_affinity = set_ioapic_affinity,
6570+ .set_affinity = set_ioapic_affinity_irq,
6571 #endif
6572- .retrigger = ioapic_retrigger,
6573+ .retrigger = ioapic_retrigger_irq,
6574 };
6575 #endif /* !CONFIG_XEN */
6576
6577@@ -1746,12 +1407,7 @@ static inline void init_IO_APIC_traps(vo
6578 */
6579 for (irq = 0; irq < NR_IRQS ; irq++) {
6580 int tmp = irq;
6581- if (use_pci_vector()) {
6582- if (!platform_legacy_irq(tmp))
6583- if ((tmp = vector_to_irq(tmp)) == -1)
6584- continue;
6585- }
6586- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
6587+ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
6588 /*
6589 * Hmm.. We don't have an entry for this,
6590 * so default to an old-fashioned 8259
6591@@ -1762,7 +1418,7 @@ static inline void init_IO_APIC_traps(vo
6592 #ifndef CONFIG_XEN
6593 else
6594 /* Strange. Oh, well.. */
6595- irq_desc[irq].chip = &no_irq_type;
6596+ irq_desc[irq].chip = &no_irq_chip;
6597 #endif
6598 }
6599 }
6600@@ -1883,8 +1539,6 @@ static inline void unlock_ExtINT_logic(v
6601 spin_unlock_irqrestore(&ioapic_lock, flags);
6602 }
6603
6604-int timer_uses_ioapic_pin_0;
6605-
6606 /*
6607 * This code may look a bit paranoid, but it's supposed to cooperate with
6608 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
6609@@ -1897,13 +1551,13 @@ static inline void check_timer(void)
6610 {
6611 int apic1, pin1, apic2, pin2;
6612 int vector;
6613+ cpumask_t mask;
6614
6615 /*
6616 * get/set the timer IRQ vector:
6617 */
6618 disable_8259A_irq(0);
6619- vector = assign_irq_vector(0);
6620- set_intr_gate(vector, interrupt[0]);
6621+ vector = assign_irq_vector(0, TARGET_CPUS, &mask);
6622
6623 /*
6624 * Subtle, code in do_timer_interrupt() expects an AEOI
6625@@ -1922,9 +1576,6 @@ static inline void check_timer(void)
6626 pin2 = ioapic_i8259.pin;
6627 apic2 = ioapic_i8259.apic;
6628
6629- if (pin1 == 0)
6630- timer_uses_ioapic_pin_0 = 1;
6631-
6632 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
6633 vector, apic1, pin1, apic2, pin2);
6634
6635@@ -2039,11 +1690,6 @@ void __init setup_IO_APIC(void)
6636
6637 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
6638
6639- /*
6640- * Set up the IO-APIC IRQ routing table.
6641- */
6642- if (!acpi_ioapic)
6643- setup_ioapic_ids_from_mpc();
6644 #ifndef CONFIG_XEN
6645 sync_Arb_IDs();
6646 #endif /* !CONFIG_XEN */
6647@@ -2066,17 +1712,12 @@ static int ioapic_suspend(struct sys_dev
6648 {
6649 struct IO_APIC_route_entry *entry;
6650 struct sysfs_ioapic_data *data;
6651- unsigned long flags;
6652 int i;
6653
6654 data = container_of(dev, struct sysfs_ioapic_data, dev);
6655 entry = data->entry;
6656- spin_lock_irqsave(&ioapic_lock, flags);
6657- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
6658- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
6659- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
6660- }
6661- spin_unlock_irqrestore(&ioapic_lock, flags);
6662+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
6663+ *entry = ioapic_read_entry(dev->id, i);
6664
6665 return 0;
6666 }
6667@@ -2098,11 +1739,9 @@ static int ioapic_resume(struct sys_devi
6668 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
6669 io_apic_write(dev->id, 0, reg_00.raw);
6670 }
6671- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
6672- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
6673- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
6674- }
6675 spin_unlock_irqrestore(&ioapic_lock, flags);
6676+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
6677+ ioapic_write_entry(dev->id, i, entry[i]);
6678
6679 return 0;
6680 }
6681@@ -2148,28 +1787,254 @@ static int __init ioapic_init_sysfs(void
6682
6683 device_initcall(ioapic_init_sysfs);
6684
6685+/*
6686+ * Dynamic irq allocate and deallocation
6687+ */
6688+int create_irq(void)
6689+{
6690+ /* Allocate an unused irq */
6691+ int irq;
6692+ int new;
6693+ int vector = 0;
6694+ unsigned long flags;
6695+ cpumask_t mask;
6696+
6697+ irq = -ENOSPC;
6698+ spin_lock_irqsave(&vector_lock, flags);
6699+ for (new = (NR_IRQS - 1); new >= 0; new--) {
6700+ if (platform_legacy_irq(new))
6701+ continue;
6702+ if (irq_vector[new] != 0)
6703+ continue;
6704+ vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
6705+ if (likely(vector > 0))
6706+ irq = new;
6707+ break;
6708+ }
6709+ spin_unlock_irqrestore(&vector_lock, flags);
6710+
6711+ if (irq >= 0) {
6712+ dynamic_irq_init(irq);
6713+ }
6714+ return irq;
6715+}
6716+
6717+void destroy_irq(unsigned int irq)
6718+{
6719+ unsigned long flags;
6720+
6721+ dynamic_irq_cleanup(irq);
6722+
6723+ spin_lock_irqsave(&vector_lock, flags);
6724+ irq_vector[irq] = 0;
6725+ spin_unlock_irqrestore(&vector_lock, flags);
6726+}
6727+
6728 #endif /* CONFIG_XEN */
6729
6730-/* --------------------------------------------------------------------------
6731- ACPI-based IOAPIC Configuration
6732- -------------------------------------------------------------------------- */
6733+/*
6734+ * MSI mesage composition
6735+ */
6736+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
6737+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
6738+{
6739+ int vector;
6740+ unsigned dest;
6741+ cpumask_t tmp;
6742
6743-#ifdef CONFIG_ACPI
6744+ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
6745+ if (vector >= 0) {
6746+ dest = cpu_mask_to_apicid(tmp);
6747+
6748+ msg->address_hi = MSI_ADDR_BASE_HI;
6749+ msg->address_lo =
6750+ MSI_ADDR_BASE_LO |
6751+ ((INT_DEST_MODE == 0) ?
6752+ MSI_ADDR_DEST_MODE_PHYSICAL:
6753+ MSI_ADDR_DEST_MODE_LOGICAL) |
6754+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6755+ MSI_ADDR_REDIRECTION_CPU:
6756+ MSI_ADDR_REDIRECTION_LOWPRI) |
6757+ MSI_ADDR_DEST_ID(dest);
6758+
6759+ msg->data =
6760+ MSI_DATA_TRIGGER_EDGE |
6761+ MSI_DATA_LEVEL_ASSERT |
6762+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6763+ MSI_DATA_DELIVERY_FIXED:
6764+ MSI_DATA_DELIVERY_LOWPRI) |
6765+ MSI_DATA_VECTOR(vector);
6766+ }
6767+ return vector;
6768+}
6769
6770-#define IO_APIC_MAX_ID 0xFE
6771+#ifdef CONFIG_SMP
6772+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
6773+{
6774+ struct msi_msg msg;
6775+ unsigned int dest;
6776+ cpumask_t tmp;
6777+ int vector;
6778+
6779+ cpus_and(tmp, mask, cpu_online_map);
6780+ if (cpus_empty(tmp))
6781+ tmp = TARGET_CPUS;
6782+
6783+ cpus_and(mask, tmp, CPU_MASK_ALL);
6784+
6785+ vector = assign_irq_vector(irq, mask, &tmp);
6786+ if (vector < 0)
6787+ return;
6788+
6789+ dest = cpu_mask_to_apicid(tmp);
6790+
6791+ read_msi_msg(irq, &msg);
6792+
6793+ msg.data &= ~MSI_DATA_VECTOR_MASK;
6794+ msg.data |= MSI_DATA_VECTOR(vector);
6795+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
6796+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
6797
6798-int __init io_apic_get_version (int ioapic)
6799+ write_msi_msg(irq, &msg);
6800+ set_native_irq_info(irq, mask);
6801+}
6802+#endif /* CONFIG_SMP */
6803+
6804+/*
6805+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
6806+ * which implement the MSI or MSI-X Capability Structure.
6807+ */
6808+static struct irq_chip msi_chip = {
6809+ .name = "PCI-MSI",
6810+ .unmask = unmask_msi_irq,
6811+ .mask = mask_msi_irq,
6812+ .ack = ack_apic_edge,
6813+#ifdef CONFIG_SMP
6814+ .set_affinity = set_msi_irq_affinity,
6815+#endif
6816+ .retrigger = ioapic_retrigger_irq,
6817+};
6818+
6819+int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
6820 {
6821- union IO_APIC_reg_01 reg_01;
6822- unsigned long flags;
6823+ struct msi_msg msg;
6824+ int ret;
6825+ ret = msi_compose_msg(dev, irq, &msg);
6826+ if (ret < 0)
6827+ return ret;
6828
6829- spin_lock_irqsave(&ioapic_lock, flags);
6830- reg_01.raw = io_apic_read(ioapic, 1);
6831- spin_unlock_irqrestore(&ioapic_lock, flags);
6832+ write_msi_msg(irq, &msg);
6833
6834- return reg_01.bits.version;
6835+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
6836+
6837+ return 0;
6838+}
6839+
6840+void arch_teardown_msi_irq(unsigned int irq)
6841+{
6842+ return;
6843 }
6844
6845+#endif /* CONFIG_PCI_MSI */
6846+
6847+/*
6848+ * Hypertransport interrupt support
6849+ */
6850+#ifdef CONFIG_HT_IRQ
6851+
6852+#ifdef CONFIG_SMP
6853+
6854+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
6855+{
6856+ struct ht_irq_msg msg;
6857+ fetch_ht_irq_msg(irq, &msg);
6858+
6859+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
6860+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
6861+
6862+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
6863+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
6864+
6865+ write_ht_irq_msg(irq, &msg);
6866+}
6867+
6868+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
6869+{
6870+ unsigned int dest;
6871+ cpumask_t tmp;
6872+ int vector;
6873+
6874+ cpus_and(tmp, mask, cpu_online_map);
6875+ if (cpus_empty(tmp))
6876+ tmp = TARGET_CPUS;
6877+
6878+ cpus_and(mask, tmp, CPU_MASK_ALL);
6879+
6880+ vector = assign_irq_vector(irq, mask, &tmp);
6881+ if (vector < 0)
6882+ return;
6883+
6884+ dest = cpu_mask_to_apicid(tmp);
6885+
6886+ target_ht_irq(irq, dest, vector);
6887+ set_native_irq_info(irq, mask);
6888+}
6889+#endif
6890+
6891+static struct irq_chip ht_irq_chip = {
6892+ .name = "PCI-HT",
6893+ .mask = mask_ht_irq,
6894+ .unmask = unmask_ht_irq,
6895+ .ack = ack_apic_edge,
6896+#ifdef CONFIG_SMP
6897+ .set_affinity = set_ht_irq_affinity,
6898+#endif
6899+ .retrigger = ioapic_retrigger_irq,
6900+};
6901+
6902+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
6903+{
6904+ int vector;
6905+ cpumask_t tmp;
6906+
6907+ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
6908+ if (vector >= 0) {
6909+ struct ht_irq_msg msg;
6910+ unsigned dest;
6911+
6912+ dest = cpu_mask_to_apicid(tmp);
6913+
6914+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
6915+
6916+ msg.address_lo =
6917+ HT_IRQ_LOW_BASE |
6918+ HT_IRQ_LOW_DEST_ID(dest) |
6919+ HT_IRQ_LOW_VECTOR(vector) |
6920+ ((INT_DEST_MODE == 0) ?
6921+ HT_IRQ_LOW_DM_PHYSICAL :
6922+ HT_IRQ_LOW_DM_LOGICAL) |
6923+ HT_IRQ_LOW_RQEOI_EDGE |
6924+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6925+ HT_IRQ_LOW_MT_FIXED :
6926+ HT_IRQ_LOW_MT_ARBITRATED) |
6927+ HT_IRQ_LOW_IRQ_MASKED;
6928+
6929+ write_ht_irq_msg(irq, &msg);
6930+
6931+ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
6932+ handle_edge_irq, "edge");
6933+ }
6934+ return vector;
6935+}
6936+#endif /* CONFIG_HT_IRQ */
6937+
6938+/* --------------------------------------------------------------------------
6939+ ACPI-based IOAPIC Configuration
6940+ -------------------------------------------------------------------------- */
6941+
6942+#ifdef CONFIG_ACPI
6943+
6944+#define IO_APIC_MAX_ID 0xFE
6945
6946 int __init io_apic_get_redir_entries (int ioapic)
6947 {
6948@@ -2188,6 +2053,8 @@ int io_apic_set_pci_routing (int ioapic,
6949 {
6950 struct IO_APIC_route_entry entry;
6951 unsigned long flags;
6952+ int vector;
6953+ cpumask_t mask;
6954
6955 if (!IO_APIC_IRQ(irq)) {
6956 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
6957@@ -2196,6 +2063,17 @@ int io_apic_set_pci_routing (int ioapic,
6958 }
6959
6960 /*
6961+ * IRQs < 16 are already in the irq_2_pin[] map
6962+ */
6963+ if (irq >= 16)
6964+ add_pin_to_irq(irq, ioapic, pin);
6965+
6966+
6967+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
6968+ if (vector < 0)
6969+ return vector;
6970+
6971+ /*
6972 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
6973 * Note that we mask (disable) IRQs now -- these get enabled when the
6974 * corresponding device driver registers for this IRQ.
6975@@ -2205,19 +2083,11 @@ int io_apic_set_pci_routing (int ioapic,
6976
6977 entry.delivery_mode = INT_DELIVERY_MODE;
6978 entry.dest_mode = INT_DEST_MODE;
6979- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
6980+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
6981 entry.trigger = edge_level;
6982 entry.polarity = active_high_low;
6983 entry.mask = 1; /* Disabled (masked) */
6984-
6985- irq = gsi_irq_sharing(irq);
6986- /*
6987- * IRQs < 16 are already in the irq_2_pin[] map
6988- */
6989- if (irq >= 16)
6990- add_pin_to_irq(irq, ioapic, pin);
6991-
6992- entry.vector = assign_irq_vector(irq);
6993+ entry.vector = vector & 0xff;
6994
6995 apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
6996 "IRQ %d Mode:%i Active:%i)\n", ioapic,
6997@@ -2229,10 +2099,10 @@ int io_apic_set_pci_routing (int ioapic,
6998 if (!ioapic && (irq < 16))
6999 disable_8259A_irq(irq);
7000
7001+ ioapic_write_entry(ioapic, pin, entry);
7002+
7003 spin_lock_irqsave(&ioapic_lock, flags);
7004- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
7005- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
7006- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
7007+ set_native_irq_info(irq, TARGET_CPUS);
7008 spin_unlock_irqrestore(&ioapic_lock, flags);
7009
7010 return 0;
7011--- sle11-2009-05-14.orig/arch/x86/kernel/ioport_64-xen.c 2009-05-14 11:02:43.000000000 +0200
7012+++ sle11-2009-05-14/arch/x86/kernel/ioport_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7013@@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long
7014
7015 memset(bitmap, 0xff, IO_BITMAP_BYTES);
7016 t->io_bitmap_ptr = bitmap;
7017+ set_thread_flag(TIF_IO_BITMAP);
7018
7019 set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
7020 set_iobitmap.nr_ports = IO_BITMAP_BITS;
7021--- sle11-2009-05-14.orig/arch/x86/kernel/irq_64-xen.c 2009-05-14 11:02:43.000000000 +0200
7022+++ sle11-2009-05-14/arch/x86/kernel/irq_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7023@@ -20,11 +20,6 @@
7024 #include <asm/idle.h>
7025
7026 atomic_t irq_err_count;
7027-#ifdef CONFIG_X86_IO_APIC
7028-#ifdef APIC_MISMATCH_DEBUG
7029-atomic_t irq_mis_count;
7030-#endif
7031-#endif
7032
7033 #ifdef CONFIG_DEBUG_STACKOVERFLOW
7034 /*
7035@@ -79,7 +74,8 @@ int show_interrupts(struct seq_file *p,
7036 for_each_online_cpu(j)
7037 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
7038 #endif
7039- seq_printf(p, " %14s", irq_desc[i].chip->typename);
7040+ seq_printf(p, " %8s", irq_desc[i].chip->name);
7041+ seq_printf(p, "-%-8s", irq_desc[i].name);
7042
7043 seq_printf(p, " %s", action->name);
7044 for (action=action->next; action; action = action->next)
7045@@ -99,11 +95,6 @@ skip:
7046 seq_putc(p, '\n');
7047 #endif
7048 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
7049-#ifdef CONFIG_X86_IO_APIC
7050-#ifdef APIC_MISMATCH_DEBUG
7051- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
7052-#endif
7053-#endif
7054 }
7055 return 0;
7056 }
7057@@ -114,24 +105,28 @@ skip:
7058 * handlers).
7059 */
7060 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
7061-{
7062+{
7063+ struct pt_regs *old_regs = set_irq_regs(regs);
7064+
7065 /* high bit used in ret_from_ code */
7066 unsigned irq = ~regs->orig_rax;
7067
7068- if (unlikely(irq >= NR_IRQS)) {
7069- printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
7070- __FUNCTION__, irq);
7071- BUG();
7072- }
7073-
7074 /*exit_idle();*/
7075 /*irq_enter();*/
7076+
7077 #ifdef CONFIG_DEBUG_STACKOVERFLOW
7078 stack_overflow_check(regs);
7079 #endif
7080- __do_IRQ(irq, regs);
7081+
7082+ if (likely(irq < NR_IRQS))
7083+ generic_handle_irq(irq);
7084+ else
7085+ printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
7086+ __func__, smp_processor_id(), irq);
7087+
7088 /*irq_exit();*/
7089
7090+ set_irq_regs(old_regs);
7091 return 1;
7092 }
7093
7094@@ -192,6 +187,6 @@ EXPORT_SYMBOL(do_softirq);
7095 */
7096 void ack_bad_irq(unsigned int irq)
7097 {
7098- printk("unexpected IRQ trap at vector %02x\n", irq);
7099+ printk("unexpected IRQ trap at irq %02x\n", irq);
7100 }
7101 #endif
7102--- sle11-2009-05-14.orig/arch/x86/kernel/mpparse_64-xen.c 2009-05-14 11:02:43.000000000 +0200
7103+++ sle11-2009-05-14/arch/x86/kernel/mpparse_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7104@@ -41,8 +41,7 @@ int acpi_found_madt;
7105 * Various Linux-internal data structures created from the
7106 * MP-table.
7107 */
7108-unsigned char apic_version [MAX_APICS];
7109-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
7110+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
7111 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
7112
7113 static int mp_current_pci_id = 0;
7114@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
7115 int mp_irq_entries;
7116
7117 int nr_ioapics;
7118-int pic_mode;
7119 unsigned long mp_lapic_addr = 0;
7120
7121
7122@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
7123 /* Bitmask of physically existing CPUs */
7124 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
7125
7126-/* ACPI MADT entry parsing functions */
7127-#ifdef CONFIG_ACPI
7128-extern struct acpi_boot_flags acpi_boot;
7129-#ifdef CONFIG_X86_LOCAL_APIC
7130-extern int acpi_parse_lapic (acpi_table_entry_header *header);
7131-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
7132-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
7133-#endif /*CONFIG_X86_LOCAL_APIC*/
7134-#ifdef CONFIG_X86_IO_APIC
7135-extern int acpi_parse_ioapic (acpi_table_entry_header *header);
7136-#endif /*CONFIG_X86_IO_APIC*/
7137-#endif /*CONFIG_ACPI*/
7138-
7139 u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
7140
7141
7142@@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned
7143 static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
7144 {
7145 int cpu;
7146- unsigned char ver;
7147 cpumask_t tmp_map;
7148+ char *bootup_cpu = "";
7149
7150 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
7151 disabled_cpus++;
7152 return;
7153 }
7154-
7155- printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
7156- m->mpc_apicid,
7157- (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
7158- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
7159- m->mpc_apicver);
7160-
7161 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
7162- Dprintk(" Bootup CPU\n");
7163+ bootup_cpu = " (Bootup-CPU)";
7164 boot_cpu_id = m->mpc_apicid;
7165 }
7166+
7167+ printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
7168+
7169 if (num_processors >= NR_CPUS) {
7170 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
7171 " Processor ignored.\n", NR_CPUS);
7172@@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info
7173 cpus_complement(tmp_map, cpu_present_map);
7174 cpu = first_cpu(tmp_map);
7175
7176-#if MAX_APICS < 255
7177- if ((int)m->mpc_apicid > MAX_APICS) {
7178- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
7179- m->mpc_apicid, MAX_APICS);
7180- return;
7181- }
7182-#endif
7183- ver = m->mpc_apicver;
7184-
7185 physid_set(m->mpc_apicid, phys_cpu_present_map);
7186- /*
7187- * Validate version
7188- */
7189- if (ver == 0x0) {
7190- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
7191- ver = 0x10;
7192- }
7193- apic_version[m->mpc_apicid] = ver;
7194 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
7195 /*
7196 * bios_cpu_apicid is required to have processors listed
7197@@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m
7198 Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
7199
7200 if (strncmp(str, "ISA", 3) == 0) {
7201- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
7202- } else if (strncmp(str, "EISA", 4) == 0) {
7203- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
7204+ set_bit(m->mpc_busid, mp_bus_not_pci);
7205 } else if (strncmp(str, "PCI", 3) == 0) {
7206- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
7207+ clear_bit(m->mpc_busid, mp_bus_not_pci);
7208 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
7209 mp_current_pci_id++;
7210- } else if (strncmp(str, "MCA", 3) == 0) {
7211- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
7212 } else {
7213 printk(KERN_ERR "Unknown bustype %s\n", str);
7214 }
7215 }
7216
7217+static int bad_ioapic(unsigned long address)
7218+{
7219+ if (nr_ioapics >= MAX_IO_APICS) {
7220+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
7221+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
7222+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
7223+ }
7224+ if (!address) {
7225+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
7226+ " found in table, skipping!\n");
7227+ return 1;
7228+ }
7229+ return 0;
7230+}
7231+
7232 static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
7233 {
7234 if (!(m->mpc_flags & MPC_APIC_USABLE))
7235 return;
7236
7237- printk("I/O APIC #%d Version %d at 0x%X.\n",
7238- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
7239- if (nr_ioapics >= MAX_IO_APICS) {
7240- printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
7241- MAX_IO_APICS, nr_ioapics);
7242- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
7243- }
7244- if (!m->mpc_apicaddr) {
7245- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
7246- " found in MP table, skipping!\n");
7247+ printk("I/O APIC #%d at 0x%X.\n",
7248+ m->mpc_apicid, m->mpc_apicaddr);
7249+
7250+ if (bad_ioapic(m->mpc_apicaddr))
7251 return;
7252- }
7253+
7254 mp_ioapics[nr_ioapics] = *m;
7255 nr_ioapics++;
7256 }
7257@@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru
7258 m->mpc_irqtype, m->mpc_irqflag & 3,
7259 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
7260 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
7261- /*
7262- * Well it seems all SMP boards in existence
7263- * use ExtINT/LVT1 == LINT0 and
7264- * NMI/LVT2 == LINT1 - the following check
7265- * will show us if this assumptions is false.
7266- * Until then we do not have to add baggage.
7267- */
7268- if ((m->mpc_irqtype == mp_ExtINT) &&
7269- (m->mpc_destapiclint != 0))
7270- BUG();
7271- if ((m->mpc_irqtype == mp_NMI) &&
7272- (m->mpc_destapiclint != 1))
7273- BUG();
7274 }
7275
7276 /*
7277@@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp
7278 unsigned char *mpt=((unsigned char *)mpc)+count;
7279
7280 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
7281- printk("SMP mptable: bad signature [%c%c%c%c]!\n",
7282+ printk("MPTABLE: bad signature [%c%c%c%c]!\n",
7283 mpc->mpc_signature[0],
7284 mpc->mpc_signature[1],
7285 mpc->mpc_signature[2],
7286@@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp
7287 return 0;
7288 }
7289 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
7290- printk("SMP mptable: checksum error!\n");
7291+ printk("MPTABLE: checksum error!\n");
7292 return 0;
7293 }
7294 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
7295- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
7296+ printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
7297 mpc->mpc_spec);
7298 return 0;
7299 }
7300 if (!mpc->mpc_lapic) {
7301- printk(KERN_ERR "SMP mptable: null local APIC address!\n");
7302+ printk(KERN_ERR "MPTABLE: null local APIC address!\n");
7303 return 0;
7304 }
7305 memcpy(str,mpc->mpc_oem,8);
7306- str[8]=0;
7307- printk(KERN_INFO "OEM ID: %s ",str);
7308+ str[8] = 0;
7309+ printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
7310
7311 memcpy(str,mpc->mpc_productid,12);
7312- str[12]=0;
7313- printk("Product ID: %s ",str);
7314+ str[12] = 0;
7315+ printk("MPTABLE: Product ID: %s ",str);
7316
7317- printk("APIC at: 0x%X\n",mpc->mpc_lapic);
7318+ printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
7319
7320 /* save the local APIC address, it might be non-default */
7321 if (!acpi_lapic)
7322- mp_lapic_addr = mpc->mpc_lapic;
7323+ mp_lapic_addr = mpc->mpc_lapic;
7324
7325 /*
7326 * Now process the configuration blocks.
7327@@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp
7328 struct mpc_config_processor *m=
7329 (struct mpc_config_processor *)mpt;
7330 if (!acpi_lapic)
7331- MP_processor_info(m);
7332+ MP_processor_info(m);
7333 mpt += sizeof(*m);
7334 count += sizeof(*m);
7335 break;
7336@@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp
7337 struct mpc_config_ioapic *m=
7338 (struct mpc_config_ioapic *)mpt;
7339 MP_ioapic_info(m);
7340- mpt+=sizeof(*m);
7341- count+=sizeof(*m);
7342+ mpt += sizeof(*m);
7343+ count += sizeof(*m);
7344 break;
7345 }
7346 case MP_INTSRC:
7347@@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp
7348 (struct mpc_config_intsrc *)mpt;
7349
7350 MP_intsrc_info(m);
7351- mpt+=sizeof(*m);
7352- count+=sizeof(*m);
7353+ mpt += sizeof(*m);
7354+ count += sizeof(*m);
7355 break;
7356 }
7357 case MP_LINTSRC:
7358@@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp
7359 struct mpc_config_lintsrc *m=
7360 (struct mpc_config_lintsrc *)mpt;
7361 MP_lintsrc_info(m);
7362- mpt+=sizeof(*m);
7363- count+=sizeof(*m);
7364+ mpt += sizeof(*m);
7365+ count += sizeof(*m);
7366 break;
7367 }
7368 }
7369 }
7370 clustered_apic_check();
7371 if (!num_processors)
7372- printk(KERN_ERR "SMP mptable: no processors registered!\n");
7373+ printk(KERN_ERR "MPTABLE: no processors registered!\n");
7374 return num_processors;
7375 }
7376
7377@@ -451,13 +407,10 @@ static inline void __init construct_defa
7378 * 2 CPUs, numbered 0 & 1.
7379 */
7380 processor.mpc_type = MP_PROCESSOR;
7381- /* Either an integrated APIC or a discrete 82489DX. */
7382- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
7383+ processor.mpc_apicver = 0;
7384 processor.mpc_cpuflag = CPU_ENABLED;
7385- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
7386- (boot_cpu_data.x86_model << 4) |
7387- boot_cpu_data.x86_mask;
7388- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
7389+ processor.mpc_cpufeature = 0;
7390+ processor.mpc_featureflag = 0;
7391 processor.mpc_reserved[0] = 0;
7392 processor.mpc_reserved[1] = 0;
7393 for (i = 0; i < 2; i++) {
7394@@ -476,14 +429,6 @@ static inline void __init construct_defa
7395 case 5:
7396 memcpy(bus.mpc_bustype, "ISA ", 6);
7397 break;
7398- case 2:
7399- case 6:
7400- case 3:
7401- memcpy(bus.mpc_bustype, "EISA ", 6);
7402- break;
7403- case 4:
7404- case 7:
7405- memcpy(bus.mpc_bustype, "MCA ", 6);
7406 }
7407 MP_bus_info(&bus);
7408 if (mpc_default_type > 4) {
7409@@ -494,7 +439,7 @@ static inline void __init construct_defa
7410
7411 ioapic.mpc_type = MP_IOAPIC;
7412 ioapic.mpc_apicid = 2;
7413- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
7414+ ioapic.mpc_apicver = 0;
7415 ioapic.mpc_flags = MPC_APIC_USABLE;
7416 ioapic.mpc_apicaddr = 0xFEC00000;
7417 MP_ioapic_info(&ioapic);
7418@@ -537,13 +482,6 @@ void __init get_smp_config (void)
7419 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
7420
7421 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
7422- if (mpf->mpf_feature2 & (1<<7)) {
7423- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
7424- pic_mode = 1;
7425- } else {
7426- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
7427- pic_mode = 0;
7428- }
7429
7430 /*
7431 * Now see if we need to read further.
7432@@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig
7433 return 0;
7434 }
7435
7436-void __init find_intel_smp (void)
7437+void __init find_smp_config(void)
7438 {
7439 unsigned int address;
7440
7441@@ -637,9 +575,7 @@ void __init find_intel_smp (void)
7442 smp_scan_config(0xF0000,0x10000))
7443 return;
7444 /*
7445- * If it is an SMP machine we should know now, unless the
7446- * configuration is in an EISA/MCA bus machine with an
7447- * extended bios data area.
7448+ * If it is an SMP machine we should know now.
7449 *
7450 * there is a real-mode segmented pointer pointing to the
7451 * 4K EBDA area at 0x40E, calculate and scan it here.
7452@@ -660,64 +596,38 @@ void __init find_intel_smp (void)
7453 printk(KERN_INFO "No mptable found.\n");
7454 }
7455
7456-/*
7457- * - Intel MP Configuration Table
7458- */
7459-void __init find_smp_config (void)
7460-{
7461-#ifdef CONFIG_X86_LOCAL_APIC
7462- find_intel_smp();
7463-#endif
7464-}
7465-
7466-
7467 /* --------------------------------------------------------------------------
7468 ACPI-based MP Configuration
7469 -------------------------------------------------------------------------- */
7470
7471 #ifdef CONFIG_ACPI
7472
7473-void __init mp_register_lapic_address (
7474- u64 address)
7475+void __init mp_register_lapic_address(u64 address)
7476 {
7477 #ifndef CONFIG_XEN
7478 mp_lapic_addr = (unsigned long) address;
7479-
7480 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
7481-
7482 if (boot_cpu_id == -1U)
7483 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
7484-
7485- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
7486 #endif
7487 }
7488
7489-
7490-void __cpuinit mp_register_lapic (
7491- u8 id,
7492- u8 enabled)
7493+void __cpuinit mp_register_lapic (u8 id, u8 enabled)
7494 {
7495 struct mpc_config_processor processor;
7496 int boot_cpu = 0;
7497
7498- if (id >= MAX_APICS) {
7499- printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
7500- id, MAX_APICS);
7501- return;
7502- }
7503-
7504- if (id == boot_cpu_physical_apicid)
7505+ if (id == boot_cpu_id)
7506 boot_cpu = 1;
7507
7508 #ifndef CONFIG_XEN
7509 processor.mpc_type = MP_PROCESSOR;
7510 processor.mpc_apicid = id;
7511- processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
7512+ processor.mpc_apicver = 0;
7513 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
7514 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
7515- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
7516- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
7517- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
7518+ processor.mpc_cpufeature = 0;
7519+ processor.mpc_featureflag = 0;
7520 processor.mpc_reserved[0] = 0;
7521 processor.mpc_reserved[1] = 0;
7522 #endif
7523@@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic (
7524 MP_processor_info(&processor);
7525 }
7526
7527-#ifdef CONFIG_X86_IO_APIC
7528-
7529 #define MP_ISA_BUS 0
7530 #define MP_MAX_IOAPIC_PIN 127
7531
7532@@ -737,11 +645,9 @@ static struct mp_ioapic_routing {
7533 u32 pin_programmed[4];
7534 } mp_ioapic_routing[MAX_IO_APICS];
7535
7536-
7537-static int mp_find_ioapic (
7538- int gsi)
7539+static int mp_find_ioapic(int gsi)
7540 {
7541- int i = 0;
7542+ int i = 0;
7543
7544 /* Find the IOAPIC that manages this GSI. */
7545 for (i = 0; i < nr_ioapics; i++) {
7546@@ -751,28 +657,15 @@ static int mp_find_ioapic (
7547 }
7548
7549 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
7550-
7551 return -1;
7552 }
7553-
7554
7555-void __init mp_register_ioapic (
7556- u8 id,
7557- u32 address,
7558- u32 gsi_base)
7559+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
7560 {
7561- int idx = 0;
7562+ int idx = 0;
7563
7564- if (nr_ioapics >= MAX_IO_APICS) {
7565- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
7566- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
7567- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
7568- }
7569- if (!address) {
7570- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
7571- " found in MADT table, skipping!\n");
7572+ if (bad_ioapic(address))
7573 return;
7574- }
7575
7576 idx = nr_ioapics++;
7577
7578@@ -784,7 +677,7 @@ void __init mp_register_ioapic (
7579 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
7580 #endif
7581 mp_ioapics[idx].mpc_apicid = id;
7582- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
7583+ mp_ioapics[idx].mpc_apicver = 0;
7584
7585 /*
7586 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
7587@@ -795,21 +688,15 @@ void __init mp_register_ioapic (
7588 mp_ioapic_routing[idx].gsi_end = gsi_base +
7589 io_apic_get_redir_entries(idx);
7590
7591- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
7592+ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
7593 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
7594- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
7595+ mp_ioapics[idx].mpc_apicaddr,
7596 mp_ioapic_routing[idx].gsi_start,
7597 mp_ioapic_routing[idx].gsi_end);
7598-
7599- return;
7600 }
7601
7602-
7603-void __init mp_override_legacy_irq (
7604- u8 bus_irq,
7605- u8 polarity,
7606- u8 trigger,
7607- u32 gsi)
7608+void __init
7609+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
7610 {
7611 struct mpc_config_intsrc intsrc;
7612 int ioapic = -1;
7613@@ -847,22 +734,18 @@ void __init mp_override_legacy_irq (
7614 mp_irqs[mp_irq_entries] = intsrc;
7615 if (++mp_irq_entries == MAX_IRQ_SOURCES)
7616 panic("Max # of irq sources exceeded!\n");
7617-
7618- return;
7619 }
7620
7621-
7622-void __init mp_config_acpi_legacy_irqs (void)
7623+void __init mp_config_acpi_legacy_irqs(void)
7624 {
7625 struct mpc_config_intsrc intsrc;
7626- int i = 0;
7627- int ioapic = -1;
7628+ int i = 0;
7629+ int ioapic = -1;
7630
7631 /*
7632 * Fabricate the legacy ISA bus (bus #31).
7633 */
7634- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
7635- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
7636+ set_bit(MP_ISA_BUS, mp_bus_not_pci);
7637
7638 /*
7639 * Locate the IOAPIC that manages the ISA IRQs (0-15).
7640@@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs (
7641 if (++mp_irq_entries == MAX_IRQ_SOURCES)
7642 panic("Max # of irq sources exceeded!\n");
7643 }
7644-
7645- return;
7646 }
7647
7648-#define MAX_GSI_NUM 4096
7649-
7650 int mp_register_gsi(u32 gsi, int triggering, int polarity)
7651 {
7652- int ioapic = -1;
7653- int ioapic_pin = 0;
7654- int idx, bit = 0;
7655- static int pci_irq = 16;
7656- /*
7657- * Mapping between Global System Interrupts, which
7658- * represent all possible interrupts, to the IRQs
7659- * assigned to actual devices.
7660- */
7661- static int gsi_to_irq[MAX_GSI_NUM];
7662+ int ioapic = -1;
7663+ int ioapic_pin = 0;
7664+ int idx, bit = 0;
7665
7666 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
7667 return gsi;
7668@@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger
7669 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
7670 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
7671 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
7672- return gsi_to_irq[gsi];
7673+ return gsi;
7674 }
7675
7676 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
7677
7678- if (triggering == ACPI_LEVEL_SENSITIVE) {
7679- /*
7680- * For PCI devices assign IRQs in order, avoiding gaps
7681- * due to unused I/O APIC pins.
7682- */
7683- int irq = gsi;
7684- if (gsi < MAX_GSI_NUM) {
7685- /*
7686- * Retain the VIA chipset work-around (gsi > 15), but
7687- * avoid a problem where the 8254 timer (IRQ0) is setup
7688- * via an override (so it's not on pin 0 of the ioapic),
7689- * and at the same time, the pin 0 interrupt is a PCI
7690- * type. The gsi > 15 test could cause these two pins
7691- * to be shared as IRQ0, and they are not shareable.
7692- * So test for this condition, and if necessary, avoid
7693- * the pin collision.
7694- */
7695- if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
7696- gsi = pci_irq++;
7697- /*
7698- * Don't assign IRQ used by ACPI SCI
7699- */
7700- if (gsi == acpi_fadt.sci_int)
7701- gsi = pci_irq++;
7702- gsi_to_irq[irq] = gsi;
7703- } else {
7704- printk(KERN_ERR "GSI %u is too high\n", gsi);
7705- return gsi;
7706- }
7707- }
7708-
7709 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
7710 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
7711 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
7712 return gsi;
7713 }
7714-
7715-#endif /*CONFIG_X86_IO_APIC*/
7716 #endif /*CONFIG_ACPI*/
7717--- sle11-2009-05-14.orig/arch/x86/kernel/process_64-xen.c 2009-05-14 11:02:43.000000000 +0200
7718+++ sle11-2009-05-14/arch/x86/kernel/process_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7719@@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not
7720 }
7721 EXPORT_SYMBOL(idle_notifier_unregister);
7722
7723-enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
7724-static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
7725-
7726 void enter_idle(void)
7727 {
7728- __get_cpu_var(idle_state) = CPU_IDLE;
7729+ write_pda(isidle, 1);
7730 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
7731 }
7732
7733 static void __exit_idle(void)
7734 {
7735- __get_cpu_var(idle_state) = CPU_NOT_IDLE;
7736+ if (test_and_clear_bit_pda(0, isidle) == 0)
7737+ return;
7738 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
7739 }
7740
7741 /* Called from interrupts to signify idle end */
7742 void exit_idle(void)
7743 {
7744- if (current->pid | read_pda(irqcount))
7745+ /* idle loop has pid 0 */
7746+ if (current->pid)
7747 return;
7748 __exit_idle();
7749 }
7750@@ -184,6 +183,9 @@ void cpu_idle (void)
7751 play_dead();
7752 enter_idle();
7753 idle();
7754+ /* In many cases the interrupt that ended idle
7755+ has already called exit_idle. But some idle
7756+ loops can be woken up without interrupt. */
7757 __exit_idle();
7758 }
7759
7760@@ -196,7 +198,7 @@ void cpu_idle (void)
7761 void cpu_idle_wait(void)
7762 {
7763 unsigned int cpu, this_cpu = get_cpu();
7764- cpumask_t map;
7765+ cpumask_t map, tmp = current->cpus_allowed;
7766
7767 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
7768 put_cpu();
7769@@ -219,6 +221,8 @@ void cpu_idle_wait(void)
7770 }
7771 cpus_and(map, map, cpu_online_map);
7772 } while (!cpus_empty(map));
7773+
7774+ set_cpus_allowed(current, tmp);
7775 }
7776 EXPORT_SYMBOL_GPL(cpu_idle_wait);
7777
7778@@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs)
7779 print_modules();
7780 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
7781 current->pid, current->comm, print_tainted(),
7782- system_utsname.release,
7783- (int)strcspn(system_utsname.version, " "),
7784- system_utsname.version);
7785+ init_utsname()->release,
7786+ (int)strcspn(init_utsname()->version, " "),
7787+ init_utsname()->version);
7788 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
7789 printk_address(regs->rip);
7790 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
7791@@ -310,6 +314,7 @@ void exit_thread(void)
7792
7793 kfree(t->io_bitmap_ptr);
7794 t->io_bitmap_ptr = NULL;
7795+ clear_thread_flag(TIF_IO_BITMAP);
7796 /*
7797 * Careful, clear this in the TSS too:
7798 */
7799@@ -340,6 +345,7 @@ void flush_thread(void)
7800 if (t->flags & _TIF_IA32)
7801 current_thread_info()->status |= TS_COMPAT;
7802 }
7803+ t->flags &= ~_TIF_DEBUG;
7804
7805 tsk->thread.debugreg0 = 0;
7806 tsk->thread.debugreg1 = 0;
7807@@ -432,7 +438,7 @@ int copy_thread(int nr, unsigned long cl
7808 asm("mov %%es,%0" : "=m" (p->thread.es));
7809 asm("mov %%ds,%0" : "=m" (p->thread.ds));
7810
7811- if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
7812+ if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
7813 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
7814 if (!p->thread.io_bitmap_ptr) {
7815 p->thread.io_bitmap_max = 0;
7816@@ -440,6 +446,7 @@ int copy_thread(int nr, unsigned long cl
7817 }
7818 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
7819 IO_BITMAP_BYTES);
7820+ set_tsk_thread_flag(p, TIF_IO_BITMAP);
7821 }
7822
7823 /*
7824@@ -474,6 +481,30 @@ static inline void __save_init_fpu( stru
7825 }
7826
7827 /*
7828+ * This special macro can be used to load a debugging register
7829+ */
7830+#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
7831+
7832+static inline void __switch_to_xtra(struct task_struct *prev_p,
7833+ struct task_struct *next_p)
7834+{
7835+ struct thread_struct *prev, *next;
7836+
7837+ prev = &prev_p->thread,
7838+ next = &next_p->thread;
7839+
7840+ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7841+ loaddebug(next, 0);
7842+ loaddebug(next, 1);
7843+ loaddebug(next, 2);
7844+ loaddebug(next, 3);
7845+ /* no 4 and 5 */
7846+ loaddebug(next, 6);
7847+ loaddebug(next, 7);
7848+ }
7849+}
7850+
7851+/*
7852 * switch_to(x,y) should switch tasks from x to y.
7853 *
7854 * This could still be optimized:
7855@@ -501,6 +532,10 @@ __switch_to(struct task_struct *prev_p,
7856 #endif
7857 multicall_entry_t _mcl[8], *mcl = _mcl;
7858
7859+ /* we're going to use this soon, after a few expensive things */
7860+ if (next_p->fpu_counter>5)
7861+ prefetch(&next->i387.fxsave);
7862+
7863 /*
7864 * This is basically '__unlazy_fpu', except that we queue a
7865 * multicall to indicate FPU task switch, rather than
7866@@ -513,7 +548,8 @@ __switch_to(struct task_struct *prev_p,
7867 mcl->op = __HYPERVISOR_fpu_taskswitch;
7868 mcl->args[0] = 1;
7869 mcl++;
7870- }
7871+ } else
7872+ prev_p->fpu_counter = 0;
7873
7874 /*
7875 * Reload esp0, LDT and the page table pointer:
7876@@ -608,21 +644,29 @@ __switch_to(struct task_struct *prev_p,
7877 write_pda(oldrsp, next->userrsp);
7878 write_pda(pcurrent, next_p);
7879 write_pda(kernelstack,
7880- task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
7881+ (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
7882+#ifdef CONFIG_CC_STACKPROTECTOR
7883+ write_pda(stack_canary, next_p->stack_canary);
7884+
7885+ /*
7886+ * Build time only check to make sure the stack_canary is at
7887+ * offset 40 in the pda; this is a gcc ABI requirement
7888+ */
7889+ BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
7890+#endif
7891
7892 /*
7893 * Now maybe reload the debug registers
7894 */
7895- if (unlikely(next->debugreg7)) {
7896- set_debugreg(next->debugreg0, 0);
7897- set_debugreg(next->debugreg1, 1);
7898- set_debugreg(next->debugreg2, 2);
7899- set_debugreg(next->debugreg3, 3);
7900- /* no 4 and 5 */
7901- set_debugreg(next->debugreg6, 6);
7902- set_debugreg(next->debugreg7, 7);
7903- }
7904+ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
7905+ __switch_to_xtra(prev_p, next_p);
7906
7907+ /* If the task has used fpu the last 5 timeslices, just do a full
7908+ * restore of the math state immediately to avoid the trap; the
7909+ * chances of needing FPU soon are obviously high now
7910+ */
7911+ if (next_p->fpu_counter>5)
7912+ math_state_restore();
7913 return prev_p;
7914 }
7915
7916@@ -842,7 +886,7 @@ int dump_task_regs(struct task_struct *t
7917
7918 unsigned long arch_align_stack(unsigned long sp)
7919 {
7920- if (randomize_va_space)
7921+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
7922 sp -= get_random_int() % 8192;
7923 return sp & ~0xf;
7924 }
7925--- sle11-2009-05-14.orig/arch/x86/kernel/setup_64-xen.c 2009-05-14 11:02:43.000000000 +0200
7926+++ sle11-2009-05-14/arch/x86/kernel/setup_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7927@@ -88,9 +88,6 @@ extern struct edid_info edid_info;
7928 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
7929 EXPORT_SYMBOL(HYPERVISOR_shared_info);
7930
7931-extern char hypercall_page[PAGE_SIZE];
7932-EXPORT_SYMBOL(hypercall_page);
7933-
7934 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
7935 static struct notifier_block xen_panic_block = {
7936 xen_panic_event, NULL, 0 /* try to go last */
7937@@ -118,16 +115,6 @@ EXPORT_SYMBOL(boot_cpu_data);
7938
7939 unsigned long mmu_cr4_features;
7940
7941-int acpi_disabled;
7942-EXPORT_SYMBOL(acpi_disabled);
7943-#ifdef CONFIG_ACPI
7944-extern int __initdata acpi_ht;
7945-extern acpi_interrupt_flags acpi_sci_flags;
7946-int __initdata acpi_force = 0;
7947-#endif
7948-
7949-int acpi_numa __initdata;
7950-
7951 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
7952 int bootloader_type;
7953
7954@@ -151,10 +138,6 @@ struct sys_desc_table_struct {
7955
7956 struct edid_info edid_info;
7957 EXPORT_SYMBOL_GPL(edid_info);
7958-struct e820map e820;
7959-#ifdef CONFIG_XEN
7960-struct e820map machine_e820;
7961-#endif
7962
7963 extern int root_mountflags;
7964
7965@@ -181,9 +164,6 @@ struct resource standard_io_resources[]
7966 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
7967 };
7968
7969-#define STANDARD_IO_RESOURCES \
7970- (sizeof standard_io_resources / sizeof standard_io_resources[0])
7971-
7972 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
7973
7974 struct resource data_resource = {
7975@@ -230,9 +210,6 @@ static struct resource adapter_rom_resou
7976 .flags = IORESOURCE_ROM }
7977 };
7978
7979-#define ADAPTER_ROM_RESOURCES \
7980- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
7981-
7982 static struct resource video_rom_resource = {
7983 .name = "Video ROM",
7984 .start = 0xc0000,
7985@@ -309,7 +286,8 @@ static void __init probe_roms(void)
7986 }
7987
7988 /* check for adapter roms on 2k boundaries */
7989- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
7990+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
7991+ start += 2048) {
7992 rom = isa_bus_to_virt(start);
7993 if (!romsignature(rom))
7994 continue;
7995@@ -329,187 +307,22 @@ static void __init probe_roms(void)
7996 }
7997 }
7998
7999-/* Check for full argument with no trailing characters */
8000-static int fullarg(char *p, char *arg)
8001+#ifdef CONFIG_PROC_VMCORE
8002+/* elfcorehdr= specifies the location of elf core header
8003+ * stored by the crashed kernel. This option will be passed
8004+ * by kexec loader to the capture kernel.
8005+ */
8006+static int __init setup_elfcorehdr(char *arg)
8007 {
8008- int l = strlen(arg);
8009- return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
8010+ char *end;
8011+ if (!arg)
8012+ return -EINVAL;
8013+ elfcorehdr_addr = memparse(arg, &end);
8014+ return end > arg ? 0 : -EINVAL;
8015 }
8016-
8017-static __init void parse_cmdline_early (char ** cmdline_p)
8018-{
8019- char c = ' ', *to = command_line, *from = COMMAND_LINE;
8020- int len = 0;
8021- int userdef = 0;
8022-
8023- for (;;) {
8024- if (c != ' ')
8025- goto next_char;
8026-
8027-#ifdef CONFIG_SMP
8028- /*
8029- * If the BIOS enumerates physical processors before logical,
8030- * maxcpus=N at enumeration-time can be used to disable HT.
8031- */
8032- else if (!memcmp(from, "maxcpus=", 8)) {
8033- extern unsigned int maxcpus;
8034-
8035- maxcpus = simple_strtoul(from + 8, NULL, 0);
8036- }
8037-#endif
8038-#ifdef CONFIG_ACPI
8039- /* "acpi=off" disables both ACPI table parsing and interpreter init */
8040- if (fullarg(from,"acpi=off"))
8041- disable_acpi();
8042-
8043- if (fullarg(from, "acpi=force")) {
8044- /* add later when we do DMI horrors: */
8045- acpi_force = 1;
8046- acpi_disabled = 0;
8047- }
8048-
8049- /* acpi=ht just means: do ACPI MADT parsing
8050- at bootup, but don't enable the full ACPI interpreter */
8051- if (fullarg(from, "acpi=ht")) {
8052- if (!acpi_force)
8053- disable_acpi();
8054- acpi_ht = 1;
8055- }
8056- else if (fullarg(from, "pci=noacpi"))
8057- acpi_disable_pci();
8058- else if (fullarg(from, "acpi=noirq"))
8059- acpi_noirq_set();
8060-
8061- else if (fullarg(from, "acpi_sci=edge"))
8062- acpi_sci_flags.trigger = 1;
8063- else if (fullarg(from, "acpi_sci=level"))
8064- acpi_sci_flags.trigger = 3;
8065- else if (fullarg(from, "acpi_sci=high"))
8066- acpi_sci_flags.polarity = 1;
8067- else if (fullarg(from, "acpi_sci=low"))
8068- acpi_sci_flags.polarity = 3;
8069-
8070- /* acpi=strict disables out-of-spec workarounds */
8071- else if (fullarg(from, "acpi=strict")) {
8072- acpi_strict = 1;
8073- }
8074-#ifdef CONFIG_X86_IO_APIC
8075- else if (fullarg(from, "acpi_skip_timer_override"))
8076- acpi_skip_timer_override = 1;
8077-#endif
8078-#endif
8079-
8080-#ifndef CONFIG_XEN
8081- if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
8082- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
8083- disable_apic = 1;
8084- }
8085-
8086- if (fullarg(from, "noapic"))
8087- skip_ioapic_setup = 1;
8088-
8089- if (fullarg(from,"apic")) {
8090- skip_ioapic_setup = 0;
8091- ioapic_force = 1;
8092- }
8093-#endif
8094-
8095- if (!memcmp(from, "mem=", 4))
8096- parse_memopt(from+4, &from);
8097-
8098- if (!memcmp(from, "memmap=", 7)) {
8099- /* exactmap option is for used defined memory */
8100- if (!memcmp(from+7, "exactmap", 8)) {
8101-#ifdef CONFIG_CRASH_DUMP
8102- /* If we are doing a crash dump, we
8103- * still need to know the real mem
8104- * size before original memory map is
8105- * reset.
8106- */
8107- saved_max_pfn = e820_end_of_ram();
8108-#endif
8109- from += 8+7;
8110- end_pfn_map = 0;
8111- e820.nr_map = 0;
8112- userdef = 1;
8113- }
8114- else {
8115- parse_memmapopt(from+7, &from);
8116- userdef = 1;
8117- }
8118- }
8119-
8120-#ifdef CONFIG_NUMA
8121- if (!memcmp(from, "numa=", 5))
8122- numa_setup(from+5);
8123+early_param("elfcorehdr", setup_elfcorehdr);
8124 #endif
8125
8126- if (!memcmp(from,"iommu=",6)) {
8127- iommu_setup(from+6);
8128- }
8129-
8130- if (fullarg(from,"oops=panic"))
8131- panic_on_oops = 1;
8132-
8133- if (!memcmp(from, "noexec=", 7))
8134- nonx_setup(from + 7);
8135-
8136-#ifdef CONFIG_KEXEC
8137- /* crashkernel=size@addr specifies the location to reserve for
8138- * a crash kernel. By reserving this memory we guarantee
8139- * that linux never set's it up as a DMA target.
8140- * Useful for holding code to do something appropriate
8141- * after a kernel panic.
8142- */
8143- else if (!memcmp(from, "crashkernel=", 12)) {
8144-#ifndef CONFIG_XEN
8145- unsigned long size, base;
8146- size = memparse(from+12, &from);
8147- if (*from == '@') {
8148- base = memparse(from+1, &from);
8149- /* FIXME: Do I want a sanity check
8150- * to validate the memory range?
8151- */
8152- crashk_res.start = base;
8153- crashk_res.end = base + size - 1;
8154- }
8155-#else
8156- printk("Ignoring crashkernel command line, "
8157- "parameter will be supplied by xen\n");
8158-#endif
8159- }
8160-#endif
8161-
8162-#ifdef CONFIG_PROC_VMCORE
8163- /* elfcorehdr= specifies the location of elf core header
8164- * stored by the crashed kernel. This option will be passed
8165- * by kexec loader to the capture kernel.
8166- */
8167- else if(!memcmp(from, "elfcorehdr=", 11))
8168- elfcorehdr_addr = memparse(from+11, &from);
8169-#endif
8170-
8171-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
8172- else if (!memcmp(from, "additional_cpus=", 16))
8173- setup_additional_cpus(from+16);
8174-#endif
8175-
8176- next_char:
8177- c = *(from++);
8178- if (!c)
8179- break;
8180- if (COMMAND_LINE_SIZE <= ++len)
8181- break;
8182- *(to++) = c;
8183- }
8184- if (userdef) {
8185- printk(KERN_INFO "user-defined physical RAM map:\n");
8186- e820_print_map("user");
8187- }
8188- *to = '\0';
8189- *cmdline_p = command_line;
8190-}
8191-
8192 #ifndef CONFIG_NUMA
8193 static void __init
8194 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
8195@@ -521,10 +334,11 @@ contig_initmem_init(unsigned long start_
8196 if (bootmap == -1L)
8197 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
8198 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
8199+ e820_register_active_regions(0, start_pfn, end_pfn);
8200 #ifdef CONFIG_XEN
8201- e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
8202+ free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
8203 #else
8204- e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
8205+ free_bootmem_with_active_regions(0, end_pfn);
8206 #endif
8207 reserve_bootmem(bootmap, bootmap_size);
8208 }
8209@@ -587,6 +401,10 @@ static void discover_ebda(void)
8210 void __init setup_arch(char **cmdline_p)
8211 {
8212 #ifdef CONFIG_XEN
8213+ extern struct e820map machine_e820;
8214+
8215+ printk(KERN_INFO "Command line: %s\n", saved_command_line);
8216+
8217 /* Register a call for panic conditions. */
8218 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
8219
8220@@ -612,6 +430,8 @@ void __init setup_arch(char **cmdline_p)
8221
8222 ARCH_SETUP
8223 #else
8224+ printk(KERN_INFO "Command line: %s\n", saved_command_line);
8225+
8226 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
8227 screen_info = SCREEN_INFO;
8228 edid_info = EDID_INFO;
8229@@ -639,16 +459,22 @@ void __init setup_arch(char **cmdline_p)
8230 data_resource.start = virt_to_phys(&_etext);
8231 data_resource.end = virt_to_phys(&_edata)-1;
8232
8233- parse_cmdline_early(cmdline_p);
8234-
8235 early_identify_cpu(&boot_cpu_data);
8236
8237+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
8238+ *cmdline_p = command_line;
8239+
8240+ parse_early_param();
8241+
8242+ finish_e820_parsing();
8243+
8244+ e820_register_active_regions(0, 0, -1UL);
8245 /*
8246 * partially used pages are not usable - thus
8247 * we are rounding upwards:
8248 */
8249 end_pfn = e820_end_of_ram();
8250- num_physpages = end_pfn; /* for pfn_valid */
8251+ num_physpages = end_pfn;
8252
8253 check_efer();
8254
8255@@ -659,6 +485,14 @@ void __init setup_arch(char **cmdline_p)
8256 if (is_initial_xendomain())
8257 dmi_scan_machine();
8258
8259+ /* How many end-of-memory variables you have, grandma! */
8260+ max_low_pfn = end_pfn;
8261+ max_pfn = end_pfn;
8262+ high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
8263+
8264+ /* Remove active ranges so rediscovery with NUMA-awareness happens */
8265+ remove_all_active_ranges();
8266+
8267 #ifdef CONFIG_ACPI_NUMA
8268 /*
8269 * Parse SRAT to discover nodes.
8270@@ -848,16 +682,16 @@ void __init setup_arch(char **cmdline_p)
8271 BUG();
8272 }
8273
8274+#ifdef CONFIG_ACPI
8275 if (!is_initial_xendomain()) {
8276 acpi_disabled = 1;
8277-#ifdef CONFIG_ACPI
8278 acpi_ht = 0;
8279-#endif
8280 }
8281 #endif
8282+#endif
8283
8284-#ifndef CONFIG_XEN
8285- check_ioapic();
8286+#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
8287+ early_quirks();
8288 #endif
8289
8290 zap_low_mappings(0);
8291@@ -907,6 +741,7 @@ void __init setup_arch(char **cmdline_p)
8292 e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
8293 #else
8294 e820_reserve_resources(e820.map, e820.nr_map);
8295+ e820_mark_nosave_regions();
8296 #endif
8297
8298 request_resource(&iomem_resource, &video_ram_resource);
8299@@ -914,7 +749,7 @@ void __init setup_arch(char **cmdline_p)
8300 {
8301 unsigned i;
8302 /* request I/O space for devices used on all i[345]86 PCs */
8303- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
8304+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
8305 request_resource(&ioport_resource, &standard_io_resources[i]);
8306 }
8307
8308@@ -1098,7 +933,7 @@ static void __init amd_detect_cmp(struct
8309 #endif
8310 }
8311
8312-static void __init init_amd(struct cpuinfo_x86 *c)
8313+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
8314 {
8315 unsigned level;
8316
8317@@ -1154,6 +989,12 @@ static void __init init_amd(struct cpuin
8318
8319 /* Fix cpuid4 emulation for more */
8320 num_cache_leaves = 3;
8321+
8322+ /* When there is only one core no need to synchronize RDTSC */
8323+ if (num_possible_cpus() == 1)
8324+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8325+ else
8326+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8327 }
8328
8329 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
8330@@ -1235,8 +1076,7 @@ static void srat_detect_node(void)
8331 node = first_node(node_online_map);
8332 numa_set_node(cpu, node);
8333
8334- if (acpi_numa > 0)
8335- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
8336+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
8337 #endif
8338 }
8339
8340@@ -1270,6 +1110,8 @@ static void __cpuinit init_intel(struct
8341 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8342 (c->x86 == 0x6 && c->x86_model >= 0x0e))
8343 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
8344+ if (c->x86 == 6)
8345+ set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
8346 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8347 c->x86_max_cores = intel_num_cpu_cores(c);
8348
8349@@ -1488,8 +1330,8 @@ static int show_cpuinfo(struct seq_file
8350
8351 /* Intel-defined (#2) */
8352 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
8353- "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
8354- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8355+ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
8356+ NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
8357 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8358
8359 /* VIA/Cyrix/Centaur-defined */
8360--- sle11-2009-05-14.orig/arch/x86/kernel/setup64-xen.c 2009-04-20 11:36:10.000000000 +0200
8361+++ sle11-2009-05-14/arch/x86/kernel/setup64-xen.c 2009-03-04 11:28:34.000000000 +0100
8362@@ -31,7 +31,7 @@
8363 #include <asm/hypervisor.h>
8364 #endif
8365
8366-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
8367+char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
8368
8369 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
8370
8371@@ -55,8 +55,10 @@ Control non executable mappings for 64bi
8372 on Enable(default)
8373 off Disable
8374 */
8375-int __init nonx_setup(char *str)
8376+static int __init nonx_setup(char *str)
8377 {
8378+ if (!str)
8379+ return -EINVAL;
8380 if (!strncmp(str, "on", 2)) {
8381 __supported_pte_mask |= _PAGE_NX;
8382 do_not_nx = 0;
8383@@ -64,9 +66,9 @@ int __init nonx_setup(char *str)
8384 do_not_nx = 1;
8385 __supported_pte_mask &= ~_PAGE_NX;
8386 }
8387- return 1;
8388+ return 0;
8389 }
8390-__setup("noexec=", nonx_setup); /* parsed early actually */
8391+early_param("noexec", nonx_setup);
8392
8393 int force_personality32 = 0;
8394
8395@@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void)
8396 #endif
8397
8398 /* Copy section for each CPU (we discard the original) */
8399- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
8400-#ifdef CONFIG_MODULES
8401- if (size < PERCPU_ENOUGH_ROOM)
8402- size = PERCPU_ENOUGH_ROOM;
8403-#endif
8404+ size = PERCPU_ENOUGH_ROOM;
8405
8406+ printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
8407 for_each_cpu_mask (i, cpu_possible_map) {
8408 char *ptr;
8409
8410@@ -169,7 +168,10 @@ void pda_init(int cpu)
8411 /* Setup up data that may be needed in __get_free_pages early */
8412 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
8413 #ifndef CONFIG_XEN
8414+ /* Memory clobbers used to order PDA accessed */
8415+ mb();
8416 wrmsrl(MSR_GS_BASE, pda);
8417+ mb();
8418 #else
8419 if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
8420 (unsigned long)pda))
8421@@ -302,28 +304,17 @@ void __cpuinit cpu_init (void)
8422 * set up and load the per-CPU TSS
8423 */
8424 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
8425+ static const unsigned int order[N_EXCEPTION_STACKS] = {
8426+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
8427+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
8428+ };
8429 if (cpu) {
8430- static const unsigned int order[N_EXCEPTION_STACKS] = {
8431- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
8432- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
8433- };
8434-
8435 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
8436 if (!estacks)
8437 panic("Cannot allocate exception stack %ld %d\n",
8438 v, cpu);
8439 }
8440- switch (v + 1) {
8441-#if DEBUG_STKSZ > EXCEPTION_STKSZ
8442- case DEBUG_STACK:
8443- cpu_pda(cpu)->debugstack = (unsigned long)estacks;
8444- estacks += DEBUG_STKSZ;
8445- break;
8446-#endif
8447- default:
8448- estacks += EXCEPTION_STKSZ;
8449- break;
8450- }
8451+ estacks += PAGE_SIZE << order[v];
8452 orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
8453 }
8454
8455--- sle11-2009-05-14.orig/arch/x86/kernel/smp_64-xen.c 2009-05-14 11:02:43.000000000 +0200
8456+++ sle11-2009-05-14/arch/x86/kernel/smp_64-xen.c 2009-03-04 11:28:34.000000000 +0100
8457@@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v
8458 /* prevent preemption and reschedule on another processor */
8459 int me = get_cpu();
8460 if (cpu == me) {
8461- WARN_ON(1);
8462 put_cpu();
8463- return -EBUSY;
8464+ return 0;
8465 }
8466 spin_lock_bh(&call_lock);
8467 __smp_call_function_single(cpu, func, info, nonatomic, wait);
8468@@ -501,7 +500,7 @@ void smp_send_stop(void)
8469 #ifndef CONFIG_XEN
8470 asmlinkage void smp_reschedule_interrupt(void)
8471 #else
8472-asmlinkage irqreturn_t smp_reschedule_interrupt(void)
8473+asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
8474 #endif
8475 {
8476 #ifndef CONFIG_XEN
8477@@ -514,7 +513,7 @@ asmlinkage irqreturn_t smp_reschedule_in
8478 #ifndef CONFIG_XEN
8479 asmlinkage void smp_call_function_interrupt(void)
8480 #else
8481-asmlinkage irqreturn_t smp_call_function_interrupt(void)
8482+asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
8483 #endif
8484 {
8485 void (*func) (void *info) = call_data->func;
8486@@ -545,31 +544,3 @@ asmlinkage irqreturn_t smp_call_function
8487 return IRQ_HANDLED;
8488 #endif
8489 }
8490-
8491-int safe_smp_processor_id(void)
8492-{
8493-#ifdef CONFIG_XEN
8494- return smp_processor_id();
8495-#else
8496- unsigned apicid, i;
8497-
8498- if (disable_apic)
8499- return 0;
8500-
8501- apicid = hard_smp_processor_id();
8502- if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
8503- return apicid;
8504-
8505- for (i = 0; i < NR_CPUS; ++i) {
8506- if (x86_cpu_to_apicid[i] == apicid)
8507- return i;
8508- }
8509-
8510- /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
8511- * or called too early. Either way, we must be CPU 0. */
8512- if (x86_cpu_to_apicid[0] == BAD_APICID)
8513- return 0;
8514-
8515- return 0; /* Should not happen */
8516-#endif
8517-}
8518--- sle11-2009-05-14.orig/arch/x86/kernel/traps_64-xen.c 2009-05-14 11:02:43.000000000 +0200
8519+++ sle11-2009-05-14/arch/x86/kernel/traps_64-xen.c 2009-03-04 11:28:34.000000000 +0100
8520@@ -23,6 +23,7 @@
8521 #include <linux/delay.h>
8522 #include <linux/spinlock.h>
8523 #include <linux/interrupt.h>
8524+#include <linux/kallsyms.h>
8525 #include <linux/module.h>
8526 #include <linux/moduleparam.h>
8527 #include <linux/nmi.h>
8528@@ -45,6 +46,7 @@
8529 #include <asm/pda.h>
8530 #include <asm/proto.h>
8531 #include <asm/nmi.h>
8532+#include <asm/stacktrace.h>
8533
8534 asmlinkage void divide_error(void);
8535 asmlinkage void debug(void);
8536@@ -114,7 +116,6 @@ static int call_trace = 1;
8537 #endif
8538
8539 #ifdef CONFIG_KALLSYMS
8540-# include <linux/kallsyms.h>
8541 void printk_address(unsigned long address)
8542 {
8543 unsigned long offset = 0, symsize;
8544@@ -142,7 +143,7 @@ void printk_address(unsigned long addres
8545 #endif
8546
8547 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
8548- unsigned *usedp, const char **idp)
8549+ unsigned *usedp, char **idp)
8550 {
8551 #ifndef CONFIG_X86_NO_TSS
8552 static char ids[][8] = {
8553@@ -162,26 +163,7 @@ static unsigned long *in_exception_stack
8554 * 'stack' is in one of them:
8555 */
8556 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
8557- unsigned long end;
8558-
8559- /*
8560- * set 'end' to the end of the exception stack.
8561- */
8562- switch (k + 1) {
8563- /*
8564- * TODO: this block is not needed i think, because
8565- * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
8566- * properly too.
8567- */
8568-#if DEBUG_STKSZ > EXCEPTION_STKSZ
8569- case DEBUG_STACK:
8570- end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
8571- break;
8572-#endif
8573- default:
8574- end = per_cpu(orig_ist, cpu).ist[k];
8575- break;
8576- }
8577+ unsigned long end = per_cpu(orig_ist, cpu).ist[k];
8578 /*
8579 * Is 'stack' above this exception frame's end?
8580 * If yes then skip to the next frame.
8581@@ -236,13 +218,19 @@ static unsigned long *in_exception_stack
8582 return NULL;
8583 }
8584
8585-static int show_trace_unwind(struct unwind_frame_info *info, void *context)
8586+struct ops_and_data {
8587+ struct stacktrace_ops *ops;
8588+ void *data;
8589+};
8590+
8591+static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
8592 {
8593+ struct ops_and_data *oad = (struct ops_and_data *)context;
8594 int n = 0;
8595
8596 while (unwind(info) == 0 && UNW_PC(info)) {
8597 n++;
8598- printk_address(UNW_PC(info));
8599+ oad->ops->address(oad->data, UNW_PC(info));
8600 if (arch_unw_user_mode(info))
8601 break;
8602 }
8603@@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi
8604 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
8605 */
8606
8607-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
8608+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
8609 {
8610- const unsigned cpu = safe_smp_processor_id();
8611+ void *t = (void *)tinfo;
8612+ return p > t && p < t + THREAD_SIZE - 3;
8613+}
8614+
8615+void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
8616+ struct stacktrace_ops *ops, void *data)
8617+{
8618+ const unsigned cpu = smp_processor_id();
8619 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
8620 unsigned used = 0;
8621-
8622- printk("\nCall Trace:\n");
8623+ struct thread_info *tinfo;
8624
8625 if (!tsk)
8626 tsk = current;
8627@@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk,
8628 if (call_trace >= 0) {
8629 int unw_ret = 0;
8630 struct unwind_frame_info info;
8631+ struct ops_and_data oad = { .ops = ops, .data = data };
8632
8633 if (regs) {
8634 if (unwind_init_frame_info(&info, tsk, regs) == 0)
8635- unw_ret = show_trace_unwind(&info, NULL);
8636+ unw_ret = dump_trace_unwind(&info, &oad);
8637 } else if (tsk == current)
8638- unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
8639+ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
8640 else {
8641 if (unwind_init_blocked(&info, tsk) == 0)
8642- unw_ret = show_trace_unwind(&info, NULL);
8643+ unw_ret = dump_trace_unwind(&info, &oad);
8644 }
8645 if (unw_ret > 0) {
8646 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
8647- print_symbol("DWARF2 unwinder stuck at %s\n",
8648+ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
8649 UNW_PC(&info));
8650 if ((long)UNW_SP(&info) < 0) {
8651- printk("Leftover inexact backtrace:\n");
8652+ ops->warning(data, "Leftover inexact backtrace:\n");
8653 stack = (unsigned long *)UNW_SP(&info);
8654+ if (!stack)
8655+ return;
8656 } else
8657- printk("Full inexact backtrace again:\n");
8658+ ops->warning(data, "Full inexact backtrace again:\n");
8659 } else if (call_trace >= 1)
8660 return;
8661 else
8662- printk("Full inexact backtrace again:\n");
8663+ ops->warning(data, "Full inexact backtrace again:\n");
8664 } else
8665- printk("Inexact backtrace:\n");
8666+ ops->warning(data, "Inexact backtrace:\n");
8667 }
8668+ if (!stack) {
8669+ unsigned long dummy;
8670+ stack = &dummy;
8671+ if (tsk && tsk != current)
8672+ stack = (unsigned long *)tsk->thread.rsp;
8673+ }
8674+ /*
8675+ * Align the stack pointer on word boundary, later loops
8676+ * rely on that (and corruption / debug info bugs can cause
8677+ * unaligned values here):
8678+ */
8679+ stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
8680
8681 /*
8682 * Print function call entries within a stack. 'cond' is the
8683@@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk,
8684 #define HANDLE_STACK(cond) \
8685 do while (cond) { \
8686 unsigned long addr = *stack++; \
8687- if (kernel_text_address(addr)) { \
8688+ if (oops_in_progress ? \
8689+ __kernel_text_address(addr) : \
8690+ kernel_text_address(addr)) { \
8691 /* \
8692 * If the address is either in the text segment of the \
8693 * kernel, or in the region which contains vmalloc'ed \
8694@@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk,
8695 * down the cause of the crash will be able to figure \
8696 * out the call path that was taken. \
8697 */ \
8698- printk_address(addr); \
8699+ ops->address(data, addr); \
8700 } \
8701 } while (0)
8702
8703@@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk,
8704 * current stack address. If the stacks consist of nested
8705 * exceptions
8706 */
8707- for ( ; ; ) {
8708- const char *id;
8709+ for (;;) {
8710+ char *id;
8711 unsigned long *estack_end;
8712 estack_end = in_exception_stack(cpu, (unsigned long)stack,
8713 &used, &id);
8714
8715 if (estack_end) {
8716- printk(" <%s>", id);
8717+ if (ops->stack(data, id) < 0)
8718+ break;
8719 HANDLE_STACK (stack < estack_end);
8720- printk(" <EOE>");
8721+ ops->stack(data, "<EOE>");
8722 /*
8723 * We link to the next stack via the
8724 * second-to-last pointer (index -2 to end) in the
8725@@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk,
8726 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
8727
8728 if (stack >= irqstack && stack < irqstack_end) {
8729- printk(" <IRQ>");
8730+ if (ops->stack(data, "IRQ") < 0)
8731+ break;
8732 HANDLE_STACK (stack < irqstack_end);
8733 /*
8734 * We link to the next stack (which would be
8735@@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk,
8736 */
8737 stack = (unsigned long *) (irqstack_end[-1]);
8738 irqstack_end = NULL;
8739- printk(" <EOI>");
8740+ ops->stack(data, "EOI");
8741 continue;
8742 }
8743 }
8744@@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk,
8745 }
8746
8747 /*
8748- * This prints the process stack:
8749+ * This handles the process stack:
8750 */
8751- HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
8752+ tinfo = current_thread_info();
8753+ HANDLE_STACK (valid_stack_ptr(tinfo, stack));
8754 #undef HANDLE_STACK
8755+}
8756+EXPORT_SYMBOL(dump_trace);
8757
8758+static void
8759+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
8760+{
8761+ print_symbol(msg, symbol);
8762 printk("\n");
8763 }
8764
8765-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
8766+static void print_trace_warning(void *data, char *msg)
8767+{
8768+ printk("%s\n", msg);
8769+}
8770+
8771+static int print_trace_stack(void *data, char *name)
8772+{
8773+ printk(" <%s> ", name);
8774+ return 0;
8775+}
8776+
8777+static void print_trace_address(void *data, unsigned long addr)
8778+{
8779+ printk_address(addr);
8780+}
8781+
8782+static struct stacktrace_ops print_trace_ops = {
8783+ .warning = print_trace_warning,
8784+ .warning_symbol = print_trace_warning_symbol,
8785+ .stack = print_trace_stack,
8786+ .address = print_trace_address,
8787+};
8788+
8789+void
8790+show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
8791+{
8792+ printk("\nCall Trace:\n");
8793+ dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
8794+ printk("\n");
8795+}
8796+
8797+static void
8798+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
8799 {
8800 unsigned long *stack;
8801 int i;
8802- const int cpu = safe_smp_processor_id();
8803+ const int cpu = smp_processor_id();
8804 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
8805 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
8806
8807@@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs
8808 int i;
8809 int in_kernel = !user_mode(regs);
8810 unsigned long rsp;
8811- const int cpu = safe_smp_processor_id();
8812+ const int cpu = smp_processor_id();
8813 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
8814
8815 rsp = regs->rsp;
8816@@ -505,9 +557,11 @@ static unsigned int die_nest_count;
8817
8818 unsigned __kprobes long oops_begin(void)
8819 {
8820- int cpu = safe_smp_processor_id();
8821+ int cpu = smp_processor_id();
8822 unsigned long flags;
8823
8824+ oops_enter();
8825+
8826 /* racy, but better than risking deadlock. */
8827 local_irq_save(flags);
8828 if (!spin_trylock(&die_lock)) {
8829@@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl
8830 spin_unlock_irqrestore(&die_lock, flags);
8831 if (panic_on_oops)
8832 panic("Fatal exception");
8833+ oops_exit();
8834 }
8835
8836 void __kprobes __die(const char * str, struct pt_regs * regs, long err)
8837@@ -572,8 +627,8 @@ void die(const char * str, struct pt_reg
8838 do_exit(SIGSEGV);
8839 }
8840
8841-#ifdef CONFIG_X86_LOCAL_APIC
8842-void __kprobes die_nmi(char *str, struct pt_regs *regs)
8843+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
8844+void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
8845 {
8846 unsigned long flags = oops_begin();
8847
8848@@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct
8849 * We are in trouble anyway, lets at least try
8850 * to get a message out.
8851 */
8852- printk(str, safe_smp_processor_id());
8853+ printk(str, smp_processor_id());
8854 show_registers(regs);
8855 if (kexec_should_crash(current))
8856 crash_kexec(regs);
8857- if (panic_on_timeout || panic_on_oops)
8858- panic("nmi watchdog");
8859- printk("console shuts up ...\n");
8860+ if (do_panic || panic_on_oops)
8861+ panic("Non maskable interrupt");
8862 oops_end(flags);
8863 nmi_exit();
8864 local_irq_enable();
8865@@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro
8866 static __kprobes void
8867 mem_parity_error(unsigned char reason, struct pt_regs * regs)
8868 {
8869- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
8870- printk("You probably have a hardware problem with your RAM chips\n");
8871+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8872+ reason);
8873+ printk(KERN_EMERG "You probably have a hardware problem with your "
8874+ "RAM chips\n");
8875+
8876+ if (panic_on_unrecovered_nmi)
8877+ panic("NMI: Not continuing");
8878+
8879+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8880
8881 #if 0 /* XEN */
8882 /* Clear and disable the memory parity error line. */
8883@@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str
8884
8885 static __kprobes void
8886 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
8887-{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
8888- printk("Dazed and confused, but trying to continue\n");
8889- printk("Do you have a strange power saving mode enabled?\n");
8890+{
8891+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8892+ reason);
8893+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
8894+
8895+ if (panic_on_unrecovered_nmi)
8896+ panic("NMI: Not continuing");
8897+
8898+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8899 }
8900
8901 /* Runs on IST stack. This code must keep interrupts off all the time.
8902@@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi
8903 * Ok, so this is none of the documented NMI sources,
8904 * so it must be the NMI watchdog.
8905 */
8906- if (nmi_watchdog > 0) {
8907- nmi_watchdog_tick(regs,reason);
8908+ if (nmi_watchdog_tick(regs,reason))
8909 return;
8910- }
8911 #endif
8912- unknown_nmi_error(reason, regs);
8913+ if (!do_nmi_callback(regs,cpu))
8914+ unknown_nmi_error(reason, regs);
8915+
8916 return;
8917 }
8918 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
8919@@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void)
8920 init_fpu(me);
8921 restore_fpu_checking(&me->thread.i387.fxsave);
8922 task_thread_info(me)->status |= TS_USEDFPU;
8923+ me->fpu_counter++;
8924 }
8925
8926
8927@@ -1141,24 +1209,30 @@ void __cpuinit smp_trap_init(trap_info_t
8928 }
8929
8930
8931-/* Actual parsing is done early in setup.c. */
8932-static int __init oops_dummy(char *s)
8933+static int __init oops_setup(char *s)
8934 {
8935- panic_on_oops = 1;
8936- return 1;
8937+ if (!s)
8938+ return -EINVAL;
8939+ if (!strcmp(s, "panic"))
8940+ panic_on_oops = 1;
8941+ return 0;
8942 }
8943-__setup("oops=", oops_dummy);
8944+early_param("oops", oops_setup);
8945
8946 static int __init kstack_setup(char *s)
8947 {
8948+ if (!s)
8949+ return -EINVAL;
8950 kstack_depth_to_print = simple_strtoul(s,NULL,0);
8951- return 1;
8952+ return 0;
8953 }
8954-__setup("kstack=", kstack_setup);
8955+early_param("kstack", kstack_setup);
8956
8957 #ifdef CONFIG_STACK_UNWIND
8958 static int __init call_trace_setup(char *s)
8959 {
8960+ if (!s)
8961+ return -EINVAL;
8962 if (strcmp(s, "old") == 0)
8963 call_trace = -1;
8964 else if (strcmp(s, "both") == 0)
8965@@ -1167,7 +1241,7 @@ static int __init call_trace_setup(char
8966 call_trace = 1;
8967 else if (strcmp(s, "new") == 0)
8968 call_trace = 2;
8969- return 1;
8970+ return 0;
8971 }
8972-__setup("call_trace=", call_trace_setup);
8973+early_param("call_trace", call_trace_setup);
8974 #endif
8975--- sle11-2009-05-14.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-05-14 11:02:43.000000000 +0200
8976+++ sle11-2009-05-14/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:28:34.000000000 +0100
8977@@ -26,6 +26,10 @@
8978 #include <linux/seqlock.h>
8979 #include <linux/jiffies.h>
8980 #include <linux/sysctl.h>
8981+#include <linux/getcpu.h>
8982+#include <linux/cpu.h>
8983+#include <linux/smp.h>
8984+#include <linux/notifier.h>
8985
8986 #include <asm/vsyscall.h>
8987 #include <asm/pgtable.h>
8988@@ -33,11 +37,15 @@
8989 #include <asm/fixmap.h>
8990 #include <asm/errno.h>
8991 #include <asm/io.h>
8992+#include <asm/segment.h>
8993+#include <asm/desc.h>
8994+#include <asm/topology.h>
8995
8996 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
8997
8998 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
8999 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
9000+int __vgetcpu_mode __section_vgetcpu_mode;
9001
9002 #include <asm/unistd.h>
9003
9004@@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo
9005 sequence = read_seqbegin(&__xtime_lock);
9006
9007 sec = __xtime.tv_sec;
9008- usec = (__xtime.tv_nsec / 1000) +
9009- (__jiffies - __wall_jiffies) * (1000000 / HZ);
9010+ usec = __xtime.tv_nsec / 1000;
9011
9012 if (__vxtime.mode != VXTIME_HPET) {
9013 t = get_cycles_sync();
9014@@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo
9015 __vxtime.tsc_quot) >> 32;
9016 /* See comment in x86_64 do_gettimeofday. */
9017 } else {
9018- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
9019+ usec += ((readl((void __iomem *)
9020+ fix_to_virt(VSYSCALL_HPET) + 0xf0) -
9021 __vxtime.last) * __vxtime.quot) >> 32;
9022 }
9023 } while (read_seqretry(&__xtime_lock, sequence));
9024@@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t)
9025 return __xtime.tv_sec;
9026 }
9027
9028-long __vsyscall(2) venosys_0(void)
9029-{
9030- return -ENOSYS;
9031+/* Fast way to get current CPU and node.
9032+ This helps to do per node and per CPU caches in user space.
9033+ The result is not guaranteed without CPU affinity, but usually
9034+ works out because the scheduler tries to keep a thread on the same
9035+ CPU.
9036+
9037+ tcache must point to a two element sized long array.
9038+ All arguments can be NULL. */
9039+long __vsyscall(2)
9040+vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
9041+{
9042+ unsigned int dummy, p;
9043+ unsigned long j = 0;
9044+
9045+ /* Fast cache - only recompute value once per jiffies and avoid
9046+ relatively costly rdtscp/cpuid otherwise.
9047+ This works because the scheduler usually keeps the process
9048+ on the same CPU and this syscall doesn't guarantee its
9049+ results anyways.
9050+ We do this here because otherwise user space would do it on
9051+ its own in a likely inferior way (no access to jiffies).
9052+ If you don't like it pass NULL. */
9053+ if (tcache && tcache->blob[0] == (j = __jiffies)) {
9054+ p = tcache->blob[1];
9055+ } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
9056+ /* Load per CPU data from RDTSCP */
9057+ rdtscp(dummy, dummy, p);
9058+ } else {
9059+ /* Load per CPU data from GDT */
9060+ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
9061+ }
9062+ if (tcache) {
9063+ tcache->blob[0] = j;
9064+ tcache->blob[1] = p;
9065+ }
9066+ if (cpu)
9067+ *cpu = p & 0xfff;
9068+ if (node)
9069+ *node = p >> 12;
9070+ return 0;
9071 }
9072
9073 long __vsyscall(3) venosys_1(void)
9074@@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta
9075 void __user *buffer, size_t *lenp, loff_t *ppos)
9076 {
9077 extern u16 vsysc1, vsysc2;
9078- u16 *map1, *map2;
9079+ u16 __iomem *map1;
9080+ u16 __iomem *map2;
9081 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
9082 if (!write)
9083 return ret;
9084@@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta
9085 goto out;
9086 }
9087 if (!sysctl_vsyscall) {
9088- *map1 = SYSCALL;
9089- *map2 = SYSCALL;
9090+ writew(SYSCALL, map1);
9091+ writew(SYSCALL, map2);
9092 } else {
9093- *map1 = NOP2;
9094- *map2 = NOP2;
9095+ writew(NOP2, map1);
9096+ writew(NOP2, map2);
9097 }
9098 iounmap(map2);
9099 out:
9100@@ -200,6 +246,48 @@ static ctl_table kernel_root_table2[] =
9101
9102 #endif
9103
9104+/* Assume __initcall executes before all user space. Hopefully kmod
9105+ doesn't violate that. We'll find out if it does. */
9106+static void __cpuinit vsyscall_set_cpu(int cpu)
9107+{
9108+ unsigned long d;
9109+ unsigned long node = 0;
9110+#ifdef CONFIG_NUMA
9111+ node = cpu_to_node[cpu];
9112+#endif
9113+ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
9114+ write_rdtscp_aux((node << 12) | cpu);
9115+
9116+ /* Store cpu number in limit so that it can be loaded quickly
9117+ in user space in vgetcpu.
9118+ 12 bits for the CPU and 8 bits for the node. */
9119+ d = 0x0f40000000000ULL;
9120+ d |= cpu;
9121+ d |= (node & 0xf) << 12;
9122+ d |= (node >> 4) << 48;
9123+ if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
9124+ + GDT_ENTRY_PER_CPU),
9125+ d))
9126+ BUG();
9127+}
9128+
9129+static void __cpuinit cpu_vsyscall_init(void *arg)
9130+{
9131+ /* preemption should be already off */
9132+ vsyscall_set_cpu(raw_smp_processor_id());
9133+}
9134+
9135+#ifdef CONFIG_HOTPLUG_CPU
9136+static int __cpuinit
9137+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
9138+{
9139+ long cpu = (long)arg;
9140+ if (action == CPU_ONLINE)
9141+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
9142+ return NOTIFY_DONE;
9143+}
9144+#endif
9145+
9146 static void __init map_vsyscall(void)
9147 {
9148 extern char __vsyscall_0;
9149@@ -214,13 +302,20 @@ static int __init vsyscall_init(void)
9150 VSYSCALL_ADDR(__NR_vgettimeofday)));
9151 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
9152 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
9153+ BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
9154 map_vsyscall();
9155 #ifdef CONFIG_XEN
9156 sysctl_vsyscall = 0; /* disable vgettimeofay() */
9157+ if (boot_cpu_has(X86_FEATURE_RDTSCP))
9158+ vgetcpu_mode = VGETCPU_RDTSCP;
9159+ else
9160+ vgetcpu_mode = VGETCPU_LSL;
9161 #endif
9162 #ifdef CONFIG_SYSCTL
9163 register_sysctl_table(kernel_root_table2, 0);
9164 #endif
9165+ on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
9166+ hotcpu_notifier(cpu_vsyscall_notifier, 0);
9167 return 0;
9168 }
9169
9170--- sle11-2009-05-14.orig/arch/x86/mm/fault_64-xen.c 2009-05-14 11:02:43.000000000 +0200
9171+++ sle11-2009-05-14/arch/x86/mm/fault_64-xen.c 2009-03-04 11:28:34.000000000 +0100
9172@@ -40,8 +40,7 @@
9173 #define PF_RSVD (1<<3)
9174 #define PF_INSTR (1<<4)
9175
9176-#ifdef CONFIG_KPROBES
9177-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
9178+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
9179
9180 /* Hook to register for page fault notifications */
9181 int register_page_fault_notifier(struct notifier_block *nb)
9182@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct
9183 vmalloc_sync_all();
9184 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
9185 }
9186+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
9187
9188 int unregister_page_fault_notifier(struct notifier_block *nb)
9189 {
9190 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
9191 }
9192+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
9193
9194 static inline int notify_page_fault(enum die_val val, const char *str,
9195 struct pt_regs *regs, long err, int trap, int sig)
9196@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
9197 };
9198 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
9199 }
9200-#else
9201-static inline int notify_page_fault(enum die_val val, const char *str,
9202- struct pt_regs *regs, long err, int trap, int sig)
9203-{
9204- return NOTIFY_DONE;
9205-}
9206-#endif
9207
9208 void bust_spinlocks(int yes)
9209 {
9210@@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
9211 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
9212 unsigned long error_code)
9213 {
9214- unsigned char *instr;
9215+ unsigned char __user *instr;
9216 int scan_more = 1;
9217 int prefetch = 0;
9218 unsigned char *max_instr;
9219@@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p
9220 if (error_code & PF_INSTR)
9221 return 0;
9222
9223- instr = (unsigned char *)convert_rip_to_linear(current, regs);
9224+ instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
9225 max_instr = instr + 15;
9226
9227 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
9228@@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p
9229 unsigned char instr_hi;
9230 unsigned char instr_lo;
9231
9232- if (__get_user(opcode, instr))
9233+ if (__get_user(opcode, (char __user *)instr))
9234 break;
9235
9236 instr_hi = opcode & 0xf0;
9237@@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p
9238 case 0x00:
9239 /* Prefetch instruction is 0x0F0D or 0x0F18 */
9240 scan_more = 0;
9241- if (__get_user(opcode, instr))
9242+ if (__get_user(opcode, (char __user *)instr))
9243 break;
9244 prefetch = (instr_lo == 0xF) &&
9245 (opcode == 0x0D || opcode == 0x18);
9246@@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p
9247 static int bad_address(void *p)
9248 {
9249 unsigned long dummy;
9250- return __get_user(dummy, (unsigned long *)p);
9251+ return __get_user(dummy, (unsigned long __user *)p);
9252 }
9253
9254 void dump_pagetable(unsigned long address)
9255@@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r
9256
9257 int unhandled_signal(struct task_struct *tsk, int sig)
9258 {
9259- if (tsk->pid == 1)
9260+ if (is_init(tsk))
9261 return 1;
9262 if (tsk->ptrace & PT_PTRACED)
9263 return 0;
9264@@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a
9265 if (pgd_none(*pgd))
9266 set_pgd(pgd, *pgd_ref);
9267 else
9268- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
9269+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
9270
9271 /* Below here mismatches are bugs because these lower tables
9272 are shared */
9273@@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a
9274 pud_ref = pud_offset(pgd_ref, address);
9275 if (pud_none(*pud_ref))
9276 return -1;
9277- if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
9278+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
9279 BUG();
9280 pmd = pmd_offset(pud, address);
9281 pmd_ref = pmd_offset(pud_ref, address);
9282@@ -531,7 +525,7 @@ good_area:
9283 case PF_PROT: /* read, present */
9284 goto bad_area;
9285 case 0: /* read, not present */
9286- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
9287+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
9288 goto bad_area;
9289 }
9290
9291@@ -647,7 +641,7 @@ no_context:
9292 */
9293 out_of_memory:
9294 up_read(&mm->mmap_sem);
9295- if (current->pid == 1) {
9296+ if (is_init(current)) {
9297 yield();
9298 goto again;
9299 }
9300@@ -702,7 +696,7 @@ void vmalloc_sync_all(void)
9301 if (pgd_none(*pgd))
9302 set_pgd(pgd, *pgd_ref);
9303 else
9304- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
9305+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
9306 }
9307 spin_unlock(&pgd_lock);
9308 set_bit(pgd_index(address), insync);
9309--- sle11-2009-05-14.orig/arch/x86/mm/init_64-xen.c 2009-05-14 11:02:43.000000000 +0200
9310+++ sle11-2009-05-14/arch/x86/mm/init_64-xen.c 2009-03-04 11:28:34.000000000 +0100
9311@@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user);
9312
9313 int after_bootmem;
9314
9315-static unsigned long dma_reserve __initdata;
9316-
9317 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
9318 extern unsigned long start_pfn;
9319
9320@@ -420,7 +418,6 @@ __init void *early_ioremap(unsigned long
9321
9322 /* actually usually some more */
9323 if (size >= LARGE_PAGE_SIZE) {
9324- printk("SMBIOS area too long %lu\n", size);
9325 return NULL;
9326 }
9327 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
9328@@ -442,16 +439,24 @@ __init void early_iounmap(void *addr, un
9329 #endif
9330
9331 static void __meminit
9332-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
9333+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
9334 {
9335- int i, k;
9336+ int i = pmd_index(address);
9337
9338- for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
9339+ for (; i < PTRS_PER_PMD; i++) {
9340 unsigned long pte_phys;
9341+ pmd_t *pmd = pmd_page + i;
9342 pte_t *pte, *pte_save;
9343+ int k;
9344
9345 if (address >= end)
9346 break;
9347+
9348+ if (__pmd_val(*pmd)) {
9349+ address += PMD_SIZE;
9350+ continue;
9351+ }
9352+
9353 pte = alloc_static_page(&pte_phys);
9354 pte_save = pte;
9355 for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
9356@@ -478,40 +483,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long
9357 static void __meminit
9358 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
9359 {
9360- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
9361-
9362- if (pmd_none(*pmd)) {
9363- spin_lock(&init_mm.page_table_lock);
9364- phys_pmd_init(pmd, address, end);
9365- spin_unlock(&init_mm.page_table_lock);
9366- __flush_tlb_all();
9367- }
9368+ pmd_t *pmd = pmd_offset(pud,0);
9369+ spin_lock(&init_mm.page_table_lock);
9370+ phys_pmd_init(pmd, address, end);
9371+ spin_unlock(&init_mm.page_table_lock);
9372+ __flush_tlb_all();
9373 }
9374
9375-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
9376+static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
9377 {
9378- long i = pud_index(address);
9379-
9380- pud = pud + i;
9381-
9382- if (after_bootmem && pud_val(*pud)) {
9383- phys_pmd_update(pud, address, end);
9384- return;
9385- }
9386+ int i = pud_index(addr);
9387
9388- for (; i < PTRS_PER_PUD; pud++, i++) {
9389- unsigned long paddr, pmd_phys;
9390+ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
9391+ unsigned long pmd_phys;
9392+ pud_t *pud = pud_page + pud_index(addr);
9393 pmd_t *pmd;
9394
9395- paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
9396- if (paddr >= end)
9397+ if (addr >= end)
9398 break;
9399
9400+ if (__pud_val(*pud)) {
9401+ phys_pmd_update(pud, addr, end);
9402+ continue;
9403+ }
9404+
9405 pmd = alloc_static_page(&pmd_phys);
9406
9407 spin_lock(&init_mm.page_table_lock);
9408 *pud = __pud(pmd_phys | _KERNPG_TABLE);
9409- phys_pmd_init(pmd, paddr, end);
9410+ phys_pmd_init(pmd, addr, end);
9411 spin_unlock(&init_mm.page_table_lock);
9412
9413 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
9414@@ -775,69 +775,18 @@ void __cpuinit zap_low_mappings(int cpu)
9415 #endif
9416 }
9417
9418-/* Compute zone sizes for the DMA and DMA32 zones in a node. */
9419-__init void
9420-size_zones(unsigned long *z, unsigned long *h,
9421- unsigned long start_pfn, unsigned long end_pfn)
9422-{
9423- int i;
9424- unsigned long w;
9425-
9426- for (i = 0; i < MAX_NR_ZONES; i++)
9427- z[i] = 0;
9428-
9429- if (start_pfn < MAX_DMA_PFN)
9430- z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
9431- if (start_pfn < MAX_DMA32_PFN) {
9432- unsigned long dma32_pfn = MAX_DMA32_PFN;
9433- if (dma32_pfn > end_pfn)
9434- dma32_pfn = end_pfn;
9435- z[ZONE_DMA32] = dma32_pfn - start_pfn;
9436- }
9437- z[ZONE_NORMAL] = end_pfn - start_pfn;
9438-
9439- /* Remove lower zones from higher ones. */
9440- w = 0;
9441- for (i = 0; i < MAX_NR_ZONES; i++) {
9442- if (z[i])
9443- z[i] -= w;
9444- w += z[i];
9445- }
9446-
9447- /* Compute holes */
9448- w = start_pfn;
9449- for (i = 0; i < MAX_NR_ZONES; i++) {
9450- unsigned long s = w;
9451- w += z[i];
9452- h[i] = e820_hole_size(s, w);
9453- }
9454-
9455- /* Add the space pace needed for mem_map to the holes too. */
9456- for (i = 0; i < MAX_NR_ZONES; i++)
9457- h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
9458-
9459- /* The 16MB DMA zone has the kernel and other misc mappings.
9460- Account them too */
9461- if (h[ZONE_DMA]) {
9462- h[ZONE_DMA] += dma_reserve;
9463- if (h[ZONE_DMA] >= z[ZONE_DMA]) {
9464- printk(KERN_WARNING
9465- "Kernel too large and filling up ZONE_DMA?\n");
9466- h[ZONE_DMA] = z[ZONE_DMA];
9467- }
9468- }
9469-}
9470-
9471 #ifndef CONFIG_NUMA
9472 void __init paging_init(void)
9473 {
9474- unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
9475+ unsigned long max_zone_pfns[MAX_NR_ZONES];
9476+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
9477+ max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
9478+ max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
9479+ max_zone_pfns[ZONE_NORMAL] = end_pfn;
9480
9481 memory_present(0, 0, end_pfn);
9482 sparse_init();
9483- size_zones(zones, holes, 0, end_pfn);
9484- free_area_init_node(0, NODE_DATA(0), zones,
9485- __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
9486+ free_area_init_nodes(max_zone_pfns);
9487
9488 init_mm.context.pinned = 1;
9489 }
9490@@ -891,36 +840,23 @@ void online_page(struct page *page)
9491
9492 #ifdef CONFIG_MEMORY_HOTPLUG
9493 /*
9494- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
9495- * via probe interface of sysfs. If acpi notifies hot-add event, then it
9496- * can tell node id by searching dsdt. But, probe interface doesn't have
9497- * node id. So, return 0 as node id at this time.
9498- */
9499-#ifdef CONFIG_NUMA
9500-int memory_add_physaddr_to_nid(u64 start)
9501-{
9502- return 0;
9503-}
9504-#endif
9505-
9506-/*
9507 * Memory is added always to NORMAL zone. This means you will never get
9508 * additional DMA/DMA32 memory.
9509 */
9510 int arch_add_memory(int nid, u64 start, u64 size)
9511 {
9512 struct pglist_data *pgdat = NODE_DATA(nid);
9513- struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
9514+ struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
9515 unsigned long start_pfn = start >> PAGE_SHIFT;
9516 unsigned long nr_pages = size >> PAGE_SHIFT;
9517 int ret;
9518
9519+ init_memory_mapping(start, (start + size -1));
9520+
9521 ret = __add_pages(zone, start_pfn, nr_pages);
9522 if (ret)
9523 goto error;
9524
9525- init_memory_mapping(start, (start + size -1));
9526-
9527 return ret;
9528 error:
9529 printk("%s: Problem encountered in __add_pages!\n", __func__);
9530@@ -934,7 +870,17 @@ int remove_memory(u64 start, u64 size)
9531 }
9532 EXPORT_SYMBOL_GPL(remove_memory);
9533
9534-#else /* CONFIG_MEMORY_HOTPLUG */
9535+#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
9536+int memory_add_physaddr_to_nid(u64 start)
9537+{
9538+ return 0;
9539+}
9540+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
9541+#endif
9542+
9543+#endif /* CONFIG_MEMORY_HOTPLUG */
9544+
9545+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
9546 /*
9547 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
9548 * just online the pages.
9549@@ -960,7 +906,7 @@ int __add_pages(struct zone *z, unsigned
9550 }
9551 return err;
9552 }
9553-#endif /* CONFIG_MEMORY_HOTPLUG */
9554+#endif
9555
9556 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
9557 kcore_vsyscall;
9558@@ -972,12 +918,6 @@ void __init mem_init(void)
9559
9560 pci_iommu_alloc();
9561
9562- /* How many end-of-memory variables you have, grandma! */
9563- max_low_pfn = end_pfn;
9564- max_pfn = end_pfn;
9565- num_physpages = end_pfn;
9566- high_memory = (void *) __va(end_pfn * PAGE_SIZE);
9567-
9568 /* clear the zero-page */
9569 memset(empty_zero_page, 0, PAGE_SIZE);
9570
9571@@ -995,7 +935,8 @@ void __init mem_init(void)
9572 init_page_count(pfn_to_page(pfn));
9573 totalram_pages++;
9574 }
9575- reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
9576+ reservedpages = end_pfn - totalram_pages -
9577+ absent_pages_in_range(0, end_pfn);
9578
9579 after_bootmem = 1;
9580
9581@@ -1102,15 +1043,32 @@ void free_initrd_mem(unsigned long start
9582
9583 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
9584 {
9585- /* Should check here against the e820 map to avoid double free */
9586 #ifdef CONFIG_NUMA
9587 int nid = phys_to_nid(phys);
9588+#endif
9589+ unsigned long pfn = phys >> PAGE_SHIFT;
9590+ if (pfn >= end_pfn) {
9591+ /* This can happen with kdump kernels when accessing firmware
9592+ tables. */
9593+ if (pfn < end_pfn_map)
9594+ return;
9595+ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
9596+ phys, len);
9597+ return;
9598+ }
9599+
9600+ /* Should check here against the e820 map to avoid double free */
9601+#ifdef CONFIG_NUMA
9602 reserve_bootmem_node(NODE_DATA(nid), phys, len);
9603 #else
9604 reserve_bootmem(phys, len);
9605 #endif
9606- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
9607+#ifndef CONFIG_XEN
9608+ if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
9609 dma_reserve += len / PAGE_SIZE;
9610+ set_dma_reserve(dma_reserve);
9611+ }
9612+#endif
9613 }
9614
9615 int kern_addr_valid(unsigned long addr)
9616--- sle11-2009-05-14.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:25:57.000000000 +0100
9617+++ sle11-2009-05-14/arch/x86/mm/pageattr_64-xen.c 2009-03-04 11:28:34.000000000 +0100
9618@@ -377,8 +377,8 @@ static void revert_page(unsigned long ad
9619 BUG_ON(pud_none(*pud));
9620 pmd = pmd_offset(pud, address);
9621 BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
9622- pgprot_val(ref_prot) |= _PAGE_PSE;
9623 large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
9624+ large_pte = pte_mkhuge(large_pte);
9625 set_pte((pte_t *)pmd, large_pte);
9626 }
9627
9628@@ -388,32 +388,28 @@ __change_page_attr(unsigned long address
9629 {
9630 pte_t *kpte;
9631 struct page *kpte_page;
9632- unsigned kpte_flags;
9633 pgprot_t ref_prot2;
9634 kpte = lookup_address(address);
9635 if (!kpte) return 0;
9636 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
9637- kpte_flags = pte_val(*kpte);
9638 if (pgprot_val(prot) != pgprot_val(ref_prot)) {
9639- if ((kpte_flags & _PAGE_PSE) == 0) {
9640+ if (!pte_huge(*kpte)) {
9641 set_pte(kpte, pfn_pte(pfn, prot));
9642 } else {
9643 /*
9644 * split_large_page will take the reference for this
9645 * change_page_attr on the split page.
9646 */
9647-
9648 struct page *split;
9649- ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
9650-
9651+ ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
9652 split = split_large_page(address, prot, ref_prot2);
9653 if (!split)
9654 return -ENOMEM;
9655- set_pte(kpte,mk_pte(split, ref_prot2));
9656+ set_pte(kpte, mk_pte(split, ref_prot2));
9657 kpte_page = split;
9658- }
9659+ }
9660 page_private(kpte_page)++;
9661- } else if ((kpte_flags & _PAGE_PSE) == 0) {
9662+ } else if (!pte_huge(*kpte)) {
9663 set_pte(kpte, pfn_pte(pfn, ref_prot));
9664 BUG_ON(page_private(kpte_page) == 0);
9665 page_private(kpte_page)--;
9666@@ -470,10 +466,12 @@ int change_page_attr_addr(unsigned long
9667 * lowmem */
9668 if (__pa(address) < KERNEL_TEXT_SIZE) {
9669 unsigned long addr2;
9670- pgprot_t prot2 = prot;
9671+ pgprot_t prot2;
9672 addr2 = __START_KERNEL_map + __pa(address);
9673- pgprot_val(prot2) &= ~_PAGE_NX;
9674- err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
9675+ /* Make sure the kernel mappings stay executable */
9676+ prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
9677+ err = __change_page_attr(addr2, pfn, prot2,
9678+ PAGE_KERNEL_EXEC);
9679 }
9680 }
9681 up_write(&init_mm.mmap_sem);
9682--- sle11-2009-05-14.orig/drivers/char/tpm/tpm_xen.c 2009-02-16 15:58:14.000000000 +0100
9683+++ sle11-2009-05-14/drivers/char/tpm/tpm_xen.c 2009-03-04 11:28:34.000000000 +0100
9684@@ -85,8 +85,7 @@ static struct tpm_private *my_priv;
9685
9686 /* local function prototypes */
9687 static irqreturn_t tpmif_int(int irq,
9688- void *tpm_priv,
9689- struct pt_regs *ptregs);
9690+ void *tpm_priv);
9691 static void tpmif_rx_action(unsigned long unused);
9692 static int tpmif_connect(struct xenbus_device *dev,
9693 struct tpm_private *tp,
9694@@ -559,7 +558,7 @@ static void tpmif_rx_action(unsigned lon
9695 }
9696
9697
9698-static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
9699+static irqreturn_t tpmif_int(int irq, void *tpm_priv)
9700 {
9701 struct tpm_private *tp = tpm_priv;
9702 unsigned long flags;
9703--- sle11-2009-05-14.orig/drivers/pci/Kconfig 2009-05-14 11:02:43.000000000 +0200
9704+++ sle11-2009-05-14/drivers/pci/Kconfig 2009-03-04 11:28:34.000000000 +0100
9705@@ -48,7 +48,7 @@ config PCI_DEBUG
9706 config HT_IRQ
9707 bool "Interrupts on hypertransport devices"
9708 default y
9709- depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
9710+ depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
9711 help
9712 This allows native hypertransport devices to use interrupts.
9713
9714--- sle11-2009-05-14.orig/drivers/pci/msi-xen.c 2009-05-14 11:02:43.000000000 +0200
9715+++ sle11-2009-05-14/drivers/pci/msi-xen.c 2009-04-24 13:31:56.000000000 +0200
9716@@ -6,6 +6,7 @@
9717 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
9718 */
9719
9720+#include <linux/err.h>
9721 #include <linux/mm.h>
9722 #include <linux/irq.h>
9723 #include <linux/interrupt.h>
9724@@ -14,6 +15,7 @@
9725 #include <linux/smp_lock.h>
9726 #include <linux/pci.h>
9727 #include <linux/proc_fs.h>
9728+#include <linux/msi.h>
9729
9730 #include <xen/evtchn.h>
9731
9732@@ -26,14 +28,6 @@
9733
9734 static int pci_msi_enable = 1;
9735
9736-static struct msi_ops *msi_ops;
9737-
9738-int msi_register(struct msi_ops *ops)
9739-{
9740- msi_ops = ops;
9741- return 0;
9742-}
9743-
9744 static LIST_HEAD(msi_dev_head);
9745 DEFINE_SPINLOCK(msi_dev_lock);
9746
9747@@ -406,9 +400,9 @@ void pci_restore_msix_state(struct pci_d
9748 * @dev: pointer to the pci_dev data structure of MSI device function
9749 *
9750 * Setup the MSI capability structure of device function with a single
9751- * MSI vector, regardless of device function is capable of handling
9752+ * MSI irq, regardless of device function is capable of handling
9753 * multiple messages. A return of zero indicates the successful setup
9754- * of an entry zero with the new MSI vector or non-zero for otherwise.
9755+ * of an entry zero with the new MSI irq or non-zero for otherwise.
9756 **/
9757 static int msi_capability_init(struct pci_dev *dev)
9758 {
9759@@ -422,11 +416,11 @@ static int msi_capability_init(struct pc
9760 if (pirq < 0)
9761 return -EBUSY;
9762
9763- dev->irq = pirq;
9764 /* Set MSI enabled bits */
9765 enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
9766 dev->msi_enabled = 1;
9767
9768+ dev->irq = pirq;
9769 return 0;
9770 }
9771
9772@@ -437,8 +431,8 @@ static int msi_capability_init(struct pc
9773 * @nvec: number of @entries
9774 *
9775 * Setup the MSI-X capability structure of device function with a
9776- * single MSI-X vector. A return of zero indicates the successful setup of
9777- * requested MSI-X entries with allocated vectors or non-zero for otherwise.
9778+ * single MSI-X irq. A return of zero indicates the successful setup of
9779+ * requested MSI-X entries with allocated irqs or non-zero for otherwise.
9780 **/
9781 static int msix_capability_init(struct pci_dev *dev,
9782 struct msix_entry *entries, int nvec)
9783@@ -480,12 +474,18 @@ static int msix_capability_init(struct p
9784 }
9785
9786 if (i != nvec) {
9787+ int avail = i - 1;
9788 for (j = --i; j >= 0; j--) {
9789 msi_unmap_pirq(dev, entries[j].vector);
9790 detach_pirq_entry(entries[j].entry, msi_dev_entry);
9791 entries[j].vector = 0;
9792 }
9793- return -EBUSY;
9794+ /* If we had some success report the number of irqs
9795+ * we succeeded in setting up.
9796+ */
9797+ if (avail <= 0)
9798+ avail = -EBUSY;
9799+ return avail;
9800 }
9801
9802 enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
9803@@ -495,11 +495,40 @@ static int msix_capability_init(struct p
9804 }
9805
9806 /**
9807+ * pci_msi_supported - check whether MSI may be enabled on device
9808+ * @dev: pointer to the pci_dev data structure of MSI device function
9809+ *
9810+ * Look at global flags, the device itself, and its parent busses
9811+ * to return 0 if MSI are supported for the device.
9812+ **/
9813+static
9814+int pci_msi_supported(struct pci_dev * dev)
9815+{
9816+ struct pci_bus *bus;
9817+
9818+ /* MSI must be globally enabled and supported by the device */
9819+ if (!pci_msi_enable || !dev || dev->no_msi)
9820+ return -EINVAL;
9821+
9822+ /* Any bridge which does NOT route MSI transactions from it's
9823+ * secondary bus to it's primary bus must set NO_MSI flag on
9824+ * the secondary pci_bus.
9825+ * We expect only arch-specific PCI host bus controller driver
9826+ * or quirks for specific PCI bridges to be setting NO_MSI.
9827+ */
9828+ for (bus = dev->bus; bus; bus = bus->parent)
9829+ if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9830+ return -EINVAL;
9831+
9832+ return 0;
9833+}
9834+
9835+/**
9836 * pci_enable_msi - configure device's MSI capability structure
9837 * @dev: pointer to the pci_dev data structure of MSI device function
9838 *
9839 * Setup the MSI capability structure of device function with
9840- * a single MSI vector upon its software driver call to request for
9841+ * a single MSI irq upon its software driver call to request for
9842 * MSI mode enabled on its hardware device function. A return of zero
9843 * indicates the successful setup of an entry zero with the new MSI
9844 * vector or non-zero for otherwise.
9845@@ -507,18 +536,10 @@ static int msix_capability_init(struct p
9846 extern int pci_frontend_enable_msi(struct pci_dev *dev);
9847 int pci_enable_msi(struct pci_dev* dev)
9848 {
9849- struct pci_bus *bus;
9850- int pos, temp, status = -EINVAL;
9851+ int pos, temp, status;
9852
9853- if (!pci_msi_enable || !dev)
9854- return status;
9855-
9856- if (dev->no_msi)
9857- return status;
9858-
9859- for (bus = dev->bus; bus; bus = bus->parent)
9860- if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9861- return -EINVAL;
9862+ if (pci_msi_supported(dev) < 0)
9863+ return -EINVAL;
9864
9865 status = msi_init();
9866 if (status < 0)
9867@@ -547,10 +568,10 @@ int pci_enable_msi(struct pci_dev* dev)
9868 if (!pos)
9869 return -EINVAL;
9870
9871- /* Check whether driver already requested for MSI-X vectors */
9872+ /* Check whether driver already requested for MSI-X irqs */
9873 if (dev->msix_enabled) {
9874 printk(KERN_INFO "PCI: %s: Can't enable MSI. "
9875- "Device already has MSI-X vectors assigned\n",
9876+ "Device already has MSI-X irq assigned\n",
9877 pci_name(dev));
9878 dev->irq = temp;
9879 return -EINVAL;
9880@@ -602,36 +623,28 @@ void pci_disable_msi(struct pci_dev* dev
9881 * pci_enable_msix - configure device's MSI-X capability structure
9882 * @dev: pointer to the pci_dev data structure of MSI-X device function
9883 * @entries: pointer to an array of MSI-X entries
9884- * @nvec: number of MSI-X vectors requested for allocation by device driver
9885+ * @nvec: number of MSI-X irqs requested for allocation by device driver
9886 *
9887 * Setup the MSI-X capability structure of device function with the number
9888- * of requested vectors upon its software driver call to request for
9889+ * of requested irqs upon its software driver call to request for
9890 * MSI-X mode enabled on its hardware device function. A return of zero
9891 * indicates the successful configuration of MSI-X capability structure
9892- * with new allocated MSI-X vectors. A return of < 0 indicates a failure.
9893+ * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
9894 * Or a return of > 0 indicates that driver request is exceeding the number
9895- * of vectors available. Driver should use the returned value to re-send
9896+ * of irqs available. Driver should use the returned value to re-send
9897 * its request.
9898 **/
9899 extern int pci_frontend_enable_msix(struct pci_dev *dev,
9900 struct msix_entry *entries, int nvec);
9901 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
9902 {
9903- struct pci_bus *bus;
9904 int status, pos, nr_entries;
9905 int i, j, temp;
9906 u16 control;
9907
9908- if (!pci_msi_enable || !dev || !entries)
9909+ if (!entries || pci_msi_supported(dev) < 0)
9910 return -EINVAL;
9911
9912- if (dev->no_msi)
9913- return -EINVAL;
9914-
9915- for (bus = dev->bus; bus; bus = bus->parent)
9916- if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9917- return -EINVAL;
9918-
9919 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
9920 if (!is_initial_xendomain()) {
9921 struct msi_dev_list *msi_dev_entry;
9922@@ -694,7 +707,7 @@ int pci_enable_msix(struct pci_dev* dev,
9923 /* Check whether driver already requested for MSI vector */
9924 if (dev->msi_enabled) {
9925 printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
9926- "Device already has an MSI vector assigned\n",
9927+ "Device already has an MSI irq assigned\n",
9928 pci_name(dev));
9929 dev->irq = temp;
9930 return -EINVAL;
9931@@ -757,11 +770,11 @@ void pci_disable_msix(struct pci_dev* de
9932 }
9933
9934 /**
9935- * msi_remove_pci_irq_vectors - reclaim MSI(X) vectors to unused state
9936+ * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
9937 * @dev: pointer to the pci_dev data structure of MSI(X) device function
9938 *
9939 * Being called during hotplug remove, from which the device function
9940- * is hot-removed. All previous assigned MSI/MSI-X vectors, if
9941+ * is hot-removed. All previous assigned MSI/MSI-X irqs, if
9942 * allocated for this device function, are reclaimed to unused state,
9943 * which may be used later on.
9944 **/
9945--- sle11-2009-05-14.orig/drivers/xen/Kconfig 2008-12-05 08:43:56.000000000 +0100
9946+++ sle11-2009-05-14/drivers/xen/Kconfig 2009-03-04 11:28:34.000000000 +0100
9947@@ -287,6 +287,9 @@ endmenu
9948 config HAVE_IRQ_IGNORE_UNHANDLED
9949 def_bool y
9950
9951+config GENERIC_HARDIRQS_NO__DO_IRQ
9952+ def_bool y
9953+
9954 config NO_IDLE_HZ
9955 def_bool y
9956
9957--- sle11-2009-05-14.orig/drivers/xen/balloon/balloon.c 2008-11-25 13:31:07.000000000 +0100
9958+++ sle11-2009-05-14/drivers/xen/balloon/balloon.c 2009-03-04 11:28:34.000000000 +0100
9959@@ -84,7 +84,7 @@ static unsigned long frame_list[PAGE_SIZ
9960 /* VM /proc information for memory */
9961 extern unsigned long totalram_pages;
9962
9963-#ifndef MODULE
9964+#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
9965 extern unsigned long totalhigh_pages;
9966 #define inc_totalhigh_pages() (totalhigh_pages++)
9967 #define dec_totalhigh_pages() (totalhigh_pages--)
9968--- sle11-2009-05-14.orig/drivers/xen/blkback/blkback.c 2008-12-01 11:21:10.000000000 +0100
9969+++ sle11-2009-05-14/drivers/xen/blkback/blkback.c 2009-03-04 11:28:34.000000000 +0100
9970@@ -288,7 +288,7 @@ static void blkif_notify_work(blkif_t *b
9971 wake_up(&blkif->wq);
9972 }
9973
9974-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9975+irqreturn_t blkif_be_int(int irq, void *dev_id)
9976 {
9977 blkif_notify_work(dev_id);
9978 return IRQ_HANDLED;
9979--- sle11-2009-05-14.orig/drivers/xen/blkback/common.h 2009-05-14 11:02:43.000000000 +0200
9980+++ sle11-2009-05-14/drivers/xen/blkback/common.h 2009-03-04 11:28:34.000000000 +0100
9981@@ -130,7 +130,7 @@ void blkif_interface_init(void);
9982
9983 void blkif_xenbus_init(void);
9984
9985-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9986+irqreturn_t blkif_be_int(int irq, void *dev_id);
9987 int blkif_schedule(void *arg);
9988
9989 int blkback_barrier(struct xenbus_transaction xbt,
9990--- sle11-2009-05-14.orig/drivers/xen/blkfront/blkfront.c 2009-03-05 15:42:00.000000000 +0100
9991+++ sle11-2009-05-14/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:16.000000000 +0100
9992@@ -70,7 +70,7 @@ static int setup_blkring(struct xenbus_d
9993
9994 static void kick_pending_request_queues(struct blkfront_info *);
9995
9996-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9997+static irqreturn_t blkif_int(int irq, void *dev_id);
9998 static void blkif_restart_queue(void *arg);
9999 static void blkif_recover(struct blkfront_info *);
10000 static void blkif_completion(struct blk_shadow *);
10001@@ -707,7 +707,7 @@ void do_blkif_request(request_queue_t *r
10002 }
10003
10004
10005-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
10006+static irqreturn_t blkif_int(int irq, void *dev_id)
10007 {
10008 struct request *req;
10009 blkif_response_t *bret;
10010--- sle11-2009-05-14.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:36:10.000000000 +0200
10011+++ sle11-2009-05-14/drivers/xen/blktap/blktap.c 2009-04-20 11:37:34.000000000 +0200
10012@@ -1222,7 +1222,7 @@ static void blkif_notify_work(blkif_t *b
10013 wake_up(&blkif->wq);
10014 }
10015
10016-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10017+irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
10018 {
10019 blkif_notify_work(dev_id);
10020 return IRQ_HANDLED;
10021--- sle11-2009-05-14.orig/drivers/xen/blktap/common.h 2009-05-14 11:02:43.000000000 +0200
10022+++ sle11-2009-05-14/drivers/xen/blktap/common.h 2009-03-04 11:28:34.000000000 +0100
10023@@ -113,7 +113,7 @@ void tap_blkif_interface_init(void);
10024
10025 void tap_blkif_xenbus_init(void);
10026
10027-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10028+irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
10029 int tap_blkif_schedule(void *arg);
10030
10031 int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
10032--- sle11-2009-05-14.orig/drivers/xen/console/console.c 2008-12-15 11:13:47.000000000 +0100
10033+++ sle11-2009-05-14/drivers/xen/console/console.c 2009-03-04 11:28:34.000000000 +0100
10034@@ -361,7 +361,7 @@ static struct tty_struct *xencons_tty;
10035 static int xencons_priv_irq;
10036 static char x_char;
10037
10038-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
10039+void xencons_rx(char *buf, unsigned len)
10040 {
10041 int i;
10042 unsigned long flags;
10043@@ -386,8 +386,7 @@ void xencons_rx(char *buf, unsigned len,
10044 if (time_before(jiffies, sysrq_timeout)) {
10045 spin_unlock_irqrestore(
10046 &xencons_lock, flags);
10047- handle_sysrq(
10048- buf[i], regs, xencons_tty);
10049+ handle_sysrq(buf[i], xencons_tty);
10050 spin_lock_irqsave(
10051 &xencons_lock, flags);
10052 continue;
10053@@ -452,14 +451,13 @@ void xencons_tx(void)
10054 }
10055
10056 /* Privileged receive callback and transmit kicker. */
10057-static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
10058- struct pt_regs *regs)
10059+static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
10060 {
10061 static char rbuf[16];
10062 int l;
10063
10064 while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
10065- xencons_rx(rbuf, l, regs);
10066+ xencons_rx(rbuf, l);
10067
10068 xencons_tx();
10069
10070@@ -647,7 +645,7 @@ static void xencons_close(struct tty_str
10071 spin_unlock_irqrestore(&xencons_lock, flags);
10072 }
10073
10074-static struct tty_operations xencons_ops = {
10075+static const struct tty_operations xencons_ops = {
10076 .open = xencons_open,
10077 .close = xencons_close,
10078 .write = xencons_write,
10079--- sle11-2009-05-14.orig/drivers/xen/console/xencons_ring.c 2009-05-14 11:02:43.000000000 +0200
10080+++ sle11-2009-05-14/drivers/xen/console/xencons_ring.c 2009-03-04 11:28:34.000000000 +0100
10081@@ -83,7 +83,7 @@ int xencons_ring_send(const char *data,
10082 return sent;
10083 }
10084
10085-static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
10086+static irqreturn_t handle_input(int irq, void *unused)
10087 {
10088 struct xencons_interface *intf = xencons_interface();
10089 XENCONS_RING_IDX cons, prod;
10090@@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq,
10091 BUG_ON((prod - cons) > sizeof(intf->in));
10092
10093 while (cons != prod) {
10094- xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
10095+ xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
10096 cons++;
10097 }
10098
10099--- sle11-2009-05-14.orig/drivers/xen/core/evtchn.c 2009-03-16 16:14:12.000000000 +0100
10100+++ sle11-2009-05-14/drivers/xen/core/evtchn.c 2009-03-04 11:28:34.000000000 +0100
10101@@ -516,7 +516,7 @@ static void unbind_from_irq(unsigned int
10102
10103 int bind_caller_port_to_irqhandler(
10104 unsigned int caller_port,
10105- irqreturn_t (*handler)(int, void *, struct pt_regs *),
10106+ irq_handler_t handler,
10107 unsigned long irqflags,
10108 const char *devname,
10109 void *dev_id)
10110@@ -539,7 +539,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir
10111
10112 int bind_listening_port_to_irqhandler(
10113 unsigned int remote_domain,
10114- irqreturn_t (*handler)(int, void *, struct pt_regs *),
10115+ irq_handler_t handler,
10116 unsigned long irqflags,
10117 const char *devname,
10118 void *dev_id)
10119@@ -563,7 +563,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to
10120 int bind_interdomain_evtchn_to_irqhandler(
10121 unsigned int remote_domain,
10122 unsigned int remote_port,
10123- irqreturn_t (*handler)(int, void *, struct pt_regs *),
10124+ irq_handler_t handler,
10125 unsigned long irqflags,
10126 const char *devname,
10127 void *dev_id)
10128@@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch
10129 int bind_virq_to_irqhandler(
10130 unsigned int virq,
10131 unsigned int cpu,
10132- irqreturn_t (*handler)(int, void *, struct pt_regs *),
10133+ irq_handler_t handler,
10134 unsigned long irqflags,
10135 const char *devname,
10136 void *dev_id)
10137@@ -611,7 +611,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle
10138 int bind_ipi_to_irqhandler(
10139 unsigned int ipi,
10140 unsigned int cpu,
10141- irqreturn_t (*handler)(int, void *, struct pt_regs *),
10142+ irq_handler_t handler,
10143 unsigned long irqflags,
10144 const char *devname,
10145 void *dev_id)
10146@@ -696,15 +696,7 @@ static unsigned int startup_dynirq(unsig
10147 return 0;
10148 }
10149
10150-static void shutdown_dynirq(unsigned int irq)
10151-{
10152- int evtchn = evtchn_from_irq(irq);
10153-
10154- if (VALID_EVTCHN(evtchn))
10155- mask_evtchn(evtchn);
10156-}
10157-
10158-static void enable_dynirq(unsigned int irq)
10159+static void unmask_dynirq(unsigned int irq)
10160 {
10161 int evtchn = evtchn_from_irq(irq);
10162
10163@@ -712,7 +704,7 @@ static void enable_dynirq(unsigned int i
10164 unmask_evtchn(evtchn);
10165 }
10166
10167-static void disable_dynirq(unsigned int irq)
10168+static void mask_dynirq(unsigned int irq)
10169 {
10170 int evtchn = evtchn_from_irq(irq);
10171
10172@@ -740,12 +732,13 @@ static void end_dynirq(unsigned int irq)
10173 unmask_evtchn(evtchn);
10174 }
10175
10176-static struct hw_interrupt_type dynirq_type = {
10177- .typename = "Dynamic-irq",
10178+static struct irq_chip dynirq_chip = {
10179+ .name = "Dynamic",
10180 .startup = startup_dynirq,
10181- .shutdown = shutdown_dynirq,
10182- .enable = enable_dynirq,
10183- .disable = disable_dynirq,
10184+ .shutdown = mask_dynirq,
10185+ .mask = mask_dynirq,
10186+ .unmask = unmask_dynirq,
10187+ .mask_ack = ack_dynirq,
10188 .ack = ack_dynirq,
10189 .end = end_dynirq,
10190 #ifdef CONFIG_SMP
10191@@ -859,12 +852,12 @@ static void shutdown_pirq(unsigned int i
10192 irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
10193 }
10194
10195-static void enable_pirq(unsigned int irq)
10196+static void unmask_pirq(unsigned int irq)
10197 {
10198 startup_pirq(irq);
10199 }
10200
10201-static void disable_pirq(unsigned int irq)
10202+static void mask_pirq(unsigned int irq)
10203 {
10204 }
10205
10206@@ -891,12 +884,13 @@ static void end_pirq(unsigned int irq)
10207 pirq_unmask_and_notify(evtchn, irq);
10208 }
10209
10210-static struct hw_interrupt_type pirq_type = {
10211- .typename = "Phys-irq",
10212+static struct irq_chip pirq_chip = {
10213+ .name = "Phys",
10214 .startup = startup_pirq,
10215 .shutdown = shutdown_pirq,
10216- .enable = enable_pirq,
10217- .disable = disable_pirq,
10218+ .mask = mask_pirq,
10219+ .unmask = unmask_pirq,
10220+ .mask_ack = ack_pirq,
10221 .ack = ack_pirq,
10222 .end = end_pirq,
10223 #ifdef CONFIG_SMP
10224@@ -1081,7 +1075,8 @@ void evtchn_register_pirq(int irq)
10225 if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
10226 return;
10227 irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
10228- irq_desc[irq].chip = &pirq_type;
10229+ set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq,
10230+ "level");
10231 }
10232
10233 int evtchn_map_pirq(int irq, int xen_pirq)
10234@@ -1104,11 +1099,18 @@ int evtchn_map_pirq(int irq, int xen_pir
10235 spin_unlock(&irq_alloc_lock);
10236 if (irq < PIRQ_BASE)
10237 return -ENOSPC;
10238- irq_desc[irq].chip = &pirq_type;
10239+ set_irq_chip_and_handler_name(irq, &pirq_chip,
10240+ handle_level_irq, "level");
10241 } else if (!xen_pirq) {
10242 if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
10243 return -EINVAL;
10244- irq_desc[irq].chip = &no_irq_type;
10245+ /*
10246+ * dynamic_irq_cleanup(irq) would seem to be the correct thing
10247+ * here, but cannot be used as we get here also during shutdown
10248+ * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
10249+ * then causes a warning in dynamic_irq_cleanup().
10250+ */
10251+ set_irq_chip_and_handler(irq, NULL, NULL);
10252 irq_info[irq] = IRQ_UNBOUND;
10253 return 0;
10254 } else if (type_from_irq(irq) != IRQT_PIRQ
10255@@ -1154,10 +1156,9 @@ void __init xen_init_IRQ(void)
10256 for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
10257 irq_bindcount[i] = 0;
10258
10259- irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
10260- irq_desc[i].action = NULL;
10261- irq_desc[i].depth = 1;
10262- irq_desc[i].chip = &dynirq_type;
10263+ irq_desc[i].status |= IRQ_NOPROBE;
10264+ set_irq_chip_and_handler_name(i, &dynirq_chip,
10265+ handle_level_irq, "level");
10266 }
10267
10268 /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
10269@@ -1173,9 +1174,7 @@ void __init xen_init_IRQ(void)
10270 continue;
10271 #endif
10272
10273- irq_desc[i].status = IRQ_DISABLED;
10274- irq_desc[i].action = NULL;
10275- irq_desc[i].depth = 1;
10276- irq_desc[i].chip = &pirq_type;
10277+ set_irq_chip_and_handler_name(i, &pirq_chip,
10278+ handle_level_irq, "level");
10279 }
10280 }
10281--- sle11-2009-05-14.orig/drivers/xen/core/reboot.c 2009-05-14 11:02:43.000000000 +0200
10282+++ sle11-2009-05-14/drivers/xen/core/reboot.c 2009-03-04 11:28:34.000000000 +0100
10283@@ -14,6 +14,7 @@
10284
10285 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
10286 #include <xen/platform-compat.h>
10287+#undef handle_sysrq
10288 #endif
10289
10290 MODULE_LICENSE("Dual BSD/GPL");
10291@@ -231,7 +232,7 @@ static void sysrq_handler(struct xenbus_
10292
10293 #ifdef CONFIG_MAGIC_SYSRQ
10294 if (sysrq_key != '\0')
10295- handle_sysrq(sysrq_key, NULL, NULL);
10296+ handle_sysrq(sysrq_key, NULL);
10297 #endif
10298 }
10299
10300@@ -245,7 +246,7 @@ static struct xenbus_watch sysrq_watch =
10301 .callback = sysrq_handler
10302 };
10303
10304-static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
10305+static irqreturn_t suspend_int(int irq, void* dev_id)
10306 {
10307 switch_shutdown_state(SHUTDOWN_SUSPEND);
10308 return IRQ_HANDLED;
10309--- sle11-2009-05-14.orig/drivers/xen/core/smpboot.c 2009-04-28 16:02:07.000000000 +0200
10310+++ sle11-2009-05-14/drivers/xen/core/smpboot.c 2009-03-04 11:28:34.000000000 +0100
10311@@ -25,8 +25,8 @@
10312 #include <xen/cpu_hotplug.h>
10313 #include <xen/xenbus.h>
10314
10315-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
10316-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
10317+extern irqreturn_t smp_reschedule_interrupt(int, void *);
10318+extern irqreturn_t smp_call_function_interrupt(int, void *);
10319
10320 extern int local_setup_timer(unsigned int cpu);
10321 extern void local_teardown_timer(unsigned int cpu);
10322@@ -62,8 +62,6 @@ EXPORT_SYMBOL(cpu_core_map);
10323 #if defined(__i386__)
10324 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
10325 EXPORT_SYMBOL(x86_cpu_to_apicid);
10326-#elif !defined(CONFIG_X86_IO_APIC)
10327-unsigned int maxcpus = NR_CPUS;
10328 #endif
10329
10330 void __init prefill_possible_map(void)
10331--- sle11-2009-05-14.orig/drivers/xen/fbfront/xenfb.c 2009-02-16 15:59:55.000000000 +0100
10332+++ sle11-2009-05-14/drivers/xen/fbfront/xenfb.c 2009-03-04 11:28:34.000000000 +0100
10333@@ -524,8 +524,7 @@ static struct fb_ops xenfb_fb_ops = {
10334 .fb_set_par = xenfb_set_par,
10335 };
10336
10337-static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
10338- struct pt_regs *regs)
10339+static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
10340 {
10341 /*
10342 * No in events recognized, simply ignore them all.
10343--- sle11-2009-05-14.orig/drivers/xen/fbfront/xenkbd.c 2009-05-14 11:02:43.000000000 +0200
10344+++ sle11-2009-05-14/drivers/xen/fbfront/xenkbd.c 2009-03-04 11:28:34.000000000 +0100
10345@@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st
10346 * to do that.
10347 */
10348
10349-static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
10350+static irqreturn_t input_handler(int rq, void *dev_id)
10351 {
10352 struct xenkbd_info *info = dev_id;
10353 struct xenkbd_page *page = info->page;
10354--- sle11-2009-05-14.orig/drivers/xen/gntdev/gntdev.c 2008-12-15 11:13:45.000000000 +0100
10355+++ sle11-2009-05-14/drivers/xen/gntdev/gntdev.c 2009-03-04 11:28:34.000000000 +0100
10356@@ -752,9 +752,6 @@ static pte_t gntdev_clear_pte(struct vm_
10357 BUG();
10358 }
10359
10360- /* Copy the existing value of the PTE for returning. */
10361- copy = *ptep;
10362-
10363 /* Calculate the grant relating to this PTE. */
10364 slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
10365
10366@@ -769,6 +766,10 @@ static pte_t gntdev_clear_pte(struct vm_
10367 GNTDEV_INVALID_HANDLE &&
10368 !xen_feature(XENFEAT_auto_translated_physmap)) {
10369 /* NOT USING SHADOW PAGE TABLES. */
10370+
10371+ /* Copy the existing value of the PTE for returning. */
10372+ copy = *ptep;
10373+
10374 gnttab_set_unmap_op(&op, ptep_to_machine(ptep),
10375 GNTMAP_contains_pte,
10376 private_data->grants[slot_index]
10377@@ -781,7 +782,7 @@ static pte_t gntdev_clear_pte(struct vm_
10378 op.status);
10379 } else {
10380 /* USING SHADOW PAGE TABLES. */
10381- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10382+ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10383 }
10384
10385 /* Finally, we unmap the grant from kernel space. */
10386@@ -809,7 +810,7 @@ static pte_t gntdev_clear_pte(struct vm_
10387 >> PAGE_SHIFT, INVALID_P2M_ENTRY);
10388
10389 } else {
10390- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10391+ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10392 }
10393
10394 return copy;
10395--- sle11-2009-05-14.orig/drivers/xen/netback/accel.c 2009-05-14 11:02:43.000000000 +0200
10396+++ sle11-2009-05-14/drivers/xen/netback/accel.c 2009-03-04 11:28:34.000000000 +0100
10397@@ -65,7 +65,7 @@ static int match_accelerator(struct xenb
10398
10399 if (IS_ERR(eth_name)) {
10400 /* Probably means not present */
10401- DPRINTK("%s: no match due to xenbus_read accel error %d\n",
10402+ DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
10403 __FUNCTION__, PTR_ERR(eth_name));
10404 return 0;
10405 } else {
10406--- sle11-2009-05-14.orig/drivers/xen/netback/common.h 2009-05-14 11:02:43.000000000 +0200
10407+++ sle11-2009-05-14/drivers/xen/netback/common.h 2009-03-04 11:28:34.000000000 +0100
10408@@ -200,7 +200,7 @@ void netif_deschedule_work(netif_t *neti
10409
10410 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
10411 struct net_device_stats *netif_be_get_stats(struct net_device *dev);
10412-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10413+irqreturn_t netif_be_int(int irq, void *dev_id);
10414
10415 static inline int netbk_can_queue(struct net_device *dev)
10416 {
10417--- sle11-2009-05-14.orig/drivers/xen/netback/loopback.c 2009-05-14 11:02:43.000000000 +0200
10418+++ sle11-2009-05-14/drivers/xen/netback/loopback.c 2009-03-04 11:28:34.000000000 +0100
10419@@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk
10420 np->stats.rx_bytes += skb->len;
10421 np->stats.rx_packets++;
10422
10423- if (skb->ip_summed == CHECKSUM_HW) {
10424+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
10425 /* Defer checksum calculation. */
10426 skb->proto_csum_blank = 1;
10427 /* Must be a local packet: assert its integrity. */
10428--- sle11-2009-05-14.orig/drivers/xen/netback/netback.c 2008-12-23 09:31:07.000000000 +0100
10429+++ sle11-2009-05-14/drivers/xen/netback/netback.c 2009-03-04 11:28:34.000000000 +0100
10430@@ -692,7 +692,7 @@ static void net_rx_action(unsigned long
10431 id = meta[npo.meta_cons].id;
10432 flags = nr_frags ? NETRXF_more_data : 0;
10433
10434- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
10435+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
10436 flags |= NETRXF_csum_blank | NETRXF_data_validated;
10437 else if (skb->proto_data_valid) /* remote but checksummed? */
10438 flags |= NETRXF_data_validated;
10439@@ -1459,7 +1459,7 @@ static void netif_page_release(struct pa
10440 netif_idx_release(idx);
10441 }
10442
10443-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10444+irqreturn_t netif_be_int(int irq, void *dev_id)
10445 {
10446 netif_t *netif = dev_id;
10447
10448@@ -1526,7 +1526,7 @@ static netif_rx_response_t *make_rx_resp
10449 }
10450
10451 #ifdef NETBE_DEBUG_INTERRUPT
10452-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
10453+static irqreturn_t netif_be_dbg(int irq, void *dev_id)
10454 {
10455 struct list_head *ent;
10456 netif_t *netif;
10457--- sle11-2009-05-14.orig/drivers/xen/netfront/netfront.c 2009-04-09 14:41:33.000000000 +0200
10458+++ sle11-2009-05-14/drivers/xen/netfront/netfront.c 2009-03-30 16:34:59.000000000 +0200
10459@@ -136,7 +136,7 @@ static inline int netif_needs_gso(struct
10460 {
10461 return skb_is_gso(skb) &&
10462 (!skb_gso_ok(skb, dev->features) ||
10463- unlikely(skb->ip_summed != CHECKSUM_HW));
10464+ unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
10465 }
10466 #else
10467 #define HAVE_GSO 0
10468@@ -222,7 +222,7 @@ static void network_tx_buf_gc(struct net
10469 static void network_alloc_rx_buffers(struct net_device *);
10470 static void send_fake_arp(struct net_device *);
10471
10472-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
10473+static irqreturn_t netif_int(int irq, void *dev_id);
10474
10475 #ifdef CONFIG_SYSFS
10476 static int xennet_sysfs_addif(struct net_device *netdev);
10477@@ -992,7 +992,7 @@ static int network_start_xmit(struct sk_
10478 tx->flags = 0;
10479 extra = NULL;
10480
10481- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
10482+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
10483 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
10484 #ifdef CONFIG_XEN
10485 if (skb->proto_data_valid) /* remote but checksummed? */
10486@@ -1049,7 +1049,7 @@ static int network_start_xmit(struct sk_
10487 return 0;
10488 }
10489
10490-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
10491+static irqreturn_t netif_int(int irq, void *dev_id)
10492 {
10493 struct net_device *dev = dev_id;
10494 struct netfront_info *np = netdev_priv(dev);
10495--- sle11-2009-05-14.orig/drivers/xen/pciback/pciback.h 2009-05-14 11:02:43.000000000 +0200
10496+++ sle11-2009-05-14/drivers/xen/pciback/pciback.h 2009-03-04 11:28:34.000000000 +0100
10497@@ -87,7 +87,7 @@ int pciback_publish_pci_roots(struct pci
10498 void pciback_release_devices(struct pciback_device *pdev);
10499
10500 /* Handles events from front-end */
10501-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
10502+irqreturn_t pciback_handle_event(int irq, void *dev_id);
10503 void pciback_do_op(void *data);
10504
10505 int pciback_xenbus_register(void);
10506--- sle11-2009-05-14.orig/drivers/xen/pciback/pciback_ops.c 2009-05-14 11:02:43.000000000 +0200
10507+++ sle11-2009-05-14/drivers/xen/pciback/pciback_ops.c 2009-03-04 11:28:34.000000000 +0100
10508@@ -107,7 +107,7 @@ void pciback_do_op(void *data)
10509 test_and_schedule_op(pdev);
10510 }
10511
10512-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
10513+irqreturn_t pciback_handle_event(int irq, void *dev_id)
10514 {
10515 struct pciback_device *pdev = dev_id;
10516
10517--- sle11-2009-05-14.orig/drivers/xen/pcifront/pci_op.c 2009-05-14 11:02:43.000000000 +0200
10518+++ sle11-2009-05-14/drivers/xen/pcifront/pci_op.c 2009-03-04 11:28:34.000000000 +0100
10519@@ -508,10 +508,16 @@ int __devinit pcifront_rescan_root(struc
10520
10521 d = pci_scan_single_device(b, devfn);
10522 if (d) {
10523+ int err;
10524+
10525 dev_info(&pdev->xdev->dev, "New device on "
10526 "%04x:%02x:%02x.%02x found.\n", domain, bus,
10527 PCI_SLOT(devfn), PCI_FUNC(devfn));
10528- pci_bus_add_device(d);
10529+ err = pci_bus_add_device(d);
10530+ if (err)
10531+ dev_err(&pdev->xdev->dev,
10532+ "error %d adding device, continuing.\n",
10533+ err);
10534 }
10535 }
10536
10537--- sle11-2009-05-14.orig/drivers/xen/privcmd/compat_privcmd.c 2009-05-14 11:02:43.000000000 +0200
10538+++ sle11-2009-05-14/drivers/xen/privcmd/compat_privcmd.c 2009-03-04 11:28:34.000000000 +0100
10539@@ -18,7 +18,6 @@
10540 * Authors: Jimi Xenidis <jimix@watson.ibm.com>
10541 */
10542
10543-#include <linux/config.h>
10544 #include <linux/compat.h>
10545 #include <linux/ioctl.h>
10546 #include <linux/syscalls.h>
10547--- sle11-2009-05-14.orig/drivers/xen/privcmd/privcmd.c 2009-05-14 11:02:43.000000000 +0200
10548+++ sle11-2009-05-14/drivers/xen/privcmd/privcmd.c 2009-03-04 11:28:34.000000000 +0100
10549@@ -40,7 +40,7 @@ static int privcmd_enforce_singleshot_ma
10550 static long privcmd_ioctl(struct file *file,
10551 unsigned int cmd, unsigned long data)
10552 {
10553- int ret = -ENOSYS;
10554+ long ret = -ENOSYS;
10555 void __user *udata = (void __user *) data;
10556
10557 switch (cmd) {
10558@@ -50,42 +50,15 @@ static long privcmd_ioctl(struct file *f
10559 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
10560 return -EFAULT;
10561
10562-#if defined(__i386__)
10563+#ifdef CONFIG_X86
10564 if (hypercall.op >= (PAGE_SIZE >> 5))
10565 break;
10566- __asm__ __volatile__ (
10567- "pushl %%ebx; pushl %%ecx; pushl %%edx; "
10568- "pushl %%esi; pushl %%edi; "
10569- "movl 8(%%eax),%%ebx ;"
10570- "movl 16(%%eax),%%ecx ;"
10571- "movl 24(%%eax),%%edx ;"
10572- "movl 32(%%eax),%%esi ;"
10573- "movl 40(%%eax),%%edi ;"
10574- "movl (%%eax),%%eax ;"
10575- "shll $5,%%eax ;"
10576- "addl $hypercall_page,%%eax ;"
10577- "call *%%eax ;"
10578- "popl %%edi; popl %%esi; popl %%edx; "
10579- "popl %%ecx; popl %%ebx"
10580- : "=a" (ret) : "0" (&hypercall) : "memory" );
10581-#elif defined (__x86_64__)
10582- if (hypercall.op < (PAGE_SIZE >> 5)) {
10583- long ign1, ign2, ign3;
10584- __asm__ __volatile__ (
10585- "movq %8,%%r10; movq %9,%%r8;"
10586- "shll $5,%%eax ;"
10587- "addq $hypercall_page,%%rax ;"
10588- "call *%%rax"
10589- : "=a" (ret), "=D" (ign1),
10590- "=S" (ign2), "=d" (ign3)
10591- : "0" ((unsigned int)hypercall.op),
10592- "1" (hypercall.arg[0]),
10593- "2" (hypercall.arg[1]),
10594- "3" (hypercall.arg[2]),
10595- "g" (hypercall.arg[3]),
10596- "g" (hypercall.arg[4])
10597- : "r8", "r10", "memory" );
10598- }
10599+ ret = _hypercall(long, (unsigned int)hypercall.op,
10600+ (unsigned long)hypercall.arg[0],
10601+ (unsigned long)hypercall.arg[1],
10602+ (unsigned long)hypercall.arg[2],
10603+ (unsigned long)hypercall.arg[3],
10604+ (unsigned long)hypercall.arg[4]);
10605 #else
10606 ret = privcmd_hypercall(&hypercall);
10607 #endif
10608@@ -306,7 +279,7 @@ static int privcmd_mmap(struct file * fi
10609 return -ENOSYS;
10610
10611 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
10612- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
10613+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
10614 vma->vm_ops = &privcmd_vm_ops;
10615 vma->vm_private_data = NULL;
10616
10617--- sle11-2009-05-14.orig/drivers/xen/scsiback/common.h 2009-05-14 11:02:43.000000000 +0200
10618+++ sle11-2009-05-14/drivers/xen/scsiback/common.h 2009-03-04 11:28:34.000000000 +0100
10619@@ -142,7 +142,7 @@ typedef struct {
10620 #define VSCSIIF_TIMEOUT (900*HZ)
10621
10622
10623-irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
10624+irqreturn_t scsiback_intr(int, void *);
10625 int scsiback_init_sring(struct vscsibk_info *info,
10626 unsigned long ring_ref, unsigned int evtchn);
10627 int scsiback_schedule(void *data);
10628--- sle11-2009-05-14.orig/drivers/xen/scsiback/scsiback.c 2009-05-14 11:02:43.000000000 +0200
10629+++ sle11-2009-05-14/drivers/xen/scsiback/scsiback.c 2009-03-04 11:28:34.000000000 +0100
10630@@ -440,7 +440,7 @@ void scsiback_cmd_exec(pending_req_t *pe
10631 write = (data_dir == DMA_TO_DEVICE);
10632 rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
10633
10634- rq->flags |= REQ_BLOCK_PC;
10635+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
10636 rq->cmd_len = cmd_len;
10637 memcpy(rq->cmd, pending_req->cmnd, cmd_len);
10638
10639@@ -484,7 +484,7 @@ static void scsiback_device_reset_exec(p
10640 }
10641
10642
10643-irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
10644+irqreturn_t scsiback_intr(int irq, void *dev_id)
10645 {
10646 scsiback_notify_work((struct vscsibk_info *)dev_id);
10647 return IRQ_HANDLED;
10648--- sle11-2009-05-14.orig/drivers/xen/scsifront/common.h 2009-05-14 11:02:43.000000000 +0200
10649+++ sle11-2009-05-14/drivers/xen/scsifront/common.h 2009-03-04 11:28:34.000000000 +0100
10650@@ -122,7 +122,7 @@ struct vscsifrnt_info {
10651 int scsifront_xenbus_init(void);
10652 void scsifront_xenbus_unregister(void);
10653 int scsifront_schedule(void *data);
10654-irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
10655+irqreturn_t scsifront_intr(int irq, void *dev_id);
10656 int scsifront_cmd_done(struct vscsifrnt_info *info);
10657
10658
10659--- sle11-2009-05-14.orig/drivers/xen/scsifront/scsifront.c 2009-05-14 11:02:43.000000000 +0200
10660+++ sle11-2009-05-14/drivers/xen/scsifront/scsifront.c 2009-03-04 11:28:34.000000000 +0100
10661@@ -100,7 +100,7 @@ static void scsifront_do_request(struct
10662 notify_remote_via_irq(irq);
10663 }
10664
10665-irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
10666+irqreturn_t scsifront_intr(int irq, void *dev_id)
10667 {
10668 scsifront_notify_work((struct vscsifrnt_info *)dev_id);
10669 return IRQ_HANDLED;
10670--- sle11-2009-05-14.orig/drivers/xen/sfc_netback/accel_xenbus.c 2009-05-14 11:02:43.000000000 +0200
10671+++ sle11-2009-05-14/drivers/xen/sfc_netback/accel_xenbus.c 2009-03-04 11:28:34.000000000 +0100
10672@@ -68,8 +68,7 @@ static void unlink_bend(struct netback_a
10673
10674
10675 /* Demultiplex a message IRQ from the frontend driver. */
10676-static irqreturn_t msgirq_from_frontend(int irq, void *context,
10677- struct pt_regs *unused)
10678+static irqreturn_t msgirq_from_frontend(int irq, void *context)
10679 {
10680 struct xenbus_device *dev = context;
10681 struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
10682@@ -84,8 +83,7 @@ static irqreturn_t msgirq_from_frontend(
10683 * functionally, but we need it to pass to the bind function, and may
10684 * get called spuriously
10685 */
10686-static irqreturn_t netirq_from_frontend(int irq, void *context,
10687- struct pt_regs *unused)
10688+static irqreturn_t netirq_from_frontend(int irq, void *context)
10689 {
10690 VPRINTK("netirq %d from device %s\n", irq,
10691 ((struct xenbus_device *)context)->nodename);
10692--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel.h 2009-04-09 14:41:38.000000000 +0200
10693+++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel.h 2009-03-30 16:34:56.000000000 +0200
10694@@ -451,10 +451,8 @@ void netfront_accel_msg_tx_fastpath(netf
10695 u32 ip, u16 port, u8 protocol);
10696
10697 /* Process an IRQ received from back end driver */
10698-irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10699- struct pt_regs *unused);
10700-irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10701- struct pt_regs *unused);
10702+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
10703+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
10704
10705 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
10706 extern void netfront_accel_msg_from_bend(struct work_struct *context);
10707--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel_msg.c 2009-05-14 11:02:43.000000000 +0200
10708+++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel_msg.c 2009-03-04 11:28:34.000000000 +0100
10709@@ -490,8 +490,7 @@ void netfront_accel_msg_from_bend(void *
10710 }
10711
10712
10713-irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10714- struct pt_regs *unused)
10715+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
10716 {
10717 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10718 VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
10719@@ -502,8 +501,7 @@ irqreturn_t netfront_accel_msg_channel_i
10720 }
10721
10722 /* Process an interrupt received from the NIC via backend */
10723-irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10724- struct pt_regs *unused)
10725+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
10726 {
10727 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10728 struct net_device *net_dev = vnic->net_dev;
10729--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel_tso.c 2009-05-14 11:02:43.000000000 +0200
10730+++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel_tso.c 2009-03-04 11:28:34.000000000 +0100
10731@@ -363,7 +363,7 @@ int netfront_accel_enqueue_skb_tso(netfr
10732
10733 tso_check_safe(skb);
10734
10735- if (skb->ip_summed != CHECKSUM_HW)
10736+ if (skb->ip_summed != CHECKSUM_PARTIAL)
10737 EPRINTK("Trying to TSO send a packet without HW checksum\n");
10738
10739 tso_start(&state, skb);
10740--- sle11-2009-05-14.orig/drivers/xen/sfc_netfront/accel_vi.c 2009-04-09 14:41:38.000000000 +0200
10741+++ sle11-2009-05-14/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:11.000000000 +0200
10742@@ -461,7 +461,7 @@ netfront_accel_enqueue_skb_multi(netfron
10743
10744 frag_i = -1;
10745
10746- if (skb->ip_summed == CHECKSUM_HW) {
10747+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
10748 /* Set to zero to encourage falcon to work it out for us */
10749 *(u16*)(skb->h.raw + skb->csum) = 0;
10750 }
10751@@ -580,7 +580,7 @@ netfront_accel_enqueue_skb_single(netfro
10752
10753 kva = buf->pkt_kva;
10754
10755- if (skb->ip_summed == CHECKSUM_HW) {
10756+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
10757 /* Set to zero to encourage falcon to work it out for us */
10758 *(u16*)(skb->h.raw + skb->csum) = 0;
10759 }
10760--- sle11-2009-05-14.orig/drivers/xen/tpmback/common.h 2009-05-14 11:02:43.000000000 +0200
10761+++ sle11-2009-05-14/drivers/xen/tpmback/common.h 2009-03-04 11:28:34.000000000 +0100
10762@@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm
10763 void tpmif_xenbus_init(void);
10764 void tpmif_xenbus_exit(void);
10765 int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
10766-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10767+irqreturn_t tpmif_be_int(int irq, void *dev_id);
10768
10769 long int tpmback_get_instance(struct backend_info *bi);
10770
10771--- sle11-2009-05-14.orig/drivers/xen/tpmback/tpmback.c 2009-05-14 11:02:43.000000000 +0200
10772+++ sle11-2009-05-14/drivers/xen/tpmback/tpmback.c 2009-03-04 11:28:34.000000000 +0100
10773@@ -502,7 +502,7 @@ static ssize_t vtpm_op_read(struct file
10774 list_del(&pak->next);
10775 write_unlock_irqrestore(&dataex.pak_lock, flags);
10776
10777- DPRINTK("size given by app: %d, available: %d\n", size, left);
10778+ DPRINTK("size given by app: %zu, available: %u\n", size, left);
10779
10780 ret_size = min_t(size_t, size, left);
10781
10782@@ -899,7 +899,7 @@ static void tpm_tx_action(unsigned long
10783 }
10784 }
10785
10786-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10787+irqreturn_t tpmif_be_int(int irq, void *dev_id)
10788 {
10789 tpmif_t *tpmif = (tpmif_t *) dev_id;
10790
10791--- sle11-2009-05-14.orig/drivers/xen/xenbus/xenbus_comms.c 2008-11-25 12:35:56.000000000 +0100
10792+++ sle11-2009-05-14/drivers/xen/xenbus/xenbus_comms.c 2009-03-04 11:28:34.000000000 +0100
10793@@ -55,7 +55,7 @@ static DECLARE_WORK(probe_work, xenbus_p
10794
10795 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
10796
10797-static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
10798+static irqreturn_t wake_waiting(int irq, void *unused)
10799 {
10800 if (unlikely(xenstored_ready == 0)) {
10801 xenstored_ready = 1;
10802--- sle11-2009-05-14.orig/drivers/xen/xenoprof/xenoprofile.c 2009-05-14 11:02:43.000000000 +0200
10803+++ sle11-2009-05-14/drivers/xen/xenoprof/xenoprofile.c 2009-03-04 11:28:34.000000000 +0100
10804@@ -194,8 +194,7 @@ done:
10805 oprofile_add_domain_switch(COORDINATOR_DOMAIN);
10806 }
10807
10808-static irqreturn_t
10809-xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
10810+static irqreturn_t xenoprof_ovf_interrupt(int irq, void *dev_id)
10811 {
10812 struct xenoprof_buf * buf;
10813 static unsigned long flag;
10814--- sle11-2009-05-14.orig/include/asm-generic/pgtable.h 2009-02-16 15:58:14.000000000 +0100
10815+++ sle11-2009-05-14/include/asm-generic/pgtable.h 2009-03-04 11:28:34.000000000 +0100
10816@@ -100,7 +100,7 @@ static inline void ptep_set_wrprotect(st
10817 #endif
10818
10819 #ifndef arch_change_pte_range
10820-#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
10821+#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
10822 #endif
10823
10824 #ifndef __HAVE_ARCH_PTE_SAME
10825--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/desc_32.h 2009-05-14 11:02:43.000000000 +0200
10826+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/desc_32.h 2009-03-04 11:28:34.000000000 +0100
10827@@ -32,52 +32,110 @@ static inline struct desc_struct *get_cp
10828 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
10829 }
10830
10831+/*
10832+ * This is the ldt that every process will get unless we need
10833+ * something other than this.
10834+ */
10835+extern struct desc_struct default_ldt[];
10836+extern struct desc_struct idt_table[];
10837+extern void set_intr_gate(unsigned int irq, void * addr);
10838+
10839+static inline void pack_descriptor(__u32 *a, __u32 *b,
10840+ unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
10841+{
10842+ *a = ((base & 0xffff) << 16) | (limit & 0xffff);
10843+ *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
10844+ (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
10845+}
10846+
10847+static inline void pack_gate(__u32 *a, __u32 *b,
10848+ unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
10849+{
10850+ *a = (seg << 16) | (base & 0xffff);
10851+ *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
10852+}
10853+
10854+#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
10855+#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
10856+#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
10857+#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
10858+#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
10859+#define DESCTYPE_DPL3 0x60 /* DPL-3 */
10860+#define DESCTYPE_S 0x10 /* !system */
10861+
10862 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
10863 #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
10864
10865 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
10866 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
10867-#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
10868-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
10869+#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
10870+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
10871
10872 #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
10873 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
10874-#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
10875-#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
10876+#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
10877+#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
10878
10879-/*
10880- * This is the ldt that every process will get unless we need
10881- * something other than this.
10882- */
10883-extern struct desc_struct default_ldt[];
10884-extern void set_intr_gate(unsigned int irq, void * addr);
10885+#if TLS_SIZE != 24
10886+# error update this code.
10887+#endif
10888+
10889+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10890+{
10891+#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10892+ *(u64 *)&t->tls_array[i]) \
10893+ BUG()
10894+ C(0); C(1); C(2);
10895+#undef C
10896+}
10897
10898-#define _set_tssldt_desc(n,addr,limit,type) \
10899-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
10900- "movw %w1,2(%2)\n\t" \
10901- "rorl $16,%1\n\t" \
10902- "movb %b1,4(%2)\n\t" \
10903- "movb %4,5(%2)\n\t" \
10904- "movb $0,6(%2)\n\t" \
10905- "movb %h1,7(%2)\n\t" \
10906- "rorl $16,%1" \
10907- : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
10908+#ifndef CONFIG_XEN
10909+static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
10910+{
10911+ __u32 *lp = (__u32 *)((char *)dt + entry*8);
10912+ *lp = entry_a;
10913+ *(lp+1) = entry_b;
10914+}
10915
10916-#ifndef CONFIG_X86_NO_TSS
10917-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
10918+#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10919+#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10920+#else
10921+extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10922+extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
10923+#endif
10924+#ifndef CONFIG_X86_NO_IDT
10925+#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10926+
10927+static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
10928 {
10929- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
10930- offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
10931+ __u32 a, b;
10932+ pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
10933+ write_idt_entry(idt_table, gate, a, b);
10934 }
10935+#endif
10936
10937-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10938+#ifndef CONFIG_X86_NO_TSS
10939+static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
10940+{
10941+ __u32 a, b;
10942+ pack_descriptor(&a, &b, (unsigned long)addr,
10943+ offsetof(struct tss_struct, __cacheline_filler) - 1,
10944+ DESCTYPE_TSS, 0);
10945+ write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
10946+}
10947 #endif
10948
10949-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
10950+static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
10951 {
10952- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
10953+ __u32 a, b;
10954+ pack_descriptor(&a, &b, (unsigned long)addr,
10955+ entries * sizeof(struct desc_struct) - 1,
10956+ DESCTYPE_LDT, 0);
10957+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
10958 }
10959
10960+#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10961+
10962 #define LDT_entry_a(info) \
10963 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
10964
10965@@ -103,21 +161,6 @@ static inline void set_ldt_desc(unsigned
10966 (info)->seg_not_present == 1 && \
10967 (info)->useable == 0 )
10968
10969-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10970-
10971-#if TLS_SIZE != 24
10972-# error update this code.
10973-#endif
10974-
10975-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10976-{
10977-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10978- *(u64 *)&t->tls_array[i])) \
10979- BUG();
10980- C(0); C(1); C(2);
10981-#undef C
10982-}
10983-
10984 static inline void clear_LDT(void)
10985 {
10986 int cpu = get_cpu();
10987--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-05-14 11:02:43.000000000 +0200
10988+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-04 11:28:34.000000000 +0100
10989@@ -55,7 +55,7 @@ enum fixed_addresses {
10990 #ifdef CONFIG_X86_LOCAL_APIC
10991 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10992 #endif
10993-#ifdef CONFIG_X86_IO_APIC
10994+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
10995 FIX_IO_APIC_BASE_0,
10996 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10997 #endif
10998@@ -95,10 +95,9 @@ enum fixed_addresses {
10999 __end_of_fixed_addresses
11000 };
11001
11002-extern void set_fixaddr_top(unsigned long top);
11003-
11004 extern void __set_fixmap(enum fixed_addresses idx,
11005 maddr_t phys, pgprot_t flags);
11006+extern void reserve_top_address(unsigned long reserve);
11007
11008 #define set_fixmap(idx, phys) \
11009 __set_fixmap(idx, phys, PAGE_KERNEL)
11010--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypercall_32.h 2009-05-14 11:02:43.000000000 +0200
11011+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypercall_32.h 2009-03-04 11:28:34.000000000 +0100
11012@@ -128,6 +128,23 @@
11013 __res; \
11014 })
11015
11016+#define _hypercall(type, op, a1, a2, a3, a4, a5) \
11017+({ \
11018+ type __res; \
11019+ register typeof((a1)+0) __arg1 asm("ebx") = (a1); \
11020+ register typeof((a2)+0) __arg2 asm("ecx") = (a2); \
11021+ register typeof((a3)+0) __arg3 asm("edx") = (a3); \
11022+ register typeof((a4)+0) __arg4 asm("esi") = (a4); \
11023+ register typeof((a5)+0) __arg5 asm("edi") = (a5); \
11024+ asm volatile ( \
11025+ "call *%6" \
11026+ : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
11027+ "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
11028+ : "0" (hypercall_page + (op) * 32) \
11029+ : "memory" ); \
11030+ __res; \
11031+})
11032+
11033 static inline int __must_check
11034 HYPERVISOR_set_trap_table(
11035 const trap_info_t *table)
11036@@ -140,6 +157,8 @@ HYPERVISOR_mmu_update(
11037 mmu_update_t *req, unsigned int count, unsigned int *success_count,
11038 domid_t domid)
11039 {
11040+ if (arch_use_lazy_mmu_mode())
11041+ return xen_multi_mmu_update(req, count, success_count, domid);
11042 return _hypercall4(int, mmu_update, req, count, success_count, domid);
11043 }
11044
11045@@ -148,6 +167,8 @@ HYPERVISOR_mmuext_op(
11046 struct mmuext_op *op, unsigned int count, unsigned int *success_count,
11047 domid_t domid)
11048 {
11049+ if (arch_use_lazy_mmu_mode())
11050+ return xen_multi_mmuext_op(op, count, success_count, domid);
11051 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
11052 }
11053
11054@@ -238,6 +259,8 @@ static inline int __must_check
11055 HYPERVISOR_memory_op(
11056 unsigned int cmd, void *arg)
11057 {
11058+ if (arch_use_lazy_mmu_mode())
11059+ xen_multicall_flush(false);
11060 return _hypercall2(int, memory_op, cmd, arg);
11061 }
11062
11063@@ -253,6 +276,9 @@ HYPERVISOR_update_va_mapping(
11064 unsigned long va, pte_t new_val, unsigned long flags)
11065 {
11066 unsigned long pte_hi = 0;
11067+
11068+ if (arch_use_lazy_mmu_mode())
11069+ return xen_multi_update_va_mapping(va, new_val, flags);
11070 #ifdef CONFIG_X86_PAE
11071 pte_hi = new_val.pte_high;
11072 #endif
11073@@ -316,6 +342,8 @@ static inline int __must_check
11074 HYPERVISOR_grant_table_op(
11075 unsigned int cmd, void *uop, unsigned int count)
11076 {
11077+ if (arch_use_lazy_mmu_mode())
11078+ xen_multicall_flush(false);
11079 return _hypercall3(int, grant_table_op, cmd, uop, count);
11080 }
11081
11082--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypercall_64.h 2009-05-14 11:02:43.000000000 +0200
11083+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypercall_64.h 2009-03-04 11:28:34.000000000 +0100
11084@@ -134,6 +134,23 @@
11085 __res; \
11086 })
11087
11088+#define _hypercall(type, op, a1, a2, a3, a4, a5) \
11089+({ \
11090+ type __res; \
11091+ register typeof((a1)+0) __arg1 asm("rdi") = (a1); \
11092+ register typeof((a2)+0) __arg2 asm("rsi") = (a2); \
11093+ register typeof((a3)+0) __arg3 asm("rdx") = (a3); \
11094+ register typeof((a4)+0) __arg4 asm("r10") = (a4); \
11095+ register typeof((a5)+0) __arg5 asm("r8") = (a5); \
11096+ asm volatile ( \
11097+ "call *%6" \
11098+ : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
11099+ "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
11100+ : "0" (hypercall_page + (op) * 32) \
11101+ : "memory" ); \
11102+ __res; \
11103+})
11104+
11105 static inline int __must_check
11106 HYPERVISOR_set_trap_table(
11107 const trap_info_t *table)
11108@@ -146,6 +163,8 @@ HYPERVISOR_mmu_update(
11109 mmu_update_t *req, unsigned int count, unsigned int *success_count,
11110 domid_t domid)
11111 {
11112+ if (arch_use_lazy_mmu_mode())
11113+ return xen_multi_mmu_update(req, count, success_count, domid);
11114 return _hypercall4(int, mmu_update, req, count, success_count, domid);
11115 }
11116
11117@@ -154,6 +173,8 @@ HYPERVISOR_mmuext_op(
11118 struct mmuext_op *op, unsigned int count, unsigned int *success_count,
11119 domid_t domid)
11120 {
11121+ if (arch_use_lazy_mmu_mode())
11122+ return xen_multi_mmuext_op(op, count, success_count, domid);
11123 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
11124 }
11125
11126@@ -241,6 +262,8 @@ static inline int __must_check
11127 HYPERVISOR_memory_op(
11128 unsigned int cmd, void *arg)
11129 {
11130+ if (arch_use_lazy_mmu_mode())
11131+ xen_multicall_flush(false);
11132 return _hypercall2(int, memory_op, cmd, arg);
11133 }
11134
11135@@ -255,6 +278,8 @@ static inline int __must_check
11136 HYPERVISOR_update_va_mapping(
11137 unsigned long va, pte_t new_val, unsigned long flags)
11138 {
11139+ if (arch_use_lazy_mmu_mode())
11140+ return xen_multi_update_va_mapping(va, new_val, flags);
11141 return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
11142 }
11143
11144@@ -314,6 +339,8 @@ static inline int __must_check
11145 HYPERVISOR_grant_table_op(
11146 unsigned int cmd, void *uop, unsigned int count)
11147 {
11148+ if (arch_use_lazy_mmu_mode())
11149+ xen_multicall_flush(false);
11150 return _hypercall3(int, grant_table_op, cmd, uop, count);
11151 }
11152
11153--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-05-14 11:02:43.000000000 +0200
11154+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-04 11:28:34.000000000 +0100
11155@@ -43,6 +43,7 @@
11156 #include <xen/interface/physdev.h>
11157 #include <xen/interface/sched.h>
11158 #include <xen/interface/nmi.h>
11159+#include <asm/percpu.h>
11160 #include <asm/ptrace.h>
11161 #include <asm/page.h>
11162 #if defined(__i386__)
11163@@ -135,7 +136,46 @@ void scrub_pages(void *, unsigned int);
11164 #define scrub_pages(_p,_n) ((void)0)
11165 #endif
11166
11167-#include <xen/hypercall.h>
11168+#if defined(CONFIG_XEN) && !defined(MODULE)
11169+
11170+DECLARE_PER_CPU(bool, xen_lazy_mmu);
11171+
11172+int xen_multicall_flush(bool);
11173+
11174+int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
11175+ unsigned long flags);
11176+int __must_check xen_multi_mmu_update(mmu_update_t *, unsigned int count,
11177+ unsigned int *success_count, domid_t);
11178+int __must_check xen_multi_mmuext_op(struct mmuext_op *, unsigned int count,
11179+ unsigned int *success_count, domid_t);
11180+
11181+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
11182+static inline void arch_enter_lazy_mmu_mode(void)
11183+{
11184+ __get_cpu_var(xen_lazy_mmu) = true;
11185+}
11186+
11187+static inline void arch_leave_lazy_mmu_mode(void)
11188+{
11189+ __get_cpu_var(xen_lazy_mmu) = false;
11190+ xen_multicall_flush(false);
11191+}
11192+
11193+#if defined(CONFIG_X86_32)
11194+#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
11195+#elif !defined(arch_use_lazy_mmu_mode)
11196+#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
11197+#endif
11198+
11199+#else /* !CONFIG_XEN || MODULE */
11200+
11201+static inline void xen_multicall_flush(bool ignore) {}
11202+#define arch_use_lazy_mmu_mode() false
11203+#define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
11204+#define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
11205+#define xen_multi_mmuext_op(...) ({ BUG(); -ENOSYS; })
11206+
11207+#endif /* CONFIG_XEN && !MODULE */
11208
11209 #if defined(CONFIG_X86_64)
11210 #define MULTI_UVMFLAGS_INDEX 2
11211@@ -147,11 +187,15 @@ void scrub_pages(void *, unsigned int);
11212
11213 #ifdef CONFIG_XEN
11214 #define is_running_on_xen() 1
11215+extern char hypercall_page[PAGE_SIZE];
11216 #else
11217 extern char *hypercall_stubs;
11218+#define hypercall_page hypercall_stubs
11219 #define is_running_on_xen() (!!hypercall_stubs)
11220 #endif
11221
11222+#include <xen/hypercall.h>
11223+
11224 static inline int
11225 HYPERVISOR_yield(
11226 void)
11227--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-05-14 11:02:43.000000000 +0200
11228+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-04 11:28:34.000000000 +0100
11229@@ -53,7 +53,6 @@ static inline int pte_exec_kernel(pte_t
11230 * not possible, use pte_get_and_clear to obtain the old pte
11231 * value and then use set_pte to update it. -ben
11232 */
11233-#define __HAVE_ARCH_SET_PTE_ATOMIC
11234
11235 static inline void set_pte(pte_t *ptep, pte_t pte)
11236 {
11237@@ -70,14 +69,6 @@ static inline void set_pte(pte_t *ptep,
11238 set_pte((ptep), (pteval)); \
11239 } while (0)
11240
11241-#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
11242- if (((_mm) != current->mm && (_mm) != &init_mm) || \
11243- HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
11244- set_pte((ptep), (pteval)); \
11245- xen_invlpg((addr)); \
11246- } \
11247-} while (0)
11248-
11249 #define set_pmd(pmdptr,pmdval) \
11250 xen_l2_entry_update((pmdptr), (pmdval))
11251 #define set_pud(pudptr,pudval) \
11252@@ -94,7 +85,7 @@ static inline void pud_clear (pud_t * pu
11253 #define pud_page(pud) \
11254 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
11255
11256-#define pud_page_kernel(pud) \
11257+#define pud_page_vaddr(pud) \
11258 ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
11259
11260
11261@@ -124,6 +115,7 @@ static inline void pte_clear(struct mm_s
11262
11263 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
11264
11265+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11266 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11267 {
11268 pte_t pte = *ptep;
11269@@ -142,6 +134,7 @@ static inline pte_t ptep_get_and_clear(s
11270 return pte;
11271 }
11272
11273+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11274 #define ptep_clear_flush(vma, addr, ptep) \
11275 ({ \
11276 pte_t *__ptep = (ptep); \
11277@@ -159,6 +152,7 @@ static inline pte_t ptep_get_and_clear(s
11278 __res; \
11279 })
11280
11281+#define __HAVE_ARCH_PTE_SAME
11282 static inline int pte_same(pte_t a, pte_t b)
11283 {
11284 return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
11285--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-15 11:13:45.000000000 +0100
11286+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:28:34.000000000 +0100
11287@@ -260,31 +260,89 @@ static inline pte_t pte_mkhuge(pte_t pte
11288 # include <asm/pgtable-2level.h>
11289 #endif
11290
11291-#define ptep_test_and_clear_dirty(vma, addr, ptep) \
11292+/*
11293+ * Rules for using pte_update - it must be called after any PTE update which
11294+ * has not been done using the set_pte / clear_pte interfaces. It is used by
11295+ * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
11296+ * updates should either be sets, clears, or set_pte_atomic for P->P
11297+ * transitions, which means this hook should only be called for user PTEs.
11298+ * This hook implies a P->P protection or access change has taken place, which
11299+ * requires a subsequent TLB flush. The notification can optionally be delayed
11300+ * until the TLB flush event by using the pte_update_defer form of the
11301+ * interface, but care must be taken to assure that the flush happens while
11302+ * still holding the same page table lock so that the shadow and primary pages
11303+ * do not become out of sync on SMP.
11304+ */
11305+#define pte_update(mm, addr, ptep) do { } while (0)
11306+#define pte_update_defer(mm, addr, ptep) do { } while (0)
11307+
11308+
11309+/*
11310+ * We only update the dirty/accessed state if we set
11311+ * the dirty bit by hand in the kernel, since the hardware
11312+ * will do the accessed bit for us, and we don't want to
11313+ * race with other CPU's that might be updating the dirty
11314+ * bit at the same time.
11315+ */
11316+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
11317+#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
11318+do { \
11319+ if (dirty) \
11320+ ptep_establish(vma, address, ptep, entry); \
11321+} while (0)
11322+
11323+/*
11324+ * We don't actually have these, but we want to advertise them so that
11325+ * we can encompass the flush here.
11326+ */
11327+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11328+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11329+
11330+/*
11331+ * Rules for using ptep_establish: the pte MUST be a user pte, and
11332+ * must be a present->present transition.
11333+ */
11334+#define __HAVE_ARCH_PTEP_ESTABLISH
11335+#define ptep_establish(vma, address, ptep, pteval) \
11336+do { \
11337+ if ( likely((vma)->vm_mm == current->mm) ) { \
11338+ BUG_ON(HYPERVISOR_update_va_mapping(address, \
11339+ pteval, \
11340+ (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
11341+ UVMF_INVLPG|UVMF_MULTI)); \
11342+ } else { \
11343+ xen_l1_entry_update(ptep, pteval); \
11344+ flush_tlb_page(vma, address); \
11345+ } \
11346+} while (0)
11347+
11348+#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
11349+#define ptep_clear_flush_dirty(vma, address, ptep) \
11350 ({ \
11351 pte_t __pte = *(ptep); \
11352- int __ret = pte_dirty(__pte); \
11353- if (__ret) { \
11354- __pte = pte_mkclean(__pte); \
11355- if ((vma)->vm_mm != current->mm || \
11356- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
11357- (ptep)->pte_low = __pte.pte_low; \
11358- } \
11359- __ret; \
11360+ int __dirty = pte_dirty(__pte); \
11361+ __pte = pte_mkclean(__pte); \
11362+ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
11363+ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
11364+ else if (__dirty) \
11365+ (ptep)->pte_low = __pte.pte_low; \
11366+ __dirty; \
11367 })
11368
11369-#define ptep_test_and_clear_young(vma, addr, ptep) \
11370+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
11371+#define ptep_clear_flush_young(vma, address, ptep) \
11372 ({ \
11373 pte_t __pte = *(ptep); \
11374- int __ret = pte_young(__pte); \
11375- if (__ret) \
11376- __pte = pte_mkold(__pte); \
11377- if ((vma)->vm_mm != current->mm || \
11378- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
11379- (ptep)->pte_low = __pte.pte_low; \
11380- __ret; \
11381+ int __young = pte_young(__pte); \
11382+ __pte = pte_mkold(__pte); \
11383+ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
11384+ ptep_set_access_flags(vma, address, ptep, __pte, __young); \
11385+ else if (__young) \
11386+ (ptep)->pte_low = __pte.pte_low; \
11387+ __young; \
11388 })
11389
11390+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11391 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
11392 ((full) ? ({ \
11393 pte_t __res = *(ptep); \
11394@@ -296,6 +354,7 @@ static inline pte_t pte_mkhuge(pte_t pte
11395 }) : \
11396 ptep_get_and_clear(mm, addr, ptep))
11397
11398+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
11399 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11400 {
11401 pte_t pte = *ptep;
11402@@ -391,11 +450,11 @@ static inline pte_t pte_modify(pte_t pte
11403 #define pte_index(address) \
11404 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
11405 #define pte_offset_kernel(dir, address) \
11406- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
11407+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
11408
11409 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
11410
11411-#define pmd_page_kernel(pmd) \
11412+#define pmd_page_vaddr(pmd) \
11413 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
11414
11415 /*
11416@@ -418,8 +477,6 @@ extern pte_t *lookup_address(unsigned lo
11417 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
11418 #endif
11419
11420-extern void noexec_setup(const char *str);
11421-
11422 #if defined(CONFIG_HIGHPTE)
11423 #define pte_offset_map(dir, address) \
11424 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
11425@@ -437,37 +494,17 @@ extern void noexec_setup(const char *str
11426 #define pte_unmap_nested(pte) do { } while (0)
11427 #endif
11428
11429-#define __HAVE_ARCH_PTEP_ESTABLISH
11430-#define ptep_establish(vma, address, ptep, pteval) \
11431- do { \
11432- if ( likely((vma)->vm_mm == current->mm) ) { \
11433- BUG_ON(HYPERVISOR_update_va_mapping(address, \
11434- pteval, \
11435- (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
11436- UVMF_INVLPG|UVMF_MULTI)); \
11437- } else { \
11438- xen_l1_entry_update(ptep, pteval); \
11439- flush_tlb_page(vma, address); \
11440- } \
11441- } while (0)
11442+/* Clear a kernel PTE and flush it from the TLB */
11443+#define kpte_clear_flush(ptep, vaddr) do { \
11444+ if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
11445+ BUG(); \
11446+} while (0)
11447
11448 /*
11449 * The i386 doesn't have any external MMU info: the kernel page
11450 * tables contain all the necessary information.
11451- *
11452- * Also, we only update the dirty/accessed state if we set
11453- * the dirty bit by hand in the kernel, since the hardware
11454- * will do the accessed bit for us, and we don't want to
11455- * race with other CPU's that might be updating the dirty
11456- * bit at the same time.
11457 */
11458 #define update_mmu_cache(vma,address,pte) do { } while (0)
11459-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
11460-#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
11461- do { \
11462- if (dirty) \
11463- ptep_establish(vma, address, ptep, entry); \
11464- } while (0)
11465
11466 #include <xen/features.h>
11467 void make_lowmem_page_readonly(void *va, unsigned int feature);
11468@@ -526,10 +563,11 @@ int touch_pte_range(struct mm_struct *mm
11469 unsigned long size);
11470
11471 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11472- unsigned long addr, unsigned long end, pgprot_t newprot);
11473+ unsigned long addr, unsigned long end, pgprot_t newprot,
11474+ int dirty_accountable);
11475
11476-#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11477- xen_change_pte_range(mm, pmd, addr, end, newprot)
11478+#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11479+ xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11480
11481 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
11482 direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
11483@@ -538,13 +576,6 @@ direct_remap_pfn_range(vma,from,pfn,size
11484 #define GET_IOSPACE(pfn) 0
11485 #define GET_PFN(pfn) (pfn)
11486
11487-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11488-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11489-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11490-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11491-#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11492-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
11493-#define __HAVE_ARCH_PTE_SAME
11494 #include <asm-generic/pgtable.h>
11495
11496 #endif /* _I386_PGTABLE_H */
11497--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-04-20 11:36:10.000000000 +0200
11498+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:28:34.000000000 +0100
11499@@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui
11500 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
11501 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
11502
11503+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
11504+ unsigned int *ecx, unsigned int *edx)
11505+{
11506+ /* ecx is often an input as well as an output. */
11507+ __asm__(XEN_CPUID
11508+ : "=a" (*eax),
11509+ "=b" (*ebx),
11510+ "=c" (*ecx),
11511+ "=d" (*edx)
11512+ : "0" (*eax), "2" (*ecx));
11513+}
11514+
11515 /*
11516 * Generic CPUID function
11517 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
11518@@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui
11519 */
11520 static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
11521 {
11522- __asm__(XEN_CPUID
11523- : "=a" (*eax),
11524- "=b" (*ebx),
11525- "=c" (*ecx),
11526- "=d" (*edx)
11527- : "0" (op), "c"(0));
11528+ *eax = op;
11529+ *ecx = 0;
11530+ __cpuid(eax, ebx, ecx, edx);
11531 }
11532
11533 /* Some CPUID calls want 'count' to be placed in ecx */
11534 static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
11535- int *edx)
11536+ int *edx)
11537 {
11538- __asm__(XEN_CPUID
11539- : "=a" (*eax),
11540- "=b" (*ebx),
11541- "=c" (*ecx),
11542- "=d" (*edx)
11543- : "0" (op), "c" (count));
11544+ *eax = op;
11545+ *ecx = count;
11546+ __cpuid(eax, ebx, ecx, edx);
11547 }
11548
11549 /*
11550@@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i
11551 */
11552 static inline unsigned int cpuid_eax(unsigned int op)
11553 {
11554- unsigned int eax;
11555+ unsigned int eax, ebx, ecx, edx;
11556
11557- __asm__(XEN_CPUID
11558- : "=a" (eax)
11559- : "0" (op)
11560- : "bx", "cx", "dx");
11561+ cpuid(op, &eax, &ebx, &ecx, &edx);
11562 return eax;
11563 }
11564 static inline unsigned int cpuid_ebx(unsigned int op)
11565 {
11566- unsigned int eax, ebx;
11567+ unsigned int eax, ebx, ecx, edx;
11568
11569- __asm__(XEN_CPUID
11570- : "=a" (eax), "=b" (ebx)
11571- : "0" (op)
11572- : "cx", "dx" );
11573+ cpuid(op, &eax, &ebx, &ecx, &edx);
11574 return ebx;
11575 }
11576 static inline unsigned int cpuid_ecx(unsigned int op)
11577 {
11578- unsigned int eax, ecx;
11579+ unsigned int eax, ebx, ecx, edx;
11580
11581- __asm__(XEN_CPUID
11582- : "=a" (eax), "=c" (ecx)
11583- : "0" (op)
11584- : "bx", "dx" );
11585+ cpuid(op, &eax, &ebx, &ecx, &edx);
11586 return ecx;
11587 }
11588 static inline unsigned int cpuid_edx(unsigned int op)
11589 {
11590- unsigned int eax, edx;
11591+ unsigned int eax, ebx, ecx, edx;
11592
11593- __asm__(XEN_CPUID
11594- : "=a" (eax), "=d" (edx)
11595- : "0" (op)
11596- : "bx", "cx");
11597+ cpuid(op, &eax, &ebx, &ecx, &edx);
11598 return edx;
11599 }
11600
11601@@ -315,6 +309,8 @@ static inline void __mwait(unsigned long
11602 : :"a" (eax), "c" (ecx));
11603 }
11604
11605+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11606+
11607 /* from system description table in BIOS. Mostly for MCA use, but
11608 others may find it useful. */
11609 extern unsigned int machine_id;
11610--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-05-14 11:02:43.000000000 +0200
11611+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:28:34.000000000 +0100
11612@@ -61,11 +61,9 @@
11613
11614 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
11615 #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
11616-#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11617
11618 #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
11619 #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
11620-#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11621
11622 #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
11623 #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
11624@@ -85,6 +83,11 @@
11625
11626 #define GDT_SIZE (GDT_ENTRIES * 8)
11627
11628+/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
11629+#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
11630+/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
11631+#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
11632+
11633 /* Simple and small GDT entries for booting only */
11634
11635 #define GDT_ENTRY_BOOT_CS 2
11636@@ -114,4 +117,16 @@
11637 */
11638 #define IDT_ENTRIES 256
11639
11640+/* Bottom two bits of selector give the ring privilege level */
11641+#define SEGMENT_RPL_MASK 0x3
11642+/* Bit 2 is table indicator (LDT/GDT) */
11643+#define SEGMENT_TI_MASK 0x4
11644+
11645+/* User mode is privilege level 3 */
11646+#define USER_RPL 0x3
11647+/* LDT segment has TI set, GDT has it cleared */
11648+#define SEGMENT_LDT 0x4
11649+#define SEGMENT_GDT 0x0
11650+
11651+#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
11652 #endif
11653--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-05-14 11:02:43.000000000 +0200
11654+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:28:34.000000000 +0100
11655@@ -79,25 +79,36 @@ static inline int hard_smp_processor_id(
11656 return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
11657 }
11658 #endif
11659-
11660-static __inline int logical_smp_processor_id(void)
11661-{
11662- /* we don't want to mark this access volatile - bad code generation */
11663- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11664-}
11665-
11666 #endif
11667
11668+#define safe_smp_processor_id() smp_processor_id()
11669 extern int __cpu_disable(void);
11670 extern void __cpu_die(unsigned int cpu);
11671 extern void prefill_possible_map(void);
11672+extern unsigned int num_processors;
11673+
11674 #endif /* !__ASSEMBLY__ */
11675
11676 #else /* CONFIG_SMP */
11677
11678+#define safe_smp_processor_id() 0
11679 #define cpu_physical_id(cpu) boot_cpu_physical_apicid
11680
11681 #define NO_PROC_ID 0xFF /* No processor magic marker */
11682
11683 #endif
11684+
11685+#ifndef __ASSEMBLY__
11686+
11687+extern u8 apicid_2_node[];
11688+
11689+#ifdef CONFIG_X86_LOCAL_APIC
11690+static __inline int logical_smp_processor_id(void)
11691+{
11692+ /* we don't want to mark this access volatile - bad code generation */
11693+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11694+}
11695+#endif
11696+#endif
11697+
11698 #endif
11699--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system_32.h 2009-05-14 11:02:43.000000000 +0200
11700+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system_32.h 2009-03-04 11:28:34.000000000 +0100
11701@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig
11702 #define cmpxchg(ptr,o,n)\
11703 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
11704 (unsigned long)(n),sizeof(*(ptr))))
11705+#define sync_cmpxchg(ptr,o,n)\
11706+ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
11707+ (unsigned long)(n),sizeof(*(ptr))))
11708 #endif
11709
11710 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
11711@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo
11712 return old;
11713 }
11714
11715+/*
11716+ * Always use locked operations when touching memory shared with a
11717+ * hypervisor, since the system may be SMP even if the guest kernel
11718+ * isn't.
11719+ */
11720+static inline unsigned long __sync_cmpxchg(volatile void *ptr,
11721+ unsigned long old,
11722+ unsigned long new, int size)
11723+{
11724+ unsigned long prev;
11725+ switch (size) {
11726+ case 1:
11727+ __asm__ __volatile__("lock; cmpxchgb %b1,%2"
11728+ : "=a"(prev)
11729+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
11730+ : "memory");
11731+ return prev;
11732+ case 2:
11733+ __asm__ __volatile__("lock; cmpxchgw %w1,%2"
11734+ : "=a"(prev)
11735+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
11736+ : "memory");
11737+ return prev;
11738+ case 4:
11739+ __asm__ __volatile__("lock; cmpxchgl %1,%2"
11740+ : "=a"(prev)
11741+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
11742+ : "memory");
11743+ return prev;
11744+ }
11745+ return old;
11746+}
11747+
11748 #ifndef CONFIG_X86_CMPXCHG
11749 /*
11750 * Building a kernel capable running on 80386. It may be necessary to
11751--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-05-14 11:02:43.000000000 +0200
11752+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-03-04 11:28:34.000000000 +0100
11753@@ -8,8 +8,6 @@
11754 #define __flush_tlb_global() xen_tlb_flush()
11755 #define __flush_tlb_all() xen_tlb_flush()
11756
11757-extern unsigned long pgkern_mask;
11758-
11759 #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
11760
11761 #define __flush_tlb_single(addr) xen_invlpg(addr)
11762--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-05-14 11:02:43.000000000 +0200
11763+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-03-04 11:28:34.000000000 +0100
11764@@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address;
11765 extern struct dma_mapping_ops* dma_ops;
11766 extern int iommu_merge;
11767
11768-static inline int valid_dma_direction(int dma_direction)
11769-{
11770- return ((dma_direction == DMA_BIDIRECTIONAL) ||
11771- (dma_direction == DMA_TO_DEVICE) ||
11772- (dma_direction == DMA_FROM_DEVICE));
11773-}
11774-
11775 #if 0
11776 static inline int dma_mapping_error(dma_addr_t dma_addr)
11777 {
11778--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-05-14 11:02:43.000000000 +0200
11779+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-04 11:28:34.000000000 +0100
11780@@ -41,7 +41,7 @@ enum fixed_addresses {
11781 #ifdef CONFIG_X86_LOCAL_APIC
11782 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
11783 #endif
11784-#ifdef CONFIG_X86_IO_APIC
11785+#ifndef CONFIG_XEN
11786 FIX_IO_APIC_BASE_0,
11787 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
11788 #endif
11789--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-04-20 11:36:10.000000000 +0200
11790+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:28:34.000000000 +0100
11791@@ -44,12 +44,9 @@ extern unsigned long __supported_pte_mas
11792
11793 #define swapper_pg_dir init_level4_pgt
11794
11795-extern int nonx_setup(char *str);
11796 extern void paging_init(void);
11797 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
11798
11799-extern unsigned long pgkern_mask;
11800-
11801 /*
11802 * ZERO_PAGE is a global shared page that is always zero: used
11803 * for zero-mapped memory areas etc..
11804@@ -119,9 +116,6 @@ static inline void pgd_clear (pgd_t * pg
11805 set_pgd(__user_pgd(pgd), __pgd(0));
11806 }
11807
11808-#define pud_page(pud) \
11809- ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
11810-
11811 #define pte_same(a, b) ((a).pte == (b).pte)
11812
11813 #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
11814@@ -333,7 +327,7 @@ static inline pte_t ptep_get_and_clear_f
11815 #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
11816 static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
11817 static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
11818-static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
11819+static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
11820 static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
11821 static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
11822 static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
11823@@ -346,29 +340,12 @@ static inline pte_t pte_mkclean(pte_t pt
11824 static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
11825 static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
11826 static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
11827-static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
11828+static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
11829 static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
11830 static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
11831 static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
11832 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
11833-
11834-#define ptep_test_and_clear_dirty(vma, addr, ptep) \
11835-({ \
11836- pte_t __pte = *(ptep); \
11837- int __ret = pte_dirty(__pte); \
11838- if (__ret) \
11839- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
11840- __ret; \
11841-})
11842-
11843-#define ptep_test_and_clear_young(vma, addr, ptep) \
11844-({ \
11845- pte_t __pte = *(ptep); \
11846- int __ret = pte_young(__pte); \
11847- if (__ret) \
11848- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
11849- __ret; \
11850-})
11851+static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
11852
11853 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11854 {
11855@@ -395,7 +372,8 @@ static inline int pmd_large(pmd_t pte) {
11856 /*
11857 * Level 4 access.
11858 */
11859-#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
11860+#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
11861+#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
11862 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
11863 #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
11864 #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
11865@@ -404,16 +382,18 @@ static inline int pmd_large(pmd_t pte) {
11866
11867 /* PUD - Level3 access */
11868 /* to find an entry in a page-table-directory. */
11869+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
11870+#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
11871 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
11872-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
11873+#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
11874 #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
11875
11876 /* PMD - Level 2 access */
11877-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
11878+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
11879 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
11880
11881 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
11882-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
11883+#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
11884 pmd_index(address))
11885 #define pmd_none(x) (!__pmd_val(x))
11886 #if CONFIG_XEN_COMPAT <= 0x030002
11887@@ -444,6 +424,7 @@ static inline pte_t mk_pte_phys(unsigned
11888 {
11889 unsigned long pteval;
11890 pteval = physpage | pgprot_val(pgprot);
11891+ pteval &= __supported_pte_mask;
11892 return __pte(pteval);
11893 }
11894
11895@@ -465,7 +446,7 @@ static inline pte_t pte_modify(pte_t pte
11896
11897 #define pte_index(address) \
11898 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
11899-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
11900+#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
11901 pte_index(address))
11902
11903 /* x86-64 always has all page tables mapped. */
11904@@ -506,6 +487,40 @@ static inline pte_t pte_modify(pte_t pte
11905 ptep_establish(vma, address, ptep, entry); \
11906 } while (0)
11907
11908+
11909+/*
11910+ * i386 says: We don't actually have these, but we want to advertise
11911+ * them so that we can encompass the flush here.
11912+ */
11913+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11914+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11915+
11916+#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
11917+#define ptep_clear_flush_dirty(vma, address, ptep) \
11918+({ \
11919+ pte_t __pte = *(ptep); \
11920+ int __dirty = pte_dirty(__pte); \
11921+ __pte = pte_mkclean(__pte); \
11922+ if ((vma)->vm_mm->context.pinned) \
11923+ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
11924+ else if (__dirty) \
11925+ set_pte(ptep, __pte); \
11926+ __dirty; \
11927+})
11928+
11929+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
11930+#define ptep_clear_flush_young(vma, address, ptep) \
11931+({ \
11932+ pte_t __pte = *(ptep); \
11933+ int __young = pte_young(__pte); \
11934+ __pte = pte_mkold(__pte); \
11935+ if ((vma)->vm_mm->context.pinned) \
11936+ ptep_set_access_flags(vma, address, ptep, __pte, __young); \
11937+ else if (__young) \
11938+ set_pte(ptep, __pte); \
11939+ __young; \
11940+})
11941+
11942 /* Encode and de-code a swap entry */
11943 #define __swp_type(x) (((x).val >> 1) & 0x3f)
11944 #define __swp_offset(x) ((x).val >> 8)
11945@@ -547,10 +562,11 @@ int touch_pte_range(struct mm_struct *mm
11946 unsigned long size);
11947
11948 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11949- unsigned long addr, unsigned long end, pgprot_t newprot);
11950+ unsigned long addr, unsigned long end, pgprot_t newprot,
11951+ int dirty_accountable);
11952
11953-#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11954- xen_change_pte_range(mm, pmd, addr, end, newprot)
11955+#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11956+ xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11957
11958 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
11959 direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
11960@@ -572,8 +588,6 @@ int xen_change_pte_range(struct mm_struc
11961 #define kc_offset_to_vaddr(o) \
11962 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
11963
11964-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11965-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11966 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11967 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11968 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11969--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/processor_64.h 2009-05-14 11:02:43.000000000 +0200
11970+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/processor_64.h 2009-03-04 11:28:34.000000000 +0100
11971@@ -484,6 +484,8 @@ static inline void __mwait(unsigned long
11972 : :"a" (eax), "c" (ecx));
11973 }
11974
11975+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11976+
11977 #define stack_current() \
11978 ({ \
11979 struct thread_info *ti; \
11980--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-05-14 11:02:43.000000000 +0200
11981+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-04 11:28:34.000000000 +0100
11982@@ -4,15 +4,12 @@
11983 /*
11984 * We need the APIC definitions automatically as part of 'smp.h'
11985 */
11986-#ifndef __ASSEMBLY__
11987 #include <linux/threads.h>
11988 #include <linux/cpumask.h>
11989 #include <linux/bitops.h>
11990 extern int disable_apic;
11991-#endif
11992
11993 #ifdef CONFIG_X86_LOCAL_APIC
11994-#ifndef __ASSEMBLY__
11995 #include <asm/fixmap.h>
11996 #include <asm/mpspec.h>
11997 #ifdef CONFIG_X86_IO_APIC
11998@@ -21,10 +18,8 @@ extern int disable_apic;
11999 #include <asm/apic.h>
12000 #include <asm/thread_info.h>
12001 #endif
12002-#endif
12003
12004 #ifdef CONFIG_SMP
12005-#ifndef ASSEMBLY
12006
12007 #include <asm/pda.h>
12008
12009@@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized;
12010
12011 extern void smp_alloc_memory(void);
12012 extern volatile unsigned long smp_invalidate_needed;
12013-extern int pic_mode;
12014 extern void lock_ipi_call_lock(void);
12015 extern void unlock_ipi_call_lock(void);
12016 extern int smp_num_siblings;
12017 extern void smp_send_reschedule(int cpu);
12018 void smp_stop_cpu(void);
12019-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
12020- void *info, int retry, int wait);
12021
12022 extern cpumask_t cpu_sibling_map[NR_CPUS];
12023 extern cpumask_t cpu_core_map[NR_CPUS];
12024@@ -77,20 +69,16 @@ static inline int hard_smp_processor_id(
12025 }
12026 #endif
12027
12028-extern int safe_smp_processor_id(void);
12029 extern int __cpu_disable(void);
12030 extern void __cpu_die(unsigned int cpu);
12031 extern void prefill_possible_map(void);
12032 extern unsigned num_processors;
12033 extern unsigned disabled_cpus;
12034
12035-#endif /* !ASSEMBLY */
12036-
12037 #define NO_PROC_ID 0xFF /* No processor magic marker */
12038
12039 #endif
12040
12041-#ifndef ASSEMBLY
12042 /*
12043 * Some lowlevel functions might want to know about
12044 * the real APIC ID <-> CPU # mapping.
12045@@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid(
12046 }
12047 #endif
12048
12049-#endif /* !ASSEMBLY */
12050-
12051 #ifndef CONFIG_SMP
12052 #define stack_smp_processor_id() 0
12053-#define safe_smp_processor_id() 0
12054 #define cpu_logical_map(x) (x)
12055 #else
12056 #include <asm/thread_info.h>
12057@@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid(
12058 })
12059 #endif
12060
12061-#ifndef __ASSEMBLY__
12062 #ifdef CONFIG_X86_LOCAL_APIC
12063 static __inline int logical_smp_processor_id(void)
12064 {
12065@@ -138,13 +122,18 @@ static __inline int logical_smp_processo
12066 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
12067 }
12068 #endif
12069-#endif
12070
12071 #ifdef CONFIG_SMP
12072 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
12073 #else
12074 #define cpu_physical_id(cpu) boot_cpu_id
12075-#endif
12076-
12077+static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
12078+ void *info, int retry, int wait)
12079+{
12080+ /* Disable interrupts here? */
12081+ func(info);
12082+ return 0;
12083+}
12084+#endif /* !CONFIG_SMP */
12085 #endif
12086
12087--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/system_64.h 2009-05-14 11:02:43.000000000 +0200
12088+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/system_64.h 2009-03-04 11:28:34.000000000 +0100
12089@@ -24,6 +24,7 @@
12090 #define __EXTRA_CLOBBER \
12091 ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
12092
12093+/* Save restore flags to clear handle leaking NT */
12094 #define switch_to(prev,next,last) \
12095 asm volatile(SAVE_CONTEXT \
12096 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
12097--- sle11-2009-05-14.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-05-14 11:02:43.000000000 +0200
12098+++ sle11-2009-05-14/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-03-04 11:28:34.000000000 +0100
12099@@ -12,9 +12,6 @@
12100 */
12101 #define __flush_tlb_global() xen_tlb_flush()
12102
12103-
12104-extern unsigned long pgkern_mask;
12105-
12106 #define __flush_tlb_all() __flush_tlb_global()
12107
12108 #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
12109--- sle11-2009-05-14.orig/include/linux/skbuff.h 2009-02-16 15:58:14.000000000 +0100
12110+++ sle11-2009-05-14/include/linux/skbuff.h 2009-03-04 11:28:34.000000000 +0100
12111@@ -1771,5 +1771,12 @@ static inline void skb_forward_csum(stru
12112 }
12113
12114 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
12115+
12116+#ifdef CONFIG_XEN
12117+int skb_checksum_setup(struct sk_buff *skb);
12118+#else
12119+static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
12120+#endif
12121+
12122 #endif /* __KERNEL__ */
12123 #endif /* _LINUX_SKBUFF_H */
12124--- sle11-2009-05-14.orig/include/xen/evtchn.h 2009-05-14 11:02:43.000000000 +0200
12125+++ sle11-2009-05-14/include/xen/evtchn.h 2009-03-04 11:28:34.000000000 +0100
12126@@ -54,34 +54,34 @@
12127 */
12128 int bind_caller_port_to_irqhandler(
12129 unsigned int caller_port,
12130- irqreturn_t (*handler)(int, void *, struct pt_regs *),
12131+ irq_handler_t handler,
12132 unsigned long irqflags,
12133 const char *devname,
12134 void *dev_id);
12135 int bind_listening_port_to_irqhandler(
12136 unsigned int remote_domain,
12137- irqreturn_t (*handler)(int, void *, struct pt_regs *),
12138+ irq_handler_t handler,
12139 unsigned long irqflags,
12140 const char *devname,
12141 void *dev_id);
12142 int bind_interdomain_evtchn_to_irqhandler(
12143 unsigned int remote_domain,
12144 unsigned int remote_port,
12145- irqreturn_t (*handler)(int, void *, struct pt_regs *),
12146+ irq_handler_t handler,
12147 unsigned long irqflags,
12148 const char *devname,
12149 void *dev_id);
12150 int bind_virq_to_irqhandler(
12151 unsigned int virq,
12152 unsigned int cpu,
12153- irqreturn_t (*handler)(int, void *, struct pt_regs *),
12154+ irq_handler_t handler,
12155 unsigned long irqflags,
12156 const char *devname,
12157 void *dev_id);
12158 int bind_ipi_to_irqhandler(
12159 unsigned int ipi,
12160 unsigned int cpu,
12161- irqreturn_t (*handler)(int, void *, struct pt_regs *),
12162+ irq_handler_t handler,
12163 unsigned long irqflags,
12164 const char *devname,
12165 void *dev_id);
12166--- sle11-2009-05-14.orig/include/xen/xencons.h 2009-05-14 11:02:43.000000000 +0200
12167+++ sle11-2009-05-14/include/xen/xencons.h 2009-03-04 11:28:34.000000000 +0100
12168@@ -8,7 +8,7 @@ void xencons_force_flush(void);
12169 void xencons_resume(void);
12170
12171 /* Interrupt work hooks. Receive data, or kick data out. */
12172-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
12173+void xencons_rx(char *buf, unsigned len);
12174 void xencons_tx(void);
12175
12176 int xencons_ring_init(void);
12177--- sle11-2009-05-14.orig/mm/mprotect.c 2009-02-16 15:58:14.000000000 +0100
12178+++ sle11-2009-05-14/mm/mprotect.c 2009-03-04 11:28:34.000000000 +0100
12179@@ -92,7 +92,7 @@ static inline void change_pmd_range(stru
12180 next = pmd_addr_end(addr, end);
12181 if (pmd_none_or_clear_bad(pmd))
12182 continue;
12183- if (arch_change_pte_range(mm, pmd, addr, next, newprot))
12184+ if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
12185 continue;
12186 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
12187 } while (pmd++, addr = next, addr != end);
12188--- sle11-2009-05-14.orig/net/core/dev.c 2009-02-16 15:58:14.000000000 +0100
12189+++ sle11-2009-05-14/net/core/dev.c 2009-03-04 11:28:34.000000000 +0100
12190@@ -1765,15 +1765,14 @@ inline int skb_checksum_setup(struct sk_
12191 }
12192 if ((skb->h.raw + skb->csum + 2) > skb->tail)
12193 goto out;
12194- skb->ip_summed = CHECKSUM_HW;
12195+ skb->ip_summed = CHECKSUM_PARTIAL;
12196 skb->proto_csum_blank = 0;
12197 }
12198 return 0;
12199 out:
12200 return -EPROTO;
12201 }
12202-#else
12203-inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
12204+EXPORT_SYMBOL(skb_checksum_setup);
12205 #endif
12206
12207 /**
12208@@ -2327,7 +2326,7 @@ int netif_receive_skb(struct sk_buff *sk
12209 case CHECKSUM_UNNECESSARY:
12210 skb->proto_data_valid = 1;
12211 break;
12212- case CHECKSUM_HW:
12213+ case CHECKSUM_PARTIAL:
12214 /* XXX Implement me. */
12215 default:
12216 skb->proto_data_valid = 0;
12217@@ -4989,7 +4988,6 @@ EXPORT_SYMBOL(unregister_netdevice_notif
12218 EXPORT_SYMBOL(net_enable_timestamp);
12219 EXPORT_SYMBOL(net_disable_timestamp);
12220 EXPORT_SYMBOL(dev_get_flags);
12221-EXPORT_SYMBOL(skb_checksum_setup);
12222
12223 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
12224 EXPORT_SYMBOL(br_handle_frame_hook);