]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/suse-2.6.27.39/patches.xen/xen3-patch-2.6.19
Add a patch to fix Intel E100 wake-on-lan problems.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.xen / xen3-patch-2.6.19
1 From: www.kernel.org
2 Subject: Linux 2.6.19
3 Patch-mainline: 2.6.19
4
5 Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 --- sle11-2009-10-16.orig/arch/x86/Kconfig 2009-04-20 11:36:10.000000000 +0200
10 +++ sle11-2009-10-16/arch/x86/Kconfig 2009-03-04 11:28:34.000000000 +0100
11 @@ -415,6 +415,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
12
13 menuconfig PARAVIRT_GUEST
14 bool "Paravirtualized guest support"
15 + depends on !X86_XEN && !X86_64_XEN
16 help
17 Say Y here to get to see options related to running Linux under
18 various hypervisors. This option alone does not add any kernel code.
19 --- sle11-2009-10-16.orig/arch/x86/kernel/apic_32-xen.c 2009-10-28 14:55:12.000000000 +0100
20 +++ sle11-2009-10-16/arch/x86/kernel/apic_32-xen.c 2009-03-04 11:28:34.000000000 +0100
21 @@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
22 /*
23 * Knob to control our willingness to enable the local APIC.
24 */
25 -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
26
27 /*
28 * Debug level
29 @@ -102,7 +101,7 @@ int get_physical_broadcast(void)
30
31 #ifndef CONFIG_XEN
32 #ifndef CONFIG_SMP
33 -static void up_apic_timer_interrupt_call(struct pt_regs *regs)
34 +static void up_apic_timer_interrupt_call(void)
35 {
36 int cpu = smp_processor_id();
37
38 @@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
39 */
40 per_cpu(irq_stat, cpu).apic_timer_irqs++;
41
42 - smp_local_timer_interrupt(regs);
43 + smp_local_timer_interrupt();
44 }
45 #endif
46
47 -void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
48 +void smp_send_timer_broadcast_ipi(void)
49 {
50 cpumask_t mask;
51
52 @@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
53 * We can directly call the apic timer interrupt handler
54 * in UP case. Minus all irq related functions
55 */
56 - up_apic_timer_interrupt_call(regs);
57 + up_apic_timer_interrupt_call();
58 #endif
59 }
60 }
61 --- sle11-2009-10-16.orig/arch/x86/kernel/cpu/common-xen.c 2009-10-28 14:55:12.000000000 +0100
62 +++ sle11-2009-10-16/arch/x86/kernel/cpu/common-xen.c 2009-03-04 11:28:34.000000000 +0100
63 @@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
64
65 extern int disable_pse;
66
67 -static void default_init(struct cpuinfo_x86 * c)
68 +static void __cpuinit default_init(struct cpuinfo_x86 * c)
69 {
70 /* Not much we can do here... */
71 /* Check if at least it has cpuid */
72 @@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
73 }
74 }
75
76 -static struct cpu_dev default_cpu = {
77 +static struct cpu_dev __cpuinitdata default_cpu = {
78 .c_init = default_init,
79 .c_vendor = "Unknown",
80 };
81 @@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
82
83 static int __init x86_fxsr_setup(char * s)
84 {
85 + /* Tell all the other CPU's to not use it... */
86 disable_x86_fxsr = 1;
87 +
88 + /*
89 + * ... and clear the bits early in the boot_cpu_data
90 + * so that the bootup process doesn't try to do this
91 + * either.
92 + */
93 + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
94 + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
95 return 1;
96 }
97 __setup("nofxsr", x86_fxsr_setup);
98 @@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
99 }
100 }
101
102 -void __cpuinit generic_identify(struct cpuinfo_x86 * c)
103 +static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
104 {
105 u32 tfms, xlvl;
106 int ebx;
107 @@ -698,8 +707,7 @@ old_gdt:
108 */
109 atomic_inc(&init_mm.mm_count);
110 current->active_mm = &init_mm;
111 - if (current->mm)
112 - BUG();
113 + BUG_ON(current->mm);
114 enter_lazy_tlb(&init_mm, current);
115
116 load_esp0(t, thread);
117 @@ -712,7 +720,7 @@ old_gdt:
118 #endif
119
120 /* Clear %fs and %gs. */
121 - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
122 + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
123
124 /* Clear all 6 debug registers: */
125 set_debugreg(0, 0);
126 --- sle11-2009-10-16.orig/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:01:46.000000000 +0200
127 +++ sle11-2009-10-16/arch/x86/kernel/entry_32-xen.S 2009-05-14 11:07:47.000000000 +0200
128 @@ -80,8 +80,12 @@ VM_MASK = 0x00020000
129 NMI_MASK = 0x80000000
130
131 #ifndef CONFIG_XEN
132 -#define DISABLE_INTERRUPTS cli
133 -#define ENABLE_INTERRUPTS sti
134 +/* These are replaces for paravirtualization */
135 +#define DISABLE_INTERRUPTS cli
136 +#define ENABLE_INTERRUPTS sti
137 +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
138 +#define INTERRUPT_RETURN iret
139 +#define GET_CR0_INTO_EAX movl %cr0, %eax
140 #else
141 /* Offsets into shared_info_t. */
142 #define evtchn_upcall_pending /* 0 */
143 @@ -99,15 +103,29 @@ NMI_MASK = 0x80000000
144
145 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
146 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
147 +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
148 #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
149 __DISABLE_INTERRUPTS
150 #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
151 __ENABLE_INTERRUPTS
152 -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
153 +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
154 +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
155 + __TEST_PENDING ; \
156 + jnz 14f # process more events if necessary... ; \
157 + movl ESI(%esp), %esi ; \
158 + sysexit ; \
159 +14: __DISABLE_INTERRUPTS ; \
160 + TRACE_IRQS_OFF ; \
161 +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
162 + push %esp ; \
163 + call evtchn_do_upcall ; \
164 + add $4,%esp ; \
165 + jmp ret_from_intr
166 +#define INTERRUPT_RETURN iret
167 #endif
168
169 #ifdef CONFIG_PREEMPT
170 -#define preempt_stop cli; TRACE_IRQS_OFF
171 +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
172 #else
173 #define preempt_stop
174 #define resume_kernel restore_nocheck
175 @@ -206,18 +224,21 @@ NMI_MASK = 0x80000000
176
177 #define RING0_INT_FRAME \
178 CFI_STARTPROC simple;\
179 + CFI_SIGNAL_FRAME;\
180 CFI_DEF_CFA esp, 3*4;\
181 /*CFI_OFFSET cs, -2*4;*/\
182 CFI_OFFSET eip, -3*4
183
184 #define RING0_EC_FRAME \
185 CFI_STARTPROC simple;\
186 + CFI_SIGNAL_FRAME;\
187 CFI_DEF_CFA esp, 4*4;\
188 /*CFI_OFFSET cs, -2*4;*/\
189 CFI_OFFSET eip, -3*4
190
191 #define RING0_PTREGS_FRAME \
192 CFI_STARTPROC simple;\
193 + CFI_SIGNAL_FRAME;\
194 CFI_DEF_CFA esp, OLDESP-EBX;\
195 /*CFI_OFFSET cs, CS-OLDESP;*/\
196 CFI_OFFSET eip, EIP-OLDESP;\
197 @@ -263,8 +284,9 @@ ret_from_intr:
198 check_userspace:
199 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
200 movb CS(%esp), %al
201 - testl $(VM_MASK | 2), %eax
202 - jz resume_kernel
203 + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
204 + cmpl $USER_RPL, %eax
205 + jb resume_kernel # not returning to v8086 or userspace
206 ENTRY(resume_userspace)
207 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
208 # setting need_resched or sigpending
209 @@ -277,7 +299,7 @@ ENTRY(resume_userspace)
210
211 #ifdef CONFIG_PREEMPT
212 ENTRY(resume_kernel)
213 - cli
214 + DISABLE_INTERRUPTS
215 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
216 jnz restore_nocheck
217 need_resched:
218 @@ -297,6 +319,7 @@ need_resched:
219 # sysenter call handler stub
220 ENTRY(sysenter_entry)
221 CFI_STARTPROC simple
222 + CFI_SIGNAL_FRAME
223 CFI_DEF_CFA esp, 0
224 CFI_REGISTER esp, ebp
225 movl SYSENTER_stack_esp0(%esp),%esp
226 @@ -305,7 +328,7 @@ sysenter_past_esp:
227 * No need to follow this irqs on/off section: the syscall
228 * disabled irqs and here we enable it straight after entry:
229 */
230 - sti
231 + ENABLE_INTERRUPTS
232 pushl $(__USER_DS)
233 CFI_ADJUST_CFA_OFFSET 4
234 /*CFI_REL_OFFSET ss, 0*/
235 @@ -359,26 +382,8 @@ sysenter_past_esp:
236 movl EIP(%esp), %edx
237 movl OLDESP(%esp), %ecx
238 xorl %ebp,%ebp
239 -#ifdef CONFIG_XEN
240 TRACE_IRQS_ON
241 - __ENABLE_INTERRUPTS
242 -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
243 - __TEST_PENDING
244 - jnz 14f # process more events if necessary...
245 - movl ESI(%esp), %esi
246 - sysexit
247 -14: __DISABLE_INTERRUPTS
248 - TRACE_IRQS_OFF
249 -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
250 - push %esp
251 - call evtchn_do_upcall
252 - add $4,%esp
253 - jmp ret_from_intr
254 -#else
255 - TRACE_IRQS_ON
256 - sti
257 - sysexit
258 -#endif /* !CONFIG_XEN */
259 + ENABLE_INTERRUPTS_SYSEXIT
260 CFI_ENDPROC
261
262 # pv sysenter call handler stub
263 @@ -444,8 +449,8 @@ restore_all:
264 # See comments in process.c:copy_thread() for details.
265 movb OLDSS(%esp), %ah
266 movb CS(%esp), %al
267 - andl $(VM_MASK | (4 << 8) | 3), %eax
268 - cmpl $((4 << 8) | 3), %eax
269 + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
270 + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
271 CFI_REMEMBER_STATE
272 je ldt_ss # returning to user-space with LDT SS
273 restore_nocheck:
274 @@ -467,12 +472,11 @@ restore_nocheck_notrace:
275 RESTORE_REGS
276 addl $4, %esp
277 CFI_ADJUST_CFA_OFFSET -4
278 -1: iret
279 +1: INTERRUPT_RETURN
280 .section .fixup,"ax"
281 iret_exc:
282 #ifndef CONFIG_XEN
283 - TRACE_IRQS_ON
284 - sti
285 + ENABLE_INTERRUPTS
286 #endif
287 pushl $0 # no error code
288 pushl $do_iret_error
289 @@ -498,7 +502,7 @@ ldt_ss:
290 * dosemu and wine happy. */
291 subl $8, %esp # reserve space for switch16 pointer
292 CFI_ADJUST_CFA_OFFSET 8
293 - cli
294 + DISABLE_INTERRUPTS
295 TRACE_IRQS_OFF
296 movl %esp, %eax
297 /* Set up the 16bit stack frame with switch32 pointer on top,
298 @@ -508,7 +512,7 @@ ldt_ss:
299 TRACE_IRQS_IRET
300 RESTORE_REGS
301 lss 20+4(%esp), %esp # switch to 16bit stack
302 -1: iret
303 +1: INTERRUPT_RETURN
304 .section __ex_table,"a"
305 .align 4
306 .long 1b,iret_exc
307 @@ -524,7 +528,7 @@ scrit: /**** START OF CRITICAL REGION **
308 RESTORE_REGS
309 addl $4, %esp
310 CFI_ADJUST_CFA_OFFSET -4
311 -1: iret
312 +1: INTERRUPT_RETURN
313 .section __ex_table,"a"
314 .align 4
315 .long 1b,iret_exc
316 @@ -713,11 +717,9 @@ ENTRY(name) \
317 #define UNWIND_ESPFIX_STACK
318 #endif
319
320 -ENTRY(divide_error)
321 - RING0_INT_FRAME
322 - pushl $0 # no error code
323 - CFI_ADJUST_CFA_OFFSET 4
324 - pushl $do_divide_error
325 +KPROBE_ENTRY(page_fault)
326 + RING0_EC_FRAME
327 + pushl $do_page_fault
328 CFI_ADJUST_CFA_OFFSET 4
329 ALIGN
330 error_code:
331 @@ -767,6 +769,7 @@ error_code:
332 call *%edi
333 jmp ret_from_exception
334 CFI_ENDPROC
335 +KPROBE_END(page_fault)
336
337 #ifdef CONFIG_XEN
338 # A note on the "critical region" in our callback handler.
339 @@ -790,9 +793,11 @@ ENTRY(hypervisor_callback)
340 pushl %eax
341 CFI_ADJUST_CFA_OFFSET 4
342 SAVE_ALL
343 - testb $2,CS(%esp)
344 + movl CS(%esp),%ecx
345 movl EIP(%esp),%eax
346 - jnz .Ldo_upcall
347 + andl $SEGMENT_RPL_MASK,%ecx
348 + cmpl $USER_RPL,%ecx
349 + jae .Ldo_upcall
350 cmpl $scrit,%eax
351 jb 0f
352 cmpl $ecrit,%eax
353 @@ -928,7 +933,7 @@ ENTRY(device_not_available)
354 CFI_ADJUST_CFA_OFFSET 4
355 SAVE_ALL
356 #ifndef CONFIG_XEN
357 - movl %cr0, %eax
358 + GET_CR0_INTO_EAX
359 testl $0x4, %eax # EM (math emulation bit)
360 je device_available_emulate
361 pushl $0 # temporary storage for ORIG_EIP
362 @@ -963,9 +968,15 @@ device_available_emulate:
363 jne ok; \
364 label: \
365 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
366 + CFI_DEF_CFA esp, 0; \
367 + CFI_UNDEFINED eip; \
368 pushfl; \
369 + CFI_ADJUST_CFA_OFFSET 4; \
370 pushl $__KERNEL_CS; \
371 - pushl $sysenter_past_esp
372 + CFI_ADJUST_CFA_OFFSET 4; \
373 + pushl $sysenter_past_esp; \
374 + CFI_ADJUST_CFA_OFFSET 4; \
375 + CFI_REL_OFFSET eip, 0
376 #endif /* CONFIG_XEN */
377
378 KPROBE_ENTRY(debug)
379 @@ -984,7 +995,8 @@ debug_stack_correct:
380 call do_debug
381 jmp ret_from_exception
382 CFI_ENDPROC
383 - .previous .text
384 +KPROBE_END(debug)
385 +
386 #ifndef CONFIG_XEN
387 /*
388 * NMI is doubly nasty. It can happen _while_ we're handling
389 @@ -994,7 +1006,7 @@ debug_stack_correct:
390 * check whether we got an NMI on the debug path where the debug
391 * fault happened on the sysenter path.
392 */
393 -ENTRY(nmi)
394 +KPROBE_ENTRY(nmi)
395 RING0_INT_FRAME
396 pushl %eax
397 CFI_ADJUST_CFA_OFFSET 4
398 @@ -1019,6 +1031,7 @@ ENTRY(nmi)
399 cmpl $sysenter_entry,12(%esp)
400 je nmi_debug_stack_check
401 nmi_stack_correct:
402 + /* We have a RING0_INT_FRAME here */
403 pushl %eax
404 CFI_ADJUST_CFA_OFFSET 4
405 SAVE_ALL
406 @@ -1029,9 +1042,12 @@ nmi_stack_correct:
407 CFI_ENDPROC
408
409 nmi_stack_fixup:
410 + RING0_INT_FRAME
411 FIX_STACK(12,nmi_stack_correct, 1)
412 jmp nmi_stack_correct
413 +
414 nmi_debug_stack_check:
415 + /* We have a RING0_INT_FRAME here */
416 cmpw $__KERNEL_CS,16(%esp)
417 jne nmi_stack_correct
418 cmpl $debug,(%esp)
419 @@ -1042,8 +1058,10 @@ nmi_debug_stack_check:
420 jmp nmi_stack_correct
421
422 nmi_16bit_stack:
423 - RING0_INT_FRAME
424 - /* create the pointer to lss back */
425 + /* We have a RING0_INT_FRAME here.
426 + *
427 + * create the pointer to lss back
428 + */
429 pushl %ss
430 CFI_ADJUST_CFA_OFFSET 4
431 pushl %esp
432 @@ -1064,14 +1082,14 @@ nmi_16bit_stack:
433 call do_nmi
434 RESTORE_REGS
435 lss 12+4(%esp), %esp # back to 16bit stack
436 -1: iret
437 +1: INTERRUPT_RETURN
438 CFI_ENDPROC
439 .section __ex_table,"a"
440 .align 4
441 .long 1b,iret_exc
442 .previous
443 #else
444 -ENTRY(nmi)
445 +KPROBE_ENTRY(nmi)
446 RING0_INT_FRAME
447 pushl %eax
448 CFI_ADJUST_CFA_OFFSET 4
449 @@ -1083,6 +1101,7 @@ ENTRY(nmi)
450 jmp restore_all
451 CFI_ENDPROC
452 #endif
453 +KPROBE_END(nmi)
454
455 KPROBE_ENTRY(int3)
456 RING0_INT_FRAME
457 @@ -1094,7 +1113,7 @@ KPROBE_ENTRY(int3)
458 call do_int3
459 jmp ret_from_exception
460 CFI_ENDPROC
461 - .previous .text
462 +KPROBE_END(int3)
463
464 ENTRY(overflow)
465 RING0_INT_FRAME
466 @@ -1159,7 +1178,7 @@ KPROBE_ENTRY(general_protection)
467 CFI_ADJUST_CFA_OFFSET 4
468 jmp error_code
469 CFI_ENDPROC
470 - .previous .text
471 +KPROBE_END(general_protection)
472
473 ENTRY(alignment_check)
474 RING0_EC_FRAME
475 @@ -1168,13 +1187,14 @@ ENTRY(alignment_check)
476 jmp error_code
477 CFI_ENDPROC
478
479 -KPROBE_ENTRY(page_fault)
480 - RING0_EC_FRAME
481 - pushl $do_page_fault
482 +ENTRY(divide_error)
483 + RING0_INT_FRAME
484 + pushl $0 # no error code
485 + CFI_ADJUST_CFA_OFFSET 4
486 + pushl $do_divide_error
487 CFI_ADJUST_CFA_OFFSET 4
488 jmp error_code
489 CFI_ENDPROC
490 - .previous .text
491
492 #ifdef CONFIG_X86_MCE
493 ENTRY(machine_check)
494 @@ -1236,6 +1256,19 @@ ENTRY(fixup_4gb_segment)
495 jmp error_code
496 CFI_ENDPROC
497
498 +ENTRY(kernel_thread_helper)
499 + pushl $0 # fake return address for unwinder
500 + CFI_STARTPROC
501 + movl %edx,%eax
502 + push %edx
503 + CFI_ADJUST_CFA_OFFSET 4
504 + call *%ebx
505 + push %eax
506 + CFI_ADJUST_CFA_OFFSET 4
507 + call do_exit
508 + CFI_ENDPROC
509 +ENDPROC(kernel_thread_helper)
510 +
511 .section .rodata,"a"
512 #include "syscall_table.S"
513
514 --- sle11-2009-10-16.orig/arch/x86/kernel/head_32-xen.S 2009-10-28 14:55:12.000000000 +0100
515 +++ sle11-2009-10-16/arch/x86/kernel/head_32-xen.S 2009-03-04 11:28:34.000000000 +0100
516 @@ -62,7 +62,7 @@ ENTRY(startup_32)
517 movl %eax,%gs
518 cld # gcc2 wants the direction flag cleared at all times
519
520 - pushl %eax # fake return address
521 + pushl $0 # fake return address for unwinder
522 jmp start_kernel
523
524 #define HYPERCALL_PAGE_OFFSET 0x1000
525 --- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:13:45.000000000 +0100
526 +++ sle11-2009-10-16/arch/x86/kernel/io_apic_32-xen.c 2009-03-04 11:28:34.000000000 +0100
527 @@ -31,6 +31,9 @@
528 #include <linux/acpi.h>
529 #include <linux/module.h>
530 #include <linux/sysdev.h>
531 +#include <linux/pci.h>
532 +#include <linux/msi.h>
533 +#include <linux/htirq.h>
534
535 #include <asm/io.h>
536 #include <asm/smp.h>
537 @@ -38,13 +41,15 @@
538 #include <asm/timer.h>
539 #include <asm/i8259.h>
540 #include <asm/nmi.h>
541 +#include <asm/msidef.h>
542 +#include <asm/hypertransport.h>
543
544 #include <mach_apic.h>
545 +#include <mach_apicdef.h>
546
547 #include "io_ports.h"
548
549 #ifdef CONFIG_XEN
550 -
551 #include <xen/interface/xen.h>
552 #include <xen/interface/physdev.h>
553 #include <xen/evtchn.h>
554 @@ -56,32 +61,7 @@
555
556 unsigned long io_apic_irqs;
557
558 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
559 -{
560 - struct physdev_apic apic_op;
561 - int ret;
562 -
563 - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
564 - apic_op.reg = reg;
565 - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
566 - if (ret)
567 - return ret;
568 - return apic_op.value;
569 -}
570 -
571 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
572 -{
573 - struct physdev_apic apic_op;
574 -
575 - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
576 - apic_op.reg = reg;
577 - apic_op.value = value;
578 - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
579 -}
580 -
581 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
582 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
583 -
584 +#define clear_IO_APIC() ((void)0)
585 #endif /* CONFIG_XEN */
586
587 int (*ioapic_renumber_irq)(int ioapic, int irq);
588 @@ -108,7 +88,7 @@ int sis_apic_bug = -1;
589 */
590 int nr_ioapic_registers[MAX_IO_APICS];
591
592 -int disable_timer_pin_1 __initdata;
593 +static int disable_timer_pin_1 __initdata;
594
595 /*
596 * Rough estimation of how many shared IRQs there are, can
597 @@ -128,12 +108,124 @@ static struct irq_pin_list {
598 int apic, pin, next;
599 } irq_2_pin[PIN_MAP_SIZE];
600
601 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
602 -#ifdef CONFIG_PCI_MSI
603 -#define vector_to_irq(vector) \
604 - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
605 +#ifndef CONFIG_XEN
606 +struct io_apic {
607 + unsigned int index;
608 + unsigned int unused[3];
609 + unsigned int data;
610 +};
611 +
612 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
613 +{
614 + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
615 + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
616 +}
617 +#endif
618 +
619 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
620 +{
621 +#ifndef CONFIG_XEN
622 + struct io_apic __iomem *io_apic = io_apic_base(apic);
623 + writel(reg, &io_apic->index);
624 + return readl(&io_apic->data);
625 +#else
626 + struct physdev_apic apic_op;
627 + int ret;
628 +
629 + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
630 + apic_op.reg = reg;
631 + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
632 + if (ret)
633 + return ret;
634 + return apic_op.value;
635 +#endif
636 +}
637 +
638 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
639 +{
640 +#ifndef CONFIG_XEN
641 + struct io_apic __iomem *io_apic = io_apic_base(apic);
642 + writel(reg, &io_apic->index);
643 + writel(value, &io_apic->data);
644 +#else
645 + struct physdev_apic apic_op;
646 +
647 + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
648 + apic_op.reg = reg;
649 + apic_op.value = value;
650 + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
651 +#endif
652 +}
653 +
654 +#ifndef CONFIG_XEN
655 +/*
656 + * Re-write a value: to be used for read-modify-write
657 + * cycles where the read already set up the index register.
658 + *
659 + * Older SiS APIC requires we rewrite the index register
660 + */
661 +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
662 +{
663 + volatile struct io_apic *io_apic = io_apic_base(apic);
664 + if (sis_apic_bug)
665 + writel(reg, &io_apic->index);
666 + writel(value, &io_apic->data);
667 +}
668 #else
669 -#define vector_to_irq(vector) (vector)
670 +#define io_apic_modify io_apic_write
671 +#endif
672 +
673 +union entry_union {
674 + struct { u32 w1, w2; };
675 + struct IO_APIC_route_entry entry;
676 +};
677 +
678 +#ifndef CONFIG_XEN
679 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
680 +{
681 + union entry_union eu;
682 + unsigned long flags;
683 + spin_lock_irqsave(&ioapic_lock, flags);
684 + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
685 + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
686 + spin_unlock_irqrestore(&ioapic_lock, flags);
687 + return eu.entry;
688 +}
689 +#endif
690 +
691 +/*
692 + * When we write a new IO APIC routing entry, we need to write the high
693 + * word first! If the mask bit in the low word is clear, we will enable
694 + * the interrupt, and we need to make sure the entry is fully populated
695 + * before that happens.
696 + */
697 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
698 +{
699 + unsigned long flags;
700 + union entry_union eu;
701 + eu.entry = e;
702 + spin_lock_irqsave(&ioapic_lock, flags);
703 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
704 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
705 + spin_unlock_irqrestore(&ioapic_lock, flags);
706 +}
707 +
708 +#ifndef CONFIG_XEN
709 +/*
710 + * When we mask an IO APIC routing entry, we need to write the low
711 + * word first, in order to set the mask bit before we change the
712 + * high bits!
713 + */
714 +static void ioapic_mask_entry(int apic, int pin)
715 +{
716 + unsigned long flags;
717 + union entry_union eu = { .entry.mask = 1 };
718 +
719 + spin_lock_irqsave(&ioapic_lock, flags);
720 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
721 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
722 + spin_unlock_irqrestore(&ioapic_lock, flags);
723 +}
724 #endif
725
726 /*
727 @@ -159,9 +251,7 @@ static void add_pin_to_irq(unsigned int
728 entry->pin = pin;
729 }
730
731 -#ifdef CONFIG_XEN
732 -#define clear_IO_APIC() ((void)0)
733 -#else
734 +#ifndef CONFIG_XEN
735 /*
736 * Reroute an IRQ to a different pin.
737 */
738 @@ -246,25 +336,16 @@ static void unmask_IO_APIC_irq (unsigned
739 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
740 {
741 struct IO_APIC_route_entry entry;
742 - unsigned long flags;
743
744 /* Check delivery_mode to be sure we're not clearing an SMI pin */
745 - spin_lock_irqsave(&ioapic_lock, flags);
746 - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
747 - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
748 - spin_unlock_irqrestore(&ioapic_lock, flags);
749 + entry = ioapic_read_entry(apic, pin);
750 if (entry.delivery_mode == dest_SMI)
751 return;
752
753 /*
754 * Disable it in the IO-APIC irq-routing table:
755 */
756 - memset(&entry, 0, sizeof(entry));
757 - entry.mask = 1;
758 - spin_lock_irqsave(&ioapic_lock, flags);
759 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
760 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
761 - spin_unlock_irqrestore(&ioapic_lock, flags);
762 + ioapic_mask_entry(apic, pin);
763 }
764
765 static void clear_IO_APIC (void)
766 @@ -304,7 +385,7 @@ static void set_ioapic_affinity_irq(unsi
767 break;
768 entry = irq_2_pin + entry->next;
769 }
770 - set_irq_info(irq, cpumask);
771 + set_native_irq_info(irq, cpumask);
772 spin_unlock_irqrestore(&ioapic_lock, flags);
773 }
774
775 @@ -1212,43 +1293,43 @@ static inline int IO_APIC_irq_trigger(in
776 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
777 u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
778
779 -int assign_irq_vector(int irq)
780 +static int __assign_irq_vector(int irq)
781 {
782 - unsigned long flags;
783 int vector;
784 struct physdev_irq irq_op;
785
786 - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
787 + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
788
789 if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
790 return -EINVAL;
791
792 - spin_lock_irqsave(&vector_lock, flags);
793 -
794 - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
795 - spin_unlock_irqrestore(&vector_lock, flags);
796 - return IO_APIC_VECTOR(irq);
797 - }
798 + if (irq_vector[irq] > 0)
799 + return irq_vector[irq];
800
801 irq_op.irq = irq;
802 - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
803 - spin_unlock_irqrestore(&vector_lock, flags);
804 + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
805 return -ENOSPC;
806 - }
807
808 vector = irq_op.vector;
809 - vector_irq[vector] = irq;
810 - if (irq != AUTO_ASSIGN)
811 - IO_APIC_VECTOR(irq) = vector;
812 + irq_vector[irq] = vector;
813 +
814 + return vector;
815 +}
816 +
817 +static int assign_irq_vector(int irq)
818 +{
819 + unsigned long flags;
820 + int vector;
821
822 + spin_lock_irqsave(&vector_lock, flags);
823 + vector = __assign_irq_vector(irq);
824 spin_unlock_irqrestore(&vector_lock, flags);
825
826 return vector;
827 }
828
829 #ifndef CONFIG_XEN
830 -static struct hw_interrupt_type ioapic_level_type;
831 -static struct hw_interrupt_type ioapic_edge_type;
832 +static struct irq_chip ioapic_chip;
833
834 #define IOAPIC_AUTO -1
835 #define IOAPIC_EDGE 0
836 @@ -1256,16 +1337,16 @@ static struct hw_interrupt_type ioapic_e
837
838 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
839 {
840 - unsigned idx;
841 -
842 - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
843 -
844 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
845 trigger == IOAPIC_LEVEL)
846 - irq_desc[idx].chip = &ioapic_level_type;
847 - else
848 - irq_desc[idx].chip = &ioapic_edge_type;
849 - set_intr_gate(vector, interrupt[idx]);
850 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
851 + handle_fasteoi_irq, "fasteoi");
852 + else {
853 + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
854 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
855 + handle_edge_irq, "edge");
856 + }
857 + set_intr_gate(vector, interrupt[irq]);
858 }
859 #else
860 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
861 @@ -1336,9 +1417,8 @@ static void __init setup_IO_APIC_irqs(vo
862 if (!apic && (irq < 16))
863 disable_8259A_irq(irq);
864 }
865 + ioapic_write_entry(apic, pin, entry);
866 spin_lock_irqsave(&ioapic_lock, flags);
867 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
868 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
869 set_native_irq_info(irq, TARGET_CPUS);
870 spin_unlock_irqrestore(&ioapic_lock, flags);
871 }
872 @@ -1355,7 +1435,6 @@ static void __init setup_IO_APIC_irqs(vo
873 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
874 {
875 struct IO_APIC_route_entry entry;
876 - unsigned long flags;
877
878 memset(&entry,0,sizeof(entry));
879
880 @@ -1380,15 +1459,13 @@ static void __init setup_ExtINT_IRQ0_pin
881 * The timer IRQ doesn't have to know that behind the
882 * scene we have a 8259A-master in AEOI mode ...
883 */
884 - irq_desc[0].chip = &ioapic_edge_type;
885 + irq_desc[0].chip = &ioapic_chip;
886 + set_irq_handler(0, handle_edge_irq);
887
888 /*
889 * Add it to the IO-APIC irq-routing table:
890 */
891 - spin_lock_irqsave(&ioapic_lock, flags);
892 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
893 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
894 - spin_unlock_irqrestore(&ioapic_lock, flags);
895 + ioapic_write_entry(apic, pin, entry);
896
897 enable_8259A_irq(0);
898 }
899 @@ -1498,10 +1575,7 @@ void __init print_IO_APIC(void)
900 for (i = 0; i <= reg_01.bits.entries; i++) {
901 struct IO_APIC_route_entry entry;
902
903 - spin_lock_irqsave(&ioapic_lock, flags);
904 - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
905 - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
906 - spin_unlock_irqrestore(&ioapic_lock, flags);
907 + entry = ioapic_read_entry(apic, i);
908
909 printk(KERN_DEBUG " %02x %03X %02X ",
910 i,
911 @@ -1521,17 +1595,12 @@ void __init print_IO_APIC(void)
912 );
913 }
914 }
915 - if (use_pci_vector())
916 - printk(KERN_INFO "Using vector-based indexing\n");
917 printk(KERN_DEBUG "IRQ to pin mappings:\n");
918 for (i = 0; i < NR_IRQS; i++) {
919 struct irq_pin_list *entry = irq_2_pin + i;
920 if (entry->pin < 0)
921 continue;
922 - if (use_pci_vector() && !platform_legacy_irq(i))
923 - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
924 - else
925 - printk(KERN_DEBUG "IRQ%d ", i);
926 + printk(KERN_DEBUG "IRQ%d ", i);
927 for (;;) {
928 printk("-> %d:%d", entry->apic, entry->pin);
929 if (!entry->next)
930 @@ -1720,10 +1789,7 @@ static void __init enable_IO_APIC(void)
931 /* See if any of the pins is in ExtINT mode */
932 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
933 struct IO_APIC_route_entry entry;
934 - spin_lock_irqsave(&ioapic_lock, flags);
935 - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
936 - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
937 - spin_unlock_irqrestore(&ioapic_lock, flags);
938 + entry = ioapic_read_entry(apic, pin);
939
940
941 /* If the interrupt line is enabled and in ExtInt mode
942 @@ -1782,7 +1848,6 @@ void disable_IO_APIC(void)
943 */
944 if (ioapic_i8259.pin != -1) {
945 struct IO_APIC_route_entry entry;
946 - unsigned long flags;
947
948 memset(&entry, 0, sizeof(entry));
949 entry.mask = 0; /* Enabled */
950 @@ -1799,12 +1864,7 @@ void disable_IO_APIC(void)
951 /*
952 * Add it to the IO-APIC irq-routing table:
953 */
954 - spin_lock_irqsave(&ioapic_lock, flags);
955 - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
956 - *(((int *)&entry)+1));
957 - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
958 - *(((int *)&entry)+0));
959 - spin_unlock_irqrestore(&ioapic_lock, flags);
960 + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
961 }
962 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
963 #endif
964 @@ -1971,6 +2031,8 @@ static int __init timer_irq_works(void)
965 */
966
967 /*
968 + * Startup quirk:
969 + *
970 * Starting up a edge-triggered IO-APIC interrupt is
971 * nasty - we need to make sure that we get the edge.
972 * If it is already asserted for some reason, we need
973 @@ -1978,8 +2040,10 @@ static int __init timer_irq_works(void)
974 *
975 * This is not complete - we should be able to fake
976 * an edge even if it isn't on the 8259A...
977 + *
978 + * (We do this for level-triggered IRQs too - it cannot hurt.)
979 */
980 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
981 +static unsigned int startup_ioapic_irq(unsigned int irq)
982 {
983 int was_pending = 0;
984 unsigned long flags;
985 @@ -1996,47 +2060,18 @@ static unsigned int startup_edge_ioapic_
986 return was_pending;
987 }
988
989 -/*
990 - * Once we have recorded IRQ_PENDING already, we can mask the
991 - * interrupt for real. This prevents IRQ storms from unhandled
992 - * devices.
993 - */
994 -static void ack_edge_ioapic_irq(unsigned int irq)
995 -{
996 - move_irq(irq);
997 - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
998 - == (IRQ_PENDING | IRQ_DISABLED))
999 - mask_IO_APIC_irq(irq);
1000 - ack_APIC_irq();
1001 -}
1002 -
1003 -/*
1004 - * Level triggered interrupts can just be masked,
1005 - * and shutting down and starting up the interrupt
1006 - * is the same as enabling and disabling them -- except
1007 - * with a startup need to return a "was pending" value.
1008 - *
1009 - * Level triggered interrupts are special because we
1010 - * do not touch any IO-APIC register while handling
1011 - * them. We ack the APIC in the end-IRQ handler, not
1012 - * in the start-IRQ-handler. Protection against reentrance
1013 - * from the same interrupt is still provided, both by the
1014 - * generic IRQ layer and by the fact that an unacked local
1015 - * APIC does not accept IRQs.
1016 - */
1017 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
1018 +static void ack_ioapic_irq(unsigned int irq)
1019 {
1020 - unmask_IO_APIC_irq(irq);
1021 -
1022 - return 0; /* don't check for pending */
1023 + move_native_irq(irq);
1024 + ack_APIC_irq();
1025 }
1026
1027 -static void end_level_ioapic_irq (unsigned int irq)
1028 +static void ack_ioapic_quirk_irq(unsigned int irq)
1029 {
1030 unsigned long v;
1031 int i;
1032
1033 - move_irq(irq);
1034 + move_native_irq(irq);
1035 /*
1036 * It appears there is an erratum which affects at least version 0x11
1037 * of I/O APIC (that's the 82093AA and cores integrated into various
1038 @@ -2056,7 +2091,7 @@ static void end_level_ioapic_irq (unsign
1039 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1040 * The idea is from Manfred Spraul. --macro
1041 */
1042 - i = IO_APIC_VECTOR(irq);
1043 + i = irq_vector[irq];
1044
1045 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1046
1047 @@ -2071,104 +2106,24 @@ static void end_level_ioapic_irq (unsign
1048 }
1049 }
1050
1051 -#ifdef CONFIG_PCI_MSI
1052 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
1053 -{
1054 - int irq = vector_to_irq(vector);
1055 -
1056 - return startup_edge_ioapic_irq(irq);
1057 -}
1058 -
1059 -static void ack_edge_ioapic_vector(unsigned int vector)
1060 -{
1061 - int irq = vector_to_irq(vector);
1062 -
1063 - move_native_irq(vector);
1064 - ack_edge_ioapic_irq(irq);
1065 -}
1066 -
1067 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
1068 -{
1069 - int irq = vector_to_irq(vector);
1070 -
1071 - return startup_level_ioapic_irq (irq);
1072 -}
1073 -
1074 -static void end_level_ioapic_vector (unsigned int vector)
1075 -{
1076 - int irq = vector_to_irq(vector);
1077 -
1078 - move_native_irq(vector);
1079 - end_level_ioapic_irq(irq);
1080 -}
1081 -
1082 -static void mask_IO_APIC_vector (unsigned int vector)
1083 -{
1084 - int irq = vector_to_irq(vector);
1085 -
1086 - mask_IO_APIC_irq(irq);
1087 -}
1088 -
1089 -static void unmask_IO_APIC_vector (unsigned int vector)
1090 +static int ioapic_retrigger_irq(unsigned int irq)
1091 {
1092 - int irq = vector_to_irq(vector);
1093 -
1094 - unmask_IO_APIC_irq(irq);
1095 -}
1096 -
1097 -#ifdef CONFIG_SMP
1098 -static void set_ioapic_affinity_vector (unsigned int vector,
1099 - cpumask_t cpu_mask)
1100 -{
1101 - int irq = vector_to_irq(vector);
1102 -
1103 - set_native_irq_info(vector, cpu_mask);
1104 - set_ioapic_affinity_irq(irq, cpu_mask);
1105 -}
1106 -#endif
1107 -#endif
1108 -
1109 -static int ioapic_retrigger(unsigned int irq)
1110 -{
1111 - send_IPI_self(IO_APIC_VECTOR(irq));
1112 + send_IPI_self(irq_vector[irq]);
1113
1114 return 1;
1115 }
1116
1117 -/*
1118 - * Level and edge triggered IO-APIC interrupts need different handling,
1119 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1120 - * handled with the level-triggered descriptor, but that one has slightly
1121 - * more overhead. Level-triggered interrupts cannot be handled with the
1122 - * edge-triggered handler, without risking IRQ storms and other ugly
1123 - * races.
1124 - */
1125 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
1126 - .typename = "IO-APIC-edge",
1127 - .startup = startup_edge_ioapic,
1128 - .shutdown = shutdown_edge_ioapic,
1129 - .enable = enable_edge_ioapic,
1130 - .disable = disable_edge_ioapic,
1131 - .ack = ack_edge_ioapic,
1132 - .end = end_edge_ioapic,
1133 -#ifdef CONFIG_SMP
1134 - .set_affinity = set_ioapic_affinity,
1135 -#endif
1136 - .retrigger = ioapic_retrigger,
1137 -};
1138 -
1139 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
1140 - .typename = "IO-APIC-level",
1141 - .startup = startup_level_ioapic,
1142 - .shutdown = shutdown_level_ioapic,
1143 - .enable = enable_level_ioapic,
1144 - .disable = disable_level_ioapic,
1145 - .ack = mask_and_ack_level_ioapic,
1146 - .end = end_level_ioapic,
1147 +static struct irq_chip ioapic_chip __read_mostly = {
1148 + .name = "IO-APIC",
1149 + .startup = startup_ioapic_irq,
1150 + .mask = mask_IO_APIC_irq,
1151 + .unmask = unmask_IO_APIC_irq,
1152 + .ack = ack_ioapic_irq,
1153 + .eoi = ack_ioapic_quirk_irq,
1154 #ifdef CONFIG_SMP
1155 - .set_affinity = set_ioapic_affinity,
1156 + .set_affinity = set_ioapic_affinity_irq,
1157 #endif
1158 - .retrigger = ioapic_retrigger,
1159 + .retrigger = ioapic_retrigger_irq,
1160 };
1161 #endif /* !CONFIG_XEN */
1162
1163 @@ -2189,12 +2144,7 @@ static inline void init_IO_APIC_traps(vo
1164 */
1165 for (irq = 0; irq < NR_IRQS ; irq++) {
1166 int tmp = irq;
1167 - if (use_pci_vector()) {
1168 - if (!platform_legacy_irq(tmp))
1169 - if ((tmp = vector_to_irq(tmp)) == -1)
1170 - continue;
1171 - }
1172 - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
1173 + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1174 /*
1175 * Hmm.. We don't have an entry for this,
1176 * so default to an old-fashioned 8259
1177 @@ -2205,22 +2155,23 @@ static inline void init_IO_APIC_traps(vo
1178 #ifndef CONFIG_XEN
1179 else
1180 /* Strange. Oh, well.. */
1181 - irq_desc[irq].chip = &no_irq_type;
1182 + irq_desc[irq].chip = &no_irq_chip;
1183 #endif
1184 }
1185 }
1186 }
1187
1188 #ifndef CONFIG_XEN
1189 -static void enable_lapic_irq (unsigned int irq)
1190 -{
1191 - unsigned long v;
1192 +/*
1193 + * The local APIC irq-chip implementation:
1194 + */
1195
1196 - v = apic_read(APIC_LVT0);
1197 - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1198 +static void ack_apic(unsigned int irq)
1199 +{
1200 + ack_APIC_irq();
1201 }
1202
1203 -static void disable_lapic_irq (unsigned int irq)
1204 +static void mask_lapic_irq (unsigned int irq)
1205 {
1206 unsigned long v;
1207
1208 @@ -2228,21 +2179,19 @@ static void disable_lapic_irq (unsigned
1209 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
1210 }
1211
1212 -static void ack_lapic_irq (unsigned int irq)
1213 +static void unmask_lapic_irq (unsigned int irq)
1214 {
1215 - ack_APIC_irq();
1216 -}
1217 + unsigned long v;
1218
1219 -static void end_lapic_irq (unsigned int i) { /* nothing */ }
1220 + v = apic_read(APIC_LVT0);
1221 + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1222 +}
1223
1224 -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
1225 - .typename = "local-APIC-edge",
1226 - .startup = NULL, /* startup_irq() not used for IRQ0 */
1227 - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1228 - .enable = enable_lapic_irq,
1229 - .disable = disable_lapic_irq,
1230 - .ack = ack_lapic_irq,
1231 - .end = end_lapic_irq
1232 +static struct irq_chip lapic_chip __read_mostly = {
1233 + .name = "local-APIC-edge",
1234 + .mask = mask_lapic_irq,
1235 + .unmask = unmask_lapic_irq,
1236 + .eoi = ack_apic,
1237 };
1238
1239 static void setup_nmi (void)
1240 @@ -2275,17 +2224,13 @@ static inline void unlock_ExtINT_logic(v
1241 int apic, pin, i;
1242 struct IO_APIC_route_entry entry0, entry1;
1243 unsigned char save_control, save_freq_select;
1244 - unsigned long flags;
1245
1246 pin = find_isa_irq_pin(8, mp_INT);
1247 apic = find_isa_irq_apic(8, mp_INT);
1248 if (pin == -1)
1249 return;
1250
1251 - spin_lock_irqsave(&ioapic_lock, flags);
1252 - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1253 - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1254 - spin_unlock_irqrestore(&ioapic_lock, flags);
1255 + entry0 = ioapic_read_entry(apic, pin);
1256 clear_IO_APIC_pin(apic, pin);
1257
1258 memset(&entry1, 0, sizeof(entry1));
1259 @@ -2298,10 +2243,7 @@ static inline void unlock_ExtINT_logic(v
1260 entry1.trigger = 0;
1261 entry1.vector = 0;
1262
1263 - spin_lock_irqsave(&ioapic_lock, flags);
1264 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
1265 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
1266 - spin_unlock_irqrestore(&ioapic_lock, flags);
1267 + ioapic_write_entry(apic, pin, entry1);
1268
1269 save_control = CMOS_READ(RTC_CONTROL);
1270 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
1271 @@ -2320,10 +2262,7 @@ static inline void unlock_ExtINT_logic(v
1272 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
1273 clear_IO_APIC_pin(apic, pin);
1274
1275 - spin_lock_irqsave(&ioapic_lock, flags);
1276 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
1277 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
1278 - spin_unlock_irqrestore(&ioapic_lock, flags);
1279 + ioapic_write_entry(apic, pin, entry0);
1280 }
1281
1282 int timer_uses_ioapic_pin_0;
1283 @@ -2423,7 +2362,8 @@ static inline void check_timer(void)
1284 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1285
1286 disable_8259A_irq(0);
1287 - irq_desc[0].chip = &lapic_irq_type;
1288 + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
1289 + "fasteio");
1290 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1291 enable_8259A_irq(0);
1292
1293 @@ -2537,17 +2477,12 @@ static int ioapic_suspend(struct sys_dev
1294 {
1295 struct IO_APIC_route_entry *entry;
1296 struct sysfs_ioapic_data *data;
1297 - unsigned long flags;
1298 int i;
1299
1300 data = container_of(dev, struct sysfs_ioapic_data, dev);
1301 entry = data->entry;
1302 - spin_lock_irqsave(&ioapic_lock, flags);
1303 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1304 - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
1305 - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
1306 - }
1307 - spin_unlock_irqrestore(&ioapic_lock, flags);
1308 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1309 + entry[i] = ioapic_read_entry(dev->id, i);
1310
1311 return 0;
1312 }
1313 @@ -2569,11 +2504,9 @@ static int ioapic_resume(struct sys_devi
1314 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
1315 io_apic_write(dev->id, 0, reg_00.raw);
1316 }
1317 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1318 - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
1319 - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
1320 - }
1321 spin_unlock_irqrestore(&ioapic_lock, flags);
1322 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1323 + ioapic_write_entry(dev->id, i, entry[i]);
1324
1325 return 0;
1326 }
1327 @@ -2619,8 +2552,240 @@ static int __init ioapic_init_sysfs(void
1328
1329 device_initcall(ioapic_init_sysfs);
1330
1331 +/*
1332 + * Dynamic irq allocate and deallocation
1333 + */
1334 +int create_irq(void)
1335 +{
1336 + /* Allocate an unused irq */
1337 + int irq, new, vector;
1338 + unsigned long flags;
1339 +
1340 + irq = -ENOSPC;
1341 + spin_lock_irqsave(&vector_lock, flags);
1342 + for (new = (NR_IRQS - 1); new >= 0; new--) {
1343 + if (platform_legacy_irq(new))
1344 + continue;
1345 + if (irq_vector[new] != 0)
1346 + continue;
1347 + vector = __assign_irq_vector(new);
1348 + if (likely(vector > 0))
1349 + irq = new;
1350 + break;
1351 + }
1352 + spin_unlock_irqrestore(&vector_lock, flags);
1353 +
1354 + if (irq >= 0) {
1355 + set_intr_gate(vector, interrupt[irq]);
1356 + dynamic_irq_init(irq);
1357 + }
1358 + return irq;
1359 +}
1360 +
1361 +void destroy_irq(unsigned int irq)
1362 +{
1363 + unsigned long flags;
1364 +
1365 + dynamic_irq_cleanup(irq);
1366 +
1367 + spin_lock_irqsave(&vector_lock, flags);
1368 + irq_vector[irq] = 0;
1369 + spin_unlock_irqrestore(&vector_lock, flags);
1370 +}
1371 +
1372 #endif /* CONFIG_XEN */
1373
1374 +/*
1375 + * MSI mesage composition
1376 + */
1377 +#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
1378 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1379 +{
1380 + int vector;
1381 + unsigned dest;
1382 +
1383 + vector = assign_irq_vector(irq);
1384 + if (vector >= 0) {
1385 + dest = cpu_mask_to_apicid(TARGET_CPUS);
1386 +
1387 + msg->address_hi = MSI_ADDR_BASE_HI;
1388 + msg->address_lo =
1389 + MSI_ADDR_BASE_LO |
1390 + ((INT_DEST_MODE == 0) ?
1391 + MSI_ADDR_DEST_MODE_PHYSICAL:
1392 + MSI_ADDR_DEST_MODE_LOGICAL) |
1393 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1394 + MSI_ADDR_REDIRECTION_CPU:
1395 + MSI_ADDR_REDIRECTION_LOWPRI) |
1396 + MSI_ADDR_DEST_ID(dest);
1397 +
1398 + msg->data =
1399 + MSI_DATA_TRIGGER_EDGE |
1400 + MSI_DATA_LEVEL_ASSERT |
1401 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1402 + MSI_DATA_DELIVERY_FIXED:
1403 + MSI_DATA_DELIVERY_LOWPRI) |
1404 + MSI_DATA_VECTOR(vector);
1405 + }
1406 + return vector;
1407 +}
1408 +
1409 +#ifdef CONFIG_SMP
1410 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1411 +{
1412 + struct msi_msg msg;
1413 + unsigned int dest;
1414 + cpumask_t tmp;
1415 + int vector;
1416 +
1417 + cpus_and(tmp, mask, cpu_online_map);
1418 + if (cpus_empty(tmp))
1419 + tmp = TARGET_CPUS;
1420 +
1421 + vector = assign_irq_vector(irq);
1422 + if (vector < 0)
1423 + return;
1424 +
1425 + dest = cpu_mask_to_apicid(mask);
1426 +
1427 + read_msi_msg(irq, &msg);
1428 +
1429 + msg.data &= ~MSI_DATA_VECTOR_MASK;
1430 + msg.data |= MSI_DATA_VECTOR(vector);
1431 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1432 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1433 +
1434 + write_msi_msg(irq, &msg);
1435 + set_native_irq_info(irq, mask);
1436 +}
1437 +#endif /* CONFIG_SMP */
1438 +
1439 +/*
1440 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
1441 + * which implement the MSI or MSI-X Capability Structure.
1442 + */
1443 +static struct irq_chip msi_chip = {
1444 + .name = "PCI-MSI",
1445 + .unmask = unmask_msi_irq,
1446 + .mask = mask_msi_irq,
1447 + .ack = ack_ioapic_irq,
1448 +#ifdef CONFIG_SMP
1449 + .set_affinity = set_msi_irq_affinity,
1450 +#endif
1451 + .retrigger = ioapic_retrigger_irq,
1452 +};
1453 +
1454 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1455 +{
1456 + struct msi_msg msg;
1457 + int ret;
1458 + ret = msi_compose_msg(dev, irq, &msg);
1459 + if (ret < 0)
1460 + return ret;
1461 +
1462 + write_msi_msg(irq, &msg);
1463 +
1464 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
1465 + "edge");
1466 +
1467 + return 0;
1468 +}
1469 +
1470 +void arch_teardown_msi_irq(unsigned int irq)
1471 +{
1472 + return;
1473 +}
1474 +
1475 +#endif /* CONFIG_PCI_MSI */
1476 +
1477 +/*
1478 + * Hypertransport interrupt support
1479 + */
1480 +#ifdef CONFIG_HT_IRQ
1481 +
1482 +#ifdef CONFIG_SMP
1483 +
1484 +static void target_ht_irq(unsigned int irq, unsigned int dest)
1485 +{
1486 + struct ht_irq_msg msg;
1487 + fetch_ht_irq_msg(irq, &msg);
1488 +
1489 + msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
1490 + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
1491 +
1492 + msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
1493 + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
1494 +
1495 + write_ht_irq_msg(irq, &msg);
1496 +}
1497 +
1498 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1499 +{
1500 + unsigned int dest;
1501 + cpumask_t tmp;
1502 +
1503 + cpus_and(tmp, mask, cpu_online_map);
1504 + if (cpus_empty(tmp))
1505 + tmp = TARGET_CPUS;
1506 +
1507 + cpus_and(mask, tmp, CPU_MASK_ALL);
1508 +
1509 + dest = cpu_mask_to_apicid(mask);
1510 +
1511 + target_ht_irq(irq, dest);
1512 + set_native_irq_info(irq, mask);
1513 +}
1514 +#endif
1515 +
1516 +static struct irq_chip ht_irq_chip = {
1517 + .name = "PCI-HT",
1518 + .mask = mask_ht_irq,
1519 + .unmask = unmask_ht_irq,
1520 + .ack = ack_ioapic_irq,
1521 +#ifdef CONFIG_SMP
1522 + .set_affinity = set_ht_irq_affinity,
1523 +#endif
1524 + .retrigger = ioapic_retrigger_irq,
1525 +};
1526 +
1527 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1528 +{
1529 + int vector;
1530 +
1531 + vector = assign_irq_vector(irq);
1532 + if (vector >= 0) {
1533 + struct ht_irq_msg msg;
1534 + unsigned dest;
1535 + cpumask_t tmp;
1536 +
1537 + cpus_clear(tmp);
1538 + cpu_set(vector >> 8, tmp);
1539 + dest = cpu_mask_to_apicid(tmp);
1540 +
1541 + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1542 +
1543 + msg.address_lo =
1544 + HT_IRQ_LOW_BASE |
1545 + HT_IRQ_LOW_DEST_ID(dest) |
1546 + HT_IRQ_LOW_VECTOR(vector) |
1547 + ((INT_DEST_MODE == 0) ?
1548 + HT_IRQ_LOW_DM_PHYSICAL :
1549 + HT_IRQ_LOW_DM_LOGICAL) |
1550 + HT_IRQ_LOW_RQEOI_EDGE |
1551 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1552 + HT_IRQ_LOW_MT_FIXED :
1553 + HT_IRQ_LOW_MT_ARBITRATED) |
1554 + HT_IRQ_LOW_IRQ_MASKED;
1555 +
1556 + write_ht_irq_msg(irq, &msg);
1557 +
1558 + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1559 + handle_edge_irq, "edge");
1560 + }
1561 + return vector;
1562 +}
1563 +#endif /* CONFIG_HT_IRQ */
1564 +
1565 /* --------------------------------------------------------------------------
1566 ACPI-based IOAPIC Configuration
1567 -------------------------------------------------------------------------- */
1568 @@ -2774,13 +2939,34 @@ int io_apic_set_pci_routing (int ioapic,
1569 if (!ioapic && (irq < 16))
1570 disable_8259A_irq(irq);
1571
1572 + ioapic_write_entry(ioapic, pin, entry);
1573 spin_lock_irqsave(&ioapic_lock, flags);
1574 - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
1575 - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
1576 - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
1577 + set_native_irq_info(irq, TARGET_CPUS);
1578 spin_unlock_irqrestore(&ioapic_lock, flags);
1579
1580 return 0;
1581 }
1582
1583 #endif /* CONFIG_ACPI */
1584 +
1585 +static int __init parse_disable_timer_pin_1(char *arg)
1586 +{
1587 + disable_timer_pin_1 = 1;
1588 + return 0;
1589 +}
1590 +early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
1591 +
1592 +static int __init parse_enable_timer_pin_1(char *arg)
1593 +{
1594 + disable_timer_pin_1 = -1;
1595 + return 0;
1596 +}
1597 +early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
1598 +
1599 +static int __init parse_noapic(char *arg)
1600 +{
1601 + /* disable IO-APIC */
1602 + disable_ioapic_setup();
1603 + return 0;
1604 +}
1605 +early_param("noapic", parse_noapic);
1606 --- sle11-2009-10-16.orig/arch/x86/kernel/irq_32-xen.c 2009-10-28 14:55:12.000000000 +0100
1607 +++ sle11-2009-10-16/arch/x86/kernel/irq_32-xen.c 2009-03-04 11:28:34.000000000 +0100
1608 @@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU
1609 */
1610 fastcall unsigned int do_IRQ(struct pt_regs *regs)
1611 {
1612 + struct pt_regs *old_regs;
1613 /* high bit used in ret_from_ code */
1614 int irq = ~regs->orig_eax;
1615 + struct irq_desc *desc = irq_desc + irq;
1616 #ifdef CONFIG_4KSTACKS
1617 union irq_ctx *curctx, *irqctx;
1618 u32 *isp;
1619 @@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r
1620 BUG();
1621 }
1622
1623 + old_regs = set_irq_regs(regs);
1624 /*irq_enter();*/
1625 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1626 /* Debugging check for stack overflow: is there less than 1KB free? */
1627 @@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r
1628 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
1629
1630 asm volatile(
1631 - " xchgl %%ebx,%%esp \n"
1632 - " call __do_IRQ \n"
1633 + " xchgl %%ebx,%%esp \n"
1634 + " call *%%edi \n"
1635 " movl %%ebx,%%esp \n"
1636 : "=a" (arg1), "=d" (arg2), "=b" (ebx)
1637 - : "0" (irq), "1" (regs), "2" (isp)
1638 - : "memory", "cc", "ecx"
1639 + : "0" (irq), "1" (desc), "2" (isp),
1640 + "D" (desc->handle_irq)
1641 + : "memory", "cc"
1642 );
1643 } else
1644 #endif
1645 - __do_IRQ(irq, regs);
1646 + desc->handle_irq(irq, desc);
1647
1648 /*irq_exit();*/
1649 -
1650 + set_irq_regs(old_regs);
1651 return 1;
1652 }
1653
1654 @@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p,
1655 for_each_online_cpu(j)
1656 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
1657 #endif
1658 - seq_printf(p, " %14s", irq_desc[i].chip->typename);
1659 + seq_printf(p, " %8s", irq_desc[i].chip->name);
1660 + seq_printf(p, "-%-8s", irq_desc[i].name);
1661 seq_printf(p, " %s", action->name);
1662
1663 for (action=action->next; action; action = action->next)
1664 --- sle11-2009-10-16.orig/arch/x86/kernel/ldt_32-xen.c 2009-10-28 14:55:12.000000000 +0100
1665 +++ sle11-2009-10-16/arch/x86/kernel/ldt_32-xen.c 2009-03-04 11:28:34.000000000 +0100
1666 @@ -1,5 +1,5 @@
1667 /*
1668 - * linux/kernel/ldt.c
1669 + * linux/arch/i386/kernel/ldt.c
1670 *
1671 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
1672 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1673 --- sle11-2009-10-16.orig/arch/x86/kernel/microcode-xen.c 2009-10-28 14:55:12.000000000 +0100
1674 +++ sle11-2009-10-16/arch/x86/kernel/microcode-xen.c 2009-03-04 11:28:34.000000000 +0100
1675 @@ -2,6 +2,7 @@
1676 * Intel CPU Microcode Update Driver for Linux
1677 *
1678 * Copyright (C) 2000-2004 Tigran Aivazian
1679 + * 2006 Shaohua Li <shaohua.li@intel.com>
1680 *
1681 * This driver allows to upgrade microcode on Intel processors
1682 * belonging to IA-32 family - PentiumPro, Pentium II,
1683 @@ -33,7 +34,9 @@
1684 #include <linux/spinlock.h>
1685 #include <linux/mm.h>
1686 #include <linux/mutex.h>
1687 -#include <linux/syscalls.h>
1688 +#include <linux/cpu.h>
1689 +#include <linux/firmware.h>
1690 +#include <linux/platform_device.h>
1691
1692 #include <asm/msr.h>
1693 #include <asm/uaccess.h>
1694 @@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
1695 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
1696 static DEFINE_MUTEX(microcode_mutex);
1697
1698 -static int microcode_open (struct inode *unused1, struct file *unused2)
1699 -{
1700 - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1701 -}
1702 -
1703 -
1704 +#ifdef CONFIG_MICROCODE_OLD_INTERFACE
1705 static int do_microcode_update (const void __user *ubuf, size_t len)
1706 {
1707 int err;
1708 @@ -85,6 +83,11 @@ static int do_microcode_update (const vo
1709 return err;
1710 }
1711
1712 +static int microcode_open (struct inode *unused1, struct file *unused2)
1713 +{
1714 + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1715 +}
1716 +
1717 static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
1718 {
1719 ssize_t ret;
1720 @@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
1721 .fops = &microcode_fops,
1722 };
1723
1724 -static int __init microcode_init (void)
1725 +static int __init microcode_dev_init (void)
1726 {
1727 int error;
1728
1729 @@ -129,6 +132,68 @@ static int __init microcode_init (void)
1730 return error;
1731 }
1732
1733 + return 0;
1734 +}
1735 +
1736 +static void __exit microcode_dev_exit (void)
1737 +{
1738 + misc_deregister(&microcode_dev);
1739 +}
1740 +
1741 +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1742 +#else
1743 +#define microcode_dev_init() 0
1744 +#define microcode_dev_exit() do { } while(0)
1745 +#endif
1746 +
1747 +/* fake device for request_firmware */
1748 +static struct platform_device *microcode_pdev;
1749 +
1750 +static int request_microcode(void)
1751 +{
1752 + char name[30];
1753 + const struct cpuinfo_x86 *c = &boot_cpu_data;
1754 + const struct firmware *firmware;
1755 + int error;
1756 + struct xen_platform_op op;
1757 +
1758 + sprintf(name,"intel-ucode/%02x-%02x-%02x",
1759 + c->x86, c->x86_model, c->x86_mask);
1760 + error = request_firmware(&firmware, name, &microcode_pdev->dev);
1761 + if (error) {
1762 + pr_debug("ucode data file %s load failed\n", name);
1763 + return error;
1764 + }
1765 +
1766 + op.cmd = XENPF_microcode_update;
1767 + set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
1768 + op.u.microcode.length = firmware->size;
1769 + error = HYPERVISOR_platform_op(&op);
1770 +
1771 + release_firmware(firmware);
1772 +
1773 + if (error)
1774 + pr_debug("ucode load failed\n");
1775 +
1776 + return error;
1777 +}
1778 +
1779 +static int __init microcode_init (void)
1780 +{
1781 + int error;
1782 +
1783 + error = microcode_dev_init();
1784 + if (error)
1785 + return error;
1786 + microcode_pdev = platform_device_register_simple("microcode", -1,
1787 + NULL, 0);
1788 + if (IS_ERR(microcode_pdev)) {
1789 + microcode_dev_exit();
1790 + return PTR_ERR(microcode_pdev);
1791 + }
1792 +
1793 + request_microcode();
1794 +
1795 printk(KERN_INFO
1796 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
1797 return 0;
1798 @@ -136,9 +201,9 @@ static int __init microcode_init (void)
1799
1800 static void __exit microcode_exit (void)
1801 {
1802 - misc_deregister(&microcode_dev);
1803 + microcode_dev_exit();
1804 + platform_device_unregister(microcode_pdev);
1805 }
1806
1807 module_init(microcode_init)
1808 module_exit(microcode_exit)
1809 -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1810 --- sle11-2009-10-16.orig/arch/x86/kernel/mpparse_32-xen.c 2009-10-28 14:55:12.000000000 +0100
1811 +++ sle11-2009-10-16/arch/x86/kernel/mpparse_32-xen.c 2009-03-04 11:28:34.000000000 +0100
1812 @@ -30,6 +30,7 @@
1813 #include <asm/io_apic.h>
1814
1815 #include <mach_apic.h>
1816 +#include <mach_apicdef.h>
1817 #include <mach_mpparse.h>
1818 #include <bios_ebda.h>
1819
1820 @@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
1821 /* Processor that is doing the boot up */
1822 unsigned int boot_cpu_physical_apicid = -1U;
1823 /* Internal processor count */
1824 -static unsigned int __devinitdata num_processors;
1825 +unsigned int __cpuinitdata num_processors;
1826
1827 /* Bitmask of physically existing CPUs */
1828 physid_mask_t phys_cpu_present_map;
1829 @@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
1830
1831 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
1832
1833 +#if MAX_MP_BUSSES < 256
1834 if (m->mpc_busid >= MAX_MP_BUSSES) {
1835 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
1836 " is too large, max. supported is %d\n",
1837 m->mpc_busid, str, MAX_MP_BUSSES - 1);
1838 return;
1839 }
1840 +#endif
1841
1842 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
1843 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
1844 @@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
1845 m->mpc_irqtype, m->mpc_irqflag & 3,
1846 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
1847 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
1848 - /*
1849 - * Well it seems all SMP boards in existence
1850 - * use ExtINT/LVT1 == LINT0 and
1851 - * NMI/LVT2 == LINT1 - the following check
1852 - * will show us if this assumptions is false.
1853 - * Until then we do not have to add baggage.
1854 - */
1855 - if ((m->mpc_irqtype == mp_ExtINT) &&
1856 - (m->mpc_destapiclint != 0))
1857 - BUG();
1858 - if ((m->mpc_irqtype == mp_NMI) &&
1859 - (m->mpc_destapiclint != 1))
1860 - BUG();
1861 }
1862
1863 #ifdef CONFIG_X86_NUMAQ
1864 @@ -838,8 +828,7 @@ int es7000_plat;
1865
1866 #ifdef CONFIG_ACPI
1867
1868 -void __init mp_register_lapic_address (
1869 - u64 address)
1870 +void __init mp_register_lapic_address(u64 address)
1871 {
1872 #ifndef CONFIG_XEN
1873 mp_lapic_addr = (unsigned long) address;
1874 @@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
1875 #endif
1876 }
1877
1878 -
1879 -void __devinit mp_register_lapic (
1880 - u8 id,
1881 - u8 enabled)
1882 +void __devinit mp_register_lapic (u8 id, u8 enabled)
1883 {
1884 struct mpc_config_processor processor;
1885 - int boot_cpu = 0;
1886 + int boot_cpu = 0;
1887
1888 if (MAX_APICS - id <= 0) {
1889 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
1890 @@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
1891 u32 pin_programmed[4];
1892 } mp_ioapic_routing[MAX_IO_APICS];
1893
1894 -
1895 -static int mp_find_ioapic (
1896 - int gsi)
1897 +static int mp_find_ioapic (int gsi)
1898 {
1899 - int i = 0;
1900 + int i = 0;
1901
1902 /* Find the IOAPIC that manages this GSI. */
1903 for (i = 0; i < nr_ioapics; i++) {
1904 @@ -915,15 +899,11 @@ static int mp_find_ioapic (
1905
1906 return -1;
1907 }
1908 -
1909
1910 -void __init mp_register_ioapic (
1911 - u8 id,
1912 - u32 address,
1913 - u32 gsi_base)
1914 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
1915 {
1916 - int idx = 0;
1917 - int tmpid;
1918 + int idx = 0;
1919 + int tmpid;
1920
1921 if (nr_ioapics >= MAX_IO_APICS) {
1922 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
1923 @@ -971,16 +951,10 @@ void __init mp_register_ioapic (
1924 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
1925 mp_ioapic_routing[idx].gsi_base,
1926 mp_ioapic_routing[idx].gsi_end);
1927 -
1928 - return;
1929 }
1930
1931 -
1932 -void __init mp_override_legacy_irq (
1933 - u8 bus_irq,
1934 - u8 polarity,
1935 - u8 trigger,
1936 - u32 gsi)
1937 +void __init
1938 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1939 {
1940 struct mpc_config_intsrc intsrc;
1941 int ioapic = -1;
1942 @@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
1943 mp_irqs[mp_irq_entries] = intsrc;
1944 if (++mp_irq_entries == MAX_IRQ_SOURCES)
1945 panic("Max # of irq sources exceeded!\n");
1946 -
1947 - return;
1948 }
1949
1950 void __init mp_config_acpi_legacy_irqs (void)
1951 {
1952 struct mpc_config_intsrc intsrc;
1953 - int i = 0;
1954 - int ioapic = -1;
1955 + int i = 0;
1956 + int ioapic = -1;
1957
1958 /*
1959 * Fabricate the legacy ISA bus (bus #31).
1960 @@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
1961
1962 #define MAX_GSI_NUM 4096
1963
1964 -int mp_register_gsi (u32 gsi, int triggering, int polarity)
1965 +int mp_register_gsi(u32 gsi, int triggering, int polarity)
1966 {
1967 - int ioapic = -1;
1968 - int ioapic_pin = 0;
1969 - int idx, bit = 0;
1970 - static int pci_irq = 16;
1971 + int ioapic = -1;
1972 + int ioapic_pin = 0;
1973 + int idx, bit = 0;
1974 + static int pci_irq = 16;
1975 /*
1976 * Mapping between Global System Interrups, which
1977 * represent all possible interrupts, and IRQs
1978 --- sle11-2009-10-16.orig/arch/x86/kernel/pci-dma-xen.c 2009-10-28 14:55:12.000000000 +0100
1979 +++ sle11-2009-10-16/arch/x86/kernel/pci-dma-xen.c 2009-03-04 11:28:34.000000000 +0100
1980 @@ -110,8 +110,7 @@ dma_map_sg(struct device *hwdev, struct
1981 {
1982 int i, rc;
1983
1984 - if (direction == DMA_NONE)
1985 - BUG();
1986 + BUG_ON(!valid_dma_direction(direction));
1987 WARN_ON(nents == 0 || sg[0].length == 0);
1988
1989 if (swiotlb) {
1990 @@ -142,7 +141,7 @@ dma_unmap_sg(struct device *hwdev, struc
1991 {
1992 int i;
1993
1994 - BUG_ON(direction == DMA_NONE);
1995 + BUG_ON(!valid_dma_direction(direction));
1996 if (swiotlb)
1997 swiotlb_unmap_sg(hwdev, sg, nents, direction);
1998 else {
1999 @@ -159,8 +158,7 @@ dma_map_page(struct device *dev, struct
2000 {
2001 dma_addr_t dma_addr;
2002
2003 - BUG_ON(direction == DMA_NONE);
2004 -
2005 + BUG_ON(!valid_dma_direction(direction));
2006 if (swiotlb) {
2007 dma_addr = swiotlb_map_page(
2008 dev, page, offset, size, direction);
2009 @@ -177,7 +175,7 @@ void
2010 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
2011 enum dma_data_direction direction)
2012 {
2013 - BUG_ON(direction == DMA_NONE);
2014 + BUG_ON(!valid_dma_direction(direction));
2015 if (swiotlb)
2016 swiotlb_unmap_page(dev, dma_address, size, direction);
2017 else
2018 @@ -359,8 +357,7 @@ dma_map_single(struct device *dev, void
2019 {
2020 dma_addr_t dma;
2021
2022 - if (direction == DMA_NONE)
2023 - BUG();
2024 + BUG_ON(!valid_dma_direction(direction));
2025 WARN_ON(size == 0);
2026
2027 if (swiotlb) {
2028 @@ -381,8 +378,7 @@ void
2029 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
2030 enum dma_data_direction direction)
2031 {
2032 - if (direction == DMA_NONE)
2033 - BUG();
2034 + BUG_ON(!valid_dma_direction(direction));
2035 if (swiotlb)
2036 swiotlb_unmap_single(dev, dma_addr, size, direction);
2037 else
2038 --- sle11-2009-10-16.orig/arch/x86/kernel/process_32-xen.c 2009-10-28 14:55:12.000000000 +0100
2039 +++ sle11-2009-10-16/arch/x86/kernel/process_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2040 @@ -37,6 +37,7 @@
2041 #include <linux/kallsyms.h>
2042 #include <linux/ptrace.h>
2043 #include <linux/random.h>
2044 +#include <linux/personality.h>
2045
2046 #include <asm/uaccess.h>
2047 #include <asm/pgtable.h>
2048 @@ -186,7 +187,7 @@ void cpu_idle(void)
2049 void cpu_idle_wait(void)
2050 {
2051 unsigned int cpu, this_cpu = get_cpu();
2052 - cpumask_t map;
2053 + cpumask_t map, tmp = current->cpus_allowed;
2054
2055 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
2056 put_cpu();
2057 @@ -208,6 +209,8 @@ void cpu_idle_wait(void)
2058 }
2059 cpus_and(map, map, cpu_online_map);
2060 } while (!cpus_empty(map));
2061 +
2062 + set_cpus_allowed(current, tmp);
2063 }
2064 EXPORT_SYMBOL_GPL(cpu_idle_wait);
2065
2066 @@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
2067 if (user_mode_vm(regs))
2068 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
2069 printk(" EFLAGS: %08lx %s (%s %.*s)\n",
2070 - regs->eflags, print_tainted(), system_utsname.release,
2071 - (int)strcspn(system_utsname.version, " "),
2072 - system_utsname.version);
2073 + regs->eflags, print_tainted(), init_utsname()->release,
2074 + (int)strcspn(init_utsname()->version, " "),
2075 + init_utsname()->version);
2076 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
2077 regs->eax,regs->ebx,regs->ecx,regs->edx);
2078 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2079 @@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
2080 * the "args".
2081 */
2082 extern void kernel_thread_helper(void);
2083 -__asm__(".section .text\n"
2084 - ".align 4\n"
2085 - "kernel_thread_helper:\n\t"
2086 - "movl %edx,%eax\n\t"
2087 - "pushl %edx\n\t"
2088 - "call *%ebx\n\t"
2089 - "pushl %eax\n\t"
2090 - "call do_exit\n"
2091 - ".previous");
2092
2093 /*
2094 * Create a kernel thread
2095 @@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
2096 regs.xes = __USER_DS;
2097 regs.orig_eax = -1;
2098 regs.eip = (unsigned long) kernel_thread_helper;
2099 - regs.xcs = GET_KERNEL_CS();
2100 + regs.xcs = __KERNEL_CS | get_kernel_rpl();
2101 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
2102
2103 /* Ok, create the new process.. */
2104 @@ -369,13 +363,12 @@ int copy_thread(int nr, unsigned long cl
2105
2106 tsk = current;
2107 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2108 - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
2109 + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
2110 + IO_BITMAP_BYTES, GFP_KERNEL);
2111 if (!p->thread.io_bitmap_ptr) {
2112 p->thread.io_bitmap_max = 0;
2113 return -ENOMEM;
2114 }
2115 - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
2116 - IO_BITMAP_BYTES);
2117 set_tsk_thread_flag(p, TIF_IO_BITMAP);
2118 }
2119
2120 @@ -871,7 +864,7 @@ asmlinkage int sys_get_thread_area(struc
2121
2122 unsigned long arch_align_stack(unsigned long sp)
2123 {
2124 - if (randomize_va_space)
2125 + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
2126 sp -= get_random_int() % 8192;
2127 return sp & ~0xf;
2128 }
2129 --- sle11-2009-10-16.orig/arch/x86/kernel/setup_32-xen.c 2009-10-28 14:55:12.000000000 +0100
2130 +++ sle11-2009-10-16/arch/x86/kernel/setup_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2131 @@ -56,6 +56,7 @@
2132 #include <asm/apic.h>
2133 #include <asm/e820.h>
2134 #include <asm/mpspec.h>
2135 +#include <asm/mmzone.h>
2136 #include <asm/setup.h>
2137 #include <asm/arch_hooks.h>
2138 #include <asm/sections.h>
2139 @@ -83,9 +84,6 @@ static struct notifier_block xen_panic_b
2140 xen_panic_event, NULL, 0 /* try to go last */
2141 };
2142
2143 -extern char hypercall_page[PAGE_SIZE];
2144 -EXPORT_SYMBOL(hypercall_page);
2145 -
2146 int disable_pse __devinitdata = 0;
2147
2148 /*
2149 @@ -105,18 +103,6 @@ EXPORT_SYMBOL(boot_cpu_data);
2150
2151 unsigned long mmu_cr4_features;
2152
2153 -#ifdef CONFIG_ACPI
2154 - int acpi_disabled = 0;
2155 -#else
2156 - int acpi_disabled = 1;
2157 -#endif
2158 -EXPORT_SYMBOL(acpi_disabled);
2159 -
2160 -#ifdef CONFIG_ACPI
2161 -int __initdata acpi_force = 0;
2162 -extern acpi_interrupt_flags acpi_sci_flags;
2163 -#endif
2164 -
2165 /* for MCA, but anyone else can use it if they want */
2166 unsigned int machine_id;
2167 #ifdef CONFIG_MCA
2168 @@ -170,7 +156,6 @@ struct e820map machine_e820;
2169 #endif
2170
2171 extern void early_cpu_init(void);
2172 -extern void generic_apic_probe(char *);
2173 extern int root_mountflags;
2174
2175 unsigned long saved_videomode;
2176 @@ -243,9 +228,6 @@ static struct resource adapter_rom_resou
2177 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2178 } };
2179
2180 -#define ADAPTER_ROM_RESOURCES \
2181 - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
2182 -
2183 static struct resource video_rom_resource = {
2184 .name = "Video ROM",
2185 .start = 0xc0000,
2186 @@ -307,9 +289,6 @@ static struct resource standard_io_resou
2187 .flags = IORESOURCE_BUSY | IORESOURCE_IO
2188 } };
2189
2190 -#define STANDARD_IO_RESOURCES \
2191 - (sizeof standard_io_resources / sizeof standard_io_resources[0])
2192 -
2193 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2194
2195 static int __init romchecksum(unsigned char *rom, unsigned long length)
2196 @@ -372,7 +351,7 @@ static void __init probe_roms(void)
2197 }
2198
2199 /* check for adapter roms on 2k boundaries */
2200 - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
2201 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
2202 rom = isa_bus_to_virt(start);
2203 if (!romsignature(rom))
2204 continue;
2205 @@ -779,246 +758,152 @@ static inline void copy_edd(void)
2206 }
2207 #endif
2208
2209 -static void __init parse_cmdline_early (char ** cmdline_p)
2210 +static int __initdata user_defined_memmap = 0;
2211 +
2212 +/*
2213 + * "mem=nopentium" disables the 4MB page tables.
2214 + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2215 + * to <mem>, overriding the bios size.
2216 + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2217 + * <start> to <start>+<mem>, overriding the bios size.
2218 + *
2219 + * HPA tells me bootloaders need to parse mem=, so no new
2220 + * option should be mem= [also see Documentation/i386/boot.txt]
2221 + */
2222 +static int __init parse_mem(char *arg)
2223 {
2224 - char c = ' ', *to = command_line, *from = saved_command_line;
2225 - int len = 0, max_cmdline;
2226 - int userdef = 0;
2227 -
2228 - if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2229 - max_cmdline = COMMAND_LINE_SIZE;
2230 - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
2231 - /* Save unparsed command line copy for /proc/cmdline */
2232 - saved_command_line[max_cmdline-1] = '\0';
2233 -
2234 - for (;;) {
2235 - if (c != ' ')
2236 - goto next_char;
2237 - /*
2238 - * "mem=nopentium" disables the 4MB page tables.
2239 - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2240 - * to <mem>, overriding the bios size.
2241 - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2242 - * <start> to <start>+<mem>, overriding the bios size.
2243 - *
2244 - * HPA tells me bootloaders need to parse mem=, so no new
2245 - * option should be mem= [also see Documentation/i386/boot.txt]
2246 - */
2247 - if (!memcmp(from, "mem=", 4)) {
2248 - if (to != command_line)
2249 - to--;
2250 - if (!memcmp(from+4, "nopentium", 9)) {
2251 - from += 9+4;
2252 - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2253 - disable_pse = 1;
2254 - } else {
2255 - /* If the user specifies memory size, we
2256 - * limit the BIOS-provided memory map to
2257 - * that size. exactmap can be used to specify
2258 - * the exact map. mem=number can be used to
2259 - * trim the existing memory map.
2260 - */
2261 - unsigned long long mem_size;
2262 -
2263 - mem_size = memparse(from+4, &from);
2264 - limit_regions(mem_size);
2265 - userdef=1;
2266 - }
2267 - }
2268 + if (!arg)
2269 + return -EINVAL;
2270
2271 - else if (!memcmp(from, "memmap=", 7)) {
2272 - if (to != command_line)
2273 - to--;
2274 - if (!memcmp(from+7, "exactmap", 8)) {
2275 -#ifdef CONFIG_CRASH_DUMP
2276 - /* If we are doing a crash dump, we
2277 - * still need to know the real mem
2278 - * size before original memory map is
2279 - * reset.
2280 - */
2281 - find_max_pfn();
2282 - saved_max_pfn = max_pfn;
2283 -#endif
2284 - from += 8+7;
2285 - e820.nr_map = 0;
2286 - userdef = 1;
2287 - } else {
2288 - /* If the user specifies memory size, we
2289 - * limit the BIOS-provided memory map to
2290 - * that size. exactmap can be used to specify
2291 - * the exact map. mem=number can be used to
2292 - * trim the existing memory map.
2293 - */
2294 - unsigned long long start_at, mem_size;
2295 + if (strcmp(arg, "nopentium") == 0) {
2296 + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2297 + disable_pse = 1;
2298 + } else {
2299 + /* If the user specifies memory size, we
2300 + * limit the BIOS-provided memory map to
2301 + * that size. exactmap can be used to specify
2302 + * the exact map. mem=number can be used to
2303 + * trim the existing memory map.
2304 + */
2305 + unsigned long long mem_size;
2306
2307 - mem_size = memparse(from+7, &from);
2308 - if (*from == '@') {
2309 - start_at = memparse(from+1, &from);
2310 - add_memory_region(start_at, mem_size, E820_RAM);
2311 - } else if (*from == '#') {
2312 - start_at = memparse(from+1, &from);
2313 - add_memory_region(start_at, mem_size, E820_ACPI);
2314 - } else if (*from == '$') {
2315 - start_at = memparse(from+1, &from);
2316 - add_memory_region(start_at, mem_size, E820_RESERVED);
2317 - } else {
2318 - limit_regions(mem_size);
2319 - userdef=1;
2320 - }
2321 - }
2322 - }
2323 -
2324 - else if (!memcmp(from, "noexec=", 7))
2325 - noexec_setup(from + 7);
2326 + mem_size = memparse(arg, &arg);
2327 + limit_regions(mem_size);
2328 + user_defined_memmap = 1;
2329 + }
2330 + return 0;
2331 +}
2332 +early_param("mem", parse_mem);
2333
2334 +static int __init parse_memmap(char *arg)
2335 +{
2336 + if (!arg)
2337 + return -EINVAL;
2338
2339 -#ifdef CONFIG_X86_MPPARSE
2340 - /*
2341 - * If the BIOS enumerates physical processors before logical,
2342 - * maxcpus=N at enumeration-time can be used to disable HT.
2343 + if (strcmp(arg, "exactmap") == 0) {
2344 +#ifdef CONFIG_CRASH_DUMP
2345 + /* If we are doing a crash dump, we
2346 + * still need to know the real mem
2347 + * size before original memory map is
2348 + * reset.
2349 */
2350 - else if (!memcmp(from, "maxcpus=", 8)) {
2351 - extern unsigned int maxcpus;
2352 -
2353 - maxcpus = simple_strtoul(from + 8, NULL, 0);
2354 - }
2355 + find_max_pfn();
2356 + saved_max_pfn = max_pfn;
2357 #endif
2358 + e820.nr_map = 0;
2359 + user_defined_memmap = 1;
2360 + } else {
2361 + /* If the user specifies memory size, we
2362 + * limit the BIOS-provided memory map to
2363 + * that size. exactmap can be used to specify
2364 + * the exact map. mem=number can be used to
2365 + * trim the existing memory map.
2366 + */
2367 + unsigned long long start_at, mem_size;
2368
2369 -#ifdef CONFIG_ACPI
2370 - /* "acpi=off" disables both ACPI table parsing and interpreter */
2371 - else if (!memcmp(from, "acpi=off", 8)) {
2372 - disable_acpi();
2373 - }
2374 -
2375 - /* acpi=force to over-ride black-list */
2376 - else if (!memcmp(from, "acpi=force", 10)) {
2377 - acpi_force = 1;
2378 - acpi_ht = 1;
2379 - acpi_disabled = 0;
2380 - }
2381 -
2382 - /* acpi=strict disables out-of-spec workarounds */
2383 - else if (!memcmp(from, "acpi=strict", 11)) {
2384 - acpi_strict = 1;
2385 - }
2386 -
2387 - /* Limit ACPI just to boot-time to enable HT */
2388 - else if (!memcmp(from, "acpi=ht", 7)) {
2389 - if (!acpi_force)
2390 - disable_acpi();
2391 - acpi_ht = 1;
2392 - }
2393 -
2394 - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
2395 - else if (!memcmp(from, "pci=noacpi", 10)) {
2396 - acpi_disable_pci();
2397 - }
2398 - /* "acpi=noirq" disables ACPI interrupt routing */
2399 - else if (!memcmp(from, "acpi=noirq", 10)) {
2400 - acpi_noirq_set();
2401 + mem_size = memparse(arg, &arg);
2402 + if (*arg == '@') {
2403 + start_at = memparse(arg+1, &arg);
2404 + add_memory_region(start_at, mem_size, E820_RAM);
2405 + } else if (*arg == '#') {
2406 + start_at = memparse(arg+1, &arg);
2407 + add_memory_region(start_at, mem_size, E820_ACPI);
2408 + } else if (*arg == '$') {
2409 + start_at = memparse(arg+1, &arg);
2410 + add_memory_region(start_at, mem_size, E820_RESERVED);
2411 + } else {
2412 + limit_regions(mem_size);
2413 + user_defined_memmap = 1;
2414 }
2415 + }
2416 + return 0;
2417 +}
2418 +early_param("memmap", parse_memmap);
2419
2420 - else if (!memcmp(from, "acpi_sci=edge", 13))
2421 - acpi_sci_flags.trigger = 1;
2422 -
2423 - else if (!memcmp(from, "acpi_sci=level", 14))
2424 - acpi_sci_flags.trigger = 3;
2425 +#ifdef CONFIG_PROC_VMCORE
2426 +/* elfcorehdr= specifies the location of elf core header
2427 + * stored by the crashed kernel.
2428 + */
2429 +static int __init parse_elfcorehdr(char *arg)
2430 +{
2431 + if (!arg)
2432 + return -EINVAL;
2433
2434 - else if (!memcmp(from, "acpi_sci=high", 13))
2435 - acpi_sci_flags.polarity = 1;
2436 + elfcorehdr_addr = memparse(arg, &arg);
2437 + return 0;
2438 +}
2439 +early_param("elfcorehdr", parse_elfcorehdr);
2440 +#endif /* CONFIG_PROC_VMCORE */
2441
2442 - else if (!memcmp(from, "acpi_sci=low", 12))
2443 - acpi_sci_flags.polarity = 3;
2444 +/*
2445 + * highmem=size forces highmem to be exactly 'size' bytes.
2446 + * This works even on boxes that have no highmem otherwise.
2447 + * This also works to reduce highmem size on bigger boxes.
2448 + */
2449 +static int __init parse_highmem(char *arg)
2450 +{
2451 + if (!arg)
2452 + return -EINVAL;
2453
2454 -#ifdef CONFIG_X86_IO_APIC
2455 - else if (!memcmp(from, "acpi_skip_timer_override", 24))
2456 - acpi_skip_timer_override = 1;
2457 + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
2458 + return 0;
2459 +}
2460 +early_param("highmem", parse_highmem);
2461
2462 - if (!memcmp(from, "disable_timer_pin_1", 19))
2463 - disable_timer_pin_1 = 1;
2464 - if (!memcmp(from, "enable_timer_pin_1", 18))
2465 - disable_timer_pin_1 = -1;
2466 -
2467 - /* disable IO-APIC */
2468 - else if (!memcmp(from, "noapic", 6))
2469 - disable_ioapic_setup();
2470 -#endif /* CONFIG_X86_IO_APIC */
2471 -#endif /* CONFIG_ACPI */
2472 +/*
2473 + * vmalloc=size forces the vmalloc area to be exactly 'size'
2474 + * bytes. This can be used to increase (or decrease) the
2475 + * vmalloc area - the default is 128m.
2476 + */
2477 +static int __init parse_vmalloc(char *arg)
2478 +{
2479 + if (!arg)
2480 + return -EINVAL;
2481
2482 -#ifdef CONFIG_X86_LOCAL_APIC
2483 - /* enable local APIC */
2484 - else if (!memcmp(from, "lapic", 5))
2485 - lapic_enable();
2486 -
2487 - /* disable local APIC */
2488 - else if (!memcmp(from, "nolapic", 6))
2489 - lapic_disable();
2490 -#endif /* CONFIG_X86_LOCAL_APIC */
2491 + __VMALLOC_RESERVE = memparse(arg, &arg);
2492 + return 0;
2493 +}
2494 +early_param("vmalloc", parse_vmalloc);
2495
2496 -#ifdef CONFIG_KEXEC
2497 - /* crashkernel=size@addr specifies the location to reserve for
2498 - * a crash kernel. By reserving this memory we guarantee
2499 - * that linux never set's it up as a DMA target.
2500 - * Useful for holding code to do something appropriate
2501 - * after a kernel panic.
2502 - */
2503 - else if (!memcmp(from, "crashkernel=", 12)) {
2504 #ifndef CONFIG_XEN
2505 - unsigned long size, base;
2506 - size = memparse(from+12, &from);
2507 - if (*from == '@') {
2508 - base = memparse(from+1, &from);
2509 - /* FIXME: Do I want a sanity check
2510 - * to validate the memory range?
2511 - */
2512 - crashk_res.start = base;
2513 - crashk_res.end = base + size - 1;
2514 - }
2515 -#else
2516 - printk("Ignoring crashkernel command line, "
2517 - "parameter will be supplied by xen\n");
2518 -#endif
2519 - }
2520 -#endif
2521 -#ifdef CONFIG_PROC_VMCORE
2522 - /* elfcorehdr= specifies the location of elf core header
2523 - * stored by the crashed kernel.
2524 - */
2525 - else if (!memcmp(from, "elfcorehdr=", 11))
2526 - elfcorehdr_addr = memparse(from+11, &from);
2527 -#endif
2528 +/*
2529 + * reservetop=size reserves a hole at the top of the kernel address space which
2530 + * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
2531 + * so relocating the fixmap can be done before paging initialization.
2532 + */
2533 +static int __init parse_reservetop(char *arg)
2534 +{
2535 + unsigned long address;
2536
2537 - /*
2538 - * highmem=size forces highmem to be exactly 'size' bytes.
2539 - * This works even on boxes that have no highmem otherwise.
2540 - * This also works to reduce highmem size on bigger boxes.
2541 - */
2542 - else if (!memcmp(from, "highmem=", 8))
2543 - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
2544 -
2545 - /*
2546 - * vmalloc=size forces the vmalloc area to be exactly 'size'
2547 - * bytes. This can be used to increase (or decrease) the
2548 - * vmalloc area - the default is 128m.
2549 - */
2550 - else if (!memcmp(from, "vmalloc=", 8))
2551 - __VMALLOC_RESERVE = memparse(from+8, &from);
2552 + if (!arg)
2553 + return -EINVAL;
2554
2555 - next_char:
2556 - c = *(from++);
2557 - if (!c)
2558 - break;
2559 - if (COMMAND_LINE_SIZE <= ++len)
2560 - break;
2561 - *(to++) = c;
2562 - }
2563 - *to = '\0';
2564 - *cmdline_p = command_line;
2565 - if (userdef) {
2566 - printk(KERN_INFO "user-defined physical RAM map:\n");
2567 - print_memory_map("user");
2568 - }
2569 + address = memparse(arg, &arg);
2570 + reserve_top_address(address);
2571 + return 0;
2572 }
2573 +early_param("reservetop", parse_reservetop);
2574 +#endif
2575
2576 /*
2577 * Callback for efi_memory_walk.
2578 @@ -1039,7 +924,7 @@ efi_find_max_pfn(unsigned long start, un
2579 static int __init
2580 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
2581 {
2582 - memory_present(0, start, end);
2583 + memory_present(0, PFN_UP(start), PFN_DOWN(end));
2584 return 0;
2585 }
2586
2587 @@ -1306,6 +1191,14 @@ static unsigned long __init setup_memory
2588 }
2589 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
2590 pages_to_mb(highend_pfn - highstart_pfn));
2591 + num_physpages = highend_pfn;
2592 + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
2593 +#else
2594 + num_physpages = max_low_pfn;
2595 + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
2596 +#endif
2597 +#ifdef CONFIG_FLATMEM
2598 + max_mapnr = num_physpages;
2599 #endif
2600 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
2601 pages_to_mb(max_low_pfn));
2602 @@ -1317,22 +1210,19 @@ static unsigned long __init setup_memory
2603
2604 void __init zone_sizes_init(void)
2605 {
2606 - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
2607 - unsigned int max_dma, low;
2608 -
2609 - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2610 - low = max_low_pfn;
2611 -
2612 - if (low < max_dma)
2613 - zones_size[ZONE_DMA] = low;
2614 - else {
2615 - zones_size[ZONE_DMA] = max_dma;
2616 - zones_size[ZONE_NORMAL] = low - max_dma;
2617 + unsigned long max_zone_pfns[MAX_NR_ZONES];
2618 + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
2619 + max_zone_pfns[ZONE_DMA] =
2620 + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2621 + max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
2622 #ifdef CONFIG_HIGHMEM
2623 - zones_size[ZONE_HIGHMEM] = highend_pfn - low;
2624 + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
2625 + add_active_range(0, 0, highend_pfn);
2626 +#else
2627 + add_active_range(0, 0, max_low_pfn);
2628 #endif
2629 - }
2630 - free_area_init(zones_size);
2631 +
2632 + free_area_init_nodes(max_zone_pfns);
2633 }
2634 #else
2635 extern unsigned long __init setup_memory(void);
2636 @@ -1389,6 +1279,7 @@ void __init setup_bootmem_allocator(void
2637 */
2638 acpi_reserve_bootmem();
2639 #endif
2640 + numa_kva_reserve();
2641 #endif /* !CONFIG_XEN */
2642
2643 #ifdef CONFIG_BLK_DEV_INITRD
2644 @@ -1574,7 +1465,7 @@ static int __init request_standard_resou
2645 request_resource(&iomem_resource, &video_ram_resource);
2646
2647 /* request I/O space for devices used on all i[345]86 PCs */
2648 - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
2649 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
2650 request_resource(&ioport_resource, &standard_io_resources[i]);
2651 return 0;
2652 }
2653 @@ -1705,17 +1596,19 @@ void __init setup_arch(char **cmdline_p)
2654 data_resource.start = virt_to_phys(_etext);
2655 data_resource.end = virt_to_phys(_edata)-1;
2656
2657 - parse_cmdline_early(cmdline_p);
2658 + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2659 + i = COMMAND_LINE_SIZE;
2660 + memcpy(saved_command_line, xen_start_info->cmd_line, i);
2661 + saved_command_line[i - 1] = '\0';
2662 + parse_early_param();
2663
2664 -#ifdef CONFIG_EARLY_PRINTK
2665 - {
2666 - char *s = strstr(*cmdline_p, "earlyprintk=");
2667 - if (s) {
2668 - setup_early_printk(strchr(s, '=') + 1);
2669 - printk("early console enabled\n");
2670 - }
2671 + if (user_defined_memmap) {
2672 + printk(KERN_INFO "user-defined physical RAM map:\n");
2673 + print_memory_map("user");
2674 }
2675 -#endif
2676 +
2677 + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2678 + *cmdline_p = command_line;
2679
2680 max_low_pfn = setup_memory();
2681
2682 @@ -1822,7 +1715,7 @@ void __init setup_arch(char **cmdline_p)
2683 dmi_scan_machine();
2684
2685 #ifdef CONFIG_X86_GENERICARCH
2686 - generic_apic_probe(*cmdline_p);
2687 + generic_apic_probe();
2688 #endif
2689 if (efi_enabled)
2690 efi_map_memmap();
2691 @@ -1843,9 +1736,11 @@ void __init setup_arch(char **cmdline_p)
2692 acpi_boot_table_init();
2693 #endif
2694
2695 +#ifdef CONFIG_PCI
2696 #ifdef CONFIG_X86_IO_APIC
2697 check_acpi_pci(); /* Checks more than just ACPI actually */
2698 #endif
2699 +#endif
2700
2701 #ifdef CONFIG_ACPI
2702 acpi_boot_init();
2703 --- sle11-2009-10-16.orig/arch/x86/kernel/smp_32-xen.c 2009-10-28 14:55:12.000000000 +0100
2704 +++ sle11-2009-10-16/arch/x86/kernel/smp_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2705 @@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
2706 * 2) Leave the mm if we are in the lazy tlb mode.
2707 */
2708
2709 -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
2710 - struct pt_regs *regs)
2711 +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
2712 {
2713 unsigned long cpu;
2714
2715 @@ -567,16 +566,14 @@ void smp_send_stop(void)
2716 * all the work is done automatically when
2717 * we return from the interrupt.
2718 */
2719 -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
2720 - struct pt_regs *regs)
2721 +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
2722 {
2723
2724 return IRQ_HANDLED;
2725 }
2726
2727 #include <linux/kallsyms.h>
2728 -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
2729 - struct pt_regs *regs)
2730 +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
2731 {
2732 void (*func) (void *info) = call_data->func;
2733 void *info = call_data->info;
2734 @@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt(
2735 return IRQ_HANDLED;
2736 }
2737
2738 +/*
2739 + * this function sends a 'generic call function' IPI to one other CPU
2740 + * in the system.
2741 + *
2742 + * cpu is a standard Linux logical CPU number.
2743 + */
2744 +static void
2745 +__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
2746 + int nonatomic, int wait)
2747 +{
2748 + struct call_data_struct data;
2749 + int cpus = 1;
2750 +
2751 + data.func = func;
2752 + data.info = info;
2753 + atomic_set(&data.started, 0);
2754 + data.wait = wait;
2755 + if (wait)
2756 + atomic_set(&data.finished, 0);
2757 +
2758 + call_data = &data;
2759 + wmb();
2760 + /* Send a message to all other CPUs and wait for them to respond */
2761 + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
2762 +
2763 + /* Wait for response */
2764 + while (atomic_read(&data.started) != cpus)
2765 + cpu_relax();
2766 +
2767 + if (!wait)
2768 + return;
2769 +
2770 + while (atomic_read(&data.finished) != cpus)
2771 + cpu_relax();
2772 +}
2773 +
2774 +/*
2775 + * smp_call_function_single - Run a function on another CPU
2776 + * @func: The function to run. This must be fast and non-blocking.
2777 + * @info: An arbitrary pointer to pass to the function.
2778 + * @nonatomic: Currently unused.
2779 + * @wait: If true, wait until function has completed on other CPUs.
2780 + *
2781 + * Retrurns 0 on success, else a negative status code.
2782 + *
2783 + * Does not return until the remote CPU is nearly ready to execute <func>
2784 + * or is or has executed.
2785 + */
2786 +
2787 +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
2788 + int nonatomic, int wait)
2789 +{
2790 + /* prevent preemption and reschedule on another processor */
2791 + int me = get_cpu();
2792 + if (cpu == me) {
2793 + WARN_ON(1);
2794 + put_cpu();
2795 + return -EBUSY;
2796 + }
2797 + spin_lock_bh(&call_lock);
2798 + __smp_call_function_single(cpu, func, info, nonatomic, wait);
2799 + spin_unlock_bh(&call_lock);
2800 + put_cpu();
2801 + return 0;
2802 +}
2803 +EXPORT_SYMBOL(smp_call_function_single);
2804 --- sle11-2009-10-16.orig/arch/x86/kernel/time_32-xen.c 2009-04-20 11:36:10.000000000 +0200
2805 +++ sle11-2009-10-16/arch/x86/kernel/time_32-xen.c 2009-10-28 14:57:20.000000000 +0100
2806 @@ -89,7 +89,6 @@ int pit_latch_buggy; /* ext
2807 unsigned long vxtime_hz = PIT_TICK_RATE;
2808 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
2809 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
2810 -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
2811 struct timespec __xtime __section_xtime;
2812 struct timezone __sys_tz __section_sys_tz;
2813 #endif
2814 @@ -97,8 +96,6 @@ struct timezone __sys_tz __section_sys_t
2815 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
2816 EXPORT_SYMBOL(cpu_khz);
2817
2818 -extern unsigned long wall_jiffies;
2819 -
2820 DEFINE_SPINLOCK(rtc_lock);
2821 EXPORT_SYMBOL(rtc_lock);
2822
2823 @@ -265,11 +262,10 @@ static void __update_wallclock(time_t se
2824 time_t wtm_sec, xtime_sec;
2825 u64 tmp, wc_nsec;
2826
2827 - /* Adjust wall-clock time base based on wall_jiffies ticks. */
2828 + /* Adjust wall-clock time base. */
2829 wc_nsec = processed_system_time;
2830 wc_nsec += sec * (u64)NSEC_PER_SEC;
2831 wc_nsec += nsec;
2832 - wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
2833
2834 /* Split wallclock base into seconds and nanoseconds. */
2835 tmp = wc_nsec;
2836 @@ -392,16 +388,10 @@ void do_gettimeofday(struct timeval *tv)
2837 shadow = &per_cpu(shadow_time, cpu);
2838
2839 do {
2840 - unsigned long lost;
2841 -
2842 local_time_version = shadow->version;
2843 seq = read_seqbegin(&xtime_lock);
2844
2845 usec = get_usec_offset(shadow);
2846 - lost = jiffies - wall_jiffies;
2847 -
2848 - if (unlikely(lost))
2849 - usec += lost * (USEC_PER_SEC / HZ);
2850
2851 sec = xtime.tv_sec;
2852 usec += (xtime.tv_nsec / NSEC_PER_USEC);
2853 @@ -525,7 +515,7 @@ static void sync_xen_wallclock(unsigned
2854 write_seqlock_irq(&xtime_lock);
2855
2856 sec = xtime.tv_sec;
2857 - nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
2858 + nsec = xtime.tv_nsec;
2859 __normalize_time(&sec, &nsec);
2860
2861 op.cmd = XENPF_settime;
2862 @@ -599,42 +589,49 @@ unsigned long long sched_clock(void)
2863 }
2864 #endif
2865
2866 -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
2867 unsigned long profile_pc(struct pt_regs *regs)
2868 {
2869 unsigned long pc = instruction_pointer(regs);
2870
2871 -#ifdef __x86_64__
2872 - /* Assume the lock function has either no stack frame or only a single word.
2873 - This checks if the address on the stack looks like a kernel text address.
2874 - There is a small window for false hits, but in that case the tick
2875 - is just accounted to the spinlock function.
2876 - Better would be to write these functions in assembler again
2877 - and check exactly. */
2878 +#if defined(CONFIG_SMP) || defined(__x86_64__)
2879 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
2880 - char *v = *(char **)regs->rsp;
2881 - if ((v >= _stext && v <= _etext) ||
2882 - (v >= _sinittext && v <= _einittext) ||
2883 - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
2884 - return (unsigned long)v;
2885 - return ((unsigned long *)regs->rsp)[1];
2886 +# ifdef CONFIG_FRAME_POINTER
2887 +# ifdef __i386__
2888 + return ((unsigned long *)regs->ebp)[1];
2889 +# else
2890 + return ((unsigned long *)regs->rbp)[1];
2891 +# endif
2892 +# else
2893 +# ifdef __i386__
2894 + unsigned long *sp;
2895 + if ((regs->xcs & 2) == 0)
2896 + sp = (unsigned long *)&regs->esp;
2897 + else
2898 + sp = (unsigned long *)regs->esp;
2899 +# else
2900 + unsigned long *sp = (unsigned long *)regs->rsp;
2901 +# endif
2902 + /* Return address is either directly at stack pointer
2903 + or above a saved eflags. Eflags has bits 22-31 zero,
2904 + kernel addresses don't. */
2905 + if (sp[0] >> 22)
2906 + return sp[0];
2907 + if (sp[1] >> 22)
2908 + return sp[1];
2909 +# endif
2910 }
2911 -#else
2912 - if (!user_mode_vm(regs) && in_lock_functions(pc))
2913 - return *(unsigned long *)(regs->ebp + 4);
2914 #endif
2915
2916 return pc;
2917 }
2918 EXPORT_SYMBOL(profile_pc);
2919 -#endif
2920
2921 /*
2922 * This is the same as the above, except we _also_ save the current
2923 * Time Stamp Counter value at the time of the timer interrupt, so that
2924 * we later on can estimate the time of day more exactly.
2925 */
2926 -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
2927 +irqreturn_t timer_interrupt(int irq, void *dev_id)
2928 {
2929 s64 delta, delta_cpu, stolen, blocked;
2930 u64 sched_time;
2931 @@ -692,10 +689,15 @@ irqreturn_t timer_interrupt(int irq, voi
2932 }
2933
2934 /* System-wide jiffy work. */
2935 - while (delta >= NS_PER_TICK) {
2936 - delta -= NS_PER_TICK;
2937 - processed_system_time += NS_PER_TICK;
2938 - do_timer(regs);
2939 + if (delta >= NS_PER_TICK) {
2940 + do_div(delta, NS_PER_TICK);
2941 + processed_system_time += delta * NS_PER_TICK;
2942 + while (delta > HZ) {
2943 + clobber_induction_variable(delta);
2944 + do_timer(HZ);
2945 + delta -= HZ;
2946 + }
2947 + do_timer(delta);
2948 }
2949
2950 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
2951 @@ -740,7 +742,7 @@ irqreturn_t timer_interrupt(int irq, voi
2952 if (delta_cpu > 0) {
2953 do_div(delta_cpu, NS_PER_TICK);
2954 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
2955 - if (user_mode_vm(regs))
2956 + if (user_mode_vm(get_irq_regs()))
2957 account_user_time(current, (cputime_t)delta_cpu);
2958 else
2959 account_system_time(current, HARDIRQ_OFFSET,
2960 @@ -754,10 +756,10 @@ irqreturn_t timer_interrupt(int irq, voi
2961 /* Local timer processing (see update_process_times()). */
2962 run_local_timers();
2963 if (rcu_pending(cpu))
2964 - rcu_check_callbacks(cpu, user_mode_vm(regs));
2965 + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
2966 scheduler_tick();
2967 run_posix_cpu_timers(current);
2968 - profile_tick(CPU_PROFILING, regs);
2969 + profile_tick(CPU_PROFILING);
2970
2971 return IRQ_HANDLED;
2972 }
2973 @@ -967,10 +969,11 @@ extern void (*late_time_init)(void);
2974 /* Duplicate of time_init() below, with hpet_enable part added */
2975 static void __init hpet_time_init(void)
2976 {
2977 - xtime.tv_sec = get_cmos_time();
2978 - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2979 - set_normalized_timespec(&wall_to_monotonic,
2980 - -xtime.tv_sec, -xtime.tv_nsec);
2981 + struct timespec ts;
2982 + ts.tv_sec = get_cmos_time();
2983 + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2984 +
2985 + do_settimeofday(&ts);
2986
2987 if ((hpet_enable() >= 0) && hpet_use_timer) {
2988 printk("Using HPET for base-timer\n");
2989 --- sle11-2009-10-16.orig/arch/x86/kernel/traps_32-xen.c 2009-10-28 14:55:12.000000000 +0100
2990 +++ sle11-2009-10-16/arch/x86/kernel/traps_32-xen.c 2009-03-04 11:28:34.000000000 +0100
2991 @@ -28,6 +28,7 @@
2992 #include <linux/kprobes.h>
2993 #include <linux/kexec.h>
2994 #include <linux/unwind.h>
2995 +#include <linux/uaccess.h>
2996
2997 #ifdef CONFIG_EISA
2998 #include <linux/ioport.h>
2999 @@ -40,7 +41,6 @@
3000
3001 #include <asm/processor.h>
3002 #include <asm/system.h>
3003 -#include <asm/uaccess.h>
3004 #include <asm/io.h>
3005 #include <asm/atomic.h>
3006 #include <asm/debugreg.h>
3007 @@ -51,11 +51,14 @@
3008 #include <asm/smp.h>
3009 #include <asm/arch_hooks.h>
3010 #include <asm/kdebug.h>
3011 +#include <asm/stacktrace.h>
3012
3013 #include <linux/module.h>
3014
3015 #include "mach_traps.h"
3016
3017 +int panic_on_unrecovered_nmi;
3018 +
3019 asmlinkage int system_call(void);
3020
3021 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
3022 @@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct
3023 p < (void *)tinfo + THREAD_SIZE - 3;
3024 }
3025
3026 -/*
3027 - * Print one address/symbol entries per line.
3028 - */
3029 -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
3030 -{
3031 - printk(" [<%08lx>] ", addr);
3032 -
3033 - print_symbol("%s\n", addr);
3034 -}
3035 -
3036 static inline unsigned long print_context_stack(struct thread_info *tinfo,
3037 unsigned long *stack, unsigned long ebp,
3038 - char *log_lvl)
3039 + struct stacktrace_ops *ops, void *data)
3040 {
3041 unsigned long addr;
3042
3043 #ifdef CONFIG_FRAME_POINTER
3044 while (valid_stack_ptr(tinfo, (void *)ebp)) {
3045 + unsigned long new_ebp;
3046 addr = *(unsigned long *)(ebp + 4);
3047 - print_addr_and_symbol(addr, log_lvl);
3048 + ops->address(data, addr);
3049 /*
3050 * break out of recursive entries (such as
3051 - * end_of_stack_stop_unwind_function):
3052 + * end_of_stack_stop_unwind_function). Also,
3053 + * we can never allow a frame pointer to
3054 + * move downwards!
3055 */
3056 - if (ebp == *(unsigned long *)ebp)
3057 + new_ebp = *(unsigned long *)ebp;
3058 + if (new_ebp <= ebp)
3059 break;
3060 - ebp = *(unsigned long *)ebp;
3061 + ebp = new_ebp;
3062 }
3063 #else
3064 while (valid_stack_ptr(tinfo, stack)) {
3065 addr = *stack++;
3066 if (__kernel_text_address(addr))
3067 - print_addr_and_symbol(addr, log_lvl);
3068 + ops->address(data, addr);
3069 }
3070 #endif
3071 return ebp;
3072 }
3073
3074 +struct ops_and_data {
3075 + struct stacktrace_ops *ops;
3076 + void *data;
3077 +};
3078 +
3079 static asmlinkage int
3080 -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
3081 +dump_trace_unwind(struct unwind_frame_info *info, void *data)
3082 {
3083 + struct ops_and_data *oad = (struct ops_and_data *)data;
3084 int n = 0;
3085
3086 while (unwind(info) == 0 && UNW_PC(info)) {
3087 n++;
3088 - print_addr_and_symbol(UNW_PC(info), log_lvl);
3089 + oad->ops->address(oad->data, UNW_PC(info));
3090 if (arch_unw_user_mode(info))
3091 break;
3092 }
3093 return n;
3094 }
3095
3096 -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3097 - unsigned long *stack, char *log_lvl)
3098 +void dump_trace(struct task_struct *task, struct pt_regs *regs,
3099 + unsigned long *stack,
3100 + struct stacktrace_ops *ops, void *data)
3101 {
3102 - unsigned long ebp;
3103 + unsigned long ebp = 0;
3104
3105 if (!task)
3106 task = current;
3107 @@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta
3108 if (call_trace >= 0) {
3109 int unw_ret = 0;
3110 struct unwind_frame_info info;
3111 + struct ops_and_data oad = { .ops = ops, .data = data };
3112
3113 if (regs) {
3114 if (unwind_init_frame_info(&info, task, regs) == 0)
3115 - unw_ret = show_trace_unwind(&info, log_lvl);
3116 + unw_ret = dump_trace_unwind(&info, &oad);
3117 } else if (task == current)
3118 - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
3119 + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
3120 else {
3121 if (unwind_init_blocked(&info, task) == 0)
3122 - unw_ret = show_trace_unwind(&info, log_lvl);
3123 + unw_ret = dump_trace_unwind(&info, &oad);
3124 }
3125 if (unw_ret > 0) {
3126 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
3127 - print_symbol("DWARF2 unwinder stuck at %s\n",
3128 + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
3129 UNW_PC(&info));
3130 if (UNW_SP(&info) >= PAGE_OFFSET) {
3131 - printk("Leftover inexact backtrace:\n");
3132 + ops->warning(data, "Leftover inexact backtrace:\n");
3133 stack = (void *)UNW_SP(&info);
3134 + if (!stack)
3135 + return;
3136 + ebp = UNW_FP(&info);
3137 } else
3138 - printk("Full inexact backtrace again:\n");
3139 + ops->warning(data, "Full inexact backtrace again:\n");
3140 } else if (call_trace >= 1)
3141 return;
3142 else
3143 - printk("Full inexact backtrace again:\n");
3144 + ops->warning(data, "Full inexact backtrace again:\n");
3145 } else
3146 - printk("Inexact backtrace:\n");
3147 + ops->warning(data, "Inexact backtrace:\n");
3148 }
3149 -
3150 - if (task == current) {
3151 - /* Grab ebp right from our regs */
3152 - asm ("movl %%ebp, %0" : "=r" (ebp) : );
3153 - } else {
3154 - /* ebp is the last reg pushed by switch_to */
3155 - ebp = *(unsigned long *) task->thread.esp;
3156 + if (!stack) {
3157 + unsigned long dummy;
3158 + stack = &dummy;
3159 + if (task && task != current)
3160 + stack = (unsigned long *)task->thread.esp;
3161 + }
3162 +
3163 +#ifdef CONFIG_FRAME_POINTER
3164 + if (!ebp) {
3165 + if (task == current) {
3166 + /* Grab ebp right from our regs */
3167 + asm ("movl %%ebp, %0" : "=r" (ebp) : );
3168 + } else {
3169 + /* ebp is the last reg pushed by switch_to */
3170 + ebp = *(unsigned long *) task->thread.esp;
3171 + }
3172 }
3173 +#endif
3174
3175 while (1) {
3176 struct thread_info *context;
3177 context = (struct thread_info *)
3178 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
3179 - ebp = print_context_stack(context, stack, ebp, log_lvl);
3180 + ebp = print_context_stack(context, stack, ebp, ops, data);
3181 + /* Should be after the line below, but somewhere
3182 + in early boot context comes out corrupted and we
3183 + can't reference it -AK */
3184 + if (ops->stack(data, "IRQ") < 0)
3185 + break;
3186 stack = (unsigned long*)context->previous_esp;
3187 if (!stack)
3188 break;
3189 - printk("%s =======================\n", log_lvl);
3190 }
3191 }
3192 +EXPORT_SYMBOL(dump_trace);
3193 +
3194 +static void
3195 +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
3196 +{
3197 + printk(data);
3198 + print_symbol(msg, symbol);
3199 + printk("\n");
3200 +}
3201 +
3202 +static void print_trace_warning(void *data, char *msg)
3203 +{
3204 + printk("%s%s\n", (char *)data, msg);
3205 +}
3206
3207 -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
3208 +static int print_trace_stack(void *data, char *name)
3209 +{
3210 + return 0;
3211 +}
3212 +
3213 +/*
3214 + * Print one address/symbol entries per line.
3215 + */
3216 +static void print_trace_address(void *data, unsigned long addr)
3217 +{
3218 + printk("%s [<%08lx>] ", (char *)data, addr);
3219 + print_symbol("%s\n", addr);
3220 +}
3221 +
3222 +static struct stacktrace_ops print_trace_ops = {
3223 + .warning = print_trace_warning,
3224 + .warning_symbol = print_trace_warning_symbol,
3225 + .stack = print_trace_stack,
3226 + .address = print_trace_address,
3227 +};
3228 +
3229 +static void
3230 +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3231 + unsigned long * stack, char *log_lvl)
3232 +{
3233 + dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
3234 + printk("%s =======================\n", log_lvl);
3235 +}
3236 +
3237 +void show_trace(struct task_struct *task, struct pt_regs *regs,
3238 + unsigned long * stack)
3239 {
3240 show_trace_log_lvl(task, regs, stack, "");
3241 }
3242 @@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs
3243 ss = regs->xss & 0xffff;
3244 }
3245 print_modules();
3246 - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
3247 - "EFLAGS: %08lx (%s %.*s) \n",
3248 + printk(KERN_EMERG "CPU: %d\n"
3249 + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
3250 + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
3251 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
3252 - print_tainted(), regs->eflags, system_utsname.release,
3253 - (int)strcspn(system_utsname.version, " "),
3254 - system_utsname.version);
3255 + print_tainted(), regs->eflags, init_utsname()->release,
3256 + (int)strcspn(init_utsname()->version, " "),
3257 + init_utsname()->version);
3258 print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
3259 printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
3260 regs->eax, regs->ebx, regs->ecx, regs->edx);
3261 @@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs
3262 */
3263 if (in_kernel) {
3264 u8 __user *eip;
3265 + int code_bytes = 64;
3266 + unsigned char c;
3267
3268 printk("\n" KERN_EMERG "Stack: ");
3269 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
3270 @@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs
3271 printk(KERN_EMERG "Code: ");
3272
3273 eip = (u8 __user *)regs->eip - 43;
3274 - for (i = 0; i < 64; i++, eip++) {
3275 - unsigned char c;
3276 -
3277 + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
3278 + /* try starting at EIP */
3279 + eip = (u8 __user *)regs->eip;
3280 + code_bytes = 32;
3281 + }
3282 + for (i = 0; i < code_bytes; i++, eip++) {
3283 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
3284 printk(" Bad EIP value.");
3285 break;
3286 @@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r
3287
3288 if (eip < PAGE_OFFSET)
3289 return;
3290 - if (__get_user(ud2, (unsigned short __user *)eip))
3291 + if (probe_kernel_address((unsigned short __user *)eip, ud2))
3292 return;
3293 if (ud2 != 0x0b0f)
3294 return;
3295 @@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r
3296 char *file;
3297 char c;
3298
3299 - if (__get_user(line, (unsigned short __user *)(eip + 2)))
3300 + if (probe_kernel_address((unsigned short __user *)(eip + 2),
3301 + line))
3302 break;
3303 if (__get_user(file, (char * __user *)(eip + 4)) ||
3304 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
3305 @@ -604,18 +677,24 @@ gp_in_kernel:
3306 }
3307 }
3308
3309 -static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
3310 +static __kprobes void
3311 +mem_parity_error(unsigned char reason, struct pt_regs * regs)
3312 {
3313 - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
3314 - "to continue\n");
3315 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
3316 + "CPU %d.\n", reason, smp_processor_id());
3317 printk(KERN_EMERG "You probably have a hardware problem with your RAM "
3318 "chips\n");
3319 + if (panic_on_unrecovered_nmi)
3320 + panic("NMI: Not continuing");
3321 +
3322 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3323
3324 /* Clear and disable the memory parity error line. */
3325 clear_mem_error(reason);
3326 }
3327
3328 -static void io_check_error(unsigned char reason, struct pt_regs * regs)
3329 +static __kprobes void
3330 +io_check_error(unsigned char reason, struct pt_regs * regs)
3331 {
3332 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
3333 show_registers(regs);
3334 @@ -624,7 +703,8 @@ static void io_check_error(unsigned char
3335 clear_io_check_error(reason);
3336 }
3337
3338 -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
3339 +static __kprobes void
3340 +unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
3341 {
3342 #ifdef CONFIG_MCA
3343 /* Might actually be able to figure out what the guilty party
3344 @@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c
3345 return;
3346 }
3347 #endif
3348 - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
3349 - reason, smp_processor_id());
3350 - printk("Dazed and confused, but trying to continue\n");
3351 - printk("Do you have a strange power saving mode enabled?\n");
3352 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
3353 + "CPU %d.\n", reason, smp_processor_id());
3354 + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
3355 + if (panic_on_unrecovered_nmi)
3356 + panic("NMI: Not continuing");
3357 +
3358 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3359 }
3360
3361 static DEFINE_SPINLOCK(nmi_print_lock);
3362
3363 -void die_nmi (struct pt_regs *regs, const char *msg)
3364 +void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
3365 {
3366 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
3367 NOTIFY_STOP)
3368 @@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons
3369 do_exit(SIGSEGV);
3370 }
3371
3372 -static void default_do_nmi(struct pt_regs * regs)
3373 +static __kprobes void default_do_nmi(struct pt_regs * regs)
3374 {
3375 unsigned char reason = 0;
3376
3377 @@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg
3378 * Ok, so this is none of the documented NMI sources,
3379 * so it must be the NMI watchdog.
3380 */
3381 - if (nmi_watchdog) {
3382 - nmi_watchdog_tick(regs);
3383 + if (nmi_watchdog_tick(regs, reason))
3384 return;
3385 - }
3386 + if (!do_nmi_callback(regs, smp_processor_id()))
3387 #endif
3388 - unknown_nmi_error(reason, regs);
3389 + unknown_nmi_error(reason, regs);
3390 +
3391 return;
3392 }
3393 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
3394 @@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg
3395 reassert_nmi();
3396 }
3397
3398 -static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
3399 -{
3400 - return 0;
3401 -}
3402 -
3403 -static nmi_callback_t nmi_callback = dummy_nmi_callback;
3404 -
3405 -fastcall void do_nmi(struct pt_regs * regs, long error_code)
3406 +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
3407 {
3408 int cpu;
3409
3410 @@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re
3411
3412 ++nmi_count(cpu);
3413
3414 - if (!rcu_dereference(nmi_callback)(regs, cpu))
3415 - default_do_nmi(regs);
3416 + default_do_nmi(regs);
3417
3418 nmi_exit();
3419 }
3420
3421 -void set_nmi_callback(nmi_callback_t callback)
3422 -{
3423 - vmalloc_sync_all();
3424 - rcu_assign_pointer(nmi_callback, callback);
3425 -}
3426 -EXPORT_SYMBOL_GPL(set_nmi_callback);
3427 -
3428 -void unset_nmi_callback(void)
3429 -{
3430 - nmi_callback = dummy_nmi_callback;
3431 -}
3432 -EXPORT_SYMBOL_GPL(unset_nmi_callback);
3433 -
3434 #ifdef CONFIG_KPROBES
3435 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
3436 {
3437 --- sle11-2009-10-16.orig/arch/x86/mach-xen/setup.c 2009-10-28 14:55:12.000000000 +0100
3438 +++ sle11-2009-10-16/arch/x86/mach-xen/setup.c 2009-03-04 11:28:34.000000000 +0100
3439 @@ -103,8 +103,10 @@ void __init pre_setup_arch_hook(void)
3440
3441 setup_xen_features();
3442
3443 - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
3444 - set_fixaddr_top(pp.virt_start);
3445 + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
3446 + hypervisor_virt_start = pp.virt_start;
3447 + reserve_top_address(0UL - pp.virt_start);
3448 + }
3449
3450 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
3451 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
3452 --- sle11-2009-10-16.orig/arch/x86/mm/fault_32-xen.c 2009-10-28 14:55:12.000000000 +0100
3453 +++ sle11-2009-10-16/arch/x86/mm/fault_32-xen.c 2009-03-04 11:28:34.000000000 +0100
3454 @@ -27,21 +27,24 @@
3455 #include <asm/uaccess.h>
3456 #include <asm/desc.h>
3457 #include <asm/kdebug.h>
3458 +#include <asm/segment.h>
3459
3460 extern void die(const char *,struct pt_regs *,long);
3461
3462 -#ifdef CONFIG_KPROBES
3463 -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3464 +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3465 +
3466 int register_page_fault_notifier(struct notifier_block *nb)
3467 {
3468 vmalloc_sync_all();
3469 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
3470 }
3471 +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
3472
3473 int unregister_page_fault_notifier(struct notifier_block *nb)
3474 {
3475 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
3476 }
3477 +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3478
3479 static inline int notify_page_fault(enum die_val val, const char *str,
3480 struct pt_regs *regs, long err, int trap, int sig)
3481 @@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
3482 };
3483 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3484 }
3485 -#else
3486 -static inline int notify_page_fault(enum die_val val, const char *str,
3487 - struct pt_regs *regs, long err, int trap, int sig)
3488 -{
3489 - return NOTIFY_DONE;
3490 -}
3491 -#endif
3492 -
3493
3494 /*
3495 * Unlock any spinlocks which will prevent us from getting the
3496 @@ -119,10 +114,10 @@ static inline unsigned long get_segment_
3497 }
3498
3499 /* The standard kernel/user address space limit. */
3500 - *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
3501 + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
3502
3503 /* By far the most common cases. */
3504 - if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
3505 + if (likely(SEGMENT_IS_FLAT_CODE(seg)))
3506 return eip;
3507
3508 /* Check the segment exists, is within the current LDT/GDT size,
3509 @@ -559,11 +554,7 @@ good_area:
3510 write = 0;
3511 switch (error_code & 3) {
3512 default: /* 3: write, present */
3513 -#ifdef TEST_VERIFY_AREA
3514 - if (regs->cs == GET_KERNEL_CS())
3515 - printk("WP fault at %08lx\n", regs->eip);
3516 -#endif
3517 - /* fall through */
3518 + /* fall through */
3519 case 2: /* write, not present */
3520 if (!(vma->vm_flags & VM_WRITE))
3521 goto bad_area;
3522 @@ -572,7 +563,7 @@ good_area:
3523 case 1: /* read, present */
3524 goto bad_area;
3525 case 0: /* read, not present */
3526 - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
3527 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
3528 goto bad_area;
3529 }
3530
3531 @@ -704,7 +695,7 @@ no_context:
3532 */
3533 out_of_memory:
3534 up_read(&mm->mmap_sem);
3535 - if (tsk->pid == 1) {
3536 + if (is_init(tsk)) {
3537 yield();
3538 down_read(&mm->mmap_sem);
3539 goto survive;
3540 --- sle11-2009-10-16.orig/arch/x86/mm/highmem_32-xen.c 2009-10-28 14:55:12.000000000 +0100
3541 +++ sle11-2009-10-16/arch/x86/mm/highmem_32-xen.c 2009-03-04 11:28:34.000000000 +0100
3542 @@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
3543
3544 idx = type + KM_TYPE_NR*smp_processor_id();
3545 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3546 -#ifdef CONFIG_DEBUG_HIGHMEM
3547 if (!pte_none(*(kmap_pte-idx)))
3548 BUG();
3549 -#endif
3550 - set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3551 + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3552
3553 return (void*) vaddr;
3554 }
3555 @@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page,
3556
3557 void kunmap_atomic(void *kvaddr, enum km_type type)
3558 {
3559 -#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
3560 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
3561 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
3562
3563 - if (vaddr < FIXADDR_START) { // FIXME
3564 +#ifdef CONFIG_DEBUG_HIGHMEM
3565 + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
3566 dec_preempt_count();
3567 preempt_check_resched();
3568 return;
3569 }
3570 -#endif
3571
3572 -#if defined(CONFIG_DEBUG_HIGHMEM)
3573 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
3574 BUG();
3575 -
3576 - /*
3577 - * force other mappings to Oops if they'll try to access
3578 - * this pte without first remap it
3579 - */
3580 - pte_clear(&init_mm, vaddr, kmap_pte-idx);
3581 - __flush_tlb_one(vaddr);
3582 -#elif defined(CONFIG_XEN)
3583 +#endif
3584 /*
3585 - * We must ensure there are no dangling pagetable references when
3586 - * returning memory to Xen (decrease_reservation).
3587 - * XXX TODO: We could make this faster by only zapping when
3588 - * kmap_flush_unused is called but that is trickier and more invasive.
3589 + * Force other mappings to Oops if they'll try to access this pte
3590 + * without first remap it. Keeping stale mappings around is a bad idea
3591 + * also, in case the page changes cacheability attributes or becomes
3592 + * a protected page in a hypervisor.
3593 */
3594 - pte_clear(&init_mm, vaddr, kmap_pte-idx);
3595 -#endif
3596 + kpte_clear_flush(kmap_pte-idx, vaddr);
3597
3598 dec_preempt_count();
3599 preempt_check_resched();
3600 @@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn,
3601 idx = type + KM_TYPE_NR*smp_processor_id();
3602 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3603 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
3604 - __flush_tlb_one(vaddr);
3605
3606 return (void*) vaddr;
3607 }
3608 --- sle11-2009-10-16.orig/arch/x86/mm/hypervisor.c 2008-12-15 11:13:45.000000000 +0100
3609 +++ sle11-2009-10-16/arch/x86/mm/hypervisor.c 2009-03-04 11:28:34.000000000 +0100
3610 @@ -31,6 +31,7 @@
3611 */
3612
3613 #include <linux/sched.h>
3614 +#include <linux/hardirq.h>
3615 #include <linux/mm.h>
3616 #include <linux/vmalloc.h>
3617 #include <asm/page.h>
3618 @@ -44,6 +45,302 @@
3619 #include <asm/tlbflush.h>
3620 #include <linux/highmem.h>
3621
3622 +EXPORT_SYMBOL(hypercall_page);
3623 +
3624 +#define NR_MC BITS_PER_LONG
3625 +#define NR_MMU BITS_PER_LONG
3626 +#define NR_MMUEXT (BITS_PER_LONG / 4)
3627 +
3628 +DEFINE_PER_CPU(bool, xen_lazy_mmu);
3629 +EXPORT_PER_CPU_SYMBOL(xen_lazy_mmu);
3630 +struct lazy_mmu {
3631 + unsigned int nr_mc, nr_mmu, nr_mmuext;
3632 + multicall_entry_t mc[NR_MC];
3633 + mmu_update_t mmu[NR_MMU];
3634 + struct mmuext_op mmuext[NR_MMUEXT];
3635 +};
3636 +static DEFINE_PER_CPU(struct lazy_mmu, lazy_mmu);
3637 +
3638 +static inline bool use_lazy_mmu_mode(void)
3639 +{
3640 +#ifdef CONFIG_PREEMPT
3641 + if (!preempt_count())
3642 + return false;
3643 +#endif
3644 + return !irq_count();
3645 +}
3646 +
3647 +static void multicall_failed(const multicall_entry_t *mc, int rc)
3648 +{
3649 + printk(KERN_EMERG "hypercall#%lu(%lx, %lx, %lx, %lx)"
3650 + " failed: %d (caller %lx)\n",
3651 + mc->op, mc->args[0], mc->args[1], mc->args[2], mc->args[3],
3652 + rc, mc->args[5]);
3653 + BUG();
3654 +}
3655 +
3656 +int xen_multicall_flush(bool ret_last) {
3657 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3658 + multicall_entry_t *mc = lazy->mc;
3659 + unsigned int count = lazy->nr_mc;
3660 +
3661 + if (!count || !use_lazy_mmu_mode())
3662 + return 0;
3663 +
3664 + lazy->nr_mc = 0;
3665 + lazy->nr_mmu = 0;
3666 + lazy->nr_mmuext = 0;
3667 +
3668 + if (count == 1) {
3669 + int rc = _hypercall(int, mc->op, mc->args[0], mc->args[1],
3670 + mc->args[2], mc->args[3], mc->args[4]);
3671 +
3672 + if (unlikely(rc)) {
3673 + if (ret_last)
3674 + return rc;
3675 + multicall_failed(mc, rc);
3676 + }
3677 + } else {
3678 + if (HYPERVISOR_multicall(mc, count))
3679 + BUG();
3680 + while (count-- > ret_last)
3681 + if (unlikely(mc++->result))
3682 + multicall_failed(mc - 1, mc[-1].result);
3683 + if (ret_last)
3684 + return mc->result;
3685 + }
3686 +
3687 + return 0;
3688 +}
3689 +EXPORT_SYMBOL(xen_multicall_flush);
3690 +
3691 +int xen_multi_update_va_mapping(unsigned long va, pte_t pte,
3692 + unsigned long uvmf)
3693 +{
3694 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3695 + multicall_entry_t *mc;
3696 +
3697 + if (unlikely(!use_lazy_mmu_mode()))
3698 +#ifdef CONFIG_X86_PAE
3699 + return _hypercall4(int, update_va_mapping, va,
3700 + pte.pte_low, pte.pte_high, uvmf);
3701 +#else
3702 + return _hypercall3(int, update_va_mapping, va,
3703 + pte.pte, uvmf);
3704 +#endif
3705 +
3706 + if (unlikely(lazy->nr_mc == NR_MC))
3707 + xen_multicall_flush(false);
3708 +
3709 + mc = lazy->mc + lazy->nr_mc++;
3710 + mc->op = __HYPERVISOR_update_va_mapping;
3711 + mc->args[0] = va;
3712 +#ifndef CONFIG_X86_PAE
3713 + mc->args[1] = pte.pte;
3714 +#else
3715 + mc->args[1] = pte.pte_low;
3716 + mc->args[2] = pte.pte_high;
3717 +#endif
3718 + mc->args[MULTI_UVMFLAGS_INDEX] = uvmf;
3719 + mc->args[5] = (long)__builtin_return_address(0);
3720 +
3721 + return 0;
3722 +}
3723 +
3724 +static inline bool mmu_may_merge(const multicall_entry_t *mc,
3725 + unsigned int op, domid_t domid)
3726 +{
3727 + return mc->op == op && !mc->args[2] && mc->args[3] == domid;
3728 +}
3729 +
3730 +int xen_multi_mmu_update(mmu_update_t *src, unsigned int count,
3731 + unsigned int *success_count, domid_t domid)
3732 +{
3733 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3734 + multicall_entry_t *mc = lazy->mc + lazy->nr_mc;
3735 + mmu_update_t *dst;
3736 + bool commit, merge;
3737 +
3738 + if (unlikely(!use_lazy_mmu_mode()))
3739 + return _hypercall4(int, mmu_update, src, count,
3740 + success_count, domid);
3741 +
3742 + commit = (lazy->nr_mmu + count) > NR_MMU || success_count;
3743 + merge = lazy->nr_mc && !commit
3744 + && mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid);
3745 + if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
3746 + xen_multicall_flush(false);
3747 + mc = lazy->mc;
3748 + commit = count > NR_MMU || success_count;
3749 + }
3750 +
3751 + if (!lazy->nr_mc && unlikely(commit))
3752 + return _hypercall4(int, mmu_update, src, count,
3753 + success_count, domid);
3754 +
3755 + dst = lazy->mmu + lazy->nr_mmu;
3756 + lazy->nr_mmu += count;
3757 + if (merge) {
3758 + mc[-1].args[1] += count;
3759 + memcpy(dst, src, count * sizeof(*src));
3760 + } else {
3761 + ++lazy->nr_mc;
3762 + mc->op = __HYPERVISOR_mmu_update;
3763 + if (!commit) {
3764 + mc->args[0] = (unsigned long)dst;
3765 + memcpy(dst, src, count * sizeof(*src));
3766 + } else
3767 + mc->args[0] = (unsigned long)src;
3768 + mc->args[1] = count;
3769 + mc->args[2] = (unsigned long)success_count;
3770 + mc->args[3] = domid;
3771 + mc->args[5] = (long)__builtin_return_address(0);
3772 + }
3773 +
3774 + while (!commit && count--)
3775 + switch (src++->ptr & (sizeof(pteval_t) - 1)) {
3776 + case MMU_NORMAL_PT_UPDATE:
3777 + case MMU_PT_UPDATE_PRESERVE_AD:
3778 + break;
3779 + default:
3780 + commit = true;
3781 + break;
3782 + }
3783 +
3784 + return commit ? xen_multicall_flush(true) : 0;
3785 +}
3786 +
3787 +int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count,
3788 + unsigned int *success_count, domid_t domid)
3789 +{
3790 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3791 + multicall_entry_t *mc;
3792 + struct mmuext_op *dst;
3793 + bool commit, merge;
3794 +
3795 + if (unlikely(!use_lazy_mmu_mode()))
3796 + return _hypercall4(int, mmuext_op, src, count,
3797 + success_count, domid);
3798 +
3799 + /*
3800 + * While it could be useful in theory, I've never seen the body of
3801 + * this conditional to be reached, hence it seems more reasonable
3802 + * to disable it for the time being.
3803 + */
3804 + if (0 && likely(count)
3805 + && likely(!success_count)
3806 + && likely(domid == DOMID_SELF)
3807 + && likely(lazy->nr_mc)
3808 + && lazy->mc[lazy->nr_mc - 1].op == __HYPERVISOR_update_va_mapping) {
3809 + unsigned long oldf, newf = UVMF_NONE;
3810 +
3811 + switch (src->cmd) {
3812 + case MMUEXT_TLB_FLUSH_ALL:
3813 + newf = UVMF_TLB_FLUSH | UVMF_ALL;
3814 + break;
3815 + case MMUEXT_INVLPG_ALL:
3816 + newf = UVMF_INVLPG | UVMF_ALL;
3817 + break;
3818 + case MMUEXT_TLB_FLUSH_MULTI:
3819 + newf = UVMF_TLB_FLUSH | UVMF_MULTI
3820 + | (unsigned long)src->arg2.vcpumask.p;
3821 + break;
3822 + case MMUEXT_INVLPG_MULTI:
3823 + newf = UVMF_INVLPG | UVMF_MULTI
3824 + | (unsigned long)src->arg2.vcpumask.p;
3825 + break;
3826 + case MMUEXT_TLB_FLUSH_LOCAL:
3827 + newf = UVMF_TLB_FLUSH | UVMF_LOCAL;
3828 + break;
3829 + case MMUEXT_INVLPG_LOCAL:
3830 + newf = UVMF_INVLPG | UVMF_LOCAL;
3831 + break;
3832 + }
3833 + mc = lazy->mc + lazy->nr_mc - 1;
3834 + oldf = mc->args[MULTI_UVMFLAGS_INDEX];
3835 + if (newf == UVMF_NONE || oldf == UVMF_NONE
3836 + || newf == (UVMF_TLB_FLUSH | UVMF_ALL))
3837 + ;
3838 + else if (oldf == (UVMF_TLB_FLUSH | UVMF_ALL))
3839 + newf = UVMF_TLB_FLUSH | UVMF_ALL;
3840 + else if ((newf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
3841 + && (oldf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
3842 + && ((src->arg1.linear_addr ^ mc->args[0])
3843 + >> PAGE_SHIFT))
3844 + newf = UVMF_NONE;
3845 + else if (((oldf | newf) & UVMF_ALL)
3846 + && !((oldf ^ newf) & UVMF_FLUSHTYPE_MASK))
3847 + newf |= UVMF_ALL;
3848 + else if ((oldf ^ newf) & ~UVMF_FLUSHTYPE_MASK)
3849 + newf = UVMF_NONE;
3850 + else if ((oldf & UVMF_FLUSHTYPE_MASK) == UVMF_TLB_FLUSH)
3851 + newf = (newf & ~UVMF_FLUSHTYPE_MASK) | UVMF_TLB_FLUSH;
3852 + else if ((newf & UVMF_FLUSHTYPE_MASK) != UVMF_TLB_FLUSH
3853 + && ((newf ^ oldf) & UVMF_FLUSHTYPE_MASK))
3854 + newf = UVMF_NONE;
3855 + if (newf != UVMF_NONE) {
3856 + mc->args[MULTI_UVMFLAGS_INDEX] = newf;
3857 + ++src;
3858 + if (!--count)
3859 + return 0;
3860 + }
3861 + }
3862 +
3863 + mc = lazy->mc + lazy->nr_mc;
3864 + commit = (lazy->nr_mmuext + count) > NR_MMUEXT || success_count;
3865 + merge = lazy->nr_mc && !commit
3866 + && mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid);
3867 + if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
3868 + xen_multicall_flush(false);
3869 + mc = lazy->mc;
3870 + commit = count > NR_MMUEXT || success_count;
3871 + }
3872 +
3873 + if (!lazy->nr_mc && unlikely(commit))
3874 + return _hypercall4(int, mmuext_op, src, count,
3875 + success_count, domid);
3876 +
3877 + dst = lazy->mmuext + lazy->nr_mmuext;
3878 + lazy->nr_mmuext += count;
3879 + if (merge) {
3880 + mc[-1].args[1] += count;
3881 + memcpy(dst, src, count * sizeof(*src));
3882 + } else {
3883 + ++lazy->nr_mc;
3884 + mc->op = __HYPERVISOR_mmuext_op;
3885 + if (!commit) {
3886 + mc->args[0] = (unsigned long)dst;
3887 + memcpy(dst, src, count * sizeof(*src));
3888 + } else
3889 + mc->args[0] = (unsigned long)src;
3890 + mc->args[1] = count;
3891 + mc->args[2] = (unsigned long)success_count;
3892 + mc->args[3] = domid;
3893 + mc->args[5] = (long)__builtin_return_address(0);
3894 + }
3895 +
3896 + while (!commit && count--)
3897 + switch (src++->cmd) {
3898 + case MMUEXT_PIN_L1_TABLE:
3899 + case MMUEXT_PIN_L2_TABLE:
3900 + case MMUEXT_PIN_L3_TABLE:
3901 + case MMUEXT_PIN_L4_TABLE:
3902 + case MMUEXT_UNPIN_TABLE:
3903 + case MMUEXT_TLB_FLUSH_LOCAL:
3904 + case MMUEXT_INVLPG_LOCAL:
3905 + case MMUEXT_TLB_FLUSH_MULTI:
3906 + case MMUEXT_INVLPG_MULTI:
3907 + case MMUEXT_TLB_FLUSH_ALL:
3908 + case MMUEXT_INVLPG_ALL:
3909 + break;
3910 + default:
3911 + commit = true;
3912 + break;
3913 + }
3914 +
3915 + return commit ? xen_multicall_flush(true) : 0;
3916 +}
3917 +
3918 void xen_l1_entry_update(pte_t *ptr, pte_t val)
3919 {
3920 mmu_update_t u;
3921 @@ -542,7 +839,8 @@ int write_ldt_entry(void *ldt, int entry
3922 #define MAX_BATCHED_FULL_PTES 32
3923
3924 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
3925 - unsigned long addr, unsigned long end, pgprot_t newprot)
3926 + unsigned long addr, unsigned long end, pgprot_t newprot,
3927 + int dirty_accountable)
3928 {
3929 int rc = 0, i = 0;
3930 mmu_update_t u[MAX_BATCHED_FULL_PTES];
3931 @@ -555,10 +853,14 @@ int xen_change_pte_range(struct mm_struc
3932 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
3933 do {
3934 if (pte_present(*pte)) {
3935 + pte_t ptent = pte_modify(*pte, newprot);
3936 +
3937 + if (dirty_accountable && pte_dirty(ptent))
3938 + ptent = pte_mkwrite(ptent);
3939 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
3940 | ((unsigned long)pte & ~PAGE_MASK)
3941 | MMU_PT_UPDATE_PRESERVE_AD;
3942 - u[i].val = __pte_val(pte_modify(*pte, newprot));
3943 + u[i].val = __pte_val(ptent);
3944 if (++i == MAX_BATCHED_FULL_PTES) {
3945 if ((rc = HYPERVISOR_mmu_update(
3946 &u[0], i, NULL, DOMID_SELF)) != 0)
3947 --- sle11-2009-10-16.orig/arch/x86/mm/init_32-xen.c 2009-10-28 14:55:12.000000000 +0100
3948 +++ sle11-2009-10-16/arch/x86/mm/init_32-xen.c 2009-03-04 11:28:34.000000000 +0100
3949 @@ -462,16 +462,22 @@ EXPORT_SYMBOL(__supported_pte_mask);
3950 * on Enable
3951 * off Disable
3952 */
3953 -void __init noexec_setup(const char *str)
3954 +static int __init noexec_setup(char *str)
3955 {
3956 - if (!strncmp(str, "on",2) && cpu_has_nx) {
3957 - __supported_pte_mask |= _PAGE_NX;
3958 - disable_nx = 0;
3959 - } else if (!strncmp(str,"off",3)) {
3960 + if (!str || !strcmp(str, "on")) {
3961 + if (cpu_has_nx) {
3962 + __supported_pte_mask |= _PAGE_NX;
3963 + disable_nx = 0;
3964 + }
3965 + } else if (!strcmp(str,"off")) {
3966 disable_nx = 1;
3967 __supported_pte_mask &= ~_PAGE_NX;
3968 - }
3969 + } else
3970 + return -EINVAL;
3971 +
3972 + return 0;
3973 }
3974 +early_param("noexec", noexec_setup);
3975
3976 int nx_enabled = 0;
3977 #ifdef CONFIG_X86_PAE
3978 @@ -514,6 +520,7 @@ int __init set_kernel_exec(unsigned long
3979 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
3980 else
3981 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
3982 + pte_update_defer(&init_mm, vaddr, pte);
3983 __flush_tlb_all();
3984 out:
3985 return ret;
3986 @@ -596,18 +603,6 @@ static void __init test_wp_bit(void)
3987 }
3988 }
3989
3990 -static void __init set_max_mapnr_init(void)
3991 -{
3992 -#ifdef CONFIG_HIGHMEM
3993 - num_physpages = highend_pfn;
3994 -#else
3995 - num_physpages = max_low_pfn;
3996 -#endif
3997 -#ifdef CONFIG_FLATMEM
3998 - max_mapnr = num_physpages;
3999 -#endif
4000 -}
4001 -
4002 static struct kcore_list kcore_mem, kcore_vmalloc;
4003
4004 void __init mem_init(void)
4005 @@ -623,8 +618,7 @@ void __init mem_init(void)
4006 #endif
4007
4008 #ifdef CONFIG_FLATMEM
4009 - if (!mem_map)
4010 - BUG();
4011 + BUG_ON(!mem_map);
4012 #endif
4013
4014 bad_ppro = ppro_with_ram_bug();
4015 @@ -639,17 +633,6 @@ void __init mem_init(void)
4016 }
4017 #endif
4018
4019 - set_max_mapnr_init();
4020 -
4021 -#ifdef CONFIG_HIGHMEM
4022 - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
4023 -#else
4024 - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
4025 -#endif
4026 - printk("vmalloc area: %lx-%lx, maxmem %lx\n",
4027 - VMALLOC_START,VMALLOC_END,MAXMEM);
4028 - BUG_ON(VMALLOC_START > VMALLOC_END);
4029 -
4030 /* this will put all low memory onto the freelists */
4031 totalram_pages += free_all_bootmem();
4032 /* XEN: init and count low-mem pages outside initial allocation. */
4033 @@ -687,6 +670,48 @@ void __init mem_init(void)
4034 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
4035 );
4036
4037 +#if 1 /* double-sanity-check paranoia */
4038 + printk("virtual kernel memory layout:\n"
4039 + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
4040 +#ifdef CONFIG_HIGHMEM
4041 + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
4042 +#endif
4043 + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
4044 + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
4045 + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
4046 + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
4047 + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
4048 + FIXADDR_START, FIXADDR_TOP,
4049 + (FIXADDR_TOP - FIXADDR_START) >> 10,
4050 +
4051 +#ifdef CONFIG_HIGHMEM
4052 + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
4053 + (LAST_PKMAP*PAGE_SIZE) >> 10,
4054 +#endif
4055 +
4056 + VMALLOC_START, VMALLOC_END,
4057 + (VMALLOC_END - VMALLOC_START) >> 20,
4058 +
4059 + (unsigned long)__va(0), (unsigned long)high_memory,
4060 + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
4061 +
4062 + (unsigned long)&__init_begin, (unsigned long)&__init_end,
4063 + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
4064 +
4065 + (unsigned long)&_etext, (unsigned long)&_edata,
4066 + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
4067 +
4068 + (unsigned long)&_text, (unsigned long)&_etext,
4069 + ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
4070 +
4071 +#ifdef CONFIG_HIGHMEM
4072 + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
4073 + BUG_ON(VMALLOC_END > PKMAP_BASE);
4074 +#endif
4075 + BUG_ON(VMALLOC_START > VMALLOC_END);
4076 + BUG_ON((unsigned long)high_memory > VMALLOC_START);
4077 +#endif /* double-sanity-check paranoia */
4078 +
4079 #ifdef CONFIG_X86_PAE
4080 if (!cpu_has_pae)
4081 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
4082 @@ -717,7 +742,7 @@ void __init mem_init(void)
4083 int arch_add_memory(int nid, u64 start, u64 size)
4084 {
4085 struct pglist_data *pgdata = &contig_page_data;
4086 - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
4087 + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
4088 unsigned long start_pfn = start >> PAGE_SHIFT;
4089 unsigned long nr_pages = size >> PAGE_SHIFT;
4090
4091 --- sle11-2009-10-16.orig/arch/x86/mm/ioremap_32-xen.c 2009-10-28 14:55:12.000000000 +0100
4092 +++ sle11-2009-10-16/arch/x86/mm/ioremap_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4093 @@ -12,7 +12,7 @@
4094 #include <linux/init.h>
4095 #include <linux/slab.h>
4096 #include <linux/module.h>
4097 -#include <asm/io.h>
4098 +#include <linux/io.h>
4099 #include <asm/fixmap.h>
4100 #include <asm/cacheflush.h>
4101 #include <asm/tlbflush.h>
4102 @@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are
4103 if (domid == DOMID_SELF)
4104 return -EINVAL;
4105
4106 - vma->vm_flags |= VM_IO | VM_RESERVED;
4107 + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
4108
4109 vma->vm_mm->context.has_foreign_mappings = 1;
4110
4111 @@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p
4112 void __iomem * addr;
4113 struct vm_struct * area;
4114 unsigned long offset, last_addr;
4115 + pgprot_t prot;
4116 domid_t domid = DOMID_IO;
4117
4118 /* Don't allow wraparound or zero size */
4119 @@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p
4120 domid = DOMID_SELF;
4121 }
4122
4123 + prot = __pgprot(_KERNPG_TABLE | flags);
4124 +
4125 /*
4126 * Mappings have to be page-aligned
4127 */
4128 @@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p
4129 return NULL;
4130 area->phys_addr = phys_addr;
4131 addr = (void __iomem *) area->addr;
4132 - flags |= _KERNPG_TABLE;
4133 if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
4134 phys_addr>>PAGE_SHIFT,
4135 - size, __pgprot(flags), domid)) {
4136 + size, prot, domid)) {
4137 vunmap((void __force *) addr);
4138 return NULL;
4139 }
4140 --- sle11-2009-10-16.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:25:57.000000000 +0100
4141 +++ sle11-2009-10-16/arch/x86/mm/pgtable_32-xen.c 2009-03-04 11:28:34.000000000 +0100
4142 @@ -68,7 +68,9 @@ void show_mem(void)
4143 printk(KERN_INFO "%lu pages writeback\n",
4144 global_page_state(NR_WRITEBACK));
4145 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
4146 - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
4147 + printk(KERN_INFO "%lu pages slab\n",
4148 + global_page_state(NR_SLAB_RECLAIMABLE) +
4149 + global_page_state(NR_SLAB_UNRECLAIMABLE));
4150 printk(KERN_INFO "%lu pages pagetables\n",
4151 global_page_state(NR_PAGETABLE));
4152 }
4153 @@ -108,18 +110,11 @@ void set_pmd_pfn(unsigned long vaddr, un
4154 __flush_tlb_one(vaddr);
4155 }
4156
4157 -static int nr_fixmaps = 0;
4158 +static int fixmaps;
4159 unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
4160 -unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
4161 +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
4162 EXPORT_SYMBOL(__FIXADDR_TOP);
4163
4164 -void __init set_fixaddr_top(unsigned long top)
4165 -{
4166 - BUG_ON(nr_fixmaps > 0);
4167 - hypervisor_virt_start = top;
4168 - __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
4169 -}
4170 -
4171 void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
4172 {
4173 unsigned long address = __fix_to_virt(idx);
4174 @@ -141,7 +136,21 @@ void __set_fixmap (enum fixed_addresses
4175 if (HYPERVISOR_update_va_mapping(address, pte,
4176 UVMF_INVLPG|UVMF_ALL))
4177 BUG();
4178 - nr_fixmaps++;
4179 + fixmaps++;
4180 +}
4181 +
4182 +/**
4183 + * reserve_top_address - reserves a hole in the top of kernel address space
4184 + * @reserve - size of hole to reserve
4185 + *
4186 + * Can be used to relocate the fixmap area and poke a hole in the top
4187 + * of kernel address space to make room for a hypervisor.
4188 + */
4189 +void __init reserve_top_address(unsigned long reserve)
4190 +{
4191 + BUG_ON(fixmaps > 0);
4192 + __FIXADDR_TOP = -reserve - PAGE_SIZE;
4193 + __VMALLOC_RESERVE += reserve;
4194 }
4195
4196 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
4197 --- sle11-2009-10-16.orig/arch/x86/pci/irq-xen.c 2009-10-28 14:55:12.000000000 +0100
4198 +++ sle11-2009-10-16/arch/x86/pci/irq-xen.c 2009-03-04 11:28:34.000000000 +0100
4199 @@ -991,10 +991,6 @@ static void __init pcibios_fixup_irqs(vo
4200 pci_name(bridge), 'A' + pin, irq);
4201 }
4202 if (irq >= 0) {
4203 - if (use_pci_vector() &&
4204 - !platform_legacy_irq(irq))
4205 - irq = IO_APIC_VECTOR(irq);
4206 -
4207 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
4208 pci_name(dev), 'A' + pin, irq);
4209 dev->irq = irq;
4210 @@ -1155,10 +1151,6 @@ static int pirq_enable_irq(struct pci_de
4211 }
4212 dev = temp_dev;
4213 if (irq >= 0) {
4214 -#ifdef CONFIG_PCI_MSI
4215 - if (!platform_legacy_irq(irq))
4216 - irq = IO_APIC_VECTOR(irq);
4217 -#endif
4218 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
4219 pci_name(dev), 'A' + pin, irq);
4220 dev->irq = irq;
4221 @@ -1179,33 +1171,3 @@ static int pirq_enable_irq(struct pci_de
4222 }
4223 return 0;
4224 }
4225 -
4226 -int pci_vector_resources(int last, int nr_released)
4227 -{
4228 - int count = nr_released;
4229 -
4230 - int next = last;
4231 - int offset = (last % 8);
4232 -
4233 - while (next < FIRST_SYSTEM_VECTOR) {
4234 - next += 8;
4235 -#ifdef CONFIG_X86_64
4236 - if (next == IA32_SYSCALL_VECTOR)
4237 - continue;
4238 -#else
4239 - if (next == SYSCALL_VECTOR)
4240 - continue;
4241 -#endif
4242 - count++;
4243 - if (next >= FIRST_SYSTEM_VECTOR) {
4244 - if (offset%8) {
4245 - next = FIRST_DEVICE_VECTOR + offset;
4246 - offset++;
4247 - continue;
4248 - }
4249 - count--;
4250 - }
4251 - }
4252 -
4253 - return count;
4254 -}
4255 --- sle11-2009-10-16.orig/arch/x86/ia32/ia32entry-xen.S 2009-10-28 14:55:12.000000000 +0100
4256 +++ sle11-2009-10-16/arch/x86/ia32/ia32entry-xen.S 2009-03-04 11:28:34.000000000 +0100
4257 @@ -83,6 +83,7 @@
4258 */
4259 ENTRY(ia32_sysenter_target)
4260 CFI_STARTPROC32 simple
4261 + CFI_SIGNAL_FRAME
4262 CFI_DEF_CFA rsp,SS+8-RIP+16
4263 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4264 CFI_REL_OFFSET rsp,RSP-RIP+16
4265 @@ -164,6 +165,7 @@ ENDPROC(ia32_sysenter_target)
4266 */
4267 ENTRY(ia32_cstar_target)
4268 CFI_STARTPROC32 simple
4269 + CFI_SIGNAL_FRAME
4270 CFI_DEF_CFA rsp,SS+8-RIP+16
4271 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4272 CFI_REL_OFFSET rsp,RSP-RIP+16
4273 @@ -243,6 +245,7 @@ ia32_badarg:
4274
4275 ENTRY(ia32_syscall)
4276 CFI_STARTPROC simple
4277 + CFI_SIGNAL_FRAME
4278 CFI_DEF_CFA rsp,SS+8-RIP+16
4279 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4280 CFI_REL_OFFSET rsp,RSP-RIP+16
4281 @@ -320,6 +323,7 @@ ENTRY(ia32_ptregs_common)
4282 popq %r11
4283 CFI_ENDPROC
4284 CFI_STARTPROC32 simple
4285 + CFI_SIGNAL_FRAME
4286 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
4287 CFI_REL_OFFSET rax,RAX-ARGOFFSET
4288 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
4289 @@ -653,8 +657,8 @@ ia32_sys_call_table:
4290 .quad sys_readlinkat /* 305 */
4291 .quad sys_fchmodat
4292 .quad sys_faccessat
4293 - .quad quiet_ni_syscall /* pselect6 for now */
4294 - .quad quiet_ni_syscall /* ppoll for now */
4295 + .quad compat_sys_pselect6
4296 + .quad compat_sys_ppoll
4297 .quad sys_unshare /* 310 */
4298 .quad compat_sys_set_robust_list
4299 .quad compat_sys_get_robust_list
4300 @@ -663,4 +667,5 @@ ia32_sys_call_table:
4301 .quad sys_tee
4302 .quad compat_sys_vmsplice
4303 .quad compat_sys_move_pages
4304 + .quad sys_getcpu
4305 ia32_syscall_end:
4306 --- sle11-2009-10-16.orig/arch/x86/kernel/Makefile 2009-04-20 11:36:10.000000000 +0200
4307 +++ sle11-2009-10-16/arch/x86/kernel/Makefile 2009-03-04 11:28:34.000000000 +0100
4308 @@ -104,9 +104,9 @@ obj-$(CONFIG_X86_XEN) += fixup.o
4309 ###
4310 # 64 bit specific files
4311 ifeq ($(CONFIG_X86_64),y)
4312 - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
4313 - obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o
4314 - obj-y += uv_sysfs.o
4315 + obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
4316 + obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
4317 + obj-y += tlb_uv.o uv_sysfs.o
4318 obj-y += genx2apic_cluster.o
4319 obj-y += genx2apic_phys.o
4320 obj-y += bios_uv.o uv_irq.o
4321 @@ -124,5 +124,7 @@ ifeq ($(CONFIG_X86_64),y)
4322 pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
4323 endif
4324
4325 -disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
4326 +disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
4327 + smpboot_$(BITS).o tsc_$(BITS).o
4328 +disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
4329 %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
4330 --- sle11-2009-10-16.orig/arch/x86/kernel/apic_64-xen.c 2009-10-28 14:55:12.000000000 +0100
4331 +++ sle11-2009-10-16/arch/x86/kernel/apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
4332 @@ -43,7 +43,7 @@ int apic_verbosity;
4333 */
4334 void ack_bad_irq(unsigned int irq)
4335 {
4336 - printk("unexpected IRQ trap at vector %02x\n", irq);
4337 + printk("unexpected IRQ trap at irq %02x\n", irq);
4338 /*
4339 * Currently unexpected vectors happen only on SMP and APIC.
4340 * We _must_ ack these because every local APIC has only N
4341 @@ -62,19 +62,19 @@ int setup_profiling_timer(unsigned int m
4342 return -EINVAL;
4343 }
4344
4345 -void smp_local_timer_interrupt(struct pt_regs *regs)
4346 +void smp_local_timer_interrupt(void)
4347 {
4348 - profile_tick(CPU_PROFILING, regs);
4349 + profile_tick(CPU_PROFILING);
4350 #ifndef CONFIG_XEN
4351 #ifdef CONFIG_SMP
4352 - update_process_times(user_mode(regs));
4353 + update_process_times(user_mode(get_irq_regs()));
4354 #endif
4355 #endif
4356 /*
4357 * We take the 'long' return path, and there every subsystem
4358 * grabs the appropriate locks (kernel lock/ irq lock).
4359 *
4360 - * we might want to decouple profiling from the 'long path',
4361 + * We might want to decouple profiling from the 'long path',
4362 * and do the profiling totally in assembly.
4363 *
4364 * Currently this isn't too much of an issue (performance wise),
4365 @@ -92,6 +92,8 @@ void smp_local_timer_interrupt(struct pt
4366 */
4367 void smp_apic_timer_interrupt(struct pt_regs *regs)
4368 {
4369 + struct pt_regs *old_regs = set_irq_regs(regs);
4370 +
4371 /*
4372 * the NMI deadlock-detector uses this.
4373 */
4374 @@ -109,8 +111,9 @@ void smp_apic_timer_interrupt(struct pt_
4375 */
4376 exit_idle();
4377 irq_enter();
4378 - smp_local_timer_interrupt(regs);
4379 + smp_local_timer_interrupt();
4380 irq_exit();
4381 + set_irq_regs(old_regs);
4382 }
4383
4384 /*
4385 @@ -188,9 +191,8 @@ int disable_apic;
4386 int __init APIC_init_uniprocessor (void)
4387 {
4388 #ifdef CONFIG_X86_IO_APIC
4389 - if (smp_found_config)
4390 - if (!skip_ioapic_setup && nr_ioapics)
4391 - setup_IO_APIC();
4392 + if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
4393 + setup_IO_APIC();
4394 #endif
4395
4396 return 1;
4397 --- sle11-2009-10-16.orig/arch/x86/kernel/e820_64-xen.c 2009-10-28 14:55:12.000000000 +0100
4398 +++ sle11-2009-10-16/arch/x86/kernel/e820_64-xen.c 2009-03-04 11:28:34.000000000 +0100
4399 @@ -16,6 +16,7 @@
4400 #include <linux/string.h>
4401 #include <linux/kexec.h>
4402 #include <linux/module.h>
4403 +#include <linux/mm.h>
4404
4405 #include <asm/pgtable.h>
4406 #include <asm/page.h>
4407 @@ -25,6 +26,11 @@
4408 #include <asm/sections.h>
4409 #include <xen/interface/memory.h>
4410
4411 +struct e820map e820 __initdata;
4412 +#ifdef CONFIG_XEN
4413 +struct e820map machine_e820 __initdata;
4414 +#endif
4415 +
4416 /*
4417 * PFN of last memory page.
4418 */
4419 @@ -41,14 +47,10 @@ unsigned long end_pfn_map;
4420 /*
4421 * Last pfn which the user wants to use.
4422 */
4423 -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
4424 +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
4425
4426 extern struct resource code_resource, data_resource;
4427
4428 -#ifdef CONFIG_XEN
4429 -extern struct e820map machine_e820;
4430 -#endif
4431 -
4432 /* Check for some hardcoded bad areas that early boot is not allowed to touch */
4433 static inline int bad_addr(unsigned long *addrp, unsigned long size)
4434 {
4435 @@ -57,13 +59,13 @@ static inline int bad_addr(unsigned long
4436 #ifndef CONFIG_XEN
4437 /* various gunk below that needed for SMP startup */
4438 if (addr < 0x8000) {
4439 - *addrp = 0x8000;
4440 + *addrp = PAGE_ALIGN(0x8000);
4441 return 1;
4442 }
4443
4444 /* direct mapping tables of the kernel */
4445 if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
4446 - *addrp = table_end << PAGE_SHIFT;
4447 + *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
4448 return 1;
4449 }
4450
4451 @@ -71,23 +73,18 @@ static inline int bad_addr(unsigned long
4452 #ifdef CONFIG_BLK_DEV_INITRD
4453 if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
4454 addr < INITRD_START+INITRD_SIZE) {
4455 - *addrp = INITRD_START + INITRD_SIZE;
4456 + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
4457 return 1;
4458 }
4459 #endif
4460 - /* kernel code + 640k memory hole (later should not be needed, but
4461 - be paranoid for now) */
4462 - if (last >= 640*1024 && addr < 1024*1024) {
4463 - *addrp = 1024*1024;
4464 - return 1;
4465 - }
4466 - if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
4467 - *addrp = __pa_symbol(&_end);
4468 + /* kernel code */
4469 + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
4470 + *addrp = PAGE_ALIGN(__pa_symbol(&_end));
4471 return 1;
4472 }
4473
4474 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
4475 - *addrp = ebda_addr + ebda_size;
4476 + *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
4477 return 1;
4478 }
4479
4480 @@ -184,7 +181,7 @@ unsigned long __init find_e820_area(unsi
4481 continue;
4482 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
4483 ;
4484 - last = addr + size;
4485 + last = PAGE_ALIGN(addr) + size;
4486 if (last > ei->addr + ei->size)
4487 continue;
4488 if (last > end)
4489 @@ -194,59 +191,14 @@ unsigned long __init find_e820_area(unsi
4490 return -1UL;
4491 }
4492
4493 -/*
4494 - * Free bootmem based on the e820 table for a node.
4495 - */
4496 -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
4497 -{
4498 - int i;
4499 - for (i = 0; i < e820.nr_map; i++) {
4500 - struct e820entry *ei = &e820.map[i];
4501 - unsigned long last, addr;
4502 -
4503 - if (ei->type != E820_RAM ||
4504 - ei->addr+ei->size <= start ||
4505 - ei->addr >= end)
4506 - continue;
4507 -
4508 - addr = round_up(ei->addr, PAGE_SIZE);
4509 - if (addr < start)
4510 - addr = start;
4511 -
4512 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
4513 - if (last >= end)
4514 - last = end;
4515 -
4516 - if (last > addr && last-addr >= PAGE_SIZE)
4517 - free_bootmem_node(pgdat, addr, last-addr);
4518 - }
4519 -}
4520 -
4521 /*
4522 * Find the highest page frame number we have available
4523 */
4524 unsigned long __init e820_end_of_ram(void)
4525 {
4526 - int i;
4527 unsigned long end_pfn = 0;
4528 + end_pfn = find_max_pfn_with_active_regions();
4529
4530 - for (i = 0; i < e820.nr_map; i++) {
4531 - struct e820entry *ei = &e820.map[i];
4532 - unsigned long start, end;
4533 -
4534 - start = round_up(ei->addr, PAGE_SIZE);
4535 - end = round_down(ei->addr + ei->size, PAGE_SIZE);
4536 - if (start >= end)
4537 - continue;
4538 - if (ei->type == E820_RAM) {
4539 - if (end > end_pfn<<PAGE_SHIFT)
4540 - end_pfn = end>>PAGE_SHIFT;
4541 - } else {
4542 - if (end > end_pfn_map<<PAGE_SHIFT)
4543 - end_pfn_map = end>>PAGE_SHIFT;
4544 - }
4545 - }
4546 -
4547 if (end_pfn > end_pfn_map)
4548 end_pfn_map = end_pfn;
4549 if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
4550 @@ -256,43 +208,10 @@ unsigned long __init e820_end_of_ram(voi
4551 if (end_pfn > end_pfn_map)
4552 end_pfn = end_pfn_map;
4553
4554 + printk("end_pfn_map = %lu\n", end_pfn_map);
4555 return end_pfn;
4556 }
4557
4558 -/*
4559 - * Compute how much memory is missing in a range.
4560 - * Unlike the other functions in this file the arguments are in page numbers.
4561 - */
4562 -unsigned long __init
4563 -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
4564 -{
4565 - unsigned long ram = 0;
4566 - unsigned long start = start_pfn << PAGE_SHIFT;
4567 - unsigned long end = end_pfn << PAGE_SHIFT;
4568 - int i;
4569 - for (i = 0; i < e820.nr_map; i++) {
4570 - struct e820entry *ei = &e820.map[i];
4571 - unsigned long last, addr;
4572 -
4573 - if (ei->type != E820_RAM ||
4574 - ei->addr+ei->size <= start ||
4575 - ei->addr >= end)
4576 - continue;
4577 -
4578 - addr = round_up(ei->addr, PAGE_SIZE);
4579 - if (addr < start)
4580 - addr = start;
4581 -
4582 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
4583 - if (last >= end)
4584 - last = end;
4585 -
4586 - if (last > addr)
4587 - ram += last - addr;
4588 - }
4589 - return ((end - start) - ram) >> PAGE_SHIFT;
4590 -}
4591 -
4592 /*
4593 * Mark e820 reserved areas as busy for the resource manager.
4594 */
4595 @@ -333,6 +252,98 @@ void __init e820_reserve_resources(struc
4596 }
4597 }
4598
4599 +#ifndef CONFIG_XEN
4600 +/* Mark pages corresponding to given address range as nosave */
4601 +static void __init
4602 +e820_mark_nosave_range(unsigned long start, unsigned long end)
4603 +{
4604 + unsigned long pfn, max_pfn;
4605 +
4606 + if (start >= end)
4607 + return;
4608 +
4609 + printk("Nosave address range: %016lx - %016lx\n", start, end);
4610 + max_pfn = end >> PAGE_SHIFT;
4611 + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
4612 + if (pfn_valid(pfn))
4613 + SetPageNosave(pfn_to_page(pfn));
4614 +}
4615 +
4616 +/*
4617 + * Find the ranges of physical addresses that do not correspond to
4618 + * e820 RAM areas and mark the corresponding pages as nosave for software
4619 + * suspend and suspend to RAM.
4620 + *
4621 + * This function requires the e820 map to be sorted and without any
4622 + * overlapping entries and assumes the first e820 area to be RAM.
4623 + */
4624 +void __init e820_mark_nosave_regions(void)
4625 +{
4626 + int i;
4627 + unsigned long paddr;
4628 +
4629 + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
4630 + for (i = 1; i < e820.nr_map; i++) {
4631 + struct e820entry *ei = &e820.map[i];
4632 +
4633 + if (paddr < ei->addr)
4634 + e820_mark_nosave_range(paddr,
4635 + round_up(ei->addr, PAGE_SIZE));
4636 +
4637 + paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
4638 + if (ei->type != E820_RAM)
4639 + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
4640 + paddr);
4641 +
4642 + if (paddr >= (end_pfn << PAGE_SHIFT))
4643 + break;
4644 + }
4645 +}
4646 +#endif
4647 +
4648 +/* Walk the e820 map and register active regions within a node */
4649 +void __init
4650 +e820_register_active_regions(int nid, unsigned long start_pfn,
4651 + unsigned long end_pfn)
4652 +{
4653 + int i;
4654 + unsigned long ei_startpfn, ei_endpfn;
4655 + for (i = 0; i < e820.nr_map; i++) {
4656 + struct e820entry *ei = &e820.map[i];
4657 + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
4658 + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
4659 + >> PAGE_SHIFT;
4660 +
4661 + /* Skip map entries smaller than a page */
4662 + if (ei_startpfn >= ei_endpfn)
4663 + continue;
4664 +
4665 + /* Check if end_pfn_map should be updated */
4666 + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
4667 + end_pfn_map = ei_endpfn;
4668 +
4669 + /* Skip if map is outside the node */
4670 + if (ei->type != E820_RAM ||
4671 + ei_endpfn <= start_pfn ||
4672 + ei_startpfn >= end_pfn)
4673 + continue;
4674 +
4675 + /* Check for overlaps */
4676 + if (ei_startpfn < start_pfn)
4677 + ei_startpfn = start_pfn;
4678 + if (ei_endpfn > end_pfn)
4679 + ei_endpfn = end_pfn;
4680 +
4681 + /* Obey end_user_pfn to save on memmap */
4682 + if (ei_startpfn >= end_user_pfn)
4683 + continue;
4684 + if (ei_endpfn > end_user_pfn)
4685 + ei_endpfn = end_user_pfn;
4686 +
4687 + add_active_range(nid, ei_startpfn, ei_endpfn);
4688 + }
4689 +}
4690 +
4691 /*
4692 * Add a memory region to the kernel e820 map.
4693 */
4694 @@ -553,13 +564,6 @@ static int __init sanitize_e820_map(stru
4695 * If we're lucky and live on a modern system, the setup code
4696 * will have given us a memory map that we can use to properly
4697 * set up memory. If we aren't, we'll fake a memory map.
4698 - *
4699 - * We check to see that the memory map contains at least 2 elements
4700 - * before we'll use it, because the detection code in setup.S may
4701 - * not be perfect and most every PC known to man has two memory
4702 - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
4703 - * thinkpad 560x, for example, does not cooperate with the memory
4704 - * detection code.)
4705 */
4706 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
4707 {
4708 @@ -581,27 +585,6 @@ static int __init copy_e820_map(struct e
4709 if (start > end)
4710 return -1;
4711
4712 -#ifndef CONFIG_XEN
4713 - /*
4714 - * Some BIOSes claim RAM in the 640k - 1M region.
4715 - * Not right. Fix it up.
4716 - *
4717 - * This should be removed on Hammer which is supposed to not
4718 - * have non e820 covered ISA mappings there, but I had some strange
4719 - * problems so it stays for now. -AK
4720 - */
4721 - if (type == E820_RAM) {
4722 - if (start < 0x100000ULL && end > 0xA0000ULL) {
4723 - if (start < 0xA0000ULL)
4724 - add_memory_region(start, 0xA0000ULL-start, type);
4725 - if (end <= 0x100000ULL)
4726 - continue;
4727 - start = 0x100000ULL;
4728 - size = end - start;
4729 - }
4730 - }
4731 -#endif
4732 -
4733 add_memory_region(start, size, type);
4734 } while (biosmap++,--nr_map);
4735
4736 @@ -622,11 +605,15 @@ static int __init copy_e820_map(struct e
4737 return 0;
4738 }
4739
4740 +void early_panic(char *msg)
4741 +{
4742 + early_printk(msg);
4743 + panic(msg);
4744 +}
4745 +
4746 #ifndef CONFIG_XEN
4747 void __init setup_memory_region(void)
4748 {
4749 - char *who = "BIOS-e820";
4750 -
4751 /*
4752 * Try to copy the BIOS-supplied E820-map.
4753 *
4754 @@ -634,24 +621,10 @@ void __init setup_memory_region(void)
4755 * the next section from 1mb->appropriate_mem_k
4756 */
4757 sanitize_e820_map(E820_MAP, &E820_MAP_NR);
4758 - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
4759 - unsigned long mem_size;
4760 -
4761 - /* compare results from other methods and take the greater */
4762 - if (ALT_MEM_K < EXT_MEM_K) {
4763 - mem_size = EXT_MEM_K;
4764 - who = "BIOS-88";
4765 - } else {
4766 - mem_size = ALT_MEM_K;
4767 - who = "BIOS-e801";
4768 - }
4769 -
4770 - e820.nr_map = 0;
4771 - add_memory_region(0, LOWMEMSIZE(), E820_RAM);
4772 - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
4773 - }
4774 + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
4775 + early_panic("Cannot find a valid memory map");
4776 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4777 - e820_print_map(who);
4778 + e820_print_map("BIOS-e820");
4779 }
4780
4781 #else /* CONFIG_XEN */
4782 @@ -683,20 +656,23 @@ void __init setup_memory_region(void)
4783
4784 sanitize_e820_map(map, (char *)&memmap.nr_entries);
4785
4786 - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
4787 + if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
4788 + early_panic("Cannot find a valid memory map");
4789
4790 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4791 e820_print_map("Xen");
4792 }
4793 #endif
4794
4795 -void __init parse_memopt(char *p, char **from)
4796 -{
4797 +static int __init parse_memopt(char *p)
4798 +{
4799 int i;
4800 unsigned long current_end;
4801 unsigned long end;
4802
4803 - end_user_pfn = memparse(p, from);
4804 + if (!p)
4805 + return -EINVAL;
4806 + end_user_pfn = memparse(p, &p);
4807 end_user_pfn >>= PAGE_SHIFT;
4808
4809 end = end_user_pfn<<PAGE_SHIFT;
4810 @@ -713,27 +689,61 @@ void __init parse_memopt(char *p, char *
4811 else
4812 add_memory_region(current_end, end - current_end, E820_RAM);
4813 }
4814 +
4815 + return 0;
4816 }
4817 +early_param("mem", parse_memopt);
4818 +
4819 +static int userdef __initdata;
4820
4821 -void __init parse_memmapopt(char *p, char **from)
4822 +static int __init parse_memmap_opt(char *p)
4823 {
4824 + char *oldp;
4825 unsigned long long start_at, mem_size;
4826
4827 - mem_size = memparse(p, from);
4828 - p = *from;
4829 + if (!strcmp(p, "exactmap")) {
4830 +#ifdef CONFIG_CRASH_DUMP
4831 + /* If we are doing a crash dump, we
4832 + * still need to know the real mem
4833 + * size before original memory map is
4834 + * reset.
4835 + */
4836 + e820_register_active_regions(0, 0, -1UL);
4837 + saved_max_pfn = e820_end_of_ram();
4838 + remove_all_active_ranges();
4839 +#endif
4840 + end_pfn_map = 0;
4841 + e820.nr_map = 0;
4842 + userdef = 1;
4843 + return 0;
4844 + }
4845 +
4846 + oldp = p;
4847 + mem_size = memparse(p, &p);
4848 + if (p == oldp)
4849 + return -EINVAL;
4850 if (*p == '@') {
4851 - start_at = memparse(p+1, from);
4852 + start_at = memparse(p+1, &p);
4853 add_memory_region(start_at, mem_size, E820_RAM);
4854 } else if (*p == '#') {
4855 - start_at = memparse(p+1, from);
4856 + start_at = memparse(p+1, &p);
4857 add_memory_region(start_at, mem_size, E820_ACPI);
4858 } else if (*p == '$') {
4859 - start_at = memparse(p+1, from);
4860 + start_at = memparse(p+1, &p);
4861 add_memory_region(start_at, mem_size, E820_RESERVED);
4862 } else {
4863 end_user_pfn = (mem_size >> PAGE_SHIFT);
4864 }
4865 - p = *from;
4866 + return *p == '\0' ? 0 : -EINVAL;
4867 +}
4868 +early_param("memmap", parse_memmap_opt);
4869 +
4870 +void finish_e820_parsing(void)
4871 +{
4872 + if (userdef) {
4873 + printk(KERN_INFO "user-defined physical RAM map:\n");
4874 + e820_print_map("user");
4875 + }
4876 }
4877
4878 unsigned long pci_mem_start = 0xaeedbabe;
4879 --- sle11-2009-10-16.orig/arch/x86/kernel/early_printk-xen.c 2009-10-28 14:55:12.000000000 +0100
4880 +++ sle11-2009-10-16/arch/x86/kernel/early_printk-xen.c 2009-03-04 11:28:34.000000000 +0100
4881 @@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...)
4882
4883 static int __initdata keep_early;
4884
4885 -int __init setup_early_printk(char *opt)
4886 +static int __init setup_early_printk(char *buf)
4887 {
4888 - char *space;
4889 - char buf[256];
4890 + if (!buf)
4891 + return 0;
4892
4893 if (early_console_initialized)
4894 - return 1;
4895 -
4896 - strlcpy(buf,opt,sizeof(buf));
4897 - space = strchr(buf, ' ');
4898 - if (space)
4899 - *space = 0;
4900 + return 0;
4901 + early_console_initialized = 1;
4902
4903 - if (strstr(buf,"keep"))
4904 + if (strstr(buf, "keep"))
4905 keep_early = 1;
4906
4907 if (!strncmp(buf, "serial", 6)) {
4908 @@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt)
4909 early_console = &simnow_console;
4910 keep_early = 1;
4911 }
4912 - early_console_initialized = 1;
4913 register_console(early_console);
4914 return 0;
4915 }
4916
4917 +early_param("earlyprintk", setup_early_printk);
4918 +
4919 void __init disable_early_printk(void)
4920 {
4921 if (!early_console_initialized || !early_console)
4922 @@ -299,4 +296,3 @@ void __init disable_early_printk(void)
4923 }
4924 }
4925
4926 -__setup("earlyprintk=", setup_early_printk);
4927 --- sle11-2009-10-16.orig/arch/x86/kernel/entry_64-xen.S 2009-10-28 14:55:12.000000000 +0100
4928 +++ sle11-2009-10-16/arch/x86/kernel/entry_64-xen.S 2009-03-04 11:28:34.000000000 +0100
4929 @@ -4,9 +4,6 @@
4930 * Copyright (C) 1991, 1992 Linus Torvalds
4931 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
4932 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
4933 - *
4934 - * $Id$
4935 - *
4936 * Jun Nakajima <jun.nakajima@intel.com>
4937 * Asit Mallick <asit.k.mallick@intel.com>
4938 * Modified for Xen
4939 @@ -26,15 +23,25 @@
4940 * at the top of the kernel process stack.
4941 * - partial stack frame: partially saved registers upto R11.
4942 * - full stack frame: Like partial stack frame, but all register saved.
4943 - *
4944 - * TODO:
4945 - * - schedule it carefully for the final hardware.
4946 + *
4947 + * Some macro usage:
4948 + * - CFI macros are used to generate dwarf2 unwind information for better
4949 + * backtraces. They don't change any code.
4950 + * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
4951 + * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
4952 + * There are unfortunately lots of special cases where some registers
4953 + * not touched. The macro is a big mess that should be cleaned up.
4954 + * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
4955 + * Gives a full stack frame.
4956 + * - ENTRY/END Define functions in the symbol table.
4957 + * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
4958 + * frame that is otherwise undefined after a SYSCALL
4959 + * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
4960 + * - errorentry/paranoidentry/zeroentry - Define exception entry points.
4961 */
4962
4963 -#define ASSEMBLY 1
4964 #include <linux/linkage.h>
4965 #include <asm/segment.h>
4966 -#include <asm/smp.h>
4967 #include <asm/cache.h>
4968 #include <asm/errno.h>
4969 #include <asm/dwarf2.h>
4970 @@ -117,6 +124,7 @@ NMI_MASK = 0x80000000
4971 .macro CFI_DEFAULT_STACK start=1,adj=0
4972 .if \start
4973 CFI_STARTPROC simple
4974 + CFI_SIGNAL_FRAME
4975 CFI_DEF_CFA rsp,SS+8 - \adj*ARGOFFSET
4976 .else
4977 CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET
4978 @@ -207,6 +215,7 @@ END(ret_from_fork)
4979 */
4980 .macro _frame ref
4981 CFI_STARTPROC simple
4982 + CFI_SIGNAL_FRAME
4983 CFI_DEF_CFA rsp,SS+8-\ref
4984 /*CFI_REL_OFFSET ss,SS-\ref*/
4985 CFI_REL_OFFSET rsp,RSP-\ref
4986 @@ -334,6 +343,8 @@ tracesys:
4987 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
4988 RESTORE_REST
4989 cmpq $__NR_syscall_max,%rax
4990 + movq $-ENOSYS,%rcx
4991 + cmova %rcx,%rax
4992 ja 1f
4993 movq %r10,%rcx /* fixup for C */
4994 call *sys_call_table(,%rax,8)
4995 @@ -349,6 +360,7 @@ END(system_call)
4996 */
4997 ENTRY(int_ret_from_sys_call)
4998 CFI_STARTPROC simple
4999 + CFI_SIGNAL_FRAME
5000 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
5001 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
5002 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
5003 @@ -583,8 +595,7 @@ retint_signal:
5004 #ifdef CONFIG_PREEMPT
5005 /* Returning to kernel space. Check if we need preemption */
5006 /* rcx: threadinfo. interrupts off. */
5007 - .p2align
5008 -retint_kernel:
5009 +ENTRY(retint_kernel)
5010 cmpl $0,threadinfo_preempt_count(%rcx)
5011 jnz retint_restore_args
5012 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
5013 @@ -644,7 +655,6 @@ ENTRY(call_function_interrupt)
5014 END(call_function_interrupt)
5015 #endif
5016
5017 -#ifdef CONFIG_X86_LOCAL_APIC
5018 ENTRY(apic_timer_interrupt)
5019 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
5020 END(apic_timer_interrupt)
5021 @@ -656,7 +666,6 @@ END(error_interrupt)
5022 ENTRY(spurious_interrupt)
5023 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
5024 END(spurious_interrupt)
5025 -#endif
5026 #endif /* !CONFIG_XEN */
5027
5028 /*
5029 @@ -755,7 +764,9 @@ paranoid_exit\trace:
5030 testl $3,CS(%rsp)
5031 jnz paranoid_userspace\trace
5032 paranoid_swapgs\trace:
5033 + .if \trace
5034 TRACE_IRQS_IRETQ 0
5035 + .endif
5036 swapgs
5037 paranoid_restore\trace:
5038 RESTORE_ALL 8
5039 @@ -802,7 +813,7 @@ paranoid_schedule\trace:
5040 * Exception entry point. This expects an error code/orig_rax on the stack
5041 * and the exception handler in %rax.
5042 */
5043 -ENTRY(error_entry)
5044 +KPROBE_ENTRY(error_entry)
5045 _frame RDI
5046 CFI_REL_OFFSET rax,0
5047 /* rdi slot contains rax, oldrax contains error code */
5048 @@ -896,7 +907,7 @@ error_kernelspace:
5049 jmp error_sti
5050 #endif
5051 CFI_ENDPROC
5052 -END(error_entry)
5053 +KPROBE_END(error_entry)
5054
5055 ENTRY(hypervisor_callback)
5056 zeroentry do_hypervisor_callback
5057 @@ -936,26 +947,6 @@ ENTRY(do_hypervisor_callback) # do_hyp
5058 CFI_ENDPROC
5059 END(do_hypervisor_callback)
5060
5061 -#ifdef CONFIG_X86_LOCAL_APIC
5062 -KPROBE_ENTRY(nmi)
5063 - zeroentry do_nmi_callback
5064 -ENTRY(do_nmi_callback)
5065 - CFI_STARTPROC
5066 - addq $8, %rsp
5067 - CFI_ENDPROC
5068 - CFI_DEFAULT_STACK
5069 - call do_nmi
5070 - orl $NMI_MASK,EFLAGS(%rsp)
5071 - RESTORE_REST
5072 - XEN_BLOCK_EVENTS(%rsi)
5073 - TRACE_IRQS_OFF
5074 - GET_THREAD_INFO(%rcx)
5075 - jmp retint_restore_args
5076 - CFI_ENDPROC
5077 - .previous .text
5078 -END(nmi)
5079 -#endif
5080 -
5081 ALIGN
5082 restore_all_enable_events:
5083 CFI_DEFAULT_STACK adj=1
5084 @@ -1121,7 +1112,7 @@ ENDPROC(child_rip)
5085 * do_sys_execve asm fallback arguments:
5086 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
5087 */
5088 -ENTRY(execve)
5089 +ENTRY(kernel_execve)
5090 CFI_STARTPROC
5091 FAKE_STACK_FRAME $0
5092 SAVE_ALL
5093 @@ -1135,12 +1126,11 @@ ENTRY(execve)
5094 UNFAKE_STACK_FRAME
5095 ret
5096 CFI_ENDPROC
5097 -ENDPROC(execve)
5098 +ENDPROC(kernel_execve)
5099
5100 KPROBE_ENTRY(page_fault)
5101 errorentry do_page_fault
5102 -END(page_fault)
5103 - .previous .text
5104 +KPROBE_END(page_fault)
5105
5106 ENTRY(coprocessor_error)
5107 zeroentry do_coprocessor_error
5108 @@ -1162,25 +1152,25 @@ KPROBE_ENTRY(debug)
5109 zeroentry do_debug
5110 /* paranoidexit
5111 CFI_ENDPROC */
5112 -END(debug)
5113 - .previous .text
5114 +KPROBE_END(debug)
5115
5116 -#if 0
5117 - /* runs on exception stack */
5118 KPROBE_ENTRY(nmi)
5119 - INTR_FRAME
5120 - pushq $-1
5121 - CFI_ADJUST_CFA_OFFSET 8
5122 - paranoidentry do_nmi, 0, 0
5123 -#ifdef CONFIG_TRACE_IRQFLAGS
5124 - paranoidexit 0
5125 -#else
5126 - jmp paranoid_exit1
5127 - CFI_ENDPROC
5128 -#endif
5129 -END(nmi)
5130 - .previous .text
5131 -#endif
5132 + zeroentry do_nmi_callback
5133 +KPROBE_END(nmi)
5134 +do_nmi_callback:
5135 + CFI_STARTPROC
5136 + addq $8, %rsp
5137 + CFI_ENDPROC
5138 + CFI_DEFAULT_STACK
5139 + call do_nmi
5140 + orl $NMI_MASK,EFLAGS(%rsp)
5141 + RESTORE_REST
5142 + XEN_BLOCK_EVENTS(%rsi)
5143 + TRACE_IRQS_OFF
5144 + GET_THREAD_INFO(%rcx)
5145 + jmp retint_restore_args
5146 + CFI_ENDPROC
5147 +END(do_nmi_callback)
5148
5149 KPROBE_ENTRY(int3)
5150 /* INTR_FRAME
5151 @@ -1189,8 +1179,7 @@ KPROBE_ENTRY(int3)
5152 zeroentry do_int3
5153 /* jmp paranoid_exit1
5154 CFI_ENDPROC */
5155 -END(int3)
5156 - .previous .text
5157 +KPROBE_END(int3)
5158
5159 ENTRY(overflow)
5160 zeroentry do_overflow
5161 @@ -1241,8 +1230,7 @@ END(stack_segment)
5162
5163 KPROBE_ENTRY(general_protection)
5164 errorentry do_general_protection
5165 -END(general_protection)
5166 - .previous .text
5167 +KPROBE_END(general_protection)
5168
5169 ENTRY(alignment_check)
5170 errorentry do_alignment_check
5171 --- sle11-2009-10-16.orig/arch/x86/kernel/genapic_xen_64.c 2009-10-28 14:55:12.000000000 +0100
5172 +++ sle11-2009-10-16/arch/x86/kernel/genapic_xen_64.c 2009-03-04 11:28:34.000000000 +0100
5173 @@ -71,6 +71,13 @@ static cpumask_t xen_target_cpus(void)
5174 return cpu_online_map;
5175 }
5176
5177 +static cpumask_t xen_vector_allocation_domain(int cpu)
5178 +{
5179 + cpumask_t domain = CPU_MASK_NONE;
5180 + cpu_set(cpu, domain);
5181 + return domain;
5182 +}
5183 +
5184 /*
5185 * Set up the logical destination ID.
5186 * Do nothing, not called now.
5187 @@ -147,8 +154,8 @@ struct genapic apic_xen = {
5188 .int_delivery_mode = dest_LowestPrio,
5189 #endif
5190 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
5191 - .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
5192 .target_cpus = xen_target_cpus,
5193 + .vector_allocation_domain = xen_vector_allocation_domain,
5194 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
5195 .apic_id_registered = xen_apic_id_registered,
5196 #endif
5197 --- sle11-2009-10-16.orig/arch/x86/kernel/head_64-xen.S 2009-10-28 14:55:12.000000000 +0100
5198 +++ sle11-2009-10-16/arch/x86/kernel/head_64-xen.S 2009-03-04 11:28:34.000000000 +0100
5199 @@ -5,9 +5,6 @@
5200 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
5201 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
5202 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
5203 - *
5204 - * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
5205 - *
5206 * Jun Nakajima <jun.nakajima@intel.com>
5207 * Modified for Xen
5208 */
5209 @@ -149,7 +146,7 @@ ENTRY(cpu_gdt_table)
5210 .quad 0,0 /* TSS */
5211 .quad 0,0 /* LDT */
5212 .quad 0,0,0 /* three TLS descriptors */
5213 - .quad 0 /* unused */
5214 + .quad 0x0000f40000000000 /* node/CPU stored in limit */
5215 gdt_end:
5216 /* asm/segment.h:GDT_ENTRIES must match this */
5217 /* This should be a multiple of the cache line size */
5218 --- sle11-2009-10-16.orig/arch/x86/kernel/head64-xen.c 2009-10-28 14:55:12.000000000 +0100
5219 +++ sle11-2009-10-16/arch/x86/kernel/head64-xen.c 2009-03-04 11:28:34.000000000 +0100
5220 @@ -7,6 +7,9 @@
5221 * Modified for Xen.
5222 */
5223
5224 +/* PDA is not ready to be used until the end of x86_64_start_kernel(). */
5225 +#define arch_use_lazy_mmu_mode() false
5226 +
5227 #include <linux/init.h>
5228 #include <linux/linkage.h>
5229 #include <linux/types.h>
5230 @@ -54,11 +57,9 @@ static void __init copy_bootdata(char *r
5231 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
5232 if (!new_data) {
5233 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
5234 - printk("so old bootloader that it does not support commandline?!\n");
5235 return;
5236 }
5237 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
5238 - printk("old bootloader convention, maybe loadlin?\n");
5239 }
5240 command_line = (char *) ((u64)(new_data));
5241 memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
5242 @@ -70,25 +71,6 @@ static void __init copy_bootdata(char *r
5243 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
5244 saved_command_line[max_cmdline-1] = '\0';
5245 #endif
5246 - printk("Bootdata ok (command line is %s)\n", saved_command_line);
5247 -}
5248 -
5249 -static void __init setup_boot_cpu_data(void)
5250 -{
5251 - unsigned int dummy, eax;
5252 -
5253 - /* get vendor info */
5254 - cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
5255 - (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
5256 - (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
5257 - (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
5258 -
5259 - /* get cpu type */
5260 - cpuid(1, &eax, &dummy, &dummy,
5261 - (unsigned int *) &boot_cpu_data.x86_capability);
5262 - boot_cpu_data.x86 = (eax >> 8) & 0xf;
5263 - boot_cpu_data.x86_model = (eax >> 4) & 0xf;
5264 - boot_cpu_data.x86_mask = eax & 0xf;
5265 }
5266
5267 #include <xen/interface/memory.h>
5268 @@ -101,7 +83,6 @@ void __init x86_64_start_kernel(char * r
5269 {
5270 struct xen_machphys_mapping mapping;
5271 unsigned long machine_to_phys_nr_ents;
5272 - char *s;
5273 int i;
5274
5275 setup_xen_features();
5276 @@ -128,10 +109,7 @@ void __init x86_64_start_kernel(char * r
5277 asm volatile("lidt %0" :: "m" (idt_descr));
5278 #endif
5279
5280 - /*
5281 - * This must be called really, really early:
5282 - */
5283 - lockdep_init();
5284 + early_printk("Kernel alive\n");
5285
5286 for (i = 0; i < NR_CPUS; i++)
5287 cpu_pda(i) = &boot_cpu_pda[i];
5288 @@ -141,22 +119,5 @@ void __init x86_64_start_kernel(char * r
5289 #ifdef CONFIG_SMP
5290 cpu_set(0, cpu_online_map);
5291 #endif
5292 - s = strstr(saved_command_line, "earlyprintk=");
5293 - if (s != NULL)
5294 - setup_early_printk(strchr(s, '=') + 1);
5295 -#ifdef CONFIG_NUMA
5296 - s = strstr(saved_command_line, "numa=");
5297 - if (s != NULL)
5298 - numa_setup(s+5);
5299 -#endif
5300 -#ifdef CONFIG_X86_IO_APIC
5301 - if (strstr(saved_command_line, "disableapic"))
5302 - disable_apic = 1;
5303 -#endif
5304 - /* You need early console to see that */
5305 - if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
5306 - panic("Kernel too big for kernel mapping\n");
5307 -
5308 - setup_boot_cpu_data();
5309 start_kernel();
5310 }
5311 --- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:13:45.000000000 +0100
5312 +++ sle11-2009-10-16/arch/x86/kernel/io_apic_64-xen.c 2009-03-04 11:28:34.000000000 +0100
5313 @@ -26,9 +26,12 @@
5314 #include <linux/delay.h>
5315 #include <linux/sched.h>
5316 #include <linux/smp_lock.h>
5317 +#include <linux/pci.h>
5318 #include <linux/mc146818rtc.h>
5319 #include <linux/acpi.h>
5320 #include <linux/sysdev.h>
5321 +#include <linux/msi.h>
5322 +#include <linux/htirq.h>
5323 #ifdef CONFIG_ACPI
5324 #include <acpi/acpi_bus.h>
5325 #endif
5326 @@ -41,6 +44,10 @@
5327 #include <asm/acpi.h>
5328 #include <asm/dma.h>
5329 #include <asm/nmi.h>
5330 +#include <asm/msidef.h>
5331 +#include <asm/hypertransport.h>
5332 +
5333 +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
5334
5335 #define __apicdebuginit __init
5336
5337 @@ -48,17 +55,30 @@ int sis_apic_bug; /* not actually suppor
5338
5339 static int no_timer_check;
5340
5341 -int disable_timer_pin_1 __initdata;
5342 +static int disable_timer_pin_1 __initdata;
5343
5344 -#ifndef CONFIG_XEN
5345 -int timer_over_8254 __initdata = 0;
5346 +#ifdef CONFIG_XEN
5347 +#include <xen/interface/xen.h>
5348 +#include <xen/interface/physdev.h>
5349 +#include <xen/evtchn.h>
5350 +
5351 +/* Fake i8259 */
5352 +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
5353 +#define disable_8259A_irq(_irq) ((void)0)
5354 +#define i8259A_irq_pending(_irq) (0)
5355 +
5356 +unsigned long io_apic_irqs;
5357 +
5358 +#define clear_IO_APIC() ((void)0)
5359 +#else
5360 +int timer_over_8254 __initdata = 1;
5361
5362 /* Where if anywhere is the i8259 connect in external int mode */
5363 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
5364 #endif
5365
5366 static DEFINE_SPINLOCK(ioapic_lock);
5367 -static DEFINE_SPINLOCK(vector_lock);
5368 +DEFINE_SPINLOCK(vector_lock);
5369
5370 /*
5371 * # of IRQ routing registers
5372 @@ -83,29 +103,27 @@ static struct irq_pin_list {
5373 short apic, pin, next;
5374 } irq_2_pin[PIN_MAP_SIZE];
5375
5376 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
5377 -#ifdef CONFIG_PCI_MSI
5378 -#define vector_to_irq(vector) \
5379 - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
5380 -#else
5381 -#define vector_to_irq(vector) (vector)
5382 -#endif
5383 -
5384 -#ifdef CONFIG_XEN
5385 -
5386 -#include <xen/interface/xen.h>
5387 -#include <xen/interface/physdev.h>
5388 -#include <xen/evtchn.h>
5389 -
5390 -/* Fake i8259 */
5391 -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
5392 -#define disable_8259A_irq(_irq) ((void)0)
5393 -#define i8259A_irq_pending(_irq) (0)
5394 +#ifndef CONFIG_XEN
5395 +struct io_apic {
5396 + unsigned int index;
5397 + unsigned int unused[3];
5398 + unsigned int data;
5399 +};
5400
5401 -unsigned long io_apic_irqs;
5402 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
5403 +{
5404 + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
5405 + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
5406 +}
5407 +#endif
5408
5409 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
5410 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
5411 {
5412 +#ifndef CONFIG_XEN
5413 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5414 + writel(reg, &io_apic->index);
5415 + return readl(&io_apic->data);
5416 +#else
5417 struct physdev_apic apic_op;
5418 int ret;
5419
5420 @@ -115,31 +133,133 @@ static inline unsigned int xen_io_apic_r
5421 if (ret)
5422 return ret;
5423 return apic_op.value;
5424 +#endif
5425 }
5426
5427 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
5428 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
5429 {
5430 +#ifndef CONFIG_XEN
5431 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5432 + writel(reg, &io_apic->index);
5433 + writel(value, &io_apic->data);
5434 +#else
5435 struct physdev_apic apic_op;
5436
5437 apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
5438 apic_op.reg = reg;
5439 apic_op.value = value;
5440 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
5441 +#endif
5442 +}
5443 +
5444 +#ifndef CONFIG_XEN
5445 +/*
5446 + * Re-write a value: to be used for read-modify-write
5447 + * cycles where the read already set up the index register.
5448 + */
5449 +static inline void io_apic_modify(unsigned int apic, unsigned int value)
5450 +{
5451 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5452 + writel(value, &io_apic->data);
5453 }
5454 +#else
5455 +#define io_apic_modify io_apic_write
5456 +#endif
5457
5458 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
5459 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
5460 +/*
5461 + * Synchronize the IO-APIC and the CPU by doing
5462 + * a dummy read from the IO-APIC
5463 + */
5464 +static inline void io_apic_sync(unsigned int apic)
5465 +{
5466 +#ifndef CONFIG_XEN
5467 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5468 + readl(&io_apic->data);
5469 +#endif
5470 +}
5471
5472 -#define clear_IO_APIC() ((void)0)
5473 +union entry_union {
5474 + struct { u32 w1, w2; };
5475 + struct IO_APIC_route_entry entry;
5476 +};
5477
5478 -#else
5479 +#ifndef CONFIG_XEN
5480 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
5481 +{
5482 + union entry_union eu;
5483 + unsigned long flags;
5484 + spin_lock_irqsave(&ioapic_lock, flags);
5485 + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
5486 + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
5487 + spin_unlock_irqrestore(&ioapic_lock, flags);
5488 + return eu.entry;
5489 +}
5490 +#endif
5491 +
5492 +/*
5493 + * When we write a new IO APIC routing entry, we need to write the high
5494 + * word first! If the mask bit in the low word is clear, we will enable
5495 + * the interrupt, and we need to make sure the entry is fully populated
5496 + * before that happens.
5497 + */
5498 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
5499 +{
5500 + unsigned long flags;
5501 + union entry_union eu;
5502 + eu.entry = e;
5503 + spin_lock_irqsave(&ioapic_lock, flags);
5504 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
5505 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
5506 + spin_unlock_irqrestore(&ioapic_lock, flags);
5507 +}
5508 +
5509 +#ifndef CONFIG_XEN
5510 +/*
5511 + * When we mask an IO APIC routing entry, we need to write the low
5512 + * word first, in order to set the mask bit before we change the
5513 + * high bits!
5514 + */
5515 +static void ioapic_mask_entry(int apic, int pin)
5516 +{
5517 + unsigned long flags;
5518 + union entry_union eu = { .entry.mask = 1 };
5519 +
5520 + spin_lock_irqsave(&ioapic_lock, flags);
5521 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
5522 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
5523 + spin_unlock_irqrestore(&ioapic_lock, flags);
5524 +}
5525
5526 #ifdef CONFIG_SMP
5527 +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
5528 +{
5529 + int apic, pin;
5530 + struct irq_pin_list *entry = irq_2_pin + irq;
5531 +
5532 + BUG_ON(irq >= NR_IRQS);
5533 + for (;;) {
5534 + unsigned int reg;
5535 + apic = entry->apic;
5536 + pin = entry->pin;
5537 + if (pin == -1)
5538 + break;
5539 + io_apic_write(apic, 0x11 + pin*2, dest);
5540 + reg = io_apic_read(apic, 0x10 + pin*2);
5541 + reg &= ~0x000000ff;
5542 + reg |= vector;
5543 + io_apic_modify(apic, reg);
5544 + if (!entry->next)
5545 + break;
5546 + entry = irq_2_pin + entry->next;
5547 + }
5548 +}
5549 +
5550 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
5551 {
5552 unsigned long flags;
5553 unsigned int dest;
5554 cpumask_t tmp;
5555 + int vector;
5556
5557 cpus_and(tmp, mask, cpu_online_map);
5558 if (cpus_empty(tmp))
5559 @@ -147,7 +267,11 @@ static void set_ioapic_affinity_irq(unsi
5560
5561 cpus_and(mask, tmp, CPU_MASK_ALL);
5562
5563 - dest = cpu_mask_to_apicid(mask);
5564 + vector = assign_irq_vector(irq, mask, &tmp);
5565 + if (vector < 0)
5566 + return;
5567 +
5568 + dest = cpu_mask_to_apicid(tmp);
5569
5570 /*
5571 * Only the high 8 bits are valid.
5572 @@ -155,13 +279,12 @@ static void set_ioapic_affinity_irq(unsi
5573 dest = SET_APIC_LOGICAL_ID(dest);
5574
5575 spin_lock_irqsave(&ioapic_lock, flags);
5576 - __DO_ACTION(1, = dest, )
5577 - set_irq_info(irq, mask);
5578 + __target_IO_APIC_irq(irq, dest, vector);
5579 + set_native_irq_info(irq, mask);
5580 spin_unlock_irqrestore(&ioapic_lock, flags);
5581 }
5582 #endif
5583 -
5584 -#endif /* !CONFIG_XEN */
5585 +#endif
5586
5587 /*
5588 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
5589 @@ -241,24 +364,15 @@ static void unmask_IO_APIC_irq (unsigned
5590 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
5591 {
5592 struct IO_APIC_route_entry entry;
5593 - unsigned long flags;
5594
5595 /* Check delivery_mode to be sure we're not clearing an SMI pin */
5596 - spin_lock_irqsave(&ioapic_lock, flags);
5597 - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
5598 - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
5599 - spin_unlock_irqrestore(&ioapic_lock, flags);
5600 + entry = ioapic_read_entry(apic, pin);
5601 if (entry.delivery_mode == dest_SMI)
5602 return;
5603 /*
5604 * Disable it in the IO-APIC irq-routing table:
5605 */
5606 - memset(&entry, 0, sizeof(entry));
5607 - entry.mask = 1;
5608 - spin_lock_irqsave(&ioapic_lock, flags);
5609 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
5610 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
5611 - spin_unlock_irqrestore(&ioapic_lock, flags);
5612 + ioapic_mask_entry(apic, pin);
5613 }
5614
5615 static void clear_IO_APIC (void)
5616 @@ -272,16 +386,6 @@ static void clear_IO_APIC (void)
5617
5618 #endif /* !CONFIG_XEN */
5619
5620 -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
5621 -
5622 -/*
5623 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
5624 - * specific CPU-side IRQs.
5625 - */
5626 -
5627 -#define MAX_PIRQS 8
5628 -static int pirq_entries [MAX_PIRQS];
5629 -static int pirqs_enabled;
5630 int skip_ioapic_setup;
5631 int ioapic_force;
5632
5633 @@ -290,18 +394,17 @@ int ioapic_force;
5634 static int __init disable_ioapic_setup(char *str)
5635 {
5636 skip_ioapic_setup = 1;
5637 - return 1;
5638 + return 0;
5639 }
5640 +early_param("noapic", disable_ioapic_setup);
5641
5642 -static int __init enable_ioapic_setup(char *str)
5643 +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
5644 +static int __init disable_timer_pin_setup(char *arg)
5645 {
5646 - ioapic_force = 1;
5647 - skip_ioapic_setup = 0;
5648 + disable_timer_pin_1 = 1;
5649 return 1;
5650 }
5651 -
5652 -__setup("noapic", disable_ioapic_setup);
5653 -__setup("apic", enable_ioapic_setup);
5654 +__setup("disable_timer_pin_1", disable_timer_pin_setup);
5655
5656 #ifndef CONFIG_XEN
5657 static int __init setup_disable_8254_timer(char *s)
5658 @@ -319,137 +422,6 @@ __setup("disable_8254_timer", setup_disa
5659 __setup("enable_8254_timer", setup_enable_8254_timer);
5660 #endif /* !CONFIG_XEN */
5661
5662 -#include <asm/pci-direct.h>
5663 -#include <linux/pci_ids.h>
5664 -#include <linux/pci.h>
5665 -
5666 -
5667 -#ifdef CONFIG_ACPI
5668 -
5669 -static int nvidia_hpet_detected __initdata;
5670 -
5671 -static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
5672 -{
5673 - nvidia_hpet_detected = 1;
5674 - return 0;
5675 -}
5676 -#endif
5677 -
5678 -/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
5679 - off. Check for an Nvidia or VIA PCI bridge and turn it off.
5680 - Use pci direct infrastructure because this runs before the PCI subsystem.
5681 -
5682 - Can be overwritten with "apic"
5683 -
5684 - And another hack to disable the IOMMU on VIA chipsets.
5685 -
5686 - ... and others. Really should move this somewhere else.
5687 -
5688 - Kludge-O-Rama. */
5689 -void __init check_ioapic(void)
5690 -{
5691 - int num,slot,func;
5692 - /* Poor man's PCI discovery */
5693 - for (num = 0; num < 32; num++) {
5694 - for (slot = 0; slot < 32; slot++) {
5695 - for (func = 0; func < 8; func++) {
5696 - u32 class;
5697 - u32 vendor;
5698 - u8 type;
5699 - class = read_pci_config(num,slot,func,
5700 - PCI_CLASS_REVISION);
5701 - if (class == 0xffffffff)
5702 - break;
5703 -
5704 - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
5705 - continue;
5706 -
5707 - vendor = read_pci_config(num, slot, func,
5708 - PCI_VENDOR_ID);
5709 - vendor &= 0xffff;
5710 - switch (vendor) {
5711 - case PCI_VENDOR_ID_VIA:
5712 -#ifdef CONFIG_IOMMU
5713 - if ((end_pfn > MAX_DMA32_PFN ||
5714 - force_iommu) &&
5715 - !iommu_aperture_allowed) {
5716 - printk(KERN_INFO
5717 - "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
5718 - iommu_aperture_disabled = 1;
5719 - }
5720 -#endif
5721 - return;
5722 - case PCI_VENDOR_ID_NVIDIA:
5723 -#ifdef CONFIG_ACPI
5724 - /*
5725 - * All timer overrides on Nvidia are
5726 - * wrong unless HPET is enabled.
5727 - */
5728 - nvidia_hpet_detected = 0;
5729 - acpi_table_parse(ACPI_HPET,
5730 - nvidia_hpet_check);
5731 - if (nvidia_hpet_detected == 0) {
5732 - acpi_skip_timer_override = 1;
5733 - printk(KERN_INFO "Nvidia board "
5734 - "detected. Ignoring ACPI "
5735 - "timer override.\n");
5736 - }
5737 -#endif
5738 - /* RED-PEN skip them on mptables too? */
5739 - return;
5740 - case PCI_VENDOR_ID_ATI:
5741 -
5742 - /* This should be actually default, but
5743 - for 2.6.16 let's do it for ATI only where
5744 - it's really needed. */
5745 -#ifndef CONFIG_XEN
5746 - if (timer_over_8254 == 1) {
5747 - timer_over_8254 = 0;
5748 - printk(KERN_INFO
5749 - "ATI board detected. Disabling timer routing over 8254.\n");
5750 - }
5751 -#endif
5752 - return;
5753 - }
5754 -
5755 -
5756 - /* No multi-function device? */
5757 - type = read_pci_config_byte(num,slot,func,
5758 - PCI_HEADER_TYPE);
5759 - if (!(type & 0x80))
5760 - break;
5761 - }
5762 - }
5763 - }
5764 -}
5765 -
5766 -static int __init ioapic_pirq_setup(char *str)
5767 -{
5768 - int i, max;
5769 - int ints[MAX_PIRQS+1];
5770 -
5771 - get_options(str, ARRAY_SIZE(ints), ints);
5772 -
5773 - for (i = 0; i < MAX_PIRQS; i++)
5774 - pirq_entries[i] = -1;
5775 -
5776 - pirqs_enabled = 1;
5777 - apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
5778 - max = MAX_PIRQS;
5779 - if (ints[0] < MAX_PIRQS)
5780 - max = ints[0];
5781 -
5782 - for (i = 0; i < max; i++) {
5783 - apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
5784 - /*
5785 - * PIRQs are mapped upside down, usually.
5786 - */
5787 - pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
5788 - }
5789 - return 1;
5790 -}
5791 -
5792 -__setup("pirq=", ioapic_pirq_setup);
5793
5794 /*
5795 * Find the IRQ entry number of a certain pin.
5796 @@ -479,9 +451,7 @@ static int __init find_isa_irq_pin(int i
5797 for (i = 0; i < mp_irq_entries; i++) {
5798 int lbus = mp_irqs[i].mpc_srcbus;
5799
5800 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
5801 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
5802 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
5803 + if (test_bit(lbus, mp_bus_not_pci) &&
5804 (mp_irqs[i].mpc_irqtype == type) &&
5805 (mp_irqs[i].mpc_srcbusirq == irq))
5806
5807 @@ -497,9 +467,7 @@ static int __init find_isa_irq_apic(int
5808 for (i = 0; i < mp_irq_entries; i++) {
5809 int lbus = mp_irqs[i].mpc_srcbus;
5810
5811 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
5812 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
5813 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
5814 + if (test_bit(lbus, mp_bus_not_pci) &&
5815 (mp_irqs[i].mpc_irqtype == type) &&
5816 (mp_irqs[i].mpc_srcbusirq == irq))
5817 break;
5818 @@ -540,7 +508,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
5819 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
5820 break;
5821
5822 - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
5823 + if (!test_bit(lbus, mp_bus_not_pci) &&
5824 !mp_irqs[i].mpc_irqtype &&
5825 (bus == lbus) &&
5826 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
5827 @@ -563,27 +531,6 @@ int IO_APIC_get_PCI_irq_vector(int bus,
5828 return best_guess;
5829 }
5830
5831 -/*
5832 - * EISA Edge/Level control register, ELCR
5833 - */
5834 -static int EISA_ELCR(unsigned int irq)
5835 -{
5836 - if (irq < 16) {
5837 - unsigned int port = 0x4d0 + (irq >> 3);
5838 - return (inb(port) >> (irq & 7)) & 1;
5839 - }
5840 - apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
5841 - return 0;
5842 -}
5843 -
5844 -/* EISA interrupts are always polarity zero and can be edge or level
5845 - * trigger depending on the ELCR value. If an interrupt is listed as
5846 - * EISA conforming in the MP table, that means its trigger type must
5847 - * be read in from the ELCR */
5848 -
5849 -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
5850 -#define default_EISA_polarity(idx) (0)
5851 -
5852 /* ISA interrupts are always polarity zero edge triggered,
5853 * when listed as conforming in the MP table. */
5854
5855 @@ -596,12 +543,6 @@ static int EISA_ELCR(unsigned int irq)
5856 #define default_PCI_trigger(idx) (1)
5857 #define default_PCI_polarity(idx) (1)
5858
5859 -/* MCA interrupts are always polarity zero level triggered,
5860 - * when listed as conforming in the MP table. */
5861 -
5862 -#define default_MCA_trigger(idx) (1)
5863 -#define default_MCA_polarity(idx) (0)
5864 -
5865 static int __init MPBIOS_polarity(int idx)
5866 {
5867 int bus = mp_irqs[idx].mpc_srcbus;
5868 @@ -613,38 +554,11 @@ static int __init MPBIOS_polarity(int id
5869 switch (mp_irqs[idx].mpc_irqflag & 3)
5870 {
5871 case 0: /* conforms, ie. bus-type dependent polarity */
5872 - {
5873 - switch (mp_bus_id_to_type[bus])
5874 - {
5875 - case MP_BUS_ISA: /* ISA pin */
5876 - {
5877 - polarity = default_ISA_polarity(idx);
5878 - break;
5879 - }
5880 - case MP_BUS_EISA: /* EISA pin */
5881 - {
5882 - polarity = default_EISA_polarity(idx);
5883 - break;
5884 - }
5885 - case MP_BUS_PCI: /* PCI pin */
5886 - {
5887 - polarity = default_PCI_polarity(idx);
5888 - break;
5889 - }
5890 - case MP_BUS_MCA: /* MCA pin */
5891 - {
5892 - polarity = default_MCA_polarity(idx);
5893 - break;
5894 - }
5895 - default:
5896 - {
5897 - printk(KERN_WARNING "broken BIOS!!\n");
5898 - polarity = 1;
5899 - break;
5900 - }
5901 - }
5902 + if (test_bit(bus, mp_bus_not_pci))
5903 + polarity = default_ISA_polarity(idx);
5904 + else
5905 + polarity = default_PCI_polarity(idx);
5906 break;
5907 - }
5908 case 1: /* high active */
5909 {
5910 polarity = 0;
5911 @@ -682,38 +596,11 @@ static int MPBIOS_trigger(int idx)
5912 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
5913 {
5914 case 0: /* conforms, ie. bus-type dependent */
5915 - {
5916 - switch (mp_bus_id_to_type[bus])
5917 - {
5918 - case MP_BUS_ISA: /* ISA pin */
5919 - {
5920 - trigger = default_ISA_trigger(idx);
5921 - break;
5922 - }
5923 - case MP_BUS_EISA: /* EISA pin */
5924 - {
5925 - trigger = default_EISA_trigger(idx);
5926 - break;
5927 - }
5928 - case MP_BUS_PCI: /* PCI pin */
5929 - {
5930 - trigger = default_PCI_trigger(idx);
5931 - break;
5932 - }
5933 - case MP_BUS_MCA: /* MCA pin */
5934 - {
5935 - trigger = default_MCA_trigger(idx);
5936 - break;
5937 - }
5938 - default:
5939 - {
5940 - printk(KERN_WARNING "broken BIOS!!\n");
5941 - trigger = 1;
5942 - break;
5943 - }
5944 - }
5945 + if (test_bit(bus, mp_bus_not_pci))
5946 + trigger = default_ISA_trigger(idx);
5947 + else
5948 + trigger = default_PCI_trigger(idx);
5949 break;
5950 - }
5951 case 1: /* edge */
5952 {
5953 trigger = 0;
5954 @@ -750,64 +637,6 @@ static inline int irq_trigger(int idx)
5955 return MPBIOS_trigger(idx);
5956 }
5957
5958 -static int next_irq = 16;
5959 -
5960 -/*
5961 - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
5962 - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
5963 - * from ACPI, which can reach 800 in large boxen.
5964 - *
5965 - * Compact the sparse GSI space into a sequential IRQ series and reuse
5966 - * vectors if possible.
5967 - */
5968 -int gsi_irq_sharing(int gsi)
5969 -{
5970 - int i, tries, vector;
5971 -
5972 - BUG_ON(gsi >= NR_IRQ_VECTORS);
5973 -
5974 - if (platform_legacy_irq(gsi))
5975 - return gsi;
5976 -
5977 - if (gsi_2_irq[gsi] != 0xFF)
5978 - return (int)gsi_2_irq[gsi];
5979 -
5980 - tries = NR_IRQS;
5981 - try_again:
5982 - vector = assign_irq_vector(gsi);
5983 -
5984 - /*
5985 - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
5986 - * use of vector and if found, return that IRQ. However, we never want
5987 - * to share legacy IRQs, which usually have a different trigger mode
5988 - * than PCI.
5989 - */
5990 - for (i = 0; i < NR_IRQS; i++)
5991 - if (IO_APIC_VECTOR(i) == vector)
5992 - break;
5993 - if (platform_legacy_irq(i)) {
5994 - if (--tries >= 0) {
5995 - IO_APIC_VECTOR(i) = 0;
5996 - goto try_again;
5997 - }
5998 - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
5999 - }
6000 - if (i < NR_IRQS) {
6001 - gsi_2_irq[gsi] = i;
6002 - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
6003 - gsi, vector, i);
6004 - return i;
6005 - }
6006 -
6007 - i = next_irq++;
6008 - BUG_ON(i >= NR_IRQS);
6009 - gsi_2_irq[gsi] = i;
6010 - IO_APIC_VECTOR(i) = vector;
6011 - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
6012 - gsi, vector, i);
6013 - return i;
6014 -}
6015 -
6016 static int pin_2_irq(int idx, int apic, int pin)
6017 {
6018 int irq, i;
6019 @@ -819,49 +648,16 @@ static int pin_2_irq(int idx, int apic,
6020 if (mp_irqs[idx].mpc_dstirq != pin)
6021 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
6022
6023 - switch (mp_bus_id_to_type[bus])
6024 - {
6025 - case MP_BUS_ISA: /* ISA pin */
6026 - case MP_BUS_EISA:
6027 - case MP_BUS_MCA:
6028 - {
6029 - irq = mp_irqs[idx].mpc_srcbusirq;
6030 - break;
6031 - }
6032 - case MP_BUS_PCI: /* PCI pin */
6033 - {
6034 - /*
6035 - * PCI IRQs are mapped in order
6036 - */
6037 - i = irq = 0;
6038 - while (i < apic)
6039 - irq += nr_ioapic_registers[i++];
6040 - irq += pin;
6041 - irq = gsi_irq_sharing(irq);
6042 - break;
6043 - }
6044 - default:
6045 - {
6046 - printk(KERN_ERR "unknown bus type %d.\n",bus);
6047 - irq = 0;
6048 - break;
6049 - }
6050 - }
6051 - BUG_ON(irq >= NR_IRQS);
6052 -
6053 - /*
6054 - * PCI IRQ command line redirection. Yes, limits are hardcoded.
6055 - */
6056 - if ((pin >= 16) && (pin <= 23)) {
6057 - if (pirq_entries[pin-16] != -1) {
6058 - if (!pirq_entries[pin-16]) {
6059 - apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
6060 - } else {
6061 - irq = pirq_entries[pin-16];
6062 - apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
6063 - pin-16, irq);
6064 - }
6065 - }
6066 + if (test_bit(bus, mp_bus_not_pci)) {
6067 + irq = mp_irqs[idx].mpc_srcbusirq;
6068 + } else {
6069 + /*
6070 + * PCI IRQs are mapped in order
6071 + */
6072 + i = irq = 0;
6073 + while (i < apic)
6074 + irq += nr_ioapic_registers[i++];
6075 + irq += pin;
6076 }
6077 BUG_ON(irq >= NR_IRQS);
6078 return irq;
6079 @@ -885,46 +681,71 @@ static inline int IO_APIC_irq_trigger(in
6080 }
6081
6082 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
6083 -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
6084 +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
6085
6086 -int assign_irq_vector(int irq)
6087 +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
6088 {
6089 - unsigned long flags;
6090 int vector;
6091 struct physdev_irq irq_op;
6092
6093 - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
6094 + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
6095
6096 if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
6097 return -EINVAL;
6098
6099 - spin_lock_irqsave(&vector_lock, flags);
6100 + cpus_and(*result, mask, cpu_online_map);
6101
6102 - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
6103 - spin_unlock_irqrestore(&vector_lock, flags);
6104 - return IO_APIC_VECTOR(irq);
6105 - }
6106 + if (irq_vector[irq] > 0)
6107 + return irq_vector[irq];
6108
6109 irq_op.irq = irq;
6110 - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
6111 - spin_unlock_irqrestore(&vector_lock, flags);
6112 + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
6113 return -ENOSPC;
6114 - }
6115
6116 vector = irq_op.vector;
6117 - vector_irq[vector] = irq;
6118 - if (irq != AUTO_ASSIGN)
6119 - IO_APIC_VECTOR(irq) = vector;
6120 + irq_vector[irq] = vector;
6121
6122 - spin_unlock_irqrestore(&vector_lock, flags);
6123 + return vector;
6124 +}
6125 +
6126 +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
6127 +{
6128 + int vector;
6129 + unsigned long flags;
6130
6131 + spin_lock_irqsave(&vector_lock, flags);
6132 + vector = __assign_irq_vector(irq, mask, result);
6133 + spin_unlock_irqrestore(&vector_lock, flags);
6134 return vector;
6135 }
6136
6137 -extern void (*interrupt[NR_IRQS])(void);
6138 #ifndef CONFIG_XEN
6139 -static struct hw_interrupt_type ioapic_level_type;
6140 -static struct hw_interrupt_type ioapic_edge_type;
6141 +void __setup_vector_irq(int cpu)
6142 +{
6143 + /* Initialize vector_irq on a new cpu */
6144 + /* This function must be called with vector_lock held */
6145 + int irq, vector;
6146 +
6147 + /* Mark the inuse vectors */
6148 + for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
6149 + if (!cpu_isset(cpu, irq_domain[irq]))
6150 + continue;
6151 + vector = irq_vector[irq];
6152 + per_cpu(vector_irq, cpu)[vector] = irq;
6153 + }
6154 + /* Mark the free vectors */
6155 + for (vector = 0; vector < NR_VECTORS; ++vector) {
6156 + irq = per_cpu(vector_irq, cpu)[vector];
6157 + if (irq < 0)
6158 + continue;
6159 + if (!cpu_isset(cpu, irq_domain[irq]))
6160 + per_cpu(vector_irq, cpu)[vector] = -1;
6161 + }
6162 +}
6163 +
6164 +extern void (*interrupt[NR_IRQS])(void);
6165 +
6166 +static struct irq_chip ioapic_chip;
6167
6168 #define IOAPIC_AUTO -1
6169 #define IOAPIC_EDGE 0
6170 @@ -932,16 +753,15 @@ static struct hw_interrupt_type ioapic_e
6171
6172 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
6173 {
6174 - unsigned idx;
6175 -
6176 - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
6177 -
6178 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
6179 trigger == IOAPIC_LEVEL)
6180 - irq_desc[idx].chip = &ioapic_level_type;
6181 - else
6182 - irq_desc[idx].chip = &ioapic_edge_type;
6183 - set_intr_gate(vector, interrupt[idx]);
6184 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
6185 + handle_fasteoi_irq, "fasteoi");
6186 + else {
6187 + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
6188 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
6189 + handle_edge_irq, "edge");
6190 + }
6191 }
6192 #else
6193 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
6194 @@ -994,16 +814,21 @@ static void __init setup_IO_APIC_irqs(vo
6195 continue;
6196
6197 if (IO_APIC_IRQ(irq)) {
6198 - vector = assign_irq_vector(irq);
6199 + cpumask_t mask;
6200 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
6201 + if (vector < 0)
6202 + continue;
6203 +
6204 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
6205 entry.vector = vector;
6206
6207 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
6208 if (!apic && (irq < 16))
6209 disable_8259A_irq(irq);
6210 }
6211 + ioapic_write_entry(apic, pin, entry);
6212 +
6213 spin_lock_irqsave(&ioapic_lock, flags);
6214 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
6215 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
6216 set_native_irq_info(irq, TARGET_CPUS);
6217 spin_unlock_irqrestore(&ioapic_lock, flags);
6218 }
6219 @@ -1046,7 +871,7 @@ static void __init setup_ExtINT_IRQ0_pin
6220 * The timer IRQ doesn't have to know that behind the
6221 * scene we have a 8259A-master in AEOI mode ...
6222 */
6223 - irq_desc[0].chip = &ioapic_edge_type;
6224 + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
6225
6226 /*
6227 * Add it to the IO-APIC irq-routing table:
6228 @@ -1142,10 +967,7 @@ void __apicdebuginit print_IO_APIC(void)
6229 for (i = 0; i <= reg_01.bits.entries; i++) {
6230 struct IO_APIC_route_entry entry;
6231
6232 - spin_lock_irqsave(&ioapic_lock, flags);
6233 - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
6234 - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
6235 - spin_unlock_irqrestore(&ioapic_lock, flags);
6236 + entry = ioapic_read_entry(apic, i);
6237
6238 printk(KERN_DEBUG " %02x %03X %02X ",
6239 i,
6240 @@ -1165,17 +987,12 @@ void __apicdebuginit print_IO_APIC(void)
6241 );
6242 }
6243 }
6244 - if (use_pci_vector())
6245 - printk(KERN_INFO "Using vector-based indexing\n");
6246 printk(KERN_DEBUG "IRQ to pin mappings:\n");
6247 for (i = 0; i < NR_IRQS; i++) {
6248 struct irq_pin_list *entry = irq_2_pin + i;
6249 if (entry->pin < 0)
6250 continue;
6251 - if (use_pci_vector() && !platform_legacy_irq(i))
6252 - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
6253 - else
6254 - printk(KERN_DEBUG "IRQ%d ", i);
6255 + printk(KERN_DEBUG "IRQ%d ", i);
6256 for (;;) {
6257 printk("-> %d:%d", entry->apic, entry->pin);
6258 if (!entry->next)
6259 @@ -1339,9 +1156,6 @@ static void __init enable_IO_APIC(void)
6260 irq_2_pin[i].pin = -1;
6261 irq_2_pin[i].next = 0;
6262 }
6263 - if (!pirqs_enabled)
6264 - for (i = 0; i < MAX_PIRQS; i++)
6265 - pirq_entries[i] = -1;
6266
6267 /*
6268 * The number of IO-APIC IRQ registers (== #pins):
6269 @@ -1358,11 +1172,7 @@ static void __init enable_IO_APIC(void)
6270 /* See if any of the pins is in ExtINT mode */
6271 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
6272 struct IO_APIC_route_entry entry;
6273 - spin_lock_irqsave(&ioapic_lock, flags);
6274 - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
6275 - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
6276 - spin_unlock_irqrestore(&ioapic_lock, flags);
6277 -
6278 + entry = ioapic_read_entry(apic, pin);
6279
6280 /* If the interrupt line is enabled and in ExtInt mode
6281 * I have found the pin where the i8259 is connected.
6282 @@ -1416,7 +1226,6 @@ void disable_IO_APIC(void)
6283 */
6284 if (ioapic_i8259.pin != -1) {
6285 struct IO_APIC_route_entry entry;
6286 - unsigned long flags;
6287
6288 memset(&entry, 0, sizeof(entry));
6289 entry.mask = 0; /* Enabled */
6290 @@ -1433,12 +1242,7 @@ void disable_IO_APIC(void)
6291 /*
6292 * Add it to the IO-APIC irq-routing table:
6293 */
6294 - spin_lock_irqsave(&ioapic_lock, flags);
6295 - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
6296 - *(((int *)&entry)+1));
6297 - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
6298 - *(((int *)&entry)+0));
6299 - spin_unlock_irqrestore(&ioapic_lock, flags);
6300 + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
6301 }
6302
6303 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
6304 @@ -1446,76 +1250,6 @@ void disable_IO_APIC(void)
6305 }
6306
6307 /*
6308 - * function to set the IO-APIC physical IDs based on the
6309 - * values stored in the MPC table.
6310 - *
6311 - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
6312 - */
6313 -
6314 -#ifndef CONFIG_XEN
6315 -static void __init setup_ioapic_ids_from_mpc (void)
6316 -{
6317 - union IO_APIC_reg_00 reg_00;
6318 - int apic;
6319 - int i;
6320 - unsigned char old_id;
6321 - unsigned long flags;
6322 -
6323 - /*
6324 - * Set the IOAPIC ID to the value stored in the MPC table.
6325 - */
6326 - for (apic = 0; apic < nr_ioapics; apic++) {
6327 -
6328 - /* Read the register 0 value */
6329 - spin_lock_irqsave(&ioapic_lock, flags);
6330 - reg_00.raw = io_apic_read(apic, 0);
6331 - spin_unlock_irqrestore(&ioapic_lock, flags);
6332 -
6333 - old_id = mp_ioapics[apic].mpc_apicid;
6334 -
6335 -
6336 - printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
6337 -
6338 -
6339 - /*
6340 - * We need to adjust the IRQ routing table
6341 - * if the ID changed.
6342 - */
6343 - if (old_id != mp_ioapics[apic].mpc_apicid)
6344 - for (i = 0; i < mp_irq_entries; i++)
6345 - if (mp_irqs[i].mpc_dstapic == old_id)
6346 - mp_irqs[i].mpc_dstapic
6347 - = mp_ioapics[apic].mpc_apicid;
6348 -
6349 - /*
6350 - * Read the right value from the MPC table and
6351 - * write it into the ID register.
6352 - */
6353 - apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
6354 - mp_ioapics[apic].mpc_apicid);
6355 -
6356 - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
6357 - spin_lock_irqsave(&ioapic_lock, flags);
6358 - io_apic_write(apic, 0, reg_00.raw);
6359 - spin_unlock_irqrestore(&ioapic_lock, flags);
6360 -
6361 - /*
6362 - * Sanity check
6363 - */
6364 - spin_lock_irqsave(&ioapic_lock, flags);
6365 - reg_00.raw = io_apic_read(apic, 0);
6366 - spin_unlock_irqrestore(&ioapic_lock, flags);
6367 - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
6368 - printk("could not set ID!\n");
6369 - else
6370 - apic_printk(APIC_VERBOSE," ok.\n");
6371 - }
6372 -}
6373 -#else
6374 -static void __init setup_ioapic_ids_from_mpc(void) { }
6375 -#endif
6376 -
6377 -/*
6378 * There is a nasty bug in some older SMP boards, their mptable lies
6379 * about the timer IRQ. We do the following to work around the situation:
6380 *
6381 @@ -1569,7 +1303,7 @@ static int __init timer_irq_works(void)
6382 * an edge even if it isn't on the 8259A...
6383 */
6384
6385 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
6386 +static unsigned int startup_ioapic_irq(unsigned int irq)
6387 {
6388 int was_pending = 0;
6389 unsigned long flags;
6390 @@ -1586,107 +1320,19 @@ static unsigned int startup_edge_ioapic_
6391 return was_pending;
6392 }
6393
6394 -/*
6395 - * Once we have recorded IRQ_PENDING already, we can mask the
6396 - * interrupt for real. This prevents IRQ storms from unhandled
6397 - * devices.
6398 - */
6399 -static void ack_edge_ioapic_irq(unsigned int irq)
6400 -{
6401 - move_irq(irq);
6402 - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
6403 - == (IRQ_PENDING | IRQ_DISABLED))
6404 - mask_IO_APIC_irq(irq);
6405 - ack_APIC_irq();
6406 -}
6407 -
6408 -/*
6409 - * Level triggered interrupts can just be masked,
6410 - * and shutting down and starting up the interrupt
6411 - * is the same as enabling and disabling them -- except
6412 - * with a startup need to return a "was pending" value.
6413 - *
6414 - * Level triggered interrupts are special because we
6415 - * do not touch any IO-APIC register while handling
6416 - * them. We ack the APIC in the end-IRQ handler, not
6417 - * in the start-IRQ-handler. Protection against reentrance
6418 - * from the same interrupt is still provided, both by the
6419 - * generic IRQ layer and by the fact that an unacked local
6420 - * APIC does not accept IRQs.
6421 - */
6422 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
6423 -{
6424 - unmask_IO_APIC_irq(irq);
6425 -
6426 - return 0; /* don't check for pending */
6427 -}
6428 -
6429 -static void end_level_ioapic_irq (unsigned int irq)
6430 -{
6431 - move_irq(irq);
6432 - ack_APIC_irq();
6433 -}
6434 -
6435 -#ifdef CONFIG_PCI_MSI
6436 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
6437 -{
6438 - int irq = vector_to_irq(vector);
6439 -
6440 - return startup_edge_ioapic_irq(irq);
6441 -}
6442 -
6443 -static void ack_edge_ioapic_vector(unsigned int vector)
6444 -{
6445 - int irq = vector_to_irq(vector);
6446 -
6447 - move_native_irq(vector);
6448 - ack_edge_ioapic_irq(irq);
6449 -}
6450 -
6451 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
6452 -{
6453 - int irq = vector_to_irq(vector);
6454 -
6455 - return startup_level_ioapic_irq (irq);
6456 -}
6457 -
6458 -static void end_level_ioapic_vector (unsigned int vector)
6459 -{
6460 - int irq = vector_to_irq(vector);
6461 -
6462 - move_native_irq(vector);
6463 - end_level_ioapic_irq(irq);
6464 -}
6465 -
6466 -static void mask_IO_APIC_vector (unsigned int vector)
6467 -{
6468 - int irq = vector_to_irq(vector);
6469 -
6470 - mask_IO_APIC_irq(irq);
6471 -}
6472 -
6473 -static void unmask_IO_APIC_vector (unsigned int vector)
6474 +static int ioapic_retrigger_irq(unsigned int irq)
6475 {
6476 - int irq = vector_to_irq(vector);
6477 -
6478 - unmask_IO_APIC_irq(irq);
6479 -}
6480 -
6481 -#ifdef CONFIG_SMP
6482 -static void set_ioapic_affinity_vector (unsigned int vector,
6483 - cpumask_t cpu_mask)
6484 -{
6485 - int irq = vector_to_irq(vector);
6486 + cpumask_t mask;
6487 + unsigned vector;
6488 + unsigned long flags;
6489
6490 - set_native_irq_info(vector, cpu_mask);
6491 - set_ioapic_affinity_irq(irq, cpu_mask);
6492 -}
6493 -#endif // CONFIG_SMP
6494 -#endif // CONFIG_PCI_MSI
6495 + spin_lock_irqsave(&vector_lock, flags);
6496 + vector = irq_vector[irq];
6497 + cpus_clear(mask);
6498 + cpu_set(first_cpu(irq_domain[irq]), mask);
6499
6500 -static int ioapic_retrigger(unsigned int irq)
6501 -{
6502 - send_IPI_self(IO_APIC_VECTOR(irq));
6503 + send_IPI_mask(mask, vector);
6504 + spin_unlock_irqrestore(&vector_lock, flags);
6505
6506 return 1;
6507 }
6508 @@ -1700,32 +1346,47 @@ static int ioapic_retrigger(unsigned int
6509 * races.
6510 */
6511
6512 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
6513 - .typename = "IO-APIC-edge",
6514 - .startup = startup_edge_ioapic,
6515 - .shutdown = shutdown_edge_ioapic,
6516 - .enable = enable_edge_ioapic,
6517 - .disable = disable_edge_ioapic,
6518 - .ack = ack_edge_ioapic,
6519 - .end = end_edge_ioapic,
6520 -#ifdef CONFIG_SMP
6521 - .set_affinity = set_ioapic_affinity,
6522 +static void ack_apic_edge(unsigned int irq)
6523 +{
6524 + move_native_irq(irq);
6525 + ack_APIC_irq();
6526 +}
6527 +
6528 +static void ack_apic_level(unsigned int irq)
6529 +{
6530 + int do_unmask_irq = 0;
6531 +
6532 +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
6533 + /* If we are moving the irq we need to mask it */
6534 + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
6535 + do_unmask_irq = 1;
6536 + mask_IO_APIC_irq(irq);
6537 + }
6538 #endif
6539 - .retrigger = ioapic_retrigger,
6540 -};
6541
6542 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
6543 - .typename = "IO-APIC-level",
6544 - .startup = startup_level_ioapic,
6545 - .shutdown = shutdown_level_ioapic,
6546 - .enable = enable_level_ioapic,
6547 - .disable = disable_level_ioapic,
6548 - .ack = mask_and_ack_level_ioapic,
6549 - .end = end_level_ioapic,
6550 + /*
6551 + * We must acknowledge the irq before we move it or the acknowledge will
6552 + * not propogate properly.
6553 + */
6554 + ack_APIC_irq();
6555 +
6556 + /* Now we can move and renable the irq */
6557 + move_masked_irq(irq);
6558 + if (unlikely(do_unmask_irq))
6559 + unmask_IO_APIC_irq(irq);
6560 +}
6561 +
6562 +static struct irq_chip ioapic_chip __read_mostly = {
6563 + .name = "IO-APIC",
6564 + .startup = startup_ioapic_irq,
6565 + .mask = mask_IO_APIC_irq,
6566 + .unmask = unmask_IO_APIC_irq,
6567 + .ack = ack_apic_edge,
6568 + .eoi = ack_apic_level,
6569 #ifdef CONFIG_SMP
6570 - .set_affinity = set_ioapic_affinity,
6571 + .set_affinity = set_ioapic_affinity_irq,
6572 #endif
6573 - .retrigger = ioapic_retrigger,
6574 + .retrigger = ioapic_retrigger_irq,
6575 };
6576 #endif /* !CONFIG_XEN */
6577
6578 @@ -1746,12 +1407,7 @@ static inline void init_IO_APIC_traps(vo
6579 */
6580 for (irq = 0; irq < NR_IRQS ; irq++) {
6581 int tmp = irq;
6582 - if (use_pci_vector()) {
6583 - if (!platform_legacy_irq(tmp))
6584 - if ((tmp = vector_to_irq(tmp)) == -1)
6585 - continue;
6586 - }
6587 - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
6588 + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
6589 /*
6590 * Hmm.. We don't have an entry for this,
6591 * so default to an old-fashioned 8259
6592 @@ -1762,7 +1418,7 @@ static inline void init_IO_APIC_traps(vo
6593 #ifndef CONFIG_XEN
6594 else
6595 /* Strange. Oh, well.. */
6596 - irq_desc[irq].chip = &no_irq_type;
6597 + irq_desc[irq].chip = &no_irq_chip;
6598 #endif
6599 }
6600 }
6601 @@ -1883,8 +1539,6 @@ static inline void unlock_ExtINT_logic(v
6602 spin_unlock_irqrestore(&ioapic_lock, flags);
6603 }
6604
6605 -int timer_uses_ioapic_pin_0;
6606 -
6607 /*
6608 * This code may look a bit paranoid, but it's supposed to cooperate with
6609 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
6610 @@ -1897,13 +1551,13 @@ static inline void check_timer(void)
6611 {
6612 int apic1, pin1, apic2, pin2;
6613 int vector;
6614 + cpumask_t mask;
6615
6616 /*
6617 * get/set the timer IRQ vector:
6618 */
6619 disable_8259A_irq(0);
6620 - vector = assign_irq_vector(0);
6621 - set_intr_gate(vector, interrupt[0]);
6622 + vector = assign_irq_vector(0, TARGET_CPUS, &mask);
6623
6624 /*
6625 * Subtle, code in do_timer_interrupt() expects an AEOI
6626 @@ -1922,9 +1576,6 @@ static inline void check_timer(void)
6627 pin2 = ioapic_i8259.pin;
6628 apic2 = ioapic_i8259.apic;
6629
6630 - if (pin1 == 0)
6631 - timer_uses_ioapic_pin_0 = 1;
6632 -
6633 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
6634 vector, apic1, pin1, apic2, pin2);
6635
6636 @@ -2039,11 +1690,6 @@ void __init setup_IO_APIC(void)
6637
6638 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
6639
6640 - /*
6641 - * Set up the IO-APIC IRQ routing table.
6642 - */
6643 - if (!acpi_ioapic)
6644 - setup_ioapic_ids_from_mpc();
6645 #ifndef CONFIG_XEN
6646 sync_Arb_IDs();
6647 #endif /* !CONFIG_XEN */
6648 @@ -2066,17 +1712,12 @@ static int ioapic_suspend(struct sys_dev
6649 {
6650 struct IO_APIC_route_entry *entry;
6651 struct sysfs_ioapic_data *data;
6652 - unsigned long flags;
6653 int i;
6654
6655 data = container_of(dev, struct sysfs_ioapic_data, dev);
6656 entry = data->entry;
6657 - spin_lock_irqsave(&ioapic_lock, flags);
6658 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
6659 - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
6660 - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
6661 - }
6662 - spin_unlock_irqrestore(&ioapic_lock, flags);
6663 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
6664 + *entry = ioapic_read_entry(dev->id, i);
6665
6666 return 0;
6667 }
6668 @@ -2098,11 +1739,9 @@ static int ioapic_resume(struct sys_devi
6669 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
6670 io_apic_write(dev->id, 0, reg_00.raw);
6671 }
6672 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
6673 - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
6674 - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
6675 - }
6676 spin_unlock_irqrestore(&ioapic_lock, flags);
6677 + for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
6678 + ioapic_write_entry(dev->id, i, entry[i]);
6679
6680 return 0;
6681 }
6682 @@ -2148,28 +1787,254 @@ static int __init ioapic_init_sysfs(void
6683
6684 device_initcall(ioapic_init_sysfs);
6685
6686 +/*
6687 + * Dynamic irq allocate and deallocation
6688 + */
6689 +int create_irq(void)
6690 +{
6691 + /* Allocate an unused irq */
6692 + int irq;
6693 + int new;
6694 + int vector = 0;
6695 + unsigned long flags;
6696 + cpumask_t mask;
6697 +
6698 + irq = -ENOSPC;
6699 + spin_lock_irqsave(&vector_lock, flags);
6700 + for (new = (NR_IRQS - 1); new >= 0; new--) {
6701 + if (platform_legacy_irq(new))
6702 + continue;
6703 + if (irq_vector[new] != 0)
6704 + continue;
6705 + vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
6706 + if (likely(vector > 0))
6707 + irq = new;
6708 + break;
6709 + }
6710 + spin_unlock_irqrestore(&vector_lock, flags);
6711 +
6712 + if (irq >= 0) {
6713 + dynamic_irq_init(irq);
6714 + }
6715 + return irq;
6716 +}
6717 +
6718 +void destroy_irq(unsigned int irq)
6719 +{
6720 + unsigned long flags;
6721 +
6722 + dynamic_irq_cleanup(irq);
6723 +
6724 + spin_lock_irqsave(&vector_lock, flags);
6725 + irq_vector[irq] = 0;
6726 + spin_unlock_irqrestore(&vector_lock, flags);
6727 +}
6728 +
6729 #endif /* CONFIG_XEN */
6730
6731 -/* --------------------------------------------------------------------------
6732 - ACPI-based IOAPIC Configuration
6733 - -------------------------------------------------------------------------- */
6734 +/*
6735 + * MSI mesage composition
6736 + */
6737 +#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
6738 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
6739 +{
6740 + int vector;
6741 + unsigned dest;
6742 + cpumask_t tmp;
6743
6744 -#ifdef CONFIG_ACPI
6745 + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
6746 + if (vector >= 0) {
6747 + dest = cpu_mask_to_apicid(tmp);
6748 +
6749 + msg->address_hi = MSI_ADDR_BASE_HI;
6750 + msg->address_lo =
6751 + MSI_ADDR_BASE_LO |
6752 + ((INT_DEST_MODE == 0) ?
6753 + MSI_ADDR_DEST_MODE_PHYSICAL:
6754 + MSI_ADDR_DEST_MODE_LOGICAL) |
6755 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6756 + MSI_ADDR_REDIRECTION_CPU:
6757 + MSI_ADDR_REDIRECTION_LOWPRI) |
6758 + MSI_ADDR_DEST_ID(dest);
6759 +
6760 + msg->data =
6761 + MSI_DATA_TRIGGER_EDGE |
6762 + MSI_DATA_LEVEL_ASSERT |
6763 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6764 + MSI_DATA_DELIVERY_FIXED:
6765 + MSI_DATA_DELIVERY_LOWPRI) |
6766 + MSI_DATA_VECTOR(vector);
6767 + }
6768 + return vector;
6769 +}
6770
6771 -#define IO_APIC_MAX_ID 0xFE
6772 +#ifdef CONFIG_SMP
6773 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
6774 +{
6775 + struct msi_msg msg;
6776 + unsigned int dest;
6777 + cpumask_t tmp;
6778 + int vector;
6779 +
6780 + cpus_and(tmp, mask, cpu_online_map);
6781 + if (cpus_empty(tmp))
6782 + tmp = TARGET_CPUS;
6783 +
6784 + cpus_and(mask, tmp, CPU_MASK_ALL);
6785 +
6786 + vector = assign_irq_vector(irq, mask, &tmp);
6787 + if (vector < 0)
6788 + return;
6789 +
6790 + dest = cpu_mask_to_apicid(tmp);
6791 +
6792 + read_msi_msg(irq, &msg);
6793 +
6794 + msg.data &= ~MSI_DATA_VECTOR_MASK;
6795 + msg.data |= MSI_DATA_VECTOR(vector);
6796 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
6797 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
6798
6799 -int __init io_apic_get_version (int ioapic)
6800 + write_msi_msg(irq, &msg);
6801 + set_native_irq_info(irq, mask);
6802 +}
6803 +#endif /* CONFIG_SMP */
6804 +
6805 +/*
6806 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
6807 + * which implement the MSI or MSI-X Capability Structure.
6808 + */
6809 +static struct irq_chip msi_chip = {
6810 + .name = "PCI-MSI",
6811 + .unmask = unmask_msi_irq,
6812 + .mask = mask_msi_irq,
6813 + .ack = ack_apic_edge,
6814 +#ifdef CONFIG_SMP
6815 + .set_affinity = set_msi_irq_affinity,
6816 +#endif
6817 + .retrigger = ioapic_retrigger_irq,
6818 +};
6819 +
6820 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
6821 {
6822 - union IO_APIC_reg_01 reg_01;
6823 - unsigned long flags;
6824 + struct msi_msg msg;
6825 + int ret;
6826 + ret = msi_compose_msg(dev, irq, &msg);
6827 + if (ret < 0)
6828 + return ret;
6829
6830 - spin_lock_irqsave(&ioapic_lock, flags);
6831 - reg_01.raw = io_apic_read(ioapic, 1);
6832 - spin_unlock_irqrestore(&ioapic_lock, flags);
6833 + write_msi_msg(irq, &msg);
6834
6835 - return reg_01.bits.version;
6836 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
6837 +
6838 + return 0;
6839 +}
6840 +
6841 +void arch_teardown_msi_irq(unsigned int irq)
6842 +{
6843 + return;
6844 }
6845
6846 +#endif /* CONFIG_PCI_MSI */
6847 +
6848 +/*
6849 + * Hypertransport interrupt support
6850 + */
6851 +#ifdef CONFIG_HT_IRQ
6852 +
6853 +#ifdef CONFIG_SMP
6854 +
6855 +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
6856 +{
6857 + struct ht_irq_msg msg;
6858 + fetch_ht_irq_msg(irq, &msg);
6859 +
6860 + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
6861 + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
6862 +
6863 + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
6864 + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
6865 +
6866 + write_ht_irq_msg(irq, &msg);
6867 +}
6868 +
6869 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
6870 +{
6871 + unsigned int dest;
6872 + cpumask_t tmp;
6873 + int vector;
6874 +
6875 + cpus_and(tmp, mask, cpu_online_map);
6876 + if (cpus_empty(tmp))
6877 + tmp = TARGET_CPUS;
6878 +
6879 + cpus_and(mask, tmp, CPU_MASK_ALL);
6880 +
6881 + vector = assign_irq_vector(irq, mask, &tmp);
6882 + if (vector < 0)
6883 + return;
6884 +
6885 + dest = cpu_mask_to_apicid(tmp);
6886 +
6887 + target_ht_irq(irq, dest, vector);
6888 + set_native_irq_info(irq, mask);
6889 +}
6890 +#endif
6891 +
6892 +static struct irq_chip ht_irq_chip = {
6893 + .name = "PCI-HT",
6894 + .mask = mask_ht_irq,
6895 + .unmask = unmask_ht_irq,
6896 + .ack = ack_apic_edge,
6897 +#ifdef CONFIG_SMP
6898 + .set_affinity = set_ht_irq_affinity,
6899 +#endif
6900 + .retrigger = ioapic_retrigger_irq,
6901 +};
6902 +
6903 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
6904 +{
6905 + int vector;
6906 + cpumask_t tmp;
6907 +
6908 + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
6909 + if (vector >= 0) {
6910 + struct ht_irq_msg msg;
6911 + unsigned dest;
6912 +
6913 + dest = cpu_mask_to_apicid(tmp);
6914 +
6915 + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
6916 +
6917 + msg.address_lo =
6918 + HT_IRQ_LOW_BASE |
6919 + HT_IRQ_LOW_DEST_ID(dest) |
6920 + HT_IRQ_LOW_VECTOR(vector) |
6921 + ((INT_DEST_MODE == 0) ?
6922 + HT_IRQ_LOW_DM_PHYSICAL :
6923 + HT_IRQ_LOW_DM_LOGICAL) |
6924 + HT_IRQ_LOW_RQEOI_EDGE |
6925 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6926 + HT_IRQ_LOW_MT_FIXED :
6927 + HT_IRQ_LOW_MT_ARBITRATED) |
6928 + HT_IRQ_LOW_IRQ_MASKED;
6929 +
6930 + write_ht_irq_msg(irq, &msg);
6931 +
6932 + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
6933 + handle_edge_irq, "edge");
6934 + }
6935 + return vector;
6936 +}
6937 +#endif /* CONFIG_HT_IRQ */
6938 +
6939 +/* --------------------------------------------------------------------------
6940 + ACPI-based IOAPIC Configuration
6941 + -------------------------------------------------------------------------- */
6942 +
6943 +#ifdef CONFIG_ACPI
6944 +
6945 +#define IO_APIC_MAX_ID 0xFE
6946
6947 int __init io_apic_get_redir_entries (int ioapic)
6948 {
6949 @@ -2188,6 +2053,8 @@ int io_apic_set_pci_routing (int ioapic,
6950 {
6951 struct IO_APIC_route_entry entry;
6952 unsigned long flags;
6953 + int vector;
6954 + cpumask_t mask;
6955
6956 if (!IO_APIC_IRQ(irq)) {
6957 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
6958 @@ -2196,6 +2063,17 @@ int io_apic_set_pci_routing (int ioapic,
6959 }
6960
6961 /*
6962 + * IRQs < 16 are already in the irq_2_pin[] map
6963 + */
6964 + if (irq >= 16)
6965 + add_pin_to_irq(irq, ioapic, pin);
6966 +
6967 +
6968 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
6969 + if (vector < 0)
6970 + return vector;
6971 +
6972 + /*
6973 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
6974 * Note that we mask (disable) IRQs now -- these get enabled when the
6975 * corresponding device driver registers for this IRQ.
6976 @@ -2205,19 +2083,11 @@ int io_apic_set_pci_routing (int ioapic,
6977
6978 entry.delivery_mode = INT_DELIVERY_MODE;
6979 entry.dest_mode = INT_DEST_MODE;
6980 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
6981 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
6982 entry.trigger = edge_level;
6983 entry.polarity = active_high_low;
6984 entry.mask = 1; /* Disabled (masked) */
6985 -
6986 - irq = gsi_irq_sharing(irq);
6987 - /*
6988 - * IRQs < 16 are already in the irq_2_pin[] map
6989 - */
6990 - if (irq >= 16)
6991 - add_pin_to_irq(irq, ioapic, pin);
6992 -
6993 - entry.vector = assign_irq_vector(irq);
6994 + entry.vector = vector & 0xff;
6995
6996 apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
6997 "IRQ %d Mode:%i Active:%i)\n", ioapic,
6998 @@ -2229,10 +2099,10 @@ int io_apic_set_pci_routing (int ioapic,
6999 if (!ioapic && (irq < 16))
7000 disable_8259A_irq(irq);
7001
7002 + ioapic_write_entry(ioapic, pin, entry);
7003 +
7004 spin_lock_irqsave(&ioapic_lock, flags);
7005 - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
7006 - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
7007 - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
7008 + set_native_irq_info(irq, TARGET_CPUS);
7009 spin_unlock_irqrestore(&ioapic_lock, flags);
7010
7011 return 0;
7012 --- sle11-2009-10-16.orig/arch/x86/kernel/ioport_64-xen.c 2009-10-28 14:55:12.000000000 +0100
7013 +++ sle11-2009-10-16/arch/x86/kernel/ioport_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7014 @@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long
7015
7016 memset(bitmap, 0xff, IO_BITMAP_BYTES);
7017 t->io_bitmap_ptr = bitmap;
7018 + set_thread_flag(TIF_IO_BITMAP);
7019
7020 set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
7021 set_iobitmap.nr_ports = IO_BITMAP_BITS;
7022 --- sle11-2009-10-16.orig/arch/x86/kernel/irq_64-xen.c 2009-10-28 14:55:12.000000000 +0100
7023 +++ sle11-2009-10-16/arch/x86/kernel/irq_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7024 @@ -20,11 +20,6 @@
7025 #include <asm/idle.h>
7026
7027 atomic_t irq_err_count;
7028 -#ifdef CONFIG_X86_IO_APIC
7029 -#ifdef APIC_MISMATCH_DEBUG
7030 -atomic_t irq_mis_count;
7031 -#endif
7032 -#endif
7033
7034 #ifdef CONFIG_DEBUG_STACKOVERFLOW
7035 /*
7036 @@ -79,7 +74,8 @@ int show_interrupts(struct seq_file *p,
7037 for_each_online_cpu(j)
7038 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
7039 #endif
7040 - seq_printf(p, " %14s", irq_desc[i].chip->typename);
7041 + seq_printf(p, " %8s", irq_desc[i].chip->name);
7042 + seq_printf(p, "-%-8s", irq_desc[i].name);
7043
7044 seq_printf(p, " %s", action->name);
7045 for (action=action->next; action; action = action->next)
7046 @@ -99,11 +95,6 @@ skip:
7047 seq_putc(p, '\n');
7048 #endif
7049 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
7050 -#ifdef CONFIG_X86_IO_APIC
7051 -#ifdef APIC_MISMATCH_DEBUG
7052 - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
7053 -#endif
7054 -#endif
7055 }
7056 return 0;
7057 }
7058 @@ -114,24 +105,28 @@ skip:
7059 * handlers).
7060 */
7061 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
7062 -{
7063 +{
7064 + struct pt_regs *old_regs = set_irq_regs(regs);
7065 +
7066 /* high bit used in ret_from_ code */
7067 unsigned irq = ~regs->orig_rax;
7068
7069 - if (unlikely(irq >= NR_IRQS)) {
7070 - printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
7071 - __FUNCTION__, irq);
7072 - BUG();
7073 - }
7074 -
7075 /*exit_idle();*/
7076 /*irq_enter();*/
7077 +
7078 #ifdef CONFIG_DEBUG_STACKOVERFLOW
7079 stack_overflow_check(regs);
7080 #endif
7081 - __do_IRQ(irq, regs);
7082 +
7083 + if (likely(irq < NR_IRQS))
7084 + generic_handle_irq(irq);
7085 + else
7086 + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
7087 + __func__, smp_processor_id(), irq);
7088 +
7089 /*irq_exit();*/
7090
7091 + set_irq_regs(old_regs);
7092 return 1;
7093 }
7094
7095 @@ -192,6 +187,6 @@ EXPORT_SYMBOL(do_softirq);
7096 */
7097 void ack_bad_irq(unsigned int irq)
7098 {
7099 - printk("unexpected IRQ trap at vector %02x\n", irq);
7100 + printk("unexpected IRQ trap at irq %02x\n", irq);
7101 }
7102 #endif
7103 --- sle11-2009-10-16.orig/arch/x86/kernel/mpparse_64-xen.c 2009-10-28 14:55:12.000000000 +0100
7104 +++ sle11-2009-10-16/arch/x86/kernel/mpparse_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7105 @@ -41,8 +41,7 @@ int acpi_found_madt;
7106 * Various Linux-internal data structures created from the
7107 * MP-table.
7108 */
7109 -unsigned char apic_version [MAX_APICS];
7110 -unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
7111 +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
7112 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
7113
7114 static int mp_current_pci_id = 0;
7115 @@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
7116 int mp_irq_entries;
7117
7118 int nr_ioapics;
7119 -int pic_mode;
7120 unsigned long mp_lapic_addr = 0;
7121
7122
7123 @@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
7124 /* Bitmask of physically existing CPUs */
7125 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
7126
7127 -/* ACPI MADT entry parsing functions */
7128 -#ifdef CONFIG_ACPI
7129 -extern struct acpi_boot_flags acpi_boot;
7130 -#ifdef CONFIG_X86_LOCAL_APIC
7131 -extern int acpi_parse_lapic (acpi_table_entry_header *header);
7132 -extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
7133 -extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
7134 -#endif /*CONFIG_X86_LOCAL_APIC*/
7135 -#ifdef CONFIG_X86_IO_APIC
7136 -extern int acpi_parse_ioapic (acpi_table_entry_header *header);
7137 -#endif /*CONFIG_X86_IO_APIC*/
7138 -#endif /*CONFIG_ACPI*/
7139 -
7140 u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
7141
7142
7143 @@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned
7144 static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
7145 {
7146 int cpu;
7147 - unsigned char ver;
7148 cpumask_t tmp_map;
7149 + char *bootup_cpu = "";
7150
7151 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
7152 disabled_cpus++;
7153 return;
7154 }
7155 -
7156 - printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
7157 - m->mpc_apicid,
7158 - (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
7159 - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
7160 - m->mpc_apicver);
7161 -
7162 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
7163 - Dprintk(" Bootup CPU\n");
7164 + bootup_cpu = " (Bootup-CPU)";
7165 boot_cpu_id = m->mpc_apicid;
7166 }
7167 +
7168 + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
7169 +
7170 if (num_processors >= NR_CPUS) {
7171 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
7172 " Processor ignored.\n", NR_CPUS);
7173 @@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info
7174 cpus_complement(tmp_map, cpu_present_map);
7175 cpu = first_cpu(tmp_map);
7176
7177 -#if MAX_APICS < 255
7178 - if ((int)m->mpc_apicid > MAX_APICS) {
7179 - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
7180 - m->mpc_apicid, MAX_APICS);
7181 - return;
7182 - }
7183 -#endif
7184 - ver = m->mpc_apicver;
7185 -
7186 physid_set(m->mpc_apicid, phys_cpu_present_map);
7187 - /*
7188 - * Validate version
7189 - */
7190 - if (ver == 0x0) {
7191 - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
7192 - ver = 0x10;
7193 - }
7194 - apic_version[m->mpc_apicid] = ver;
7195 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
7196 /*
7197 * bios_cpu_apicid is required to have processors listed
7198 @@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m
7199 Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
7200
7201 if (strncmp(str, "ISA", 3) == 0) {
7202 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
7203 - } else if (strncmp(str, "EISA", 4) == 0) {
7204 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
7205 + set_bit(m->mpc_busid, mp_bus_not_pci);
7206 } else if (strncmp(str, "PCI", 3) == 0) {
7207 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
7208 + clear_bit(m->mpc_busid, mp_bus_not_pci);
7209 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
7210 mp_current_pci_id++;
7211 - } else if (strncmp(str, "MCA", 3) == 0) {
7212 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
7213 } else {
7214 printk(KERN_ERR "Unknown bustype %s\n", str);
7215 }
7216 }
7217
7218 +static int bad_ioapic(unsigned long address)
7219 +{
7220 + if (nr_ioapics >= MAX_IO_APICS) {
7221 + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
7222 + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
7223 + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
7224 + }
7225 + if (!address) {
7226 + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
7227 + " found in table, skipping!\n");
7228 + return 1;
7229 + }
7230 + return 0;
7231 +}
7232 +
7233 static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
7234 {
7235 if (!(m->mpc_flags & MPC_APIC_USABLE))
7236 return;
7237
7238 - printk("I/O APIC #%d Version %d at 0x%X.\n",
7239 - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
7240 - if (nr_ioapics >= MAX_IO_APICS) {
7241 - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
7242 - MAX_IO_APICS, nr_ioapics);
7243 - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
7244 - }
7245 - if (!m->mpc_apicaddr) {
7246 - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
7247 - " found in MP table, skipping!\n");
7248 + printk("I/O APIC #%d at 0x%X.\n",
7249 + m->mpc_apicid, m->mpc_apicaddr);
7250 +
7251 + if (bad_ioapic(m->mpc_apicaddr))
7252 return;
7253 - }
7254 +
7255 mp_ioapics[nr_ioapics] = *m;
7256 nr_ioapics++;
7257 }
7258 @@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru
7259 m->mpc_irqtype, m->mpc_irqflag & 3,
7260 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
7261 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
7262 - /*
7263 - * Well it seems all SMP boards in existence
7264 - * use ExtINT/LVT1 == LINT0 and
7265 - * NMI/LVT2 == LINT1 - the following check
7266 - * will show us if this assumptions is false.
7267 - * Until then we do not have to add baggage.
7268 - */
7269 - if ((m->mpc_irqtype == mp_ExtINT) &&
7270 - (m->mpc_destapiclint != 0))
7271 - BUG();
7272 - if ((m->mpc_irqtype == mp_NMI) &&
7273 - (m->mpc_destapiclint != 1))
7274 - BUG();
7275 }
7276
7277 /*
7278 @@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp
7279 unsigned char *mpt=((unsigned char *)mpc)+count;
7280
7281 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
7282 - printk("SMP mptable: bad signature [%c%c%c%c]!\n",
7283 + printk("MPTABLE: bad signature [%c%c%c%c]!\n",
7284 mpc->mpc_signature[0],
7285 mpc->mpc_signature[1],
7286 mpc->mpc_signature[2],
7287 @@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp
7288 return 0;
7289 }
7290 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
7291 - printk("SMP mptable: checksum error!\n");
7292 + printk("MPTABLE: checksum error!\n");
7293 return 0;
7294 }
7295 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
7296 - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
7297 + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
7298 mpc->mpc_spec);
7299 return 0;
7300 }
7301 if (!mpc->mpc_lapic) {
7302 - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
7303 + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
7304 return 0;
7305 }
7306 memcpy(str,mpc->mpc_oem,8);
7307 - str[8]=0;
7308 - printk(KERN_INFO "OEM ID: %s ",str);
7309 + str[8] = 0;
7310 + printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
7311
7312 memcpy(str,mpc->mpc_productid,12);
7313 - str[12]=0;
7314 - printk("Product ID: %s ",str);
7315 + str[12] = 0;
7316 + printk("MPTABLE: Product ID: %s ",str);
7317
7318 - printk("APIC at: 0x%X\n",mpc->mpc_lapic);
7319 + printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
7320
7321 /* save the local APIC address, it might be non-default */
7322 if (!acpi_lapic)
7323 - mp_lapic_addr = mpc->mpc_lapic;
7324 + mp_lapic_addr = mpc->mpc_lapic;
7325
7326 /*
7327 * Now process the configuration blocks.
7328 @@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp
7329 struct mpc_config_processor *m=
7330 (struct mpc_config_processor *)mpt;
7331 if (!acpi_lapic)
7332 - MP_processor_info(m);
7333 + MP_processor_info(m);
7334 mpt += sizeof(*m);
7335 count += sizeof(*m);
7336 break;
7337 @@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp
7338 struct mpc_config_ioapic *m=
7339 (struct mpc_config_ioapic *)mpt;
7340 MP_ioapic_info(m);
7341 - mpt+=sizeof(*m);
7342 - count+=sizeof(*m);
7343 + mpt += sizeof(*m);
7344 + count += sizeof(*m);
7345 break;
7346 }
7347 case MP_INTSRC:
7348 @@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp
7349 (struct mpc_config_intsrc *)mpt;
7350
7351 MP_intsrc_info(m);
7352 - mpt+=sizeof(*m);
7353 - count+=sizeof(*m);
7354 + mpt += sizeof(*m);
7355 + count += sizeof(*m);
7356 break;
7357 }
7358 case MP_LINTSRC:
7359 @@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp
7360 struct mpc_config_lintsrc *m=
7361 (struct mpc_config_lintsrc *)mpt;
7362 MP_lintsrc_info(m);
7363 - mpt+=sizeof(*m);
7364 - count+=sizeof(*m);
7365 + mpt += sizeof(*m);
7366 + count += sizeof(*m);
7367 break;
7368 }
7369 }
7370 }
7371 clustered_apic_check();
7372 if (!num_processors)
7373 - printk(KERN_ERR "SMP mptable: no processors registered!\n");
7374 + printk(KERN_ERR "MPTABLE: no processors registered!\n");
7375 return num_processors;
7376 }
7377
7378 @@ -451,13 +407,10 @@ static inline void __init construct_defa
7379 * 2 CPUs, numbered 0 & 1.
7380 */
7381 processor.mpc_type = MP_PROCESSOR;
7382 - /* Either an integrated APIC or a discrete 82489DX. */
7383 - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
7384 + processor.mpc_apicver = 0;
7385 processor.mpc_cpuflag = CPU_ENABLED;
7386 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
7387 - (boot_cpu_data.x86_model << 4) |
7388 - boot_cpu_data.x86_mask;
7389 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
7390 + processor.mpc_cpufeature = 0;
7391 + processor.mpc_featureflag = 0;
7392 processor.mpc_reserved[0] = 0;
7393 processor.mpc_reserved[1] = 0;
7394 for (i = 0; i < 2; i++) {
7395 @@ -476,14 +429,6 @@ static inline void __init construct_defa
7396 case 5:
7397 memcpy(bus.mpc_bustype, "ISA ", 6);
7398 break;
7399 - case 2:
7400 - case 6:
7401 - case 3:
7402 - memcpy(bus.mpc_bustype, "EISA ", 6);
7403 - break;
7404 - case 4:
7405 - case 7:
7406 - memcpy(bus.mpc_bustype, "MCA ", 6);
7407 }
7408 MP_bus_info(&bus);
7409 if (mpc_default_type > 4) {
7410 @@ -494,7 +439,7 @@ static inline void __init construct_defa
7411
7412 ioapic.mpc_type = MP_IOAPIC;
7413 ioapic.mpc_apicid = 2;
7414 - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
7415 + ioapic.mpc_apicver = 0;
7416 ioapic.mpc_flags = MPC_APIC_USABLE;
7417 ioapic.mpc_apicaddr = 0xFEC00000;
7418 MP_ioapic_info(&ioapic);
7419 @@ -537,13 +482,6 @@ void __init get_smp_config (void)
7420 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
7421
7422 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
7423 - if (mpf->mpf_feature2 & (1<<7)) {
7424 - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
7425 - pic_mode = 1;
7426 - } else {
7427 - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
7428 - pic_mode = 0;
7429 - }
7430
7431 /*
7432 * Now see if we need to read further.
7433 @@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig
7434 return 0;
7435 }
7436
7437 -void __init find_intel_smp (void)
7438 +void __init find_smp_config(void)
7439 {
7440 unsigned int address;
7441
7442 @@ -637,9 +575,7 @@ void __init find_intel_smp (void)
7443 smp_scan_config(0xF0000,0x10000))
7444 return;
7445 /*
7446 - * If it is an SMP machine we should know now, unless the
7447 - * configuration is in an EISA/MCA bus machine with an
7448 - * extended bios data area.
7449 + * If it is an SMP machine we should know now.
7450 *
7451 * there is a real-mode segmented pointer pointing to the
7452 * 4K EBDA area at 0x40E, calculate and scan it here.
7453 @@ -660,64 +596,38 @@ void __init find_intel_smp (void)
7454 printk(KERN_INFO "No mptable found.\n");
7455 }
7456
7457 -/*
7458 - * - Intel MP Configuration Table
7459 - */
7460 -void __init find_smp_config (void)
7461 -{
7462 -#ifdef CONFIG_X86_LOCAL_APIC
7463 - find_intel_smp();
7464 -#endif
7465 -}
7466 -
7467 -
7468 /* --------------------------------------------------------------------------
7469 ACPI-based MP Configuration
7470 -------------------------------------------------------------------------- */
7471
7472 #ifdef CONFIG_ACPI
7473
7474 -void __init mp_register_lapic_address (
7475 - u64 address)
7476 +void __init mp_register_lapic_address(u64 address)
7477 {
7478 #ifndef CONFIG_XEN
7479 mp_lapic_addr = (unsigned long) address;
7480 -
7481 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
7482 -
7483 if (boot_cpu_id == -1U)
7484 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
7485 -
7486 - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
7487 #endif
7488 }
7489
7490 -
7491 -void __cpuinit mp_register_lapic (
7492 - u8 id,
7493 - u8 enabled)
7494 +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
7495 {
7496 struct mpc_config_processor processor;
7497 int boot_cpu = 0;
7498
7499 - if (id >= MAX_APICS) {
7500 - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
7501 - id, MAX_APICS);
7502 - return;
7503 - }
7504 -
7505 - if (id == boot_cpu_physical_apicid)
7506 + if (id == boot_cpu_id)
7507 boot_cpu = 1;
7508
7509 #ifndef CONFIG_XEN
7510 processor.mpc_type = MP_PROCESSOR;
7511 processor.mpc_apicid = id;
7512 - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
7513 + processor.mpc_apicver = 0;
7514 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
7515 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
7516 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
7517 - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
7518 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
7519 + processor.mpc_cpufeature = 0;
7520 + processor.mpc_featureflag = 0;
7521 processor.mpc_reserved[0] = 0;
7522 processor.mpc_reserved[1] = 0;
7523 #endif
7524 @@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic (
7525 MP_processor_info(&processor);
7526 }
7527
7528 -#ifdef CONFIG_X86_IO_APIC
7529 -
7530 #define MP_ISA_BUS 0
7531 #define MP_MAX_IOAPIC_PIN 127
7532
7533 @@ -737,11 +645,9 @@ static struct mp_ioapic_routing {
7534 u32 pin_programmed[4];
7535 } mp_ioapic_routing[MAX_IO_APICS];
7536
7537 -
7538 -static int mp_find_ioapic (
7539 - int gsi)
7540 +static int mp_find_ioapic(int gsi)
7541 {
7542 - int i = 0;
7543 + int i = 0;
7544
7545 /* Find the IOAPIC that manages this GSI. */
7546 for (i = 0; i < nr_ioapics; i++) {
7547 @@ -751,28 +657,15 @@ static int mp_find_ioapic (
7548 }
7549
7550 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
7551 -
7552 return -1;
7553 }
7554 -
7555
7556 -void __init mp_register_ioapic (
7557 - u8 id,
7558 - u32 address,
7559 - u32 gsi_base)
7560 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
7561 {
7562 - int idx = 0;
7563 + int idx = 0;
7564
7565 - if (nr_ioapics >= MAX_IO_APICS) {
7566 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
7567 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
7568 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
7569 - }
7570 - if (!address) {
7571 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
7572 - " found in MADT table, skipping!\n");
7573 + if (bad_ioapic(address))
7574 return;
7575 - }
7576
7577 idx = nr_ioapics++;
7578
7579 @@ -784,7 +677,7 @@ void __init mp_register_ioapic (
7580 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
7581 #endif
7582 mp_ioapics[idx].mpc_apicid = id;
7583 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
7584 + mp_ioapics[idx].mpc_apicver = 0;
7585
7586 /*
7587 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
7588 @@ -795,21 +688,15 @@ void __init mp_register_ioapic (
7589 mp_ioapic_routing[idx].gsi_end = gsi_base +
7590 io_apic_get_redir_entries(idx);
7591
7592 - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
7593 + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
7594 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
7595 - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
7596 + mp_ioapics[idx].mpc_apicaddr,
7597 mp_ioapic_routing[idx].gsi_start,
7598 mp_ioapic_routing[idx].gsi_end);
7599 -
7600 - return;
7601 }
7602
7603 -
7604 -void __init mp_override_legacy_irq (
7605 - u8 bus_irq,
7606 - u8 polarity,
7607 - u8 trigger,
7608 - u32 gsi)
7609 +void __init
7610 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
7611 {
7612 struct mpc_config_intsrc intsrc;
7613 int ioapic = -1;
7614 @@ -847,22 +734,18 @@ void __init mp_override_legacy_irq (
7615 mp_irqs[mp_irq_entries] = intsrc;
7616 if (++mp_irq_entries == MAX_IRQ_SOURCES)
7617 panic("Max # of irq sources exceeded!\n");
7618 -
7619 - return;
7620 }
7621
7622 -
7623 -void __init mp_config_acpi_legacy_irqs (void)
7624 +void __init mp_config_acpi_legacy_irqs(void)
7625 {
7626 struct mpc_config_intsrc intsrc;
7627 - int i = 0;
7628 - int ioapic = -1;
7629 + int i = 0;
7630 + int ioapic = -1;
7631
7632 /*
7633 * Fabricate the legacy ISA bus (bus #31).
7634 */
7635 - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
7636 - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
7637 + set_bit(MP_ISA_BUS, mp_bus_not_pci);
7638
7639 /*
7640 * Locate the IOAPIC that manages the ISA IRQs (0-15).
7641 @@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs (
7642 if (++mp_irq_entries == MAX_IRQ_SOURCES)
7643 panic("Max # of irq sources exceeded!\n");
7644 }
7645 -
7646 - return;
7647 }
7648
7649 -#define MAX_GSI_NUM 4096
7650 -
7651 int mp_register_gsi(u32 gsi, int triggering, int polarity)
7652 {
7653 - int ioapic = -1;
7654 - int ioapic_pin = 0;
7655 - int idx, bit = 0;
7656 - static int pci_irq = 16;
7657 - /*
7658 - * Mapping between Global System Interrupts, which
7659 - * represent all possible interrupts, to the IRQs
7660 - * assigned to actual devices.
7661 - */
7662 - static int gsi_to_irq[MAX_GSI_NUM];
7663 + int ioapic = -1;
7664 + int ioapic_pin = 0;
7665 + int idx, bit = 0;
7666
7667 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
7668 return gsi;
7669 @@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger
7670 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
7671 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
7672 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
7673 - return gsi_to_irq[gsi];
7674 + return gsi;
7675 }
7676
7677 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
7678
7679 - if (triggering == ACPI_LEVEL_SENSITIVE) {
7680 - /*
7681 - * For PCI devices assign IRQs in order, avoiding gaps
7682 - * due to unused I/O APIC pins.
7683 - */
7684 - int irq = gsi;
7685 - if (gsi < MAX_GSI_NUM) {
7686 - /*
7687 - * Retain the VIA chipset work-around (gsi > 15), but
7688 - * avoid a problem where the 8254 timer (IRQ0) is setup
7689 - * via an override (so it's not on pin 0 of the ioapic),
7690 - * and at the same time, the pin 0 interrupt is a PCI
7691 - * type. The gsi > 15 test could cause these two pins
7692 - * to be shared as IRQ0, and they are not shareable.
7693 - * So test for this condition, and if necessary, avoid
7694 - * the pin collision.
7695 - */
7696 - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
7697 - gsi = pci_irq++;
7698 - /*
7699 - * Don't assign IRQ used by ACPI SCI
7700 - */
7701 - if (gsi == acpi_fadt.sci_int)
7702 - gsi = pci_irq++;
7703 - gsi_to_irq[irq] = gsi;
7704 - } else {
7705 - printk(KERN_ERR "GSI %u is too high\n", gsi);
7706 - return gsi;
7707 - }
7708 - }
7709 -
7710 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
7711 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
7712 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
7713 return gsi;
7714 }
7715 -
7716 -#endif /*CONFIG_X86_IO_APIC*/
7717 #endif /*CONFIG_ACPI*/
7718 --- sle11-2009-10-16.orig/arch/x86/kernel/process_64-xen.c 2009-10-28 14:55:12.000000000 +0100
7719 +++ sle11-2009-10-16/arch/x86/kernel/process_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7720 @@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not
7721 }
7722 EXPORT_SYMBOL(idle_notifier_unregister);
7723
7724 -enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
7725 -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
7726 -
7727 void enter_idle(void)
7728 {
7729 - __get_cpu_var(idle_state) = CPU_IDLE;
7730 + write_pda(isidle, 1);
7731 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
7732 }
7733
7734 static void __exit_idle(void)
7735 {
7736 - __get_cpu_var(idle_state) = CPU_NOT_IDLE;
7737 + if (test_and_clear_bit_pda(0, isidle) == 0)
7738 + return;
7739 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
7740 }
7741
7742 /* Called from interrupts to signify idle end */
7743 void exit_idle(void)
7744 {
7745 - if (current->pid | read_pda(irqcount))
7746 + /* idle loop has pid 0 */
7747 + if (current->pid)
7748 return;
7749 __exit_idle();
7750 }
7751 @@ -184,6 +183,9 @@ void cpu_idle (void)
7752 play_dead();
7753 enter_idle();
7754 idle();
7755 + /* In many cases the interrupt that ended idle
7756 + has already called exit_idle. But some idle
7757 + loops can be woken up without interrupt. */
7758 __exit_idle();
7759 }
7760
7761 @@ -196,7 +198,7 @@ void cpu_idle (void)
7762 void cpu_idle_wait(void)
7763 {
7764 unsigned int cpu, this_cpu = get_cpu();
7765 - cpumask_t map;
7766 + cpumask_t map, tmp = current->cpus_allowed;
7767
7768 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
7769 put_cpu();
7770 @@ -219,6 +221,8 @@ void cpu_idle_wait(void)
7771 }
7772 cpus_and(map, map, cpu_online_map);
7773 } while (!cpus_empty(map));
7774 +
7775 + set_cpus_allowed(current, tmp);
7776 }
7777 EXPORT_SYMBOL_GPL(cpu_idle_wait);
7778
7779 @@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs)
7780 print_modules();
7781 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
7782 current->pid, current->comm, print_tainted(),
7783 - system_utsname.release,
7784 - (int)strcspn(system_utsname.version, " "),
7785 - system_utsname.version);
7786 + init_utsname()->release,
7787 + (int)strcspn(init_utsname()->version, " "),
7788 + init_utsname()->version);
7789 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
7790 printk_address(regs->rip);
7791 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
7792 @@ -310,6 +314,7 @@ void exit_thread(void)
7793
7794 kfree(t->io_bitmap_ptr);
7795 t->io_bitmap_ptr = NULL;
7796 + clear_thread_flag(TIF_IO_BITMAP);
7797 /*
7798 * Careful, clear this in the TSS too:
7799 */
7800 @@ -340,6 +345,7 @@ void flush_thread(void)
7801 if (t->flags & _TIF_IA32)
7802 current_thread_info()->status |= TS_COMPAT;
7803 }
7804 + t->flags &= ~_TIF_DEBUG;
7805
7806 tsk->thread.debugreg0 = 0;
7807 tsk->thread.debugreg1 = 0;
7808 @@ -432,7 +438,7 @@ int copy_thread(int nr, unsigned long cl
7809 asm("mov %%es,%0" : "=m" (p->thread.es));
7810 asm("mov %%ds,%0" : "=m" (p->thread.ds));
7811
7812 - if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
7813 + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
7814 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
7815 if (!p->thread.io_bitmap_ptr) {
7816 p->thread.io_bitmap_max = 0;
7817 @@ -440,6 +446,7 @@ int copy_thread(int nr, unsigned long cl
7818 }
7819 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
7820 IO_BITMAP_BYTES);
7821 + set_tsk_thread_flag(p, TIF_IO_BITMAP);
7822 }
7823
7824 /*
7825 @@ -474,6 +481,30 @@ static inline void __save_init_fpu( stru
7826 }
7827
7828 /*
7829 + * This special macro can be used to load a debugging register
7830 + */
7831 +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
7832 +
7833 +static inline void __switch_to_xtra(struct task_struct *prev_p,
7834 + struct task_struct *next_p)
7835 +{
7836 + struct thread_struct *prev, *next;
7837 +
7838 + prev = &prev_p->thread,
7839 + next = &next_p->thread;
7840 +
7841 + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7842 + loaddebug(next, 0);
7843 + loaddebug(next, 1);
7844 + loaddebug(next, 2);
7845 + loaddebug(next, 3);
7846 + /* no 4 and 5 */
7847 + loaddebug(next, 6);
7848 + loaddebug(next, 7);
7849 + }
7850 +}
7851 +
7852 +/*
7853 * switch_to(x,y) should switch tasks from x to y.
7854 *
7855 * This could still be optimized:
7856 @@ -501,6 +532,10 @@ __switch_to(struct task_struct *prev_p,
7857 #endif
7858 multicall_entry_t _mcl[8], *mcl = _mcl;
7859
7860 + /* we're going to use this soon, after a few expensive things */
7861 + if (next_p->fpu_counter>5)
7862 + prefetch(&next->i387.fxsave);
7863 +
7864 /*
7865 * This is basically '__unlazy_fpu', except that we queue a
7866 * multicall to indicate FPU task switch, rather than
7867 @@ -513,7 +548,8 @@ __switch_to(struct task_struct *prev_p,
7868 mcl->op = __HYPERVISOR_fpu_taskswitch;
7869 mcl->args[0] = 1;
7870 mcl++;
7871 - }
7872 + } else
7873 + prev_p->fpu_counter = 0;
7874
7875 /*
7876 * Reload esp0, LDT and the page table pointer:
7877 @@ -608,21 +644,29 @@ __switch_to(struct task_struct *prev_p,
7878 write_pda(oldrsp, next->userrsp);
7879 write_pda(pcurrent, next_p);
7880 write_pda(kernelstack,
7881 - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
7882 + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
7883 +#ifdef CONFIG_CC_STACKPROTECTOR
7884 + write_pda(stack_canary, next_p->stack_canary);
7885 +
7886 + /*
7887 + * Build time only check to make sure the stack_canary is at
7888 + * offset 40 in the pda; this is a gcc ABI requirement
7889 + */
7890 + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
7891 +#endif
7892
7893 /*
7894 * Now maybe reload the debug registers
7895 */
7896 - if (unlikely(next->debugreg7)) {
7897 - set_debugreg(next->debugreg0, 0);
7898 - set_debugreg(next->debugreg1, 1);
7899 - set_debugreg(next->debugreg2, 2);
7900 - set_debugreg(next->debugreg3, 3);
7901 - /* no 4 and 5 */
7902 - set_debugreg(next->debugreg6, 6);
7903 - set_debugreg(next->debugreg7, 7);
7904 - }
7905 + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
7906 + __switch_to_xtra(prev_p, next_p);
7907
7908 + /* If the task has used fpu the last 5 timeslices, just do a full
7909 + * restore of the math state immediately to avoid the trap; the
7910 + * chances of needing FPU soon are obviously high now
7911 + */
7912 + if (next_p->fpu_counter>5)
7913 + math_state_restore();
7914 return prev_p;
7915 }
7916
7917 @@ -842,7 +886,7 @@ int dump_task_regs(struct task_struct *t
7918
7919 unsigned long arch_align_stack(unsigned long sp)
7920 {
7921 - if (randomize_va_space)
7922 + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
7923 sp -= get_random_int() % 8192;
7924 return sp & ~0xf;
7925 }
7926 --- sle11-2009-10-16.orig/arch/x86/kernel/setup_64-xen.c 2009-10-28 14:55:12.000000000 +0100
7927 +++ sle11-2009-10-16/arch/x86/kernel/setup_64-xen.c 2009-03-04 11:28:34.000000000 +0100
7928 @@ -88,9 +88,6 @@ extern struct edid_info edid_info;
7929 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
7930 EXPORT_SYMBOL(HYPERVISOR_shared_info);
7931
7932 -extern char hypercall_page[PAGE_SIZE];
7933 -EXPORT_SYMBOL(hypercall_page);
7934 -
7935 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
7936 static struct notifier_block xen_panic_block = {
7937 xen_panic_event, NULL, 0 /* try to go last */
7938 @@ -118,16 +115,6 @@ EXPORT_SYMBOL(boot_cpu_data);
7939
7940 unsigned long mmu_cr4_features;
7941
7942 -int acpi_disabled;
7943 -EXPORT_SYMBOL(acpi_disabled);
7944 -#ifdef CONFIG_ACPI
7945 -extern int __initdata acpi_ht;
7946 -extern acpi_interrupt_flags acpi_sci_flags;
7947 -int __initdata acpi_force = 0;
7948 -#endif
7949 -
7950 -int acpi_numa __initdata;
7951 -
7952 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
7953 int bootloader_type;
7954
7955 @@ -151,10 +138,6 @@ struct sys_desc_table_struct {
7956
7957 struct edid_info edid_info;
7958 EXPORT_SYMBOL_GPL(edid_info);
7959 -struct e820map e820;
7960 -#ifdef CONFIG_XEN
7961 -struct e820map machine_e820;
7962 -#endif
7963
7964 extern int root_mountflags;
7965
7966 @@ -181,9 +164,6 @@ struct resource standard_io_resources[]
7967 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
7968 };
7969
7970 -#define STANDARD_IO_RESOURCES \
7971 - (sizeof standard_io_resources / sizeof standard_io_resources[0])
7972 -
7973 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
7974
7975 struct resource data_resource = {
7976 @@ -230,9 +210,6 @@ static struct resource adapter_rom_resou
7977 .flags = IORESOURCE_ROM }
7978 };
7979
7980 -#define ADAPTER_ROM_RESOURCES \
7981 - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
7982 -
7983 static struct resource video_rom_resource = {
7984 .name = "Video ROM",
7985 .start = 0xc0000,
7986 @@ -309,7 +286,8 @@ static void __init probe_roms(void)
7987 }
7988
7989 /* check for adapter roms on 2k boundaries */
7990 - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
7991 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
7992 + start += 2048) {
7993 rom = isa_bus_to_virt(start);
7994 if (!romsignature(rom))
7995 continue;
7996 @@ -329,187 +307,22 @@ static void __init probe_roms(void)
7997 }
7998 }
7999
8000 -/* Check for full argument with no trailing characters */
8001 -static int fullarg(char *p, char *arg)
8002 +#ifdef CONFIG_PROC_VMCORE
8003 +/* elfcorehdr= specifies the location of elf core header
8004 + * stored by the crashed kernel. This option will be passed
8005 + * by kexec loader to the capture kernel.
8006 + */
8007 +static int __init setup_elfcorehdr(char *arg)
8008 {
8009 - int l = strlen(arg);
8010 - return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
8011 + char *end;
8012 + if (!arg)
8013 + return -EINVAL;
8014 + elfcorehdr_addr = memparse(arg, &end);
8015 + return end > arg ? 0 : -EINVAL;
8016 }
8017 -
8018 -static __init void parse_cmdline_early (char ** cmdline_p)
8019 -{
8020 - char c = ' ', *to = command_line, *from = COMMAND_LINE;
8021 - int len = 0;
8022 - int userdef = 0;
8023 -
8024 - for (;;) {
8025 - if (c != ' ')
8026 - goto next_char;
8027 -
8028 -#ifdef CONFIG_SMP
8029 - /*
8030 - * If the BIOS enumerates physical processors before logical,
8031 - * maxcpus=N at enumeration-time can be used to disable HT.
8032 - */
8033 - else if (!memcmp(from, "maxcpus=", 8)) {
8034 - extern unsigned int maxcpus;
8035 -
8036 - maxcpus = simple_strtoul(from + 8, NULL, 0);
8037 - }
8038 -#endif
8039 -#ifdef CONFIG_ACPI
8040 - /* "acpi=off" disables both ACPI table parsing and interpreter init */
8041 - if (fullarg(from,"acpi=off"))
8042 - disable_acpi();
8043 -
8044 - if (fullarg(from, "acpi=force")) {
8045 - /* add later when we do DMI horrors: */
8046 - acpi_force = 1;
8047 - acpi_disabled = 0;
8048 - }
8049 -
8050 - /* acpi=ht just means: do ACPI MADT parsing
8051 - at bootup, but don't enable the full ACPI interpreter */
8052 - if (fullarg(from, "acpi=ht")) {
8053 - if (!acpi_force)
8054 - disable_acpi();
8055 - acpi_ht = 1;
8056 - }
8057 - else if (fullarg(from, "pci=noacpi"))
8058 - acpi_disable_pci();
8059 - else if (fullarg(from, "acpi=noirq"))
8060 - acpi_noirq_set();
8061 -
8062 - else if (fullarg(from, "acpi_sci=edge"))
8063 - acpi_sci_flags.trigger = 1;
8064 - else if (fullarg(from, "acpi_sci=level"))
8065 - acpi_sci_flags.trigger = 3;
8066 - else if (fullarg(from, "acpi_sci=high"))
8067 - acpi_sci_flags.polarity = 1;
8068 - else if (fullarg(from, "acpi_sci=low"))
8069 - acpi_sci_flags.polarity = 3;
8070 -
8071 - /* acpi=strict disables out-of-spec workarounds */
8072 - else if (fullarg(from, "acpi=strict")) {
8073 - acpi_strict = 1;
8074 - }
8075 -#ifdef CONFIG_X86_IO_APIC
8076 - else if (fullarg(from, "acpi_skip_timer_override"))
8077 - acpi_skip_timer_override = 1;
8078 -#endif
8079 -#endif
8080 -
8081 -#ifndef CONFIG_XEN
8082 - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
8083 - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
8084 - disable_apic = 1;
8085 - }
8086 -
8087 - if (fullarg(from, "noapic"))
8088 - skip_ioapic_setup = 1;
8089 -
8090 - if (fullarg(from,"apic")) {
8091 - skip_ioapic_setup = 0;
8092 - ioapic_force = 1;
8093 - }
8094 -#endif
8095 -
8096 - if (!memcmp(from, "mem=", 4))
8097 - parse_memopt(from+4, &from);
8098 -
8099 - if (!memcmp(from, "memmap=", 7)) {
8100 - /* exactmap option is for used defined memory */
8101 - if (!memcmp(from+7, "exactmap", 8)) {
8102 -#ifdef CONFIG_CRASH_DUMP
8103 - /* If we are doing a crash dump, we
8104 - * still need to know the real mem
8105 - * size before original memory map is
8106 - * reset.
8107 - */
8108 - saved_max_pfn = e820_end_of_ram();
8109 -#endif
8110 - from += 8+7;
8111 - end_pfn_map = 0;
8112 - e820.nr_map = 0;
8113 - userdef = 1;
8114 - }
8115 - else {
8116 - parse_memmapopt(from+7, &from);
8117 - userdef = 1;
8118 - }
8119 - }
8120 -
8121 -#ifdef CONFIG_NUMA
8122 - if (!memcmp(from, "numa=", 5))
8123 - numa_setup(from+5);
8124 +early_param("elfcorehdr", setup_elfcorehdr);
8125 #endif
8126
8127 - if (!memcmp(from,"iommu=",6)) {
8128 - iommu_setup(from+6);
8129 - }
8130 -
8131 - if (fullarg(from,"oops=panic"))
8132 - panic_on_oops = 1;
8133 -
8134 - if (!memcmp(from, "noexec=", 7))
8135 - nonx_setup(from + 7);
8136 -
8137 -#ifdef CONFIG_KEXEC
8138 - /* crashkernel=size@addr specifies the location to reserve for
8139 - * a crash kernel. By reserving this memory we guarantee
8140 - * that linux never set's it up as a DMA target.
8141 - * Useful for holding code to do something appropriate
8142 - * after a kernel panic.
8143 - */
8144 - else if (!memcmp(from, "crashkernel=", 12)) {
8145 -#ifndef CONFIG_XEN
8146 - unsigned long size, base;
8147 - size = memparse(from+12, &from);
8148 - if (*from == '@') {
8149 - base = memparse(from+1, &from);
8150 - /* FIXME: Do I want a sanity check
8151 - * to validate the memory range?
8152 - */
8153 - crashk_res.start = base;
8154 - crashk_res.end = base + size - 1;
8155 - }
8156 -#else
8157 - printk("Ignoring crashkernel command line, "
8158 - "parameter will be supplied by xen\n");
8159 -#endif
8160 - }
8161 -#endif
8162 -
8163 -#ifdef CONFIG_PROC_VMCORE
8164 - /* elfcorehdr= specifies the location of elf core header
8165 - * stored by the crashed kernel. This option will be passed
8166 - * by kexec loader to the capture kernel.
8167 - */
8168 - else if(!memcmp(from, "elfcorehdr=", 11))
8169 - elfcorehdr_addr = memparse(from+11, &from);
8170 -#endif
8171 -
8172 -#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
8173 - else if (!memcmp(from, "additional_cpus=", 16))
8174 - setup_additional_cpus(from+16);
8175 -#endif
8176 -
8177 - next_char:
8178 - c = *(from++);
8179 - if (!c)
8180 - break;
8181 - if (COMMAND_LINE_SIZE <= ++len)
8182 - break;
8183 - *(to++) = c;
8184 - }
8185 - if (userdef) {
8186 - printk(KERN_INFO "user-defined physical RAM map:\n");
8187 - e820_print_map("user");
8188 - }
8189 - *to = '\0';
8190 - *cmdline_p = command_line;
8191 -}
8192 -
8193 #ifndef CONFIG_NUMA
8194 static void __init
8195 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
8196 @@ -521,10 +334,11 @@ contig_initmem_init(unsigned long start_
8197 if (bootmap == -1L)
8198 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
8199 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
8200 + e820_register_active_regions(0, start_pfn, end_pfn);
8201 #ifdef CONFIG_XEN
8202 - e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
8203 + free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
8204 #else
8205 - e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
8206 + free_bootmem_with_active_regions(0, end_pfn);
8207 #endif
8208 reserve_bootmem(bootmap, bootmap_size);
8209 }
8210 @@ -587,6 +401,10 @@ static void discover_ebda(void)
8211 void __init setup_arch(char **cmdline_p)
8212 {
8213 #ifdef CONFIG_XEN
8214 + extern struct e820map machine_e820;
8215 +
8216 + printk(KERN_INFO "Command line: %s\n", saved_command_line);
8217 +
8218 /* Register a call for panic conditions. */
8219 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
8220
8221 @@ -612,6 +430,8 @@ void __init setup_arch(char **cmdline_p)
8222
8223 ARCH_SETUP
8224 #else
8225 + printk(KERN_INFO "Command line: %s\n", saved_command_line);
8226 +
8227 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
8228 screen_info = SCREEN_INFO;
8229 edid_info = EDID_INFO;
8230 @@ -639,16 +459,22 @@ void __init setup_arch(char **cmdline_p)
8231 data_resource.start = virt_to_phys(&_etext);
8232 data_resource.end = virt_to_phys(&_edata)-1;
8233
8234 - parse_cmdline_early(cmdline_p);
8235 -
8236 early_identify_cpu(&boot_cpu_data);
8237
8238 + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
8239 + *cmdline_p = command_line;
8240 +
8241 + parse_early_param();
8242 +
8243 + finish_e820_parsing();
8244 +
8245 + e820_register_active_regions(0, 0, -1UL);
8246 /*
8247 * partially used pages are not usable - thus
8248 * we are rounding upwards:
8249 */
8250 end_pfn = e820_end_of_ram();
8251 - num_physpages = end_pfn; /* for pfn_valid */
8252 + num_physpages = end_pfn;
8253
8254 check_efer();
8255
8256 @@ -659,6 +485,14 @@ void __init setup_arch(char **cmdline_p)
8257 if (is_initial_xendomain())
8258 dmi_scan_machine();
8259
8260 + /* How many end-of-memory variables you have, grandma! */
8261 + max_low_pfn = end_pfn;
8262 + max_pfn = end_pfn;
8263 + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
8264 +
8265 + /* Remove active ranges so rediscovery with NUMA-awareness happens */
8266 + remove_all_active_ranges();
8267 +
8268 #ifdef CONFIG_ACPI_NUMA
8269 /*
8270 * Parse SRAT to discover nodes.
8271 @@ -848,16 +682,16 @@ void __init setup_arch(char **cmdline_p)
8272 BUG();
8273 }
8274
8275 +#ifdef CONFIG_ACPI
8276 if (!is_initial_xendomain()) {
8277 acpi_disabled = 1;
8278 -#ifdef CONFIG_ACPI
8279 acpi_ht = 0;
8280 -#endif
8281 }
8282 #endif
8283 +#endif
8284
8285 -#ifndef CONFIG_XEN
8286 - check_ioapic();
8287 +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
8288 + early_quirks();
8289 #endif
8290
8291 zap_low_mappings(0);
8292 @@ -907,6 +741,7 @@ void __init setup_arch(char **cmdline_p)
8293 e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
8294 #else
8295 e820_reserve_resources(e820.map, e820.nr_map);
8296 + e820_mark_nosave_regions();
8297 #endif
8298
8299 request_resource(&iomem_resource, &video_ram_resource);
8300 @@ -914,7 +749,7 @@ void __init setup_arch(char **cmdline_p)
8301 {
8302 unsigned i;
8303 /* request I/O space for devices used on all i[345]86 PCs */
8304 - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
8305 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
8306 request_resource(&ioport_resource, &standard_io_resources[i]);
8307 }
8308
8309 @@ -1098,7 +933,7 @@ static void __init amd_detect_cmp(struct
8310 #endif
8311 }
8312
8313 -static void __init init_amd(struct cpuinfo_x86 *c)
8314 +static void __cpuinit init_amd(struct cpuinfo_x86 *c)
8315 {
8316 unsigned level;
8317
8318 @@ -1154,6 +989,12 @@ static void __init init_amd(struct cpuin
8319
8320 /* Fix cpuid4 emulation for more */
8321 num_cache_leaves = 3;
8322 +
8323 + /* When there is only one core no need to synchronize RDTSC */
8324 + if (num_possible_cpus() == 1)
8325 + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8326 + else
8327 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8328 }
8329
8330 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
8331 @@ -1235,8 +1076,7 @@ static void srat_detect_node(void)
8332 node = first_node(node_online_map);
8333 numa_set_node(cpu, node);
8334
8335 - if (acpi_numa > 0)
8336 - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
8337 + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
8338 #endif
8339 }
8340
8341 @@ -1270,6 +1110,8 @@ static void __cpuinit init_intel(struct
8342 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8343 (c->x86 == 0x6 && c->x86_model >= 0x0e))
8344 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
8345 + if (c->x86 == 6)
8346 + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
8347 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8348 c->x86_max_cores = intel_num_cpu_cores(c);
8349
8350 @@ -1488,8 +1330,8 @@ static int show_cpuinfo(struct seq_file
8351
8352 /* Intel-defined (#2) */
8353 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
8354 - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
8355 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8356 + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
8357 + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
8358 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8359
8360 /* VIA/Cyrix/Centaur-defined */
8361 --- sle11-2009-10-16.orig/arch/x86/kernel/setup64-xen.c 2009-04-20 11:36:10.000000000 +0200
8362 +++ sle11-2009-10-16/arch/x86/kernel/setup64-xen.c 2009-03-04 11:28:34.000000000 +0100
8363 @@ -31,7 +31,7 @@
8364 #include <asm/hypervisor.h>
8365 #endif
8366
8367 -char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
8368 +char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
8369
8370 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
8371
8372 @@ -55,8 +55,10 @@ Control non executable mappings for 64bi
8373 on Enable(default)
8374 off Disable
8375 */
8376 -int __init nonx_setup(char *str)
8377 +static int __init nonx_setup(char *str)
8378 {
8379 + if (!str)
8380 + return -EINVAL;
8381 if (!strncmp(str, "on", 2)) {
8382 __supported_pte_mask |= _PAGE_NX;
8383 do_not_nx = 0;
8384 @@ -64,9 +66,9 @@ int __init nonx_setup(char *str)
8385 do_not_nx = 1;
8386 __supported_pte_mask &= ~_PAGE_NX;
8387 }
8388 - return 1;
8389 + return 0;
8390 }
8391 -__setup("noexec=", nonx_setup); /* parsed early actually */
8392 +early_param("noexec", nonx_setup);
8393
8394 int force_personality32 = 0;
8395
8396 @@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void)
8397 #endif
8398
8399 /* Copy section for each CPU (we discard the original) */
8400 - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
8401 -#ifdef CONFIG_MODULES
8402 - if (size < PERCPU_ENOUGH_ROOM)
8403 - size = PERCPU_ENOUGH_ROOM;
8404 -#endif
8405 + size = PERCPU_ENOUGH_ROOM;
8406
8407 + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
8408 for_each_cpu_mask (i, cpu_possible_map) {
8409 char *ptr;
8410
8411 @@ -169,7 +168,10 @@ void pda_init(int cpu)
8412 /* Setup up data that may be needed in __get_free_pages early */
8413 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
8414 #ifndef CONFIG_XEN
8415 + /* Memory clobbers used to order PDA accessed */
8416 + mb();
8417 wrmsrl(MSR_GS_BASE, pda);
8418 + mb();
8419 #else
8420 if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
8421 (unsigned long)pda))
8422 @@ -302,28 +304,17 @@ void __cpuinit cpu_init (void)
8423 * set up and load the per-CPU TSS
8424 */
8425 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
8426 + static const unsigned int order[N_EXCEPTION_STACKS] = {
8427 + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
8428 + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
8429 + };
8430 if (cpu) {
8431 - static const unsigned int order[N_EXCEPTION_STACKS] = {
8432 - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
8433 - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
8434 - };
8435 -
8436 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
8437 if (!estacks)
8438 panic("Cannot allocate exception stack %ld %d\n",
8439 v, cpu);
8440 }
8441 - switch (v + 1) {
8442 -#if DEBUG_STKSZ > EXCEPTION_STKSZ
8443 - case DEBUG_STACK:
8444 - cpu_pda(cpu)->debugstack = (unsigned long)estacks;
8445 - estacks += DEBUG_STKSZ;
8446 - break;
8447 -#endif
8448 - default:
8449 - estacks += EXCEPTION_STKSZ;
8450 - break;
8451 - }
8452 + estacks += PAGE_SIZE << order[v];
8453 orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
8454 }
8455
8456 --- sle11-2009-10-16.orig/arch/x86/kernel/smp_64-xen.c 2009-10-28 14:55:12.000000000 +0100
8457 +++ sle11-2009-10-16/arch/x86/kernel/smp_64-xen.c 2009-03-04 11:28:34.000000000 +0100
8458 @@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v
8459 /* prevent preemption and reschedule on another processor */
8460 int me = get_cpu();
8461 if (cpu == me) {
8462 - WARN_ON(1);
8463 put_cpu();
8464 - return -EBUSY;
8465 + return 0;
8466 }
8467 spin_lock_bh(&call_lock);
8468 __smp_call_function_single(cpu, func, info, nonatomic, wait);
8469 @@ -501,7 +500,7 @@ void smp_send_stop(void)
8470 #ifndef CONFIG_XEN
8471 asmlinkage void smp_reschedule_interrupt(void)
8472 #else
8473 -asmlinkage irqreturn_t smp_reschedule_interrupt(void)
8474 +asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
8475 #endif
8476 {
8477 #ifndef CONFIG_XEN
8478 @@ -514,7 +513,7 @@ asmlinkage irqreturn_t smp_reschedule_in
8479 #ifndef CONFIG_XEN
8480 asmlinkage void smp_call_function_interrupt(void)
8481 #else
8482 -asmlinkage irqreturn_t smp_call_function_interrupt(void)
8483 +asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
8484 #endif
8485 {
8486 void (*func) (void *info) = call_data->func;
8487 @@ -545,31 +544,3 @@ asmlinkage irqreturn_t smp_call_function
8488 return IRQ_HANDLED;
8489 #endif
8490 }
8491 -
8492 -int safe_smp_processor_id(void)
8493 -{
8494 -#ifdef CONFIG_XEN
8495 - return smp_processor_id();
8496 -#else
8497 - unsigned apicid, i;
8498 -
8499 - if (disable_apic)
8500 - return 0;
8501 -
8502 - apicid = hard_smp_processor_id();
8503 - if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
8504 - return apicid;
8505 -
8506 - for (i = 0; i < NR_CPUS; ++i) {
8507 - if (x86_cpu_to_apicid[i] == apicid)
8508 - return i;
8509 - }
8510 -
8511 - /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
8512 - * or called too early. Either way, we must be CPU 0. */
8513 - if (x86_cpu_to_apicid[0] == BAD_APICID)
8514 - return 0;
8515 -
8516 - return 0; /* Should not happen */
8517 -#endif
8518 -}
8519 --- sle11-2009-10-16.orig/arch/x86/kernel/traps_64-xen.c 2009-10-28 14:55:12.000000000 +0100
8520 +++ sle11-2009-10-16/arch/x86/kernel/traps_64-xen.c 2009-03-04 11:28:34.000000000 +0100
8521 @@ -23,6 +23,7 @@
8522 #include <linux/delay.h>
8523 #include <linux/spinlock.h>
8524 #include <linux/interrupt.h>
8525 +#include <linux/kallsyms.h>
8526 #include <linux/module.h>
8527 #include <linux/moduleparam.h>
8528 #include <linux/nmi.h>
8529 @@ -45,6 +46,7 @@
8530 #include <asm/pda.h>
8531 #include <asm/proto.h>
8532 #include <asm/nmi.h>
8533 +#include <asm/stacktrace.h>
8534
8535 asmlinkage void divide_error(void);
8536 asmlinkage void debug(void);
8537 @@ -114,7 +116,6 @@ static int call_trace = 1;
8538 #endif
8539
8540 #ifdef CONFIG_KALLSYMS
8541 -# include <linux/kallsyms.h>
8542 void printk_address(unsigned long address)
8543 {
8544 unsigned long offset = 0, symsize;
8545 @@ -142,7 +143,7 @@ void printk_address(unsigned long addres
8546 #endif
8547
8548 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
8549 - unsigned *usedp, const char **idp)
8550 + unsigned *usedp, char **idp)
8551 {
8552 #ifndef CONFIG_X86_NO_TSS
8553 static char ids[][8] = {
8554 @@ -162,26 +163,7 @@ static unsigned long *in_exception_stack
8555 * 'stack' is in one of them:
8556 */
8557 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
8558 - unsigned long end;
8559 -
8560 - /*
8561 - * set 'end' to the end of the exception stack.
8562 - */
8563 - switch (k + 1) {
8564 - /*
8565 - * TODO: this block is not needed i think, because
8566 - * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
8567 - * properly too.
8568 - */
8569 -#if DEBUG_STKSZ > EXCEPTION_STKSZ
8570 - case DEBUG_STACK:
8571 - end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
8572 - break;
8573 -#endif
8574 - default:
8575 - end = per_cpu(orig_ist, cpu).ist[k];
8576 - break;
8577 - }
8578 + unsigned long end = per_cpu(orig_ist, cpu).ist[k];
8579 /*
8580 * Is 'stack' above this exception frame's end?
8581 * If yes then skip to the next frame.
8582 @@ -236,13 +218,19 @@ static unsigned long *in_exception_stack
8583 return NULL;
8584 }
8585
8586 -static int show_trace_unwind(struct unwind_frame_info *info, void *context)
8587 +struct ops_and_data {
8588 + struct stacktrace_ops *ops;
8589 + void *data;
8590 +};
8591 +
8592 +static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
8593 {
8594 + struct ops_and_data *oad = (struct ops_and_data *)context;
8595 int n = 0;
8596
8597 while (unwind(info) == 0 && UNW_PC(info)) {
8598 n++;
8599 - printk_address(UNW_PC(info));
8600 + oad->ops->address(oad->data, UNW_PC(info));
8601 if (arch_unw_user_mode(info))
8602 break;
8603 }
8604 @@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi
8605 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
8606 */
8607
8608 -void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
8609 +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
8610 {
8611 - const unsigned cpu = safe_smp_processor_id();
8612 + void *t = (void *)tinfo;
8613 + return p > t && p < t + THREAD_SIZE - 3;
8614 +}
8615 +
8616 +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
8617 + struct stacktrace_ops *ops, void *data)
8618 +{
8619 + const unsigned cpu = smp_processor_id();
8620 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
8621 unsigned used = 0;
8622 -
8623 - printk("\nCall Trace:\n");
8624 + struct thread_info *tinfo;
8625
8626 if (!tsk)
8627 tsk = current;
8628 @@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk,
8629 if (call_trace >= 0) {
8630 int unw_ret = 0;
8631 struct unwind_frame_info info;
8632 + struct ops_and_data oad = { .ops = ops, .data = data };
8633
8634 if (regs) {
8635 if (unwind_init_frame_info(&info, tsk, regs) == 0)
8636 - unw_ret = show_trace_unwind(&info, NULL);
8637 + unw_ret = dump_trace_unwind(&info, &oad);
8638 } else if (tsk == current)
8639 - unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
8640 + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
8641 else {
8642 if (unwind_init_blocked(&info, tsk) == 0)
8643 - unw_ret = show_trace_unwind(&info, NULL);
8644 + unw_ret = dump_trace_unwind(&info, &oad);
8645 }
8646 if (unw_ret > 0) {
8647 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
8648 - print_symbol("DWARF2 unwinder stuck at %s\n",
8649 + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
8650 UNW_PC(&info));
8651 if ((long)UNW_SP(&info) < 0) {
8652 - printk("Leftover inexact backtrace:\n");
8653 + ops->warning(data, "Leftover inexact backtrace:\n");
8654 stack = (unsigned long *)UNW_SP(&info);
8655 + if (!stack)
8656 + return;
8657 } else
8658 - printk("Full inexact backtrace again:\n");
8659 + ops->warning(data, "Full inexact backtrace again:\n");
8660 } else if (call_trace >= 1)
8661 return;
8662 else
8663 - printk("Full inexact backtrace again:\n");
8664 + ops->warning(data, "Full inexact backtrace again:\n");
8665 } else
8666 - printk("Inexact backtrace:\n");
8667 + ops->warning(data, "Inexact backtrace:\n");
8668 }
8669 + if (!stack) {
8670 + unsigned long dummy;
8671 + stack = &dummy;
8672 + if (tsk && tsk != current)
8673 + stack = (unsigned long *)tsk->thread.rsp;
8674 + }
8675 + /*
8676 + * Align the stack pointer on word boundary, later loops
8677 + * rely on that (and corruption / debug info bugs can cause
8678 + * unaligned values here):
8679 + */
8680 + stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
8681
8682 /*
8683 * Print function call entries within a stack. 'cond' is the
8684 @@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk,
8685 #define HANDLE_STACK(cond) \
8686 do while (cond) { \
8687 unsigned long addr = *stack++; \
8688 - if (kernel_text_address(addr)) { \
8689 + if (oops_in_progress ? \
8690 + __kernel_text_address(addr) : \
8691 + kernel_text_address(addr)) { \
8692 /* \
8693 * If the address is either in the text segment of the \
8694 * kernel, or in the region which contains vmalloc'ed \
8695 @@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk,
8696 * down the cause of the crash will be able to figure \
8697 * out the call path that was taken. \
8698 */ \
8699 - printk_address(addr); \
8700 + ops->address(data, addr); \
8701 } \
8702 } while (0)
8703
8704 @@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk,
8705 * current stack address. If the stacks consist of nested
8706 * exceptions
8707 */
8708 - for ( ; ; ) {
8709 - const char *id;
8710 + for (;;) {
8711 + char *id;
8712 unsigned long *estack_end;
8713 estack_end = in_exception_stack(cpu, (unsigned long)stack,
8714 &used, &id);
8715
8716 if (estack_end) {
8717 - printk(" <%s>", id);
8718 + if (ops->stack(data, id) < 0)
8719 + break;
8720 HANDLE_STACK (stack < estack_end);
8721 - printk(" <EOE>");
8722 + ops->stack(data, "<EOE>");
8723 /*
8724 * We link to the next stack via the
8725 * second-to-last pointer (index -2 to end) in the
8726 @@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk,
8727 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
8728
8729 if (stack >= irqstack && stack < irqstack_end) {
8730 - printk(" <IRQ>");
8731 + if (ops->stack(data, "IRQ") < 0)
8732 + break;
8733 HANDLE_STACK (stack < irqstack_end);
8734 /*
8735 * We link to the next stack (which would be
8736 @@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk,
8737 */
8738 stack = (unsigned long *) (irqstack_end[-1]);
8739 irqstack_end = NULL;
8740 - printk(" <EOI>");
8741 + ops->stack(data, "EOI");
8742 continue;
8743 }
8744 }
8745 @@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk,
8746 }
8747
8748 /*
8749 - * This prints the process stack:
8750 + * This handles the process stack:
8751 */
8752 - HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
8753 + tinfo = current_thread_info();
8754 + HANDLE_STACK (valid_stack_ptr(tinfo, stack));
8755 #undef HANDLE_STACK
8756 +}
8757 +EXPORT_SYMBOL(dump_trace);
8758
8759 +static void
8760 +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
8761 +{
8762 + print_symbol(msg, symbol);
8763 printk("\n");
8764 }
8765
8766 -static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
8767 +static void print_trace_warning(void *data, char *msg)
8768 +{
8769 + printk("%s\n", msg);
8770 +}
8771 +
8772 +static int print_trace_stack(void *data, char *name)
8773 +{
8774 + printk(" <%s> ", name);
8775 + return 0;
8776 +}
8777 +
8778 +static void print_trace_address(void *data, unsigned long addr)
8779 +{
8780 + printk_address(addr);
8781 +}
8782 +
8783 +static struct stacktrace_ops print_trace_ops = {
8784 + .warning = print_trace_warning,
8785 + .warning_symbol = print_trace_warning_symbol,
8786 + .stack = print_trace_stack,
8787 + .address = print_trace_address,
8788 +};
8789 +
8790 +void
8791 +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
8792 +{
8793 + printk("\nCall Trace:\n");
8794 + dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
8795 + printk("\n");
8796 +}
8797 +
8798 +static void
8799 +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
8800 {
8801 unsigned long *stack;
8802 int i;
8803 - const int cpu = safe_smp_processor_id();
8804 + const int cpu = smp_processor_id();
8805 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
8806 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
8807
8808 @@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs
8809 int i;
8810 int in_kernel = !user_mode(regs);
8811 unsigned long rsp;
8812 - const int cpu = safe_smp_processor_id();
8813 + const int cpu = smp_processor_id();
8814 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
8815
8816 rsp = regs->rsp;
8817 @@ -505,9 +557,11 @@ static unsigned int die_nest_count;
8818
8819 unsigned __kprobes long oops_begin(void)
8820 {
8821 - int cpu = safe_smp_processor_id();
8822 + int cpu = smp_processor_id();
8823 unsigned long flags;
8824
8825 + oops_enter();
8826 +
8827 /* racy, but better than risking deadlock. */
8828 local_irq_save(flags);
8829 if (!spin_trylock(&die_lock)) {
8830 @@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl
8831 spin_unlock_irqrestore(&die_lock, flags);
8832 if (panic_on_oops)
8833 panic("Fatal exception");
8834 + oops_exit();
8835 }
8836
8837 void __kprobes __die(const char * str, struct pt_regs * regs, long err)
8838 @@ -572,8 +627,8 @@ void die(const char * str, struct pt_reg
8839 do_exit(SIGSEGV);
8840 }
8841
8842 -#ifdef CONFIG_X86_LOCAL_APIC
8843 -void __kprobes die_nmi(char *str, struct pt_regs *regs)
8844 +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
8845 +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
8846 {
8847 unsigned long flags = oops_begin();
8848
8849 @@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct
8850 * We are in trouble anyway, lets at least try
8851 * to get a message out.
8852 */
8853 - printk(str, safe_smp_processor_id());
8854 + printk(str, smp_processor_id());
8855 show_registers(regs);
8856 if (kexec_should_crash(current))
8857 crash_kexec(regs);
8858 - if (panic_on_timeout || panic_on_oops)
8859 - panic("nmi watchdog");
8860 - printk("console shuts up ...\n");
8861 + if (do_panic || panic_on_oops)
8862 + panic("Non maskable interrupt");
8863 oops_end(flags);
8864 nmi_exit();
8865 local_irq_enable();
8866 @@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro
8867 static __kprobes void
8868 mem_parity_error(unsigned char reason, struct pt_regs * regs)
8869 {
8870 - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
8871 - printk("You probably have a hardware problem with your RAM chips\n");
8872 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8873 + reason);
8874 + printk(KERN_EMERG "You probably have a hardware problem with your "
8875 + "RAM chips\n");
8876 +
8877 + if (panic_on_unrecovered_nmi)
8878 + panic("NMI: Not continuing");
8879 +
8880 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8881
8882 #if 0 /* XEN */
8883 /* Clear and disable the memory parity error line. */
8884 @@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str
8885
8886 static __kprobes void
8887 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
8888 -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
8889 - printk("Dazed and confused, but trying to continue\n");
8890 - printk("Do you have a strange power saving mode enabled?\n");
8891 +{
8892 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8893 + reason);
8894 + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
8895 +
8896 + if (panic_on_unrecovered_nmi)
8897 + panic("NMI: Not continuing");
8898 +
8899 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8900 }
8901
8902 /* Runs on IST stack. This code must keep interrupts off all the time.
8903 @@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi
8904 * Ok, so this is none of the documented NMI sources,
8905 * so it must be the NMI watchdog.
8906 */
8907 - if (nmi_watchdog > 0) {
8908 - nmi_watchdog_tick(regs,reason);
8909 + if (nmi_watchdog_tick(regs,reason))
8910 return;
8911 - }
8912 #endif
8913 - unknown_nmi_error(reason, regs);
8914 + if (!do_nmi_callback(regs,cpu))
8915 + unknown_nmi_error(reason, regs);
8916 +
8917 return;
8918 }
8919 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
8920 @@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void)
8921 init_fpu(me);
8922 restore_fpu_checking(&me->thread.i387.fxsave);
8923 task_thread_info(me)->status |= TS_USEDFPU;
8924 + me->fpu_counter++;
8925 }
8926
8927
8928 @@ -1141,24 +1209,30 @@ void __cpuinit smp_trap_init(trap_info_t
8929 }
8930
8931
8932 -/* Actual parsing is done early in setup.c. */
8933 -static int __init oops_dummy(char *s)
8934 +static int __init oops_setup(char *s)
8935 {
8936 - panic_on_oops = 1;
8937 - return 1;
8938 + if (!s)
8939 + return -EINVAL;
8940 + if (!strcmp(s, "panic"))
8941 + panic_on_oops = 1;
8942 + return 0;
8943 }
8944 -__setup("oops=", oops_dummy);
8945 +early_param("oops", oops_setup);
8946
8947 static int __init kstack_setup(char *s)
8948 {
8949 + if (!s)
8950 + return -EINVAL;
8951 kstack_depth_to_print = simple_strtoul(s,NULL,0);
8952 - return 1;
8953 + return 0;
8954 }
8955 -__setup("kstack=", kstack_setup);
8956 +early_param("kstack", kstack_setup);
8957
8958 #ifdef CONFIG_STACK_UNWIND
8959 static int __init call_trace_setup(char *s)
8960 {
8961 + if (!s)
8962 + return -EINVAL;
8963 if (strcmp(s, "old") == 0)
8964 call_trace = -1;
8965 else if (strcmp(s, "both") == 0)
8966 @@ -1167,7 +1241,7 @@ static int __init call_trace_setup(char
8967 call_trace = 1;
8968 else if (strcmp(s, "new") == 0)
8969 call_trace = 2;
8970 - return 1;
8971 + return 0;
8972 }
8973 -__setup("call_trace=", call_trace_setup);
8974 +early_param("call_trace", call_trace_setup);
8975 #endif
8976 --- sle11-2009-10-16.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-10-28 14:55:12.000000000 +0100
8977 +++ sle11-2009-10-16/arch/x86/kernel/vsyscall_64-xen.c 2009-03-04 11:28:34.000000000 +0100
8978 @@ -26,6 +26,10 @@
8979 #include <linux/seqlock.h>
8980 #include <linux/jiffies.h>
8981 #include <linux/sysctl.h>
8982 +#include <linux/getcpu.h>
8983 +#include <linux/cpu.h>
8984 +#include <linux/smp.h>
8985 +#include <linux/notifier.h>
8986
8987 #include <asm/vsyscall.h>
8988 #include <asm/pgtable.h>
8989 @@ -33,11 +37,15 @@
8990 #include <asm/fixmap.h>
8991 #include <asm/errno.h>
8992 #include <asm/io.h>
8993 +#include <asm/segment.h>
8994 +#include <asm/desc.h>
8995 +#include <asm/topology.h>
8996
8997 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
8998
8999 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
9000 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
9001 +int __vgetcpu_mode __section_vgetcpu_mode;
9002
9003 #include <asm/unistd.h>
9004
9005 @@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo
9006 sequence = read_seqbegin(&__xtime_lock);
9007
9008 sec = __xtime.tv_sec;
9009 - usec = (__xtime.tv_nsec / 1000) +
9010 - (__jiffies - __wall_jiffies) * (1000000 / HZ);
9011 + usec = __xtime.tv_nsec / 1000;
9012
9013 if (__vxtime.mode != VXTIME_HPET) {
9014 t = get_cycles_sync();
9015 @@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo
9016 __vxtime.tsc_quot) >> 32;
9017 /* See comment in x86_64 do_gettimeofday. */
9018 } else {
9019 - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
9020 + usec += ((readl((void __iomem *)
9021 + fix_to_virt(VSYSCALL_HPET) + 0xf0) -
9022 __vxtime.last) * __vxtime.quot) >> 32;
9023 }
9024 } while (read_seqretry(&__xtime_lock, sequence));
9025 @@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t)
9026 return __xtime.tv_sec;
9027 }
9028
9029 -long __vsyscall(2) venosys_0(void)
9030 -{
9031 - return -ENOSYS;
9032 +/* Fast way to get current CPU and node.
9033 + This helps to do per node and per CPU caches in user space.
9034 + The result is not guaranteed without CPU affinity, but usually
9035 + works out because the scheduler tries to keep a thread on the same
9036 + CPU.
9037 +
9038 + tcache must point to a two element sized long array.
9039 + All arguments can be NULL. */
9040 +long __vsyscall(2)
9041 +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
9042 +{
9043 + unsigned int dummy, p;
9044 + unsigned long j = 0;
9045 +
9046 + /* Fast cache - only recompute value once per jiffies and avoid
9047 + relatively costly rdtscp/cpuid otherwise.
9048 + This works because the scheduler usually keeps the process
9049 + on the same CPU and this syscall doesn't guarantee its
9050 + results anyways.
9051 + We do this here because otherwise user space would do it on
9052 + its own in a likely inferior way (no access to jiffies).
9053 + If you don't like it pass NULL. */
9054 + if (tcache && tcache->blob[0] == (j = __jiffies)) {
9055 + p = tcache->blob[1];
9056 + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
9057 + /* Load per CPU data from RDTSCP */
9058 + rdtscp(dummy, dummy, p);
9059 + } else {
9060 + /* Load per CPU data from GDT */
9061 + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
9062 + }
9063 + if (tcache) {
9064 + tcache->blob[0] = j;
9065 + tcache->blob[1] = p;
9066 + }
9067 + if (cpu)
9068 + *cpu = p & 0xfff;
9069 + if (node)
9070 + *node = p >> 12;
9071 + return 0;
9072 }
9073
9074 long __vsyscall(3) venosys_1(void)
9075 @@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta
9076 void __user *buffer, size_t *lenp, loff_t *ppos)
9077 {
9078 extern u16 vsysc1, vsysc2;
9079 - u16 *map1, *map2;
9080 + u16 __iomem *map1;
9081 + u16 __iomem *map2;
9082 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
9083 if (!write)
9084 return ret;
9085 @@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta
9086 goto out;
9087 }
9088 if (!sysctl_vsyscall) {
9089 - *map1 = SYSCALL;
9090 - *map2 = SYSCALL;
9091 + writew(SYSCALL, map1);
9092 + writew(SYSCALL, map2);
9093 } else {
9094 - *map1 = NOP2;
9095 - *map2 = NOP2;
9096 + writew(NOP2, map1);
9097 + writew(NOP2, map2);
9098 }
9099 iounmap(map2);
9100 out:
9101 @@ -200,6 +246,48 @@ static ctl_table kernel_root_table2[] =
9102
9103 #endif
9104
9105 +/* Assume __initcall executes before all user space. Hopefully kmod
9106 + doesn't violate that. We'll find out if it does. */
9107 +static void __cpuinit vsyscall_set_cpu(int cpu)
9108 +{
9109 + unsigned long d;
9110 + unsigned long node = 0;
9111 +#ifdef CONFIG_NUMA
9112 + node = cpu_to_node[cpu];
9113 +#endif
9114 + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
9115 + write_rdtscp_aux((node << 12) | cpu);
9116 +
9117 + /* Store cpu number in limit so that it can be loaded quickly
9118 + in user space in vgetcpu.
9119 + 12 bits for the CPU and 8 bits for the node. */
9120 + d = 0x0f40000000000ULL;
9121 + d |= cpu;
9122 + d |= (node & 0xf) << 12;
9123 + d |= (node >> 4) << 48;
9124 + if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
9125 + + GDT_ENTRY_PER_CPU),
9126 + d))
9127 + BUG();
9128 +}
9129 +
9130 +static void __cpuinit cpu_vsyscall_init(void *arg)
9131 +{
9132 + /* preemption should be already off */
9133 + vsyscall_set_cpu(raw_smp_processor_id());
9134 +}
9135 +
9136 +#ifdef CONFIG_HOTPLUG_CPU
9137 +static int __cpuinit
9138 +cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
9139 +{
9140 + long cpu = (long)arg;
9141 + if (action == CPU_ONLINE)
9142 + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
9143 + return NOTIFY_DONE;
9144 +}
9145 +#endif
9146 +
9147 static void __init map_vsyscall(void)
9148 {
9149 extern char __vsyscall_0;
9150 @@ -214,13 +302,20 @@ static int __init vsyscall_init(void)
9151 VSYSCALL_ADDR(__NR_vgettimeofday)));
9152 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
9153 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
9154 + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
9155 map_vsyscall();
9156 #ifdef CONFIG_XEN
9157 sysctl_vsyscall = 0; /* disable vgettimeofay() */
9158 + if (boot_cpu_has(X86_FEATURE_RDTSCP))
9159 + vgetcpu_mode = VGETCPU_RDTSCP;
9160 + else
9161 + vgetcpu_mode = VGETCPU_LSL;
9162 #endif
9163 #ifdef CONFIG_SYSCTL
9164 register_sysctl_table(kernel_root_table2, 0);
9165 #endif
9166 + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
9167 + hotcpu_notifier(cpu_vsyscall_notifier, 0);
9168 return 0;
9169 }
9170
9171 --- sle11-2009-10-16.orig/arch/x86/mm/fault_64-xen.c 2009-10-28 14:55:12.000000000 +0100
9172 +++ sle11-2009-10-16/arch/x86/mm/fault_64-xen.c 2009-03-04 11:28:34.000000000 +0100
9173 @@ -40,8 +40,7 @@
9174 #define PF_RSVD (1<<3)
9175 #define PF_INSTR (1<<4)
9176
9177 -#ifdef CONFIG_KPROBES
9178 -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
9179 +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
9180
9181 /* Hook to register for page fault notifications */
9182 int register_page_fault_notifier(struct notifier_block *nb)
9183 @@ -49,11 +48,13 @@ int register_page_fault_notifier(struct
9184 vmalloc_sync_all();
9185 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
9186 }
9187 +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
9188
9189 int unregister_page_fault_notifier(struct notifier_block *nb)
9190 {
9191 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
9192 }
9193 +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
9194
9195 static inline int notify_page_fault(enum die_val val, const char *str,
9196 struct pt_regs *regs, long err, int trap, int sig)
9197 @@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
9198 };
9199 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
9200 }
9201 -#else
9202 -static inline int notify_page_fault(enum die_val val, const char *str,
9203 - struct pt_regs *regs, long err, int trap, int sig)
9204 -{
9205 - return NOTIFY_DONE;
9206 -}
9207 -#endif
9208
9209 void bust_spinlocks(int yes)
9210 {
9211 @@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
9212 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
9213 unsigned long error_code)
9214 {
9215 - unsigned char *instr;
9216 + unsigned char __user *instr;
9217 int scan_more = 1;
9218 int prefetch = 0;
9219 unsigned char *max_instr;
9220 @@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p
9221 if (error_code & PF_INSTR)
9222 return 0;
9223
9224 - instr = (unsigned char *)convert_rip_to_linear(current, regs);
9225 + instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
9226 max_instr = instr + 15;
9227
9228 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
9229 @@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p
9230 unsigned char instr_hi;
9231 unsigned char instr_lo;
9232
9233 - if (__get_user(opcode, instr))
9234 + if (__get_user(opcode, (char __user *)instr))
9235 break;
9236
9237 instr_hi = opcode & 0xf0;
9238 @@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p
9239 case 0x00:
9240 /* Prefetch instruction is 0x0F0D or 0x0F18 */
9241 scan_more = 0;
9242 - if (__get_user(opcode, instr))
9243 + if (__get_user(opcode, (char __user *)instr))
9244 break;
9245 prefetch = (instr_lo == 0xF) &&
9246 (opcode == 0x0D || opcode == 0x18);
9247 @@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p
9248 static int bad_address(void *p)
9249 {
9250 unsigned long dummy;
9251 - return __get_user(dummy, (unsigned long *)p);
9252 + return __get_user(dummy, (unsigned long __user *)p);
9253 }
9254
9255 void dump_pagetable(unsigned long address)
9256 @@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r
9257
9258 int unhandled_signal(struct task_struct *tsk, int sig)
9259 {
9260 - if (tsk->pid == 1)
9261 + if (is_init(tsk))
9262 return 1;
9263 if (tsk->ptrace & PT_PTRACED)
9264 return 0;
9265 @@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a
9266 if (pgd_none(*pgd))
9267 set_pgd(pgd, *pgd_ref);
9268 else
9269 - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
9270 + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
9271
9272 /* Below here mismatches are bugs because these lower tables
9273 are shared */
9274 @@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a
9275 pud_ref = pud_offset(pgd_ref, address);
9276 if (pud_none(*pud_ref))
9277 return -1;
9278 - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
9279 + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
9280 BUG();
9281 pmd = pmd_offset(pud, address);
9282 pmd_ref = pmd_offset(pud_ref, address);
9283 @@ -531,7 +525,7 @@ good_area:
9284 case PF_PROT: /* read, present */
9285 goto bad_area;
9286 case 0: /* read, not present */
9287 - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
9288 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
9289 goto bad_area;
9290 }
9291
9292 @@ -647,7 +641,7 @@ no_context:
9293 */
9294 out_of_memory:
9295 up_read(&mm->mmap_sem);
9296 - if (current->pid == 1) {
9297 + if (is_init(current)) {
9298 yield();
9299 goto again;
9300 }
9301 @@ -702,7 +696,7 @@ void vmalloc_sync_all(void)
9302 if (pgd_none(*pgd))
9303 set_pgd(pgd, *pgd_ref);
9304 else
9305 - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
9306 + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
9307 }
9308 spin_unlock(&pgd_lock);
9309 set_bit(pgd_index(address), insync);
9310 --- sle11-2009-10-16.orig/arch/x86/mm/init_64-xen.c 2009-10-28 14:55:12.000000000 +0100
9311 +++ sle11-2009-10-16/arch/x86/mm/init_64-xen.c 2009-03-04 11:28:34.000000000 +0100
9312 @@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user);
9313
9314 int after_bootmem;
9315
9316 -static unsigned long dma_reserve __initdata;
9317 -
9318 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
9319 extern unsigned long start_pfn;
9320
9321 @@ -420,7 +418,6 @@ __init void *early_ioremap(unsigned long
9322
9323 /* actually usually some more */
9324 if (size >= LARGE_PAGE_SIZE) {
9325 - printk("SMBIOS area too long %lu\n", size);
9326 return NULL;
9327 }
9328 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
9329 @@ -442,16 +439,24 @@ __init void early_iounmap(void *addr, un
9330 #endif
9331
9332 static void __meminit
9333 -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
9334 +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
9335 {
9336 - int i, k;
9337 + int i = pmd_index(address);
9338
9339 - for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
9340 + for (; i < PTRS_PER_PMD; i++) {
9341 unsigned long pte_phys;
9342 + pmd_t *pmd = pmd_page + i;
9343 pte_t *pte, *pte_save;
9344 + int k;
9345
9346 if (address >= end)
9347 break;
9348 +
9349 + if (__pmd_val(*pmd)) {
9350 + address += PMD_SIZE;
9351 + continue;
9352 + }
9353 +
9354 pte = alloc_static_page(&pte_phys);
9355 pte_save = pte;
9356 for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
9357 @@ -478,40 +483,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long
9358 static void __meminit
9359 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
9360 {
9361 - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
9362 -
9363 - if (pmd_none(*pmd)) {
9364 - spin_lock(&init_mm.page_table_lock);
9365 - phys_pmd_init(pmd, address, end);
9366 - spin_unlock(&init_mm.page_table_lock);
9367 - __flush_tlb_all();
9368 - }
9369 + pmd_t *pmd = pmd_offset(pud,0);
9370 + spin_lock(&init_mm.page_table_lock);
9371 + phys_pmd_init(pmd, address, end);
9372 + spin_unlock(&init_mm.page_table_lock);
9373 + __flush_tlb_all();
9374 }
9375
9376 -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
9377 +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
9378 {
9379 - long i = pud_index(address);
9380 -
9381 - pud = pud + i;
9382 -
9383 - if (after_bootmem && pud_val(*pud)) {
9384 - phys_pmd_update(pud, address, end);
9385 - return;
9386 - }
9387 + int i = pud_index(addr);
9388
9389 - for (; i < PTRS_PER_PUD; pud++, i++) {
9390 - unsigned long paddr, pmd_phys;
9391 + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
9392 + unsigned long pmd_phys;
9393 + pud_t *pud = pud_page + pud_index(addr);
9394 pmd_t *pmd;
9395
9396 - paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
9397 - if (paddr >= end)
9398 + if (addr >= end)
9399 break;
9400
9401 + if (__pud_val(*pud)) {
9402 + phys_pmd_update(pud, addr, end);
9403 + continue;
9404 + }
9405 +
9406 pmd = alloc_static_page(&pmd_phys);
9407
9408 spin_lock(&init_mm.page_table_lock);
9409 *pud = __pud(pmd_phys | _KERNPG_TABLE);
9410 - phys_pmd_init(pmd, paddr, end);
9411 + phys_pmd_init(pmd, addr, end);
9412 spin_unlock(&init_mm.page_table_lock);
9413
9414 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
9415 @@ -775,69 +775,18 @@ void __cpuinit zap_low_mappings(int cpu)
9416 #endif
9417 }
9418
9419 -/* Compute zone sizes for the DMA and DMA32 zones in a node. */
9420 -__init void
9421 -size_zones(unsigned long *z, unsigned long *h,
9422 - unsigned long start_pfn, unsigned long end_pfn)
9423 -{
9424 - int i;
9425 - unsigned long w;
9426 -
9427 - for (i = 0; i < MAX_NR_ZONES; i++)
9428 - z[i] = 0;
9429 -
9430 - if (start_pfn < MAX_DMA_PFN)
9431 - z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
9432 - if (start_pfn < MAX_DMA32_PFN) {
9433 - unsigned long dma32_pfn = MAX_DMA32_PFN;
9434 - if (dma32_pfn > end_pfn)
9435 - dma32_pfn = end_pfn;
9436 - z[ZONE_DMA32] = dma32_pfn - start_pfn;
9437 - }
9438 - z[ZONE_NORMAL] = end_pfn - start_pfn;
9439 -
9440 - /* Remove lower zones from higher ones. */
9441 - w = 0;
9442 - for (i = 0; i < MAX_NR_ZONES; i++) {
9443 - if (z[i])
9444 - z[i] -= w;
9445 - w += z[i];
9446 - }
9447 -
9448 - /* Compute holes */
9449 - w = start_pfn;
9450 - for (i = 0; i < MAX_NR_ZONES; i++) {
9451 - unsigned long s = w;
9452 - w += z[i];
9453 - h[i] = e820_hole_size(s, w);
9454 - }
9455 -
9456 - /* Add the space pace needed for mem_map to the holes too. */
9457 - for (i = 0; i < MAX_NR_ZONES; i++)
9458 - h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
9459 -
9460 - /* The 16MB DMA zone has the kernel and other misc mappings.
9461 - Account them too */
9462 - if (h[ZONE_DMA]) {
9463 - h[ZONE_DMA] += dma_reserve;
9464 - if (h[ZONE_DMA] >= z[ZONE_DMA]) {
9465 - printk(KERN_WARNING
9466 - "Kernel too large and filling up ZONE_DMA?\n");
9467 - h[ZONE_DMA] = z[ZONE_DMA];
9468 - }
9469 - }
9470 -}
9471 -
9472 #ifndef CONFIG_NUMA
9473 void __init paging_init(void)
9474 {
9475 - unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
9476 + unsigned long max_zone_pfns[MAX_NR_ZONES];
9477 + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
9478 + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
9479 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
9480 + max_zone_pfns[ZONE_NORMAL] = end_pfn;
9481
9482 memory_present(0, 0, end_pfn);
9483 sparse_init();
9484 - size_zones(zones, holes, 0, end_pfn);
9485 - free_area_init_node(0, NODE_DATA(0), zones,
9486 - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
9487 + free_area_init_nodes(max_zone_pfns);
9488
9489 init_mm.context.pinned = 1;
9490 }
9491 @@ -891,36 +840,23 @@ void online_page(struct page *page)
9492
9493 #ifdef CONFIG_MEMORY_HOTPLUG
9494 /*
9495 - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
9496 - * via probe interface of sysfs. If acpi notifies hot-add event, then it
9497 - * can tell node id by searching dsdt. But, probe interface doesn't have
9498 - * node id. So, return 0 as node id at this time.
9499 - */
9500 -#ifdef CONFIG_NUMA
9501 -int memory_add_physaddr_to_nid(u64 start)
9502 -{
9503 - return 0;
9504 -}
9505 -#endif
9506 -
9507 -/*
9508 * Memory is added always to NORMAL zone. This means you will never get
9509 * additional DMA/DMA32 memory.
9510 */
9511 int arch_add_memory(int nid, u64 start, u64 size)
9512 {
9513 struct pglist_data *pgdat = NODE_DATA(nid);
9514 - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
9515 + struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
9516 unsigned long start_pfn = start >> PAGE_SHIFT;
9517 unsigned long nr_pages = size >> PAGE_SHIFT;
9518 int ret;
9519
9520 + init_memory_mapping(start, (start + size -1));
9521 +
9522 ret = __add_pages(zone, start_pfn, nr_pages);
9523 if (ret)
9524 goto error;
9525
9526 - init_memory_mapping(start, (start + size -1));
9527 -
9528 return ret;
9529 error:
9530 printk("%s: Problem encountered in __add_pages!\n", __func__);
9531 @@ -934,7 +870,17 @@ int remove_memory(u64 start, u64 size)
9532 }
9533 EXPORT_SYMBOL_GPL(remove_memory);
9534
9535 -#else /* CONFIG_MEMORY_HOTPLUG */
9536 +#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
9537 +int memory_add_physaddr_to_nid(u64 start)
9538 +{
9539 + return 0;
9540 +}
9541 +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
9542 +#endif
9543 +
9544 +#endif /* CONFIG_MEMORY_HOTPLUG */
9545 +
9546 +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
9547 /*
9548 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
9549 * just online the pages.
9550 @@ -960,7 +906,7 @@ int __add_pages(struct zone *z, unsigned
9551 }
9552 return err;
9553 }
9554 -#endif /* CONFIG_MEMORY_HOTPLUG */
9555 +#endif
9556
9557 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
9558 kcore_vsyscall;
9559 @@ -972,12 +918,6 @@ void __init mem_init(void)
9560
9561 pci_iommu_alloc();
9562
9563 - /* How many end-of-memory variables you have, grandma! */
9564 - max_low_pfn = end_pfn;
9565 - max_pfn = end_pfn;
9566 - num_physpages = end_pfn;
9567 - high_memory = (void *) __va(end_pfn * PAGE_SIZE);
9568 -
9569 /* clear the zero-page */
9570 memset(empty_zero_page, 0, PAGE_SIZE);
9571
9572 @@ -995,7 +935,8 @@ void __init mem_init(void)
9573 init_page_count(pfn_to_page(pfn));
9574 totalram_pages++;
9575 }
9576 - reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
9577 + reservedpages = end_pfn - totalram_pages -
9578 + absent_pages_in_range(0, end_pfn);
9579
9580 after_bootmem = 1;
9581
9582 @@ -1102,15 +1043,32 @@ void free_initrd_mem(unsigned long start
9583
9584 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
9585 {
9586 - /* Should check here against the e820 map to avoid double free */
9587 #ifdef CONFIG_NUMA
9588 int nid = phys_to_nid(phys);
9589 +#endif
9590 + unsigned long pfn = phys >> PAGE_SHIFT;
9591 + if (pfn >= end_pfn) {
9592 + /* This can happen with kdump kernels when accessing firmware
9593 + tables. */
9594 + if (pfn < end_pfn_map)
9595 + return;
9596 + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
9597 + phys, len);
9598 + return;
9599 + }
9600 +
9601 + /* Should check here against the e820 map to avoid double free */
9602 +#ifdef CONFIG_NUMA
9603 reserve_bootmem_node(NODE_DATA(nid), phys, len);
9604 #else
9605 reserve_bootmem(phys, len);
9606 #endif
9607 - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
9608 +#ifndef CONFIG_XEN
9609 + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
9610 dma_reserve += len / PAGE_SIZE;
9611 + set_dma_reserve(dma_reserve);
9612 + }
9613 +#endif
9614 }
9615
9616 int kern_addr_valid(unsigned long addr)
9617 --- sle11-2009-10-16.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:25:57.000000000 +0100
9618 +++ sle11-2009-10-16/arch/x86/mm/pageattr_64-xen.c 2009-03-04 11:28:34.000000000 +0100
9619 @@ -377,8 +377,8 @@ static void revert_page(unsigned long ad
9620 BUG_ON(pud_none(*pud));
9621 pmd = pmd_offset(pud, address);
9622 BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
9623 - pgprot_val(ref_prot) |= _PAGE_PSE;
9624 large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
9625 + large_pte = pte_mkhuge(large_pte);
9626 set_pte((pte_t *)pmd, large_pte);
9627 }
9628
9629 @@ -388,32 +388,28 @@ __change_page_attr(unsigned long address
9630 {
9631 pte_t *kpte;
9632 struct page *kpte_page;
9633 - unsigned kpte_flags;
9634 pgprot_t ref_prot2;
9635 kpte = lookup_address(address);
9636 if (!kpte) return 0;
9637 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
9638 - kpte_flags = pte_val(*kpte);
9639 if (pgprot_val(prot) != pgprot_val(ref_prot)) {
9640 - if ((kpte_flags & _PAGE_PSE) == 0) {
9641 + if (!pte_huge(*kpte)) {
9642 set_pte(kpte, pfn_pte(pfn, prot));
9643 } else {
9644 /*
9645 * split_large_page will take the reference for this
9646 * change_page_attr on the split page.
9647 */
9648 -
9649 struct page *split;
9650 - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
9651 -
9652 + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
9653 split = split_large_page(address, prot, ref_prot2);
9654 if (!split)
9655 return -ENOMEM;
9656 - set_pte(kpte,mk_pte(split, ref_prot2));
9657 + set_pte(kpte, mk_pte(split, ref_prot2));
9658 kpte_page = split;
9659 - }
9660 + }
9661 page_private(kpte_page)++;
9662 - } else if ((kpte_flags & _PAGE_PSE) == 0) {
9663 + } else if (!pte_huge(*kpte)) {
9664 set_pte(kpte, pfn_pte(pfn, ref_prot));
9665 BUG_ON(page_private(kpte_page) == 0);
9666 page_private(kpte_page)--;
9667 @@ -470,10 +466,12 @@ int change_page_attr_addr(unsigned long
9668 * lowmem */
9669 if (__pa(address) < KERNEL_TEXT_SIZE) {
9670 unsigned long addr2;
9671 - pgprot_t prot2 = prot;
9672 + pgprot_t prot2;
9673 addr2 = __START_KERNEL_map + __pa(address);
9674 - pgprot_val(prot2) &= ~_PAGE_NX;
9675 - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
9676 + /* Make sure the kernel mappings stay executable */
9677 + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
9678 + err = __change_page_attr(addr2, pfn, prot2,
9679 + PAGE_KERNEL_EXEC);
9680 }
9681 }
9682 up_write(&init_mm.mmap_sem);
9683 --- sle11-2009-10-16.orig/drivers/char/tpm/tpm_xen.c 2009-08-26 11:52:33.000000000 +0200
9684 +++ sle11-2009-10-16/drivers/char/tpm/tpm_xen.c 2009-03-04 11:28:34.000000000 +0100
9685 @@ -85,8 +85,7 @@ static struct tpm_private *my_priv;
9686
9687 /* local function prototypes */
9688 static irqreturn_t tpmif_int(int irq,
9689 - void *tpm_priv,
9690 - struct pt_regs *ptregs);
9691 + void *tpm_priv);
9692 static void tpmif_rx_action(unsigned long unused);
9693 static int tpmif_connect(struct xenbus_device *dev,
9694 struct tpm_private *tp,
9695 @@ -559,7 +558,7 @@ static void tpmif_rx_action(unsigned lon
9696 }
9697
9698
9699 -static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
9700 +static irqreturn_t tpmif_int(int irq, void *tpm_priv)
9701 {
9702 struct tpm_private *tp = tpm_priv;
9703 unsigned long flags;
9704 --- sle11-2009-10-16.orig/drivers/pci/Kconfig 2009-10-28 14:55:12.000000000 +0100
9705 +++ sle11-2009-10-16/drivers/pci/Kconfig 2009-03-04 11:28:34.000000000 +0100
9706 @@ -48,7 +48,7 @@ config PCI_DEBUG
9707 config HT_IRQ
9708 bool "Interrupts on hypertransport devices"
9709 default y
9710 - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
9711 + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
9712 help
9713 This allows native hypertransport devices to use interrupts.
9714
9715 --- sle11-2009-10-16.orig/drivers/pci/msi-xen.c 2009-10-28 14:55:12.000000000 +0100
9716 +++ sle11-2009-10-16/drivers/pci/msi-xen.c 2009-04-24 13:31:56.000000000 +0200
9717 @@ -6,6 +6,7 @@
9718 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
9719 */
9720
9721 +#include <linux/err.h>
9722 #include <linux/mm.h>
9723 #include <linux/irq.h>
9724 #include <linux/interrupt.h>
9725 @@ -14,6 +15,7 @@
9726 #include <linux/smp_lock.h>
9727 #include <linux/pci.h>
9728 #include <linux/proc_fs.h>
9729 +#include <linux/msi.h>
9730
9731 #include <xen/evtchn.h>
9732
9733 @@ -26,14 +28,6 @@
9734
9735 static int pci_msi_enable = 1;
9736
9737 -static struct msi_ops *msi_ops;
9738 -
9739 -int msi_register(struct msi_ops *ops)
9740 -{
9741 - msi_ops = ops;
9742 - return 0;
9743 -}
9744 -
9745 static LIST_HEAD(msi_dev_head);
9746 DEFINE_SPINLOCK(msi_dev_lock);
9747
9748 @@ -406,9 +400,9 @@ void pci_restore_msix_state(struct pci_d
9749 * @dev: pointer to the pci_dev data structure of MSI device function
9750 *
9751 * Setup the MSI capability structure of device function with a single
9752 - * MSI vector, regardless of device function is capable of handling
9753 + * MSI irq, regardless of device function is capable of handling
9754 * multiple messages. A return of zero indicates the successful setup
9755 - * of an entry zero with the new MSI vector or non-zero for otherwise.
9756 + * of an entry zero with the new MSI irq or non-zero for otherwise.
9757 **/
9758 static int msi_capability_init(struct pci_dev *dev)
9759 {
9760 @@ -422,11 +416,11 @@ static int msi_capability_init(struct pc
9761 if (pirq < 0)
9762 return -EBUSY;
9763
9764 - dev->irq = pirq;
9765 /* Set MSI enabled bits */
9766 enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
9767 dev->msi_enabled = 1;
9768
9769 + dev->irq = pirq;
9770 return 0;
9771 }
9772
9773 @@ -437,8 +431,8 @@ static int msi_capability_init(struct pc
9774 * @nvec: number of @entries
9775 *
9776 * Setup the MSI-X capability structure of device function with a
9777 - * single MSI-X vector. A return of zero indicates the successful setup of
9778 - * requested MSI-X entries with allocated vectors or non-zero for otherwise.
9779 + * single MSI-X irq. A return of zero indicates the successful setup of
9780 + * requested MSI-X entries with allocated irqs or non-zero for otherwise.
9781 **/
9782 static int msix_capability_init(struct pci_dev *dev,
9783 struct msix_entry *entries, int nvec)
9784 @@ -480,12 +474,18 @@ static int msix_capability_init(struct p
9785 }
9786
9787 if (i != nvec) {
9788 + int avail = i - 1;
9789 for (j = --i; j >= 0; j--) {
9790 msi_unmap_pirq(dev, entries[j].vector);
9791 detach_pirq_entry(entries[j].entry, msi_dev_entry);
9792 entries[j].vector = 0;
9793 }
9794 - return -EBUSY;
9795 + /* If we had some success report the number of irqs
9796 + * we succeeded in setting up.
9797 + */
9798 + if (avail <= 0)
9799 + avail = -EBUSY;
9800 + return avail;
9801 }
9802
9803 enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
9804 @@ -495,11 +495,40 @@ static int msix_capability_init(struct p
9805 }
9806
9807 /**
9808 + * pci_msi_supported - check whether MSI may be enabled on device
9809 + * @dev: pointer to the pci_dev data structure of MSI device function
9810 + *
9811 + * Look at global flags, the device itself, and its parent busses
9812 + * to return 0 if MSI are supported for the device.
9813 + **/
9814 +static
9815 +int pci_msi_supported(struct pci_dev * dev)
9816 +{
9817 + struct pci_bus *bus;
9818 +
9819 + /* MSI must be globally enabled and supported by the device */
9820 + if (!pci_msi_enable || !dev || dev->no_msi)
9821 + return -EINVAL;
9822 +
9823 + /* Any bridge which does NOT route MSI transactions from it's
9824 + * secondary bus to it's primary bus must set NO_MSI flag on
9825 + * the secondary pci_bus.
9826 + * We expect only arch-specific PCI host bus controller driver
9827 + * or quirks for specific PCI bridges to be setting NO_MSI.
9828 + */
9829 + for (bus = dev->bus; bus; bus = bus->parent)
9830 + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9831 + return -EINVAL;
9832 +
9833 + return 0;
9834 +}
9835 +
9836 +/**
9837 * pci_enable_msi - configure device's MSI capability structure
9838 * @dev: pointer to the pci_dev data structure of MSI device function
9839 *
9840 * Setup the MSI capability structure of device function with
9841 - * a single MSI vector upon its software driver call to request for
9842 + * a single MSI irq upon its software driver call to request for
9843 * MSI mode enabled on its hardware device function. A return of zero
9844 * indicates the successful setup of an entry zero with the new MSI
9845 * vector or non-zero for otherwise.
9846 @@ -507,18 +536,10 @@ static int msix_capability_init(struct p
9847 extern int pci_frontend_enable_msi(struct pci_dev *dev);
9848 int pci_enable_msi(struct pci_dev* dev)
9849 {
9850 - struct pci_bus *bus;
9851 - int pos, temp, status = -EINVAL;
9852 + int pos, temp, status;
9853
9854 - if (!pci_msi_enable || !dev)
9855 - return status;
9856 -
9857 - if (dev->no_msi)
9858 - return status;
9859 -
9860 - for (bus = dev->bus; bus; bus = bus->parent)
9861 - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9862 - return -EINVAL;
9863 + if (pci_msi_supported(dev) < 0)
9864 + return -EINVAL;
9865
9866 status = msi_init();
9867 if (status < 0)
9868 @@ -547,10 +568,10 @@ int pci_enable_msi(struct pci_dev* dev)
9869 if (!pos)
9870 return -EINVAL;
9871
9872 - /* Check whether driver already requested for MSI-X vectors */
9873 + /* Check whether driver already requested for MSI-X irqs */
9874 if (dev->msix_enabled) {
9875 printk(KERN_INFO "PCI: %s: Can't enable MSI. "
9876 - "Device already has MSI-X vectors assigned\n",
9877 + "Device already has MSI-X irq assigned\n",
9878 pci_name(dev));
9879 dev->irq = temp;
9880 return -EINVAL;
9881 @@ -602,36 +623,28 @@ void pci_disable_msi(struct pci_dev* dev
9882 * pci_enable_msix - configure device's MSI-X capability structure
9883 * @dev: pointer to the pci_dev data structure of MSI-X device function
9884 * @entries: pointer to an array of MSI-X entries
9885 - * @nvec: number of MSI-X vectors requested for allocation by device driver
9886 + * @nvec: number of MSI-X irqs requested for allocation by device driver
9887 *
9888 * Setup the MSI-X capability structure of device function with the number
9889 - * of requested vectors upon its software driver call to request for
9890 + * of requested irqs upon its software driver call to request for
9891 * MSI-X mode enabled on its hardware device function. A return of zero
9892 * indicates the successful configuration of MSI-X capability structure
9893 - * with new allocated MSI-X vectors. A return of < 0 indicates a failure.
9894 + * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
9895 * Or a return of > 0 indicates that driver request is exceeding the number
9896 - * of vectors available. Driver should use the returned value to re-send
9897 + * of irqs available. Driver should use the returned value to re-send
9898 * its request.
9899 **/
9900 extern int pci_frontend_enable_msix(struct pci_dev *dev,
9901 struct msix_entry *entries, int nvec);
9902 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
9903 {
9904 - struct pci_bus *bus;
9905 int status, pos, nr_entries;
9906 int i, j, temp;
9907 u16 control;
9908
9909 - if (!pci_msi_enable || !dev || !entries)
9910 + if (!entries || pci_msi_supported(dev) < 0)
9911 return -EINVAL;
9912
9913 - if (dev->no_msi)
9914 - return -EINVAL;
9915 -
9916 - for (bus = dev->bus; bus; bus = bus->parent)
9917 - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9918 - return -EINVAL;
9919 -
9920 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
9921 if (!is_initial_xendomain()) {
9922 struct msi_dev_list *msi_dev_entry;
9923 @@ -694,7 +707,7 @@ int pci_enable_msix(struct pci_dev* dev,
9924 /* Check whether driver already requested for MSI vector */
9925 if (dev->msi_enabled) {
9926 printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
9927 - "Device already has an MSI vector assigned\n",
9928 + "Device already has an MSI irq assigned\n",
9929 pci_name(dev));
9930 dev->irq = temp;
9931 return -EINVAL;
9932 @@ -757,11 +770,11 @@ void pci_disable_msix(struct pci_dev* de
9933 }
9934
9935 /**
9936 - * msi_remove_pci_irq_vectors - reclaim MSI(X) vectors to unused state
9937 + * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
9938 * @dev: pointer to the pci_dev data structure of MSI(X) device function
9939 *
9940 * Being called during hotplug remove, from which the device function
9941 - * is hot-removed. All previous assigned MSI/MSI-X vectors, if
9942 + * is hot-removed. All previous assigned MSI/MSI-X irqs, if
9943 * allocated for this device function, are reclaimed to unused state,
9944 * which may be used later on.
9945 **/
9946 --- sle11-2009-10-16.orig/drivers/xen/Kconfig 2008-12-05 08:43:56.000000000 +0100
9947 +++ sle11-2009-10-16/drivers/xen/Kconfig 2009-03-04 11:28:34.000000000 +0100
9948 @@ -287,6 +287,9 @@ endmenu
9949 config HAVE_IRQ_IGNORE_UNHANDLED
9950 def_bool y
9951
9952 +config GENERIC_HARDIRQS_NO__DO_IRQ
9953 + def_bool y
9954 +
9955 config NO_IDLE_HZ
9956 def_bool y
9957
9958 --- sle11-2009-10-16.orig/drivers/xen/balloon/balloon.c 2009-06-29 15:24:00.000000000 +0200
9959 +++ sle11-2009-10-16/drivers/xen/balloon/balloon.c 2009-03-04 11:28:34.000000000 +0100
9960 @@ -84,7 +84,7 @@ static unsigned long frame_list[PAGE_SIZ
9961 /* VM /proc information for memory */
9962 extern unsigned long totalram_pages;
9963
9964 -#ifndef MODULE
9965 +#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
9966 extern unsigned long totalhigh_pages;
9967 #define inc_totalhigh_pages() (totalhigh_pages++)
9968 #define dec_totalhigh_pages() (totalhigh_pages--)
9969 --- sle11-2009-10-16.orig/drivers/xen/blkback/blkback.c 2008-12-01 11:21:10.000000000 +0100
9970 +++ sle11-2009-10-16/drivers/xen/blkback/blkback.c 2009-03-04 11:28:34.000000000 +0100
9971 @@ -288,7 +288,7 @@ static void blkif_notify_work(blkif_t *b
9972 wake_up(&blkif->wq);
9973 }
9974
9975 -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9976 +irqreturn_t blkif_be_int(int irq, void *dev_id)
9977 {
9978 blkif_notify_work(dev_id);
9979 return IRQ_HANDLED;
9980 --- sle11-2009-10-16.orig/drivers/xen/blkback/common.h 2009-10-28 14:55:12.000000000 +0100
9981 +++ sle11-2009-10-16/drivers/xen/blkback/common.h 2009-03-04 11:28:34.000000000 +0100
9982 @@ -130,7 +130,7 @@ void blkif_interface_init(void);
9983
9984 void blkif_xenbus_init(void);
9985
9986 -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9987 +irqreturn_t blkif_be_int(int irq, void *dev_id);
9988 int blkif_schedule(void *arg);
9989
9990 int blkback_barrier(struct xenbus_transaction xbt,
9991 --- sle11-2009-10-16.orig/drivers/xen/blkfront/blkfront.c 2009-03-05 15:42:00.000000000 +0100
9992 +++ sle11-2009-10-16/drivers/xen/blkfront/blkfront.c 2009-03-24 10:08:16.000000000 +0100
9993 @@ -70,7 +70,7 @@ static int setup_blkring(struct xenbus_d
9994
9995 static void kick_pending_request_queues(struct blkfront_info *);
9996
9997 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9998 +static irqreturn_t blkif_int(int irq, void *dev_id);
9999 static void blkif_restart_queue(void *arg);
10000 static void blkif_recover(struct blkfront_info *);
10001 static void blkif_completion(struct blk_shadow *);
10002 @@ -707,7 +707,7 @@ void do_blkif_request(request_queue_t *r
10003 }
10004
10005
10006 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
10007 +static irqreturn_t blkif_int(int irq, void *dev_id)
10008 {
10009 struct request *req;
10010 blkif_response_t *bret;
10011 --- sle11-2009-10-16.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:36:10.000000000 +0200
10012 +++ sle11-2009-10-16/drivers/xen/blktap/blktap.c 2009-04-20 11:37:34.000000000 +0200
10013 @@ -1222,7 +1222,7 @@ static void blkif_notify_work(blkif_t *b
10014 wake_up(&blkif->wq);
10015 }
10016
10017 -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10018 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
10019 {
10020 blkif_notify_work(dev_id);
10021 return IRQ_HANDLED;
10022 --- sle11-2009-10-16.orig/drivers/xen/blktap/common.h 2009-10-28 14:55:12.000000000 +0100
10023 +++ sle11-2009-10-16/drivers/xen/blktap/common.h 2009-03-04 11:28:34.000000000 +0100
10024 @@ -113,7 +113,7 @@ void tap_blkif_interface_init(void);
10025
10026 void tap_blkif_xenbus_init(void);
10027
10028 -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10029 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
10030 int tap_blkif_schedule(void *arg);
10031
10032 int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
10033 --- sle11-2009-10-16.orig/drivers/xen/console/console.c 2008-12-15 11:13:47.000000000 +0100
10034 +++ sle11-2009-10-16/drivers/xen/console/console.c 2009-03-04 11:28:34.000000000 +0100
10035 @@ -361,7 +361,7 @@ static struct tty_struct *xencons_tty;
10036 static int xencons_priv_irq;
10037 static char x_char;
10038
10039 -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
10040 +void xencons_rx(char *buf, unsigned len)
10041 {
10042 int i;
10043 unsigned long flags;
10044 @@ -386,8 +386,7 @@ void xencons_rx(char *buf, unsigned len,
10045 if (time_before(jiffies, sysrq_timeout)) {
10046 spin_unlock_irqrestore(
10047 &xencons_lock, flags);
10048 - handle_sysrq(
10049 - buf[i], regs, xencons_tty);
10050 + handle_sysrq(buf[i], xencons_tty);
10051 spin_lock_irqsave(
10052 &xencons_lock, flags);
10053 continue;
10054 @@ -452,14 +451,13 @@ void xencons_tx(void)
10055 }
10056
10057 /* Privileged receive callback and transmit kicker. */
10058 -static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
10059 - struct pt_regs *regs)
10060 +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
10061 {
10062 static char rbuf[16];
10063 int l;
10064
10065 while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
10066 - xencons_rx(rbuf, l, regs);
10067 + xencons_rx(rbuf, l);
10068
10069 xencons_tx();
10070
10071 @@ -647,7 +645,7 @@ static void xencons_close(struct tty_str
10072 spin_unlock_irqrestore(&xencons_lock, flags);
10073 }
10074
10075 -static struct tty_operations xencons_ops = {
10076 +static const struct tty_operations xencons_ops = {
10077 .open = xencons_open,
10078 .close = xencons_close,
10079 .write = xencons_write,
10080 --- sle11-2009-10-16.orig/drivers/xen/console/xencons_ring.c 2009-10-28 14:55:12.000000000 +0100
10081 +++ sle11-2009-10-16/drivers/xen/console/xencons_ring.c 2009-03-04 11:28:34.000000000 +0100
10082 @@ -83,7 +83,7 @@ int xencons_ring_send(const char *data,
10083 return sent;
10084 }
10085
10086 -static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
10087 +static irqreturn_t handle_input(int irq, void *unused)
10088 {
10089 struct xencons_interface *intf = xencons_interface();
10090 XENCONS_RING_IDX cons, prod;
10091 @@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq,
10092 BUG_ON((prod - cons) > sizeof(intf->in));
10093
10094 while (cons != prod) {
10095 - xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
10096 + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
10097 cons++;
10098 }
10099
10100 --- sle11-2009-10-16.orig/drivers/xen/core/evtchn.c 2009-03-16 16:14:12.000000000 +0100
10101 +++ sle11-2009-10-16/drivers/xen/core/evtchn.c 2009-03-04 11:28:34.000000000 +0100
10102 @@ -516,7 +516,7 @@ static void unbind_from_irq(unsigned int
10103
10104 int bind_caller_port_to_irqhandler(
10105 unsigned int caller_port,
10106 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10107 + irq_handler_t handler,
10108 unsigned long irqflags,
10109 const char *devname,
10110 void *dev_id)
10111 @@ -539,7 +539,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir
10112
10113 int bind_listening_port_to_irqhandler(
10114 unsigned int remote_domain,
10115 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10116 + irq_handler_t handler,
10117 unsigned long irqflags,
10118 const char *devname,
10119 void *dev_id)
10120 @@ -563,7 +563,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to
10121 int bind_interdomain_evtchn_to_irqhandler(
10122 unsigned int remote_domain,
10123 unsigned int remote_port,
10124 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10125 + irq_handler_t handler,
10126 unsigned long irqflags,
10127 const char *devname,
10128 void *dev_id)
10129 @@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch
10130 int bind_virq_to_irqhandler(
10131 unsigned int virq,
10132 unsigned int cpu,
10133 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10134 + irq_handler_t handler,
10135 unsigned long irqflags,
10136 const char *devname,
10137 void *dev_id)
10138 @@ -611,7 +611,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle
10139 int bind_ipi_to_irqhandler(
10140 unsigned int ipi,
10141 unsigned int cpu,
10142 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10143 + irq_handler_t handler,
10144 unsigned long irqflags,
10145 const char *devname,
10146 void *dev_id)
10147 @@ -696,15 +696,7 @@ static unsigned int startup_dynirq(unsig
10148 return 0;
10149 }
10150
10151 -static void shutdown_dynirq(unsigned int irq)
10152 -{
10153 - int evtchn = evtchn_from_irq(irq);
10154 -
10155 - if (VALID_EVTCHN(evtchn))
10156 - mask_evtchn(evtchn);
10157 -}
10158 -
10159 -static void enable_dynirq(unsigned int irq)
10160 +static void unmask_dynirq(unsigned int irq)
10161 {
10162 int evtchn = evtchn_from_irq(irq);
10163
10164 @@ -712,7 +704,7 @@ static void enable_dynirq(unsigned int i
10165 unmask_evtchn(evtchn);
10166 }
10167
10168 -static void disable_dynirq(unsigned int irq)
10169 +static void mask_dynirq(unsigned int irq)
10170 {
10171 int evtchn = evtchn_from_irq(irq);
10172
10173 @@ -740,12 +732,13 @@ static void end_dynirq(unsigned int irq)
10174 unmask_evtchn(evtchn);
10175 }
10176
10177 -static struct hw_interrupt_type dynirq_type = {
10178 - .typename = "Dynamic-irq",
10179 +static struct irq_chip dynirq_chip = {
10180 + .name = "Dynamic",
10181 .startup = startup_dynirq,
10182 - .shutdown = shutdown_dynirq,
10183 - .enable = enable_dynirq,
10184 - .disable = disable_dynirq,
10185 + .shutdown = mask_dynirq,
10186 + .mask = mask_dynirq,
10187 + .unmask = unmask_dynirq,
10188 + .mask_ack = ack_dynirq,
10189 .ack = ack_dynirq,
10190 .end = end_dynirq,
10191 #ifdef CONFIG_SMP
10192 @@ -859,12 +852,12 @@ static void shutdown_pirq(unsigned int i
10193 irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
10194 }
10195
10196 -static void enable_pirq(unsigned int irq)
10197 +static void unmask_pirq(unsigned int irq)
10198 {
10199 startup_pirq(irq);
10200 }
10201
10202 -static void disable_pirq(unsigned int irq)
10203 +static void mask_pirq(unsigned int irq)
10204 {
10205 }
10206
10207 @@ -891,12 +884,13 @@ static void end_pirq(unsigned int irq)
10208 pirq_unmask_and_notify(evtchn, irq);
10209 }
10210
10211 -static struct hw_interrupt_type pirq_type = {
10212 - .typename = "Phys-irq",
10213 +static struct irq_chip pirq_chip = {
10214 + .name = "Phys",
10215 .startup = startup_pirq,
10216 .shutdown = shutdown_pirq,
10217 - .enable = enable_pirq,
10218 - .disable = disable_pirq,
10219 + .mask = mask_pirq,
10220 + .unmask = unmask_pirq,
10221 + .mask_ack = ack_pirq,
10222 .ack = ack_pirq,
10223 .end = end_pirq,
10224 #ifdef CONFIG_SMP
10225 @@ -1081,7 +1075,8 @@ void evtchn_register_pirq(int irq)
10226 if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
10227 return;
10228 irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
10229 - irq_desc[irq].chip = &pirq_type;
10230 + set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq,
10231 + "level");
10232 }
10233
10234 int evtchn_map_pirq(int irq, int xen_pirq)
10235 @@ -1104,11 +1099,18 @@ int evtchn_map_pirq(int irq, int xen_pir
10236 spin_unlock(&irq_alloc_lock);
10237 if (irq < PIRQ_BASE)
10238 return -ENOSPC;
10239 - irq_desc[irq].chip = &pirq_type;
10240 + set_irq_chip_and_handler_name(irq, &pirq_chip,
10241 + handle_level_irq, "level");
10242 } else if (!xen_pirq) {
10243 if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
10244 return -EINVAL;
10245 - irq_desc[irq].chip = &no_irq_type;
10246 + /*
10247 + * dynamic_irq_cleanup(irq) would seem to be the correct thing
10248 + * here, but cannot be used as we get here also during shutdown
10249 + * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
10250 + * then causes a warning in dynamic_irq_cleanup().
10251 + */
10252 + set_irq_chip_and_handler(irq, NULL, NULL);
10253 irq_info[irq] = IRQ_UNBOUND;
10254 return 0;
10255 } else if (type_from_irq(irq) != IRQT_PIRQ
10256 @@ -1154,10 +1156,9 @@ void __init xen_init_IRQ(void)
10257 for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
10258 irq_bindcount[i] = 0;
10259
10260 - irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
10261 - irq_desc[i].action = NULL;
10262 - irq_desc[i].depth = 1;
10263 - irq_desc[i].chip = &dynirq_type;
10264 + irq_desc[i].status |= IRQ_NOPROBE;
10265 + set_irq_chip_and_handler_name(i, &dynirq_chip,
10266 + handle_level_irq, "level");
10267 }
10268
10269 /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
10270 @@ -1173,9 +1174,7 @@ void __init xen_init_IRQ(void)
10271 continue;
10272 #endif
10273
10274 - irq_desc[i].status = IRQ_DISABLED;
10275 - irq_desc[i].action = NULL;
10276 - irq_desc[i].depth = 1;
10277 - irq_desc[i].chip = &pirq_type;
10278 + set_irq_chip_and_handler_name(i, &pirq_chip,
10279 + handle_level_irq, "level");
10280 }
10281 }
10282 --- sle11-2009-10-16.orig/drivers/xen/core/reboot.c 2009-10-28 14:55:12.000000000 +0100
10283 +++ sle11-2009-10-16/drivers/xen/core/reboot.c 2009-03-04 11:28:34.000000000 +0100
10284 @@ -14,6 +14,7 @@
10285
10286 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
10287 #include <xen/platform-compat.h>
10288 +#undef handle_sysrq
10289 #endif
10290
10291 MODULE_LICENSE("Dual BSD/GPL");
10292 @@ -231,7 +232,7 @@ static void sysrq_handler(struct xenbus_
10293
10294 #ifdef CONFIG_MAGIC_SYSRQ
10295 if (sysrq_key != '\0')
10296 - handle_sysrq(sysrq_key, NULL, NULL);
10297 + handle_sysrq(sysrq_key, NULL);
10298 #endif
10299 }
10300
10301 @@ -245,7 +246,7 @@ static struct xenbus_watch sysrq_watch =
10302 .callback = sysrq_handler
10303 };
10304
10305 -static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
10306 +static irqreturn_t suspend_int(int irq, void* dev_id)
10307 {
10308 switch_shutdown_state(SHUTDOWN_SUSPEND);
10309 return IRQ_HANDLED;
10310 --- sle11-2009-10-16.orig/drivers/xen/core/smpboot.c 2009-04-28 16:02:07.000000000 +0200
10311 +++ sle11-2009-10-16/drivers/xen/core/smpboot.c 2009-03-04 11:28:34.000000000 +0100
10312 @@ -25,8 +25,8 @@
10313 #include <xen/cpu_hotplug.h>
10314 #include <xen/xenbus.h>
10315
10316 -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
10317 -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
10318 +extern irqreturn_t smp_reschedule_interrupt(int, void *);
10319 +extern irqreturn_t smp_call_function_interrupt(int, void *);
10320
10321 extern int local_setup_timer(unsigned int cpu);
10322 extern void local_teardown_timer(unsigned int cpu);
10323 @@ -62,8 +62,6 @@ EXPORT_SYMBOL(cpu_core_map);
10324 #if defined(__i386__)
10325 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
10326 EXPORT_SYMBOL(x86_cpu_to_apicid);
10327 -#elif !defined(CONFIG_X86_IO_APIC)
10328 -unsigned int maxcpus = NR_CPUS;
10329 #endif
10330
10331 void __init prefill_possible_map(void)
10332 --- sle11-2009-10-16.orig/drivers/xen/fbfront/xenfb.c 2009-02-16 15:59:55.000000000 +0100
10333 +++ sle11-2009-10-16/drivers/xen/fbfront/xenfb.c 2009-03-04 11:28:34.000000000 +0100
10334 @@ -524,8 +524,7 @@ static struct fb_ops xenfb_fb_ops = {
10335 .fb_set_par = xenfb_set_par,
10336 };
10337
10338 -static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
10339 - struct pt_regs *regs)
10340 +static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
10341 {
10342 /*
10343 * No in events recognized, simply ignore them all.
10344 --- sle11-2009-10-16.orig/drivers/xen/fbfront/xenkbd.c 2009-10-28 14:55:12.000000000 +0100
10345 +++ sle11-2009-10-16/drivers/xen/fbfront/xenkbd.c 2009-03-04 11:28:34.000000000 +0100
10346 @@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st
10347 * to do that.
10348 */
10349
10350 -static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
10351 +static irqreturn_t input_handler(int rq, void *dev_id)
10352 {
10353 struct xenkbd_info *info = dev_id;
10354 struct xenkbd_page *page = info->page;
10355 --- sle11-2009-10-16.orig/drivers/xen/gntdev/gntdev.c 2008-12-15 11:13:45.000000000 +0100
10356 +++ sle11-2009-10-16/drivers/xen/gntdev/gntdev.c 2009-03-04 11:28:34.000000000 +0100
10357 @@ -752,9 +752,6 @@ static pte_t gntdev_clear_pte(struct vm_
10358 BUG();
10359 }
10360
10361 - /* Copy the existing value of the PTE for returning. */
10362 - copy = *ptep;
10363 -
10364 /* Calculate the grant relating to this PTE. */
10365 slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
10366
10367 @@ -769,6 +766,10 @@ static pte_t gntdev_clear_pte(struct vm_
10368 GNTDEV_INVALID_HANDLE &&
10369 !xen_feature(XENFEAT_auto_translated_physmap)) {
10370 /* NOT USING SHADOW PAGE TABLES. */
10371 +
10372 + /* Copy the existing value of the PTE for returning. */
10373 + copy = *ptep;
10374 +
10375 gnttab_set_unmap_op(&op, ptep_to_machine(ptep),
10376 GNTMAP_contains_pte,
10377 private_data->grants[slot_index]
10378 @@ -781,7 +782,7 @@ static pte_t gntdev_clear_pte(struct vm_
10379 op.status);
10380 } else {
10381 /* USING SHADOW PAGE TABLES. */
10382 - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10383 + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10384 }
10385
10386 /* Finally, we unmap the grant from kernel space. */
10387 @@ -809,7 +810,7 @@ static pte_t gntdev_clear_pte(struct vm_
10388 >> PAGE_SHIFT, INVALID_P2M_ENTRY);
10389
10390 } else {
10391 - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10392 + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10393 }
10394
10395 return copy;
10396 --- sle11-2009-10-16.orig/drivers/xen/netback/accel.c 2009-10-28 14:55:12.000000000 +0100
10397 +++ sle11-2009-10-16/drivers/xen/netback/accel.c 2009-03-04 11:28:34.000000000 +0100
10398 @@ -65,7 +65,7 @@ static int match_accelerator(struct xenb
10399
10400 if (IS_ERR(eth_name)) {
10401 /* Probably means not present */
10402 - DPRINTK("%s: no match due to xenbus_read accel error %d\n",
10403 + DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
10404 __FUNCTION__, PTR_ERR(eth_name));
10405 return 0;
10406 } else {
10407 --- sle11-2009-10-16.orig/drivers/xen/netback/common.h 2009-10-28 14:55:12.000000000 +0100
10408 +++ sle11-2009-10-16/drivers/xen/netback/common.h 2009-03-04 11:28:34.000000000 +0100
10409 @@ -200,7 +200,7 @@ void netif_deschedule_work(netif_t *neti
10410
10411 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
10412 struct net_device_stats *netif_be_get_stats(struct net_device *dev);
10413 -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10414 +irqreturn_t netif_be_int(int irq, void *dev_id);
10415
10416 static inline int netbk_can_queue(struct net_device *dev)
10417 {
10418 --- sle11-2009-10-16.orig/drivers/xen/netback/loopback.c 2009-10-28 14:55:12.000000000 +0100
10419 +++ sle11-2009-10-16/drivers/xen/netback/loopback.c 2009-03-04 11:28:34.000000000 +0100
10420 @@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk
10421 np->stats.rx_bytes += skb->len;
10422 np->stats.rx_packets++;
10423
10424 - if (skb->ip_summed == CHECKSUM_HW) {
10425 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10426 /* Defer checksum calculation. */
10427 skb->proto_csum_blank = 1;
10428 /* Must be a local packet: assert its integrity. */
10429 --- sle11-2009-10-16.orig/drivers/xen/netback/netback.c 2008-12-23 09:31:07.000000000 +0100
10430 +++ sle11-2009-10-16/drivers/xen/netback/netback.c 2009-03-04 11:28:34.000000000 +0100
10431 @@ -692,7 +692,7 @@ static void net_rx_action(unsigned long
10432 id = meta[npo.meta_cons].id;
10433 flags = nr_frags ? NETRXF_more_data : 0;
10434
10435 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
10436 + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
10437 flags |= NETRXF_csum_blank | NETRXF_data_validated;
10438 else if (skb->proto_data_valid) /* remote but checksummed? */
10439 flags |= NETRXF_data_validated;
10440 @@ -1459,7 +1459,7 @@ static void netif_page_release(struct pa
10441 netif_idx_release(idx);
10442 }
10443
10444 -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10445 +irqreturn_t netif_be_int(int irq, void *dev_id)
10446 {
10447 netif_t *netif = dev_id;
10448
10449 @@ -1526,7 +1526,7 @@ static netif_rx_response_t *make_rx_resp
10450 }
10451
10452 #ifdef NETBE_DEBUG_INTERRUPT
10453 -static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
10454 +static irqreturn_t netif_be_dbg(int irq, void *dev_id)
10455 {
10456 struct list_head *ent;
10457 netif_t *netif;
10458 --- sle11-2009-10-16.orig/drivers/xen/netfront/netfront.c 2009-04-09 14:41:33.000000000 +0200
10459 +++ sle11-2009-10-16/drivers/xen/netfront/netfront.c 2009-03-30 16:34:59.000000000 +0200
10460 @@ -136,7 +136,7 @@ static inline int netif_needs_gso(struct
10461 {
10462 return skb_is_gso(skb) &&
10463 (!skb_gso_ok(skb, dev->features) ||
10464 - unlikely(skb->ip_summed != CHECKSUM_HW));
10465 + unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
10466 }
10467 #else
10468 #define HAVE_GSO 0
10469 @@ -222,7 +222,7 @@ static void network_tx_buf_gc(struct net
10470 static void network_alloc_rx_buffers(struct net_device *);
10471 static void send_fake_arp(struct net_device *);
10472
10473 -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
10474 +static irqreturn_t netif_int(int irq, void *dev_id);
10475
10476 #ifdef CONFIG_SYSFS
10477 static int xennet_sysfs_addif(struct net_device *netdev);
10478 @@ -992,7 +992,7 @@ static int network_start_xmit(struct sk_
10479 tx->flags = 0;
10480 extra = NULL;
10481
10482 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
10483 + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
10484 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
10485 #ifdef CONFIG_XEN
10486 if (skb->proto_data_valid) /* remote but checksummed? */
10487 @@ -1049,7 +1049,7 @@ static int network_start_xmit(struct sk_
10488 return 0;
10489 }
10490
10491 -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
10492 +static irqreturn_t netif_int(int irq, void *dev_id)
10493 {
10494 struct net_device *dev = dev_id;
10495 struct netfront_info *np = netdev_priv(dev);
10496 --- sle11-2009-10-16.orig/drivers/xen/pciback/pciback.h 2009-10-28 14:55:12.000000000 +0100
10497 +++ sle11-2009-10-16/drivers/xen/pciback/pciback.h 2009-03-04 11:28:34.000000000 +0100
10498 @@ -87,7 +87,7 @@ int pciback_publish_pci_roots(struct pci
10499 void pciback_release_devices(struct pciback_device *pdev);
10500
10501 /* Handles events from front-end */
10502 -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
10503 +irqreturn_t pciback_handle_event(int irq, void *dev_id);
10504 void pciback_do_op(void *data);
10505
10506 int pciback_xenbus_register(void);
10507 --- sle11-2009-10-16.orig/drivers/xen/pciback/pciback_ops.c 2009-10-28 14:55:12.000000000 +0100
10508 +++ sle11-2009-10-16/drivers/xen/pciback/pciback_ops.c 2009-03-04 11:28:34.000000000 +0100
10509 @@ -107,7 +107,7 @@ void pciback_do_op(void *data)
10510 test_and_schedule_op(pdev);
10511 }
10512
10513 -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
10514 +irqreturn_t pciback_handle_event(int irq, void *dev_id)
10515 {
10516 struct pciback_device *pdev = dev_id;
10517
10518 --- sle11-2009-10-16.orig/drivers/xen/pcifront/pci_op.c 2009-10-28 14:55:12.000000000 +0100
10519 +++ sle11-2009-10-16/drivers/xen/pcifront/pci_op.c 2009-03-04 11:28:34.000000000 +0100
10520 @@ -508,10 +508,16 @@ int __devinit pcifront_rescan_root(struc
10521
10522 d = pci_scan_single_device(b, devfn);
10523 if (d) {
10524 + int err;
10525 +
10526 dev_info(&pdev->xdev->dev, "New device on "
10527 "%04x:%02x:%02x.%02x found.\n", domain, bus,
10528 PCI_SLOT(devfn), PCI_FUNC(devfn));
10529 - pci_bus_add_device(d);
10530 + err = pci_bus_add_device(d);
10531 + if (err)
10532 + dev_err(&pdev->xdev->dev,
10533 + "error %d adding device, continuing.\n",
10534 + err);
10535 }
10536 }
10537
10538 --- sle11-2009-10-16.orig/drivers/xen/privcmd/compat_privcmd.c 2009-10-28 14:55:12.000000000 +0100
10539 +++ sle11-2009-10-16/drivers/xen/privcmd/compat_privcmd.c 2009-03-04 11:28:34.000000000 +0100
10540 @@ -18,7 +18,6 @@
10541 * Authors: Jimi Xenidis <jimix@watson.ibm.com>
10542 */
10543
10544 -#include <linux/config.h>
10545 #include <linux/compat.h>
10546 #include <linux/ioctl.h>
10547 #include <linux/syscalls.h>
10548 --- sle11-2009-10-16.orig/drivers/xen/privcmd/privcmd.c 2009-10-28 14:55:12.000000000 +0100
10549 +++ sle11-2009-10-16/drivers/xen/privcmd/privcmd.c 2009-03-04 11:28:34.000000000 +0100
10550 @@ -40,7 +40,7 @@ static int privcmd_enforce_singleshot_ma
10551 static long privcmd_ioctl(struct file *file,
10552 unsigned int cmd, unsigned long data)
10553 {
10554 - int ret = -ENOSYS;
10555 + long ret = -ENOSYS;
10556 void __user *udata = (void __user *) data;
10557
10558 switch (cmd) {
10559 @@ -50,42 +50,15 @@ static long privcmd_ioctl(struct file *f
10560 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
10561 return -EFAULT;
10562
10563 -#if defined(__i386__)
10564 +#ifdef CONFIG_X86
10565 if (hypercall.op >= (PAGE_SIZE >> 5))
10566 break;
10567 - __asm__ __volatile__ (
10568 - "pushl %%ebx; pushl %%ecx; pushl %%edx; "
10569 - "pushl %%esi; pushl %%edi; "
10570 - "movl 8(%%eax),%%ebx ;"
10571 - "movl 16(%%eax),%%ecx ;"
10572 - "movl 24(%%eax),%%edx ;"
10573 - "movl 32(%%eax),%%esi ;"
10574 - "movl 40(%%eax),%%edi ;"
10575 - "movl (%%eax),%%eax ;"
10576 - "shll $5,%%eax ;"
10577 - "addl $hypercall_page,%%eax ;"
10578 - "call *%%eax ;"
10579 - "popl %%edi; popl %%esi; popl %%edx; "
10580 - "popl %%ecx; popl %%ebx"
10581 - : "=a" (ret) : "0" (&hypercall) : "memory" );
10582 -#elif defined (__x86_64__)
10583 - if (hypercall.op < (PAGE_SIZE >> 5)) {
10584 - long ign1, ign2, ign3;
10585 - __asm__ __volatile__ (
10586 - "movq %8,%%r10; movq %9,%%r8;"
10587 - "shll $5,%%eax ;"
10588 - "addq $hypercall_page,%%rax ;"
10589 - "call *%%rax"
10590 - : "=a" (ret), "=D" (ign1),
10591 - "=S" (ign2), "=d" (ign3)
10592 - : "0" ((unsigned int)hypercall.op),
10593 - "1" (hypercall.arg[0]),
10594 - "2" (hypercall.arg[1]),
10595 - "3" (hypercall.arg[2]),
10596 - "g" (hypercall.arg[3]),
10597 - "g" (hypercall.arg[4])
10598 - : "r8", "r10", "memory" );
10599 - }
10600 + ret = _hypercall(long, (unsigned int)hypercall.op,
10601 + (unsigned long)hypercall.arg[0],
10602 + (unsigned long)hypercall.arg[1],
10603 + (unsigned long)hypercall.arg[2],
10604 + (unsigned long)hypercall.arg[3],
10605 + (unsigned long)hypercall.arg[4]);
10606 #else
10607 ret = privcmd_hypercall(&hypercall);
10608 #endif
10609 @@ -306,7 +279,7 @@ static int privcmd_mmap(struct file * fi
10610 return -ENOSYS;
10611
10612 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
10613 - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
10614 + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
10615 vma->vm_ops = &privcmd_vm_ops;
10616 vma->vm_private_data = NULL;
10617
10618 --- sle11-2009-10-16.orig/drivers/xen/scsiback/common.h 2009-10-28 14:55:12.000000000 +0100
10619 +++ sle11-2009-10-16/drivers/xen/scsiback/common.h 2009-03-04 11:28:34.000000000 +0100
10620 @@ -142,7 +142,7 @@ typedef struct {
10621 #define VSCSIIF_TIMEOUT (900*HZ)
10622
10623
10624 -irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
10625 +irqreturn_t scsiback_intr(int, void *);
10626 int scsiback_init_sring(struct vscsibk_info *info,
10627 unsigned long ring_ref, unsigned int evtchn);
10628 int scsiback_schedule(void *data);
10629 --- sle11-2009-10-16.orig/drivers/xen/scsiback/scsiback.c 2009-10-28 14:55:12.000000000 +0100
10630 +++ sle11-2009-10-16/drivers/xen/scsiback/scsiback.c 2009-03-04 11:28:34.000000000 +0100
10631 @@ -440,7 +440,7 @@ void scsiback_cmd_exec(pending_req_t *pe
10632 write = (data_dir == DMA_TO_DEVICE);
10633 rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
10634
10635 - rq->flags |= REQ_BLOCK_PC;
10636 + rq->cmd_type = REQ_TYPE_BLOCK_PC;
10637 rq->cmd_len = cmd_len;
10638 memcpy(rq->cmd, pending_req->cmnd, cmd_len);
10639
10640 @@ -484,7 +484,7 @@ static void scsiback_device_reset_exec(p
10641 }
10642
10643
10644 -irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
10645 +irqreturn_t scsiback_intr(int irq, void *dev_id)
10646 {
10647 scsiback_notify_work((struct vscsibk_info *)dev_id);
10648 return IRQ_HANDLED;
10649 --- sle11-2009-10-16.orig/drivers/xen/scsifront/common.h 2009-10-28 14:55:12.000000000 +0100
10650 +++ sle11-2009-10-16/drivers/xen/scsifront/common.h 2009-03-04 11:28:34.000000000 +0100
10651 @@ -122,7 +122,7 @@ struct vscsifrnt_info {
10652 int scsifront_xenbus_init(void);
10653 void scsifront_xenbus_unregister(void);
10654 int scsifront_schedule(void *data);
10655 -irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
10656 +irqreturn_t scsifront_intr(int irq, void *dev_id);
10657 int scsifront_cmd_done(struct vscsifrnt_info *info);
10658
10659
10660 --- sle11-2009-10-16.orig/drivers/xen/scsifront/scsifront.c 2009-10-28 14:55:12.000000000 +0100
10661 +++ sle11-2009-10-16/drivers/xen/scsifront/scsifront.c 2009-03-04 11:28:34.000000000 +0100
10662 @@ -100,7 +100,7 @@ static void scsifront_do_request(struct
10663 notify_remote_via_irq(irq);
10664 }
10665
10666 -irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
10667 +irqreturn_t scsifront_intr(int irq, void *dev_id)
10668 {
10669 scsifront_notify_work((struct vscsifrnt_info *)dev_id);
10670 return IRQ_HANDLED;
10671 --- sle11-2009-10-16.orig/drivers/xen/sfc_netback/accel_xenbus.c 2009-10-28 14:55:12.000000000 +0100
10672 +++ sle11-2009-10-16/drivers/xen/sfc_netback/accel_xenbus.c 2009-03-04 11:28:34.000000000 +0100
10673 @@ -68,8 +68,7 @@ static void unlink_bend(struct netback_a
10674
10675
10676 /* Demultiplex a message IRQ from the frontend driver. */
10677 -static irqreturn_t msgirq_from_frontend(int irq, void *context,
10678 - struct pt_regs *unused)
10679 +static irqreturn_t msgirq_from_frontend(int irq, void *context)
10680 {
10681 struct xenbus_device *dev = context;
10682 struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
10683 @@ -84,8 +83,7 @@ static irqreturn_t msgirq_from_frontend(
10684 * functionally, but we need it to pass to the bind function, and may
10685 * get called spuriously
10686 */
10687 -static irqreturn_t netirq_from_frontend(int irq, void *context,
10688 - struct pt_regs *unused)
10689 +static irqreturn_t netirq_from_frontend(int irq, void *context)
10690 {
10691 VPRINTK("netirq %d from device %s\n", irq,
10692 ((struct xenbus_device *)context)->nodename);
10693 --- sle11-2009-10-16.orig/drivers/xen/sfc_netfront/accel.h 2009-04-09 14:41:38.000000000 +0200
10694 +++ sle11-2009-10-16/drivers/xen/sfc_netfront/accel.h 2009-03-30 16:34:56.000000000 +0200
10695 @@ -451,10 +451,8 @@ void netfront_accel_msg_tx_fastpath(netf
10696 u32 ip, u16 port, u8 protocol);
10697
10698 /* Process an IRQ received from back end driver */
10699 -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10700 - struct pt_regs *unused);
10701 -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10702 - struct pt_regs *unused);
10703 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
10704 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
10705
10706 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
10707 extern void netfront_accel_msg_from_bend(struct work_struct *context);
10708 --- sle11-2009-10-16.orig/drivers/xen/sfc_netfront/accel_msg.c 2009-10-28 14:55:12.000000000 +0100
10709 +++ sle11-2009-10-16/drivers/xen/sfc_netfront/accel_msg.c 2009-03-04 11:28:34.000000000 +0100
10710 @@ -490,8 +490,7 @@ void netfront_accel_msg_from_bend(void *
10711 }
10712
10713
10714 -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10715 - struct pt_regs *unused)
10716 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
10717 {
10718 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10719 VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
10720 @@ -502,8 +501,7 @@ irqreturn_t netfront_accel_msg_channel_i
10721 }
10722
10723 /* Process an interrupt received from the NIC via backend */
10724 -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10725 - struct pt_regs *unused)
10726 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
10727 {
10728 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10729 struct net_device *net_dev = vnic->net_dev;
10730 --- sle11-2009-10-16.orig/drivers/xen/sfc_netfront/accel_tso.c 2009-10-28 14:55:12.000000000 +0100
10731 +++ sle11-2009-10-16/drivers/xen/sfc_netfront/accel_tso.c 2009-03-04 11:28:34.000000000 +0100
10732 @@ -363,7 +363,7 @@ int netfront_accel_enqueue_skb_tso(netfr
10733
10734 tso_check_safe(skb);
10735
10736 - if (skb->ip_summed != CHECKSUM_HW)
10737 + if (skb->ip_summed != CHECKSUM_PARTIAL)
10738 EPRINTK("Trying to TSO send a packet without HW checksum\n");
10739
10740 tso_start(&state, skb);
10741 --- sle11-2009-10-16.orig/drivers/xen/sfc_netfront/accel_vi.c 2009-04-09 14:41:38.000000000 +0200
10742 +++ sle11-2009-10-16/drivers/xen/sfc_netfront/accel_vi.c 2009-03-30 16:35:11.000000000 +0200
10743 @@ -461,7 +461,7 @@ netfront_accel_enqueue_skb_multi(netfron
10744
10745 frag_i = -1;
10746
10747 - if (skb->ip_summed == CHECKSUM_HW) {
10748 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10749 /* Set to zero to encourage falcon to work it out for us */
10750 *(u16*)(skb->h.raw + skb->csum) = 0;
10751 }
10752 @@ -580,7 +580,7 @@ netfront_accel_enqueue_skb_single(netfro
10753
10754 kva = buf->pkt_kva;
10755
10756 - if (skb->ip_summed == CHECKSUM_HW) {
10757 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10758 /* Set to zero to encourage falcon to work it out for us */
10759 *(u16*)(skb->h.raw + skb->csum) = 0;
10760 }
10761 --- sle11-2009-10-16.orig/drivers/xen/tpmback/common.h 2009-10-28 14:55:12.000000000 +0100
10762 +++ sle11-2009-10-16/drivers/xen/tpmback/common.h 2009-03-04 11:28:34.000000000 +0100
10763 @@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm
10764 void tpmif_xenbus_init(void);
10765 void tpmif_xenbus_exit(void);
10766 int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
10767 -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10768 +irqreturn_t tpmif_be_int(int irq, void *dev_id);
10769
10770 long int tpmback_get_instance(struct backend_info *bi);
10771
10772 --- sle11-2009-10-16.orig/drivers/xen/tpmback/tpmback.c 2009-10-28 14:55:12.000000000 +0100
10773 +++ sle11-2009-10-16/drivers/xen/tpmback/tpmback.c 2009-03-04 11:28:34.000000000 +0100
10774 @@ -502,7 +502,7 @@ static ssize_t vtpm_op_read(struct file
10775 list_del(&pak->next);
10776 write_unlock_irqrestore(&dataex.pak_lock, flags);
10777
10778 - DPRINTK("size given by app: %d, available: %d\n", size, left);
10779 + DPRINTK("size given by app: %zu, available: %u\n", size, left);
10780
10781 ret_size = min_t(size_t, size, left);
10782
10783 @@ -899,7 +899,7 @@ static void tpm_tx_action(unsigned long
10784 }
10785 }
10786
10787 -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10788 +irqreturn_t tpmif_be_int(int irq, void *dev_id)
10789 {
10790 tpmif_t *tpmif = (tpmif_t *) dev_id;
10791
10792 --- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_comms.c 2008-11-25 12:35:56.000000000 +0100
10793 +++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_comms.c 2009-03-04 11:28:34.000000000 +0100
10794 @@ -55,7 +55,7 @@ static DECLARE_WORK(probe_work, xenbus_p
10795
10796 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
10797
10798 -static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
10799 +static irqreturn_t wake_waiting(int irq, void *unused)
10800 {
10801 if (unlikely(xenstored_ready == 0)) {
10802 xenstored_ready = 1;
10803 --- sle11-2009-10-16.orig/drivers/xen/xenoprof/xenoprofile.c 2009-10-28 14:55:12.000000000 +0100
10804 +++ sle11-2009-10-16/drivers/xen/xenoprof/xenoprofile.c 2009-03-04 11:28:34.000000000 +0100
10805 @@ -194,8 +194,7 @@ done:
10806 oprofile_add_domain_switch(COORDINATOR_DOMAIN);
10807 }
10808
10809 -static irqreturn_t
10810 -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
10811 +static irqreturn_t xenoprof_ovf_interrupt(int irq, void *dev_id)
10812 {
10813 struct xenoprof_buf * buf;
10814 static unsigned long flag;
10815 --- sle11-2009-10-16.orig/include/asm-generic/pgtable.h 2009-08-26 11:52:33.000000000 +0200
10816 +++ sle11-2009-10-16/include/asm-generic/pgtable.h 2009-03-04 11:28:34.000000000 +0100
10817 @@ -100,7 +100,7 @@ static inline void ptep_set_wrprotect(st
10818 #endif
10819
10820 #ifndef arch_change_pte_range
10821 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
10822 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
10823 #endif
10824
10825 #ifndef __HAVE_ARCH_PTE_SAME
10826 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/desc_32.h 2009-10-28 14:55:12.000000000 +0100
10827 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/desc_32.h 2009-03-04 11:28:34.000000000 +0100
10828 @@ -32,52 +32,110 @@ static inline struct desc_struct *get_cp
10829 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
10830 }
10831
10832 +/*
10833 + * This is the ldt that every process will get unless we need
10834 + * something other than this.
10835 + */
10836 +extern struct desc_struct default_ldt[];
10837 +extern struct desc_struct idt_table[];
10838 +extern void set_intr_gate(unsigned int irq, void * addr);
10839 +
10840 +static inline void pack_descriptor(__u32 *a, __u32 *b,
10841 + unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
10842 +{
10843 + *a = ((base & 0xffff) << 16) | (limit & 0xffff);
10844 + *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
10845 + (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
10846 +}
10847 +
10848 +static inline void pack_gate(__u32 *a, __u32 *b,
10849 + unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
10850 +{
10851 + *a = (seg << 16) | (base & 0xffff);
10852 + *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
10853 +}
10854 +
10855 +#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
10856 +#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
10857 +#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
10858 +#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
10859 +#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
10860 +#define DESCTYPE_DPL3 0x60 /* DPL-3 */
10861 +#define DESCTYPE_S 0x10 /* !system */
10862 +
10863 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
10864 #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
10865
10866 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
10867 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
10868 -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
10869 -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
10870 +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
10871 +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
10872
10873 #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
10874 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
10875 -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
10876 -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
10877 +#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
10878 +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
10879
10880 -/*
10881 - * This is the ldt that every process will get unless we need
10882 - * something other than this.
10883 - */
10884 -extern struct desc_struct default_ldt[];
10885 -extern void set_intr_gate(unsigned int irq, void * addr);
10886 +#if TLS_SIZE != 24
10887 +# error update this code.
10888 +#endif
10889 +
10890 +static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10891 +{
10892 +#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10893 + *(u64 *)&t->tls_array[i]) \
10894 + BUG()
10895 + C(0); C(1); C(2);
10896 +#undef C
10897 +}
10898
10899 -#define _set_tssldt_desc(n,addr,limit,type) \
10900 -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
10901 - "movw %w1,2(%2)\n\t" \
10902 - "rorl $16,%1\n\t" \
10903 - "movb %b1,4(%2)\n\t" \
10904 - "movb %4,5(%2)\n\t" \
10905 - "movb $0,6(%2)\n\t" \
10906 - "movb %h1,7(%2)\n\t" \
10907 - "rorl $16,%1" \
10908 - : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
10909 +#ifndef CONFIG_XEN
10910 +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
10911 +{
10912 + __u32 *lp = (__u32 *)((char *)dt + entry*8);
10913 + *lp = entry_a;
10914 + *(lp+1) = entry_b;
10915 +}
10916
10917 -#ifndef CONFIG_X86_NO_TSS
10918 -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
10919 +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10920 +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10921 +#else
10922 +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10923 +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
10924 +#endif
10925 +#ifndef CONFIG_X86_NO_IDT
10926 +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10927 +
10928 +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
10929 {
10930 - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
10931 - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
10932 + __u32 a, b;
10933 + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
10934 + write_idt_entry(idt_table, gate, a, b);
10935 }
10936 +#endif
10937
10938 -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10939 +#ifndef CONFIG_X86_NO_TSS
10940 +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
10941 +{
10942 + __u32 a, b;
10943 + pack_descriptor(&a, &b, (unsigned long)addr,
10944 + offsetof(struct tss_struct, __cacheline_filler) - 1,
10945 + DESCTYPE_TSS, 0);
10946 + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
10947 +}
10948 #endif
10949
10950 -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
10951 +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
10952 {
10953 - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
10954 + __u32 a, b;
10955 + pack_descriptor(&a, &b, (unsigned long)addr,
10956 + entries * sizeof(struct desc_struct) - 1,
10957 + DESCTYPE_LDT, 0);
10958 + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
10959 }
10960
10961 +#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10962 +
10963 #define LDT_entry_a(info) \
10964 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
10965
10966 @@ -103,21 +161,6 @@ static inline void set_ldt_desc(unsigned
10967 (info)->seg_not_present == 1 && \
10968 (info)->useable == 0 )
10969
10970 -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10971 -
10972 -#if TLS_SIZE != 24
10973 -# error update this code.
10974 -#endif
10975 -
10976 -static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10977 -{
10978 -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10979 - *(u64 *)&t->tls_array[i])) \
10980 - BUG();
10981 - C(0); C(1); C(2);
10982 -#undef C
10983 -}
10984 -
10985 static inline void clear_LDT(void)
10986 {
10987 int cpu = get_cpu();
10988 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-10-28 14:55:12.000000000 +0100
10989 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-04 11:28:34.000000000 +0100
10990 @@ -55,7 +55,7 @@ enum fixed_addresses {
10991 #ifdef CONFIG_X86_LOCAL_APIC
10992 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10993 #endif
10994 -#ifdef CONFIG_X86_IO_APIC
10995 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
10996 FIX_IO_APIC_BASE_0,
10997 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10998 #endif
10999 @@ -95,10 +95,9 @@ enum fixed_addresses {
11000 __end_of_fixed_addresses
11001 };
11002
11003 -extern void set_fixaddr_top(unsigned long top);
11004 -
11005 extern void __set_fixmap(enum fixed_addresses idx,
11006 maddr_t phys, pgprot_t flags);
11007 +extern void reserve_top_address(unsigned long reserve);
11008
11009 #define set_fixmap(idx, phys) \
11010 __set_fixmap(idx, phys, PAGE_KERNEL)
11011 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypercall_32.h 2009-10-28 14:55:12.000000000 +0100
11012 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypercall_32.h 2009-03-04 11:28:34.000000000 +0100
11013 @@ -128,6 +128,23 @@
11014 __res; \
11015 })
11016
11017 +#define _hypercall(type, op, a1, a2, a3, a4, a5) \
11018 +({ \
11019 + type __res; \
11020 + register typeof((a1)+0) __arg1 asm("ebx") = (a1); \
11021 + register typeof((a2)+0) __arg2 asm("ecx") = (a2); \
11022 + register typeof((a3)+0) __arg3 asm("edx") = (a3); \
11023 + register typeof((a4)+0) __arg4 asm("esi") = (a4); \
11024 + register typeof((a5)+0) __arg5 asm("edi") = (a5); \
11025 + asm volatile ( \
11026 + "call *%6" \
11027 + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
11028 + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
11029 + : "0" (hypercall_page + (op) * 32) \
11030 + : "memory" ); \
11031 + __res; \
11032 +})
11033 +
11034 static inline int __must_check
11035 HYPERVISOR_set_trap_table(
11036 const trap_info_t *table)
11037 @@ -140,6 +157,8 @@ HYPERVISOR_mmu_update(
11038 mmu_update_t *req, unsigned int count, unsigned int *success_count,
11039 domid_t domid)
11040 {
11041 + if (arch_use_lazy_mmu_mode())
11042 + return xen_multi_mmu_update(req, count, success_count, domid);
11043 return _hypercall4(int, mmu_update, req, count, success_count, domid);
11044 }
11045
11046 @@ -148,6 +167,8 @@ HYPERVISOR_mmuext_op(
11047 struct mmuext_op *op, unsigned int count, unsigned int *success_count,
11048 domid_t domid)
11049 {
11050 + if (arch_use_lazy_mmu_mode())
11051 + return xen_multi_mmuext_op(op, count, success_count, domid);
11052 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
11053 }
11054
11055 @@ -238,6 +259,8 @@ static inline int __must_check
11056 HYPERVISOR_memory_op(
11057 unsigned int cmd, void *arg)
11058 {
11059 + if (arch_use_lazy_mmu_mode())
11060 + xen_multicall_flush(false);
11061 return _hypercall2(int, memory_op, cmd, arg);
11062 }
11063
11064 @@ -253,6 +276,9 @@ HYPERVISOR_update_va_mapping(
11065 unsigned long va, pte_t new_val, unsigned long flags)
11066 {
11067 unsigned long pte_hi = 0;
11068 +
11069 + if (arch_use_lazy_mmu_mode())
11070 + return xen_multi_update_va_mapping(va, new_val, flags);
11071 #ifdef CONFIG_X86_PAE
11072 pte_hi = new_val.pte_high;
11073 #endif
11074 @@ -316,6 +342,8 @@ static inline int __must_check
11075 HYPERVISOR_grant_table_op(
11076 unsigned int cmd, void *uop, unsigned int count)
11077 {
11078 + if (arch_use_lazy_mmu_mode())
11079 + xen_multicall_flush(false);
11080 return _hypercall3(int, grant_table_op, cmd, uop, count);
11081 }
11082
11083 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypercall_64.h 2009-10-28 14:55:12.000000000 +0100
11084 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypercall_64.h 2009-03-04 11:28:34.000000000 +0100
11085 @@ -134,6 +134,23 @@
11086 __res; \
11087 })
11088
11089 +#define _hypercall(type, op, a1, a2, a3, a4, a5) \
11090 +({ \
11091 + type __res; \
11092 + register typeof((a1)+0) __arg1 asm("rdi") = (a1); \
11093 + register typeof((a2)+0) __arg2 asm("rsi") = (a2); \
11094 + register typeof((a3)+0) __arg3 asm("rdx") = (a3); \
11095 + register typeof((a4)+0) __arg4 asm("r10") = (a4); \
11096 + register typeof((a5)+0) __arg5 asm("r8") = (a5); \
11097 + asm volatile ( \
11098 + "call *%6" \
11099 + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
11100 + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
11101 + : "0" (hypercall_page + (op) * 32) \
11102 + : "memory" ); \
11103 + __res; \
11104 +})
11105 +
11106 static inline int __must_check
11107 HYPERVISOR_set_trap_table(
11108 const trap_info_t *table)
11109 @@ -146,6 +163,8 @@ HYPERVISOR_mmu_update(
11110 mmu_update_t *req, unsigned int count, unsigned int *success_count,
11111 domid_t domid)
11112 {
11113 + if (arch_use_lazy_mmu_mode())
11114 + return xen_multi_mmu_update(req, count, success_count, domid);
11115 return _hypercall4(int, mmu_update, req, count, success_count, domid);
11116 }
11117
11118 @@ -154,6 +173,8 @@ HYPERVISOR_mmuext_op(
11119 struct mmuext_op *op, unsigned int count, unsigned int *success_count,
11120 domid_t domid)
11121 {
11122 + if (arch_use_lazy_mmu_mode())
11123 + return xen_multi_mmuext_op(op, count, success_count, domid);
11124 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
11125 }
11126
11127 @@ -241,6 +262,8 @@ static inline int __must_check
11128 HYPERVISOR_memory_op(
11129 unsigned int cmd, void *arg)
11130 {
11131 + if (arch_use_lazy_mmu_mode())
11132 + xen_multicall_flush(false);
11133 return _hypercall2(int, memory_op, cmd, arg);
11134 }
11135
11136 @@ -255,6 +278,8 @@ static inline int __must_check
11137 HYPERVISOR_update_va_mapping(
11138 unsigned long va, pte_t new_val, unsigned long flags)
11139 {
11140 + if (arch_use_lazy_mmu_mode())
11141 + return xen_multi_update_va_mapping(va, new_val, flags);
11142 return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
11143 }
11144
11145 @@ -314,6 +339,8 @@ static inline int __must_check
11146 HYPERVISOR_grant_table_op(
11147 unsigned int cmd, void *uop, unsigned int count)
11148 {
11149 + if (arch_use_lazy_mmu_mode())
11150 + xen_multicall_flush(false);
11151 return _hypercall3(int, grant_table_op, cmd, uop, count);
11152 }
11153
11154 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-10-28 14:55:12.000000000 +0100
11155 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-04 11:28:34.000000000 +0100
11156 @@ -43,6 +43,7 @@
11157 #include <xen/interface/physdev.h>
11158 #include <xen/interface/sched.h>
11159 #include <xen/interface/nmi.h>
11160 +#include <asm/percpu.h>
11161 #include <asm/ptrace.h>
11162 #include <asm/page.h>
11163 #if defined(__i386__)
11164 @@ -135,7 +136,46 @@ void scrub_pages(void *, unsigned int);
11165 #define scrub_pages(_p,_n) ((void)0)
11166 #endif
11167
11168 -#include <xen/hypercall.h>
11169 +#if defined(CONFIG_XEN) && !defined(MODULE)
11170 +
11171 +DECLARE_PER_CPU(bool, xen_lazy_mmu);
11172 +
11173 +int xen_multicall_flush(bool);
11174 +
11175 +int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
11176 + unsigned long flags);
11177 +int __must_check xen_multi_mmu_update(mmu_update_t *, unsigned int count,
11178 + unsigned int *success_count, domid_t);
11179 +int __must_check xen_multi_mmuext_op(struct mmuext_op *, unsigned int count,
11180 + unsigned int *success_count, domid_t);
11181 +
11182 +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
11183 +static inline void arch_enter_lazy_mmu_mode(void)
11184 +{
11185 + __get_cpu_var(xen_lazy_mmu) = true;
11186 +}
11187 +
11188 +static inline void arch_leave_lazy_mmu_mode(void)
11189 +{
11190 + __get_cpu_var(xen_lazy_mmu) = false;
11191 + xen_multicall_flush(false);
11192 +}
11193 +
11194 +#if defined(CONFIG_X86_32)
11195 +#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
11196 +#elif !defined(arch_use_lazy_mmu_mode)
11197 +#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
11198 +#endif
11199 +
11200 +#else /* !CONFIG_XEN || MODULE */
11201 +
11202 +static inline void xen_multicall_flush(bool ignore) {}
11203 +#define arch_use_lazy_mmu_mode() false
11204 +#define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
11205 +#define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
11206 +#define xen_multi_mmuext_op(...) ({ BUG(); -ENOSYS; })
11207 +
11208 +#endif /* CONFIG_XEN && !MODULE */
11209
11210 #if defined(CONFIG_X86_64)
11211 #define MULTI_UVMFLAGS_INDEX 2
11212 @@ -147,11 +187,15 @@ void scrub_pages(void *, unsigned int);
11213
11214 #ifdef CONFIG_XEN
11215 #define is_running_on_xen() 1
11216 +extern char hypercall_page[PAGE_SIZE];
11217 #else
11218 extern char *hypercall_stubs;
11219 +#define hypercall_page hypercall_stubs
11220 #define is_running_on_xen() (!!hypercall_stubs)
11221 #endif
11222
11223 +#include <xen/hypercall.h>
11224 +
11225 static inline int
11226 HYPERVISOR_yield(
11227 void)
11228 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-10-28 14:55:12.000000000 +0100
11229 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-04 11:28:34.000000000 +0100
11230 @@ -53,7 +53,6 @@ static inline int pte_exec_kernel(pte_t
11231 * not possible, use pte_get_and_clear to obtain the old pte
11232 * value and then use set_pte to update it. -ben
11233 */
11234 -#define __HAVE_ARCH_SET_PTE_ATOMIC
11235
11236 static inline void set_pte(pte_t *ptep, pte_t pte)
11237 {
11238 @@ -70,14 +69,6 @@ static inline void set_pte(pte_t *ptep,
11239 set_pte((ptep), (pteval)); \
11240 } while (0)
11241
11242 -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
11243 - if (((_mm) != current->mm && (_mm) != &init_mm) || \
11244 - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
11245 - set_pte((ptep), (pteval)); \
11246 - xen_invlpg((addr)); \
11247 - } \
11248 -} while (0)
11249 -
11250 #define set_pmd(pmdptr,pmdval) \
11251 xen_l2_entry_update((pmdptr), (pmdval))
11252 #define set_pud(pudptr,pudval) \
11253 @@ -94,7 +85,7 @@ static inline void pud_clear (pud_t * pu
11254 #define pud_page(pud) \
11255 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
11256
11257 -#define pud_page_kernel(pud) \
11258 +#define pud_page_vaddr(pud) \
11259 ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
11260
11261
11262 @@ -124,6 +115,7 @@ static inline void pte_clear(struct mm_s
11263
11264 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
11265
11266 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11267 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11268 {
11269 pte_t pte = *ptep;
11270 @@ -142,6 +134,7 @@ static inline pte_t ptep_get_and_clear(s
11271 return pte;
11272 }
11273
11274 +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11275 #define ptep_clear_flush(vma, addr, ptep) \
11276 ({ \
11277 pte_t *__ptep = (ptep); \
11278 @@ -159,6 +152,7 @@ static inline pte_t ptep_get_and_clear(s
11279 __res; \
11280 })
11281
11282 +#define __HAVE_ARCH_PTE_SAME
11283 static inline int pte_same(pte_t a, pte_t b)
11284 {
11285 return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
11286 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-15 11:13:45.000000000 +0100
11287 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-04 11:28:34.000000000 +0100
11288 @@ -260,31 +260,89 @@ static inline pte_t pte_mkhuge(pte_t pte
11289 # include <asm/pgtable-2level.h>
11290 #endif
11291
11292 -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
11293 +/*
11294 + * Rules for using pte_update - it must be called after any PTE update which
11295 + * has not been done using the set_pte / clear_pte interfaces. It is used by
11296 + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
11297 + * updates should either be sets, clears, or set_pte_atomic for P->P
11298 + * transitions, which means this hook should only be called for user PTEs.
11299 + * This hook implies a P->P protection or access change has taken place, which
11300 + * requires a subsequent TLB flush. The notification can optionally be delayed
11301 + * until the TLB flush event by using the pte_update_defer form of the
11302 + * interface, but care must be taken to assure that the flush happens while
11303 + * still holding the same page table lock so that the shadow and primary pages
11304 + * do not become out of sync on SMP.
11305 + */
11306 +#define pte_update(mm, addr, ptep) do { } while (0)
11307 +#define pte_update_defer(mm, addr, ptep) do { } while (0)
11308 +
11309 +
11310 +/*
11311 + * We only update the dirty/accessed state if we set
11312 + * the dirty bit by hand in the kernel, since the hardware
11313 + * will do the accessed bit for us, and we don't want to
11314 + * race with other CPU's that might be updating the dirty
11315 + * bit at the same time.
11316 + */
11317 +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
11318 +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
11319 +do { \
11320 + if (dirty) \
11321 + ptep_establish(vma, address, ptep, entry); \
11322 +} while (0)
11323 +
11324 +/*
11325 + * We don't actually have these, but we want to advertise them so that
11326 + * we can encompass the flush here.
11327 + */
11328 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11329 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11330 +
11331 +/*
11332 + * Rules for using ptep_establish: the pte MUST be a user pte, and
11333 + * must be a present->present transition.
11334 + */
11335 +#define __HAVE_ARCH_PTEP_ESTABLISH
11336 +#define ptep_establish(vma, address, ptep, pteval) \
11337 +do { \
11338 + if ( likely((vma)->vm_mm == current->mm) ) { \
11339 + BUG_ON(HYPERVISOR_update_va_mapping(address, \
11340 + pteval, \
11341 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
11342 + UVMF_INVLPG|UVMF_MULTI)); \
11343 + } else { \
11344 + xen_l1_entry_update(ptep, pteval); \
11345 + flush_tlb_page(vma, address); \
11346 + } \
11347 +} while (0)
11348 +
11349 +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
11350 +#define ptep_clear_flush_dirty(vma, address, ptep) \
11351 ({ \
11352 pte_t __pte = *(ptep); \
11353 - int __ret = pte_dirty(__pte); \
11354 - if (__ret) { \
11355 - __pte = pte_mkclean(__pte); \
11356 - if ((vma)->vm_mm != current->mm || \
11357 - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
11358 - (ptep)->pte_low = __pte.pte_low; \
11359 - } \
11360 - __ret; \
11361 + int __dirty = pte_dirty(__pte); \
11362 + __pte = pte_mkclean(__pte); \
11363 + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
11364 + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
11365 + else if (__dirty) \
11366 + (ptep)->pte_low = __pte.pte_low; \
11367 + __dirty; \
11368 })
11369
11370 -#define ptep_test_and_clear_young(vma, addr, ptep) \
11371 +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
11372 +#define ptep_clear_flush_young(vma, address, ptep) \
11373 ({ \
11374 pte_t __pte = *(ptep); \
11375 - int __ret = pte_young(__pte); \
11376 - if (__ret) \
11377 - __pte = pte_mkold(__pte); \
11378 - if ((vma)->vm_mm != current->mm || \
11379 - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
11380 - (ptep)->pte_low = __pte.pte_low; \
11381 - __ret; \
11382 + int __young = pte_young(__pte); \
11383 + __pte = pte_mkold(__pte); \
11384 + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
11385 + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
11386 + else if (__young) \
11387 + (ptep)->pte_low = __pte.pte_low; \
11388 + __young; \
11389 })
11390
11391 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11392 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
11393 ((full) ? ({ \
11394 pte_t __res = *(ptep); \
11395 @@ -296,6 +354,7 @@ static inline pte_t pte_mkhuge(pte_t pte
11396 }) : \
11397 ptep_get_and_clear(mm, addr, ptep))
11398
11399 +#define __HAVE_ARCH_PTEP_SET_WRPROTECT
11400 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11401 {
11402 pte_t pte = *ptep;
11403 @@ -391,11 +450,11 @@ static inline pte_t pte_modify(pte_t pte
11404 #define pte_index(address) \
11405 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
11406 #define pte_offset_kernel(dir, address) \
11407 - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
11408 + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
11409
11410 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
11411
11412 -#define pmd_page_kernel(pmd) \
11413 +#define pmd_page_vaddr(pmd) \
11414 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
11415
11416 /*
11417 @@ -418,8 +477,6 @@ extern pte_t *lookup_address(unsigned lo
11418 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
11419 #endif
11420
11421 -extern void noexec_setup(const char *str);
11422 -
11423 #if defined(CONFIG_HIGHPTE)
11424 #define pte_offset_map(dir, address) \
11425 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
11426 @@ -437,37 +494,17 @@ extern void noexec_setup(const char *str
11427 #define pte_unmap_nested(pte) do { } while (0)
11428 #endif
11429
11430 -#define __HAVE_ARCH_PTEP_ESTABLISH
11431 -#define ptep_establish(vma, address, ptep, pteval) \
11432 - do { \
11433 - if ( likely((vma)->vm_mm == current->mm) ) { \
11434 - BUG_ON(HYPERVISOR_update_va_mapping(address, \
11435 - pteval, \
11436 - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
11437 - UVMF_INVLPG|UVMF_MULTI)); \
11438 - } else { \
11439 - xen_l1_entry_update(ptep, pteval); \
11440 - flush_tlb_page(vma, address); \
11441 - } \
11442 - } while (0)
11443 +/* Clear a kernel PTE and flush it from the TLB */
11444 +#define kpte_clear_flush(ptep, vaddr) do { \
11445 + if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
11446 + BUG(); \
11447 +} while (0)
11448
11449 /*
11450 * The i386 doesn't have any external MMU info: the kernel page
11451 * tables contain all the necessary information.
11452 - *
11453 - * Also, we only update the dirty/accessed state if we set
11454 - * the dirty bit by hand in the kernel, since the hardware
11455 - * will do the accessed bit for us, and we don't want to
11456 - * race with other CPU's that might be updating the dirty
11457 - * bit at the same time.
11458 */
11459 #define update_mmu_cache(vma,address,pte) do { } while (0)
11460 -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
11461 -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
11462 - do { \
11463 - if (dirty) \
11464 - ptep_establish(vma, address, ptep, entry); \
11465 - } while (0)
11466
11467 #include <xen/features.h>
11468 void make_lowmem_page_readonly(void *va, unsigned int feature);
11469 @@ -526,10 +563,11 @@ int touch_pte_range(struct mm_struct *mm
11470 unsigned long size);
11471
11472 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11473 - unsigned long addr, unsigned long end, pgprot_t newprot);
11474 + unsigned long addr, unsigned long end, pgprot_t newprot,
11475 + int dirty_accountable);
11476
11477 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11478 - xen_change_pte_range(mm, pmd, addr, end, newprot)
11479 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11480 + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11481
11482 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
11483 direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
11484 @@ -538,13 +576,6 @@ direct_remap_pfn_range(vma,from,pfn,size
11485 #define GET_IOSPACE(pfn) 0
11486 #define GET_PFN(pfn) (pfn)
11487
11488 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11489 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11490 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11491 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11492 -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11493 -#define __HAVE_ARCH_PTEP_SET_WRPROTECT
11494 -#define __HAVE_ARCH_PTE_SAME
11495 #include <asm-generic/pgtable.h>
11496
11497 #endif /* _I386_PGTABLE_H */
11498 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/processor_32.h 2009-04-20 11:36:10.000000000 +0200
11499 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/processor_32.h 2009-03-04 11:28:34.000000000 +0100
11500 @@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui
11501 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
11502 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
11503
11504 +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
11505 + unsigned int *ecx, unsigned int *edx)
11506 +{
11507 + /* ecx is often an input as well as an output. */
11508 + __asm__(XEN_CPUID
11509 + : "=a" (*eax),
11510 + "=b" (*ebx),
11511 + "=c" (*ecx),
11512 + "=d" (*edx)
11513 + : "0" (*eax), "2" (*ecx));
11514 +}
11515 +
11516 /*
11517 * Generic CPUID function
11518 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
11519 @@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui
11520 */
11521 static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
11522 {
11523 - __asm__(XEN_CPUID
11524 - : "=a" (*eax),
11525 - "=b" (*ebx),
11526 - "=c" (*ecx),
11527 - "=d" (*edx)
11528 - : "0" (op), "c"(0));
11529 + *eax = op;
11530 + *ecx = 0;
11531 + __cpuid(eax, ebx, ecx, edx);
11532 }
11533
11534 /* Some CPUID calls want 'count' to be placed in ecx */
11535 static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
11536 - int *edx)
11537 + int *edx)
11538 {
11539 - __asm__(XEN_CPUID
11540 - : "=a" (*eax),
11541 - "=b" (*ebx),
11542 - "=c" (*ecx),
11543 - "=d" (*edx)
11544 - : "0" (op), "c" (count));
11545 + *eax = op;
11546 + *ecx = count;
11547 + __cpuid(eax, ebx, ecx, edx);
11548 }
11549
11550 /*
11551 @@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i
11552 */
11553 static inline unsigned int cpuid_eax(unsigned int op)
11554 {
11555 - unsigned int eax;
11556 + unsigned int eax, ebx, ecx, edx;
11557
11558 - __asm__(XEN_CPUID
11559 - : "=a" (eax)
11560 - : "0" (op)
11561 - : "bx", "cx", "dx");
11562 + cpuid(op, &eax, &ebx, &ecx, &edx);
11563 return eax;
11564 }
11565 static inline unsigned int cpuid_ebx(unsigned int op)
11566 {
11567 - unsigned int eax, ebx;
11568 + unsigned int eax, ebx, ecx, edx;
11569
11570 - __asm__(XEN_CPUID
11571 - : "=a" (eax), "=b" (ebx)
11572 - : "0" (op)
11573 - : "cx", "dx" );
11574 + cpuid(op, &eax, &ebx, &ecx, &edx);
11575 return ebx;
11576 }
11577 static inline unsigned int cpuid_ecx(unsigned int op)
11578 {
11579 - unsigned int eax, ecx;
11580 + unsigned int eax, ebx, ecx, edx;
11581
11582 - __asm__(XEN_CPUID
11583 - : "=a" (eax), "=c" (ecx)
11584 - : "0" (op)
11585 - : "bx", "dx" );
11586 + cpuid(op, &eax, &ebx, &ecx, &edx);
11587 return ecx;
11588 }
11589 static inline unsigned int cpuid_edx(unsigned int op)
11590 {
11591 - unsigned int eax, edx;
11592 + unsigned int eax, ebx, ecx, edx;
11593
11594 - __asm__(XEN_CPUID
11595 - : "=a" (eax), "=d" (edx)
11596 - : "0" (op)
11597 - : "bx", "cx");
11598 + cpuid(op, &eax, &ebx, &ecx, &edx);
11599 return edx;
11600 }
11601
11602 @@ -315,6 +309,8 @@ static inline void __mwait(unsigned long
11603 : :"a" (eax), "c" (ecx));
11604 }
11605
11606 +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11607 +
11608 /* from system description table in BIOS. Mostly for MCA use, but
11609 others may find it useful. */
11610 extern unsigned int machine_id;
11611 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/segment_32.h 2009-10-28 14:55:12.000000000 +0100
11612 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/segment_32.h 2009-03-04 11:28:34.000000000 +0100
11613 @@ -61,11 +61,9 @@
11614
11615 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
11616 #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
11617 -#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11618
11619 #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
11620 #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
11621 -#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11622
11623 #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
11624 #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
11625 @@ -85,6 +83,11 @@
11626
11627 #define GDT_SIZE (GDT_ENTRIES * 8)
11628
11629 +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
11630 +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
11631 +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
11632 +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
11633 +
11634 /* Simple and small GDT entries for booting only */
11635
11636 #define GDT_ENTRY_BOOT_CS 2
11637 @@ -114,4 +117,16 @@
11638 */
11639 #define IDT_ENTRIES 256
11640
11641 +/* Bottom two bits of selector give the ring privilege level */
11642 +#define SEGMENT_RPL_MASK 0x3
11643 +/* Bit 2 is table indicator (LDT/GDT) */
11644 +#define SEGMENT_TI_MASK 0x4
11645 +
11646 +/* User mode is privilege level 3 */
11647 +#define USER_RPL 0x3
11648 +/* LDT segment has TI set, GDT has it cleared */
11649 +#define SEGMENT_LDT 0x4
11650 +#define SEGMENT_GDT 0x0
11651 +
11652 +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
11653 #endif
11654 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp_32.h 2009-10-28 14:55:12.000000000 +0100
11655 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/smp_32.h 2009-03-04 11:28:34.000000000 +0100
11656 @@ -79,25 +79,36 @@ static inline int hard_smp_processor_id(
11657 return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
11658 }
11659 #endif
11660 -
11661 -static __inline int logical_smp_processor_id(void)
11662 -{
11663 - /* we don't want to mark this access volatile - bad code generation */
11664 - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11665 -}
11666 -
11667 #endif
11668
11669 +#define safe_smp_processor_id() smp_processor_id()
11670 extern int __cpu_disable(void);
11671 extern void __cpu_die(unsigned int cpu);
11672 extern void prefill_possible_map(void);
11673 +extern unsigned int num_processors;
11674 +
11675 #endif /* !__ASSEMBLY__ */
11676
11677 #else /* CONFIG_SMP */
11678
11679 +#define safe_smp_processor_id() 0
11680 #define cpu_physical_id(cpu) boot_cpu_physical_apicid
11681
11682 #define NO_PROC_ID 0xFF /* No processor magic marker */
11683
11684 #endif
11685 +
11686 +#ifndef __ASSEMBLY__
11687 +
11688 +extern u8 apicid_2_node[];
11689 +
11690 +#ifdef CONFIG_X86_LOCAL_APIC
11691 +static __inline int logical_smp_processor_id(void)
11692 +{
11693 + /* we don't want to mark this access volatile - bad code generation */
11694 + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11695 +}
11696 +#endif
11697 +#endif
11698 +
11699 #endif
11700 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/system_32.h 2009-10-28 14:55:12.000000000 +0100
11701 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/system_32.h 2009-03-04 11:28:34.000000000 +0100
11702 @@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig
11703 #define cmpxchg(ptr,o,n)\
11704 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
11705 (unsigned long)(n),sizeof(*(ptr))))
11706 +#define sync_cmpxchg(ptr,o,n)\
11707 + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
11708 + (unsigned long)(n),sizeof(*(ptr))))
11709 #endif
11710
11711 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
11712 @@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo
11713 return old;
11714 }
11715
11716 +/*
11717 + * Always use locked operations when touching memory shared with a
11718 + * hypervisor, since the system may be SMP even if the guest kernel
11719 + * isn't.
11720 + */
11721 +static inline unsigned long __sync_cmpxchg(volatile void *ptr,
11722 + unsigned long old,
11723 + unsigned long new, int size)
11724 +{
11725 + unsigned long prev;
11726 + switch (size) {
11727 + case 1:
11728 + __asm__ __volatile__("lock; cmpxchgb %b1,%2"
11729 + : "=a"(prev)
11730 + : "q"(new), "m"(*__xg(ptr)), "0"(old)
11731 + : "memory");
11732 + return prev;
11733 + case 2:
11734 + __asm__ __volatile__("lock; cmpxchgw %w1,%2"
11735 + : "=a"(prev)
11736 + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11737 + : "memory");
11738 + return prev;
11739 + case 4:
11740 + __asm__ __volatile__("lock; cmpxchgl %1,%2"
11741 + : "=a"(prev)
11742 + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11743 + : "memory");
11744 + return prev;
11745 + }
11746 + return old;
11747 +}
11748 +
11749 #ifndef CONFIG_X86_CMPXCHG
11750 /*
11751 * Building a kernel capable running on 80386. It may be necessary to
11752 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-10-28 14:55:12.000000000 +0100
11753 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/tlbflush_32.h 2009-03-04 11:28:34.000000000 +0100
11754 @@ -8,8 +8,6 @@
11755 #define __flush_tlb_global() xen_tlb_flush()
11756 #define __flush_tlb_all() xen_tlb_flush()
11757
11758 -extern unsigned long pgkern_mask;
11759 -
11760 #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
11761
11762 #define __flush_tlb_single(addr) xen_invlpg(addr)
11763 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-10-28 14:55:12.000000000 +0100
11764 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2009-03-04 11:28:34.000000000 +0100
11765 @@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address;
11766 extern struct dma_mapping_ops* dma_ops;
11767 extern int iommu_merge;
11768
11769 -static inline int valid_dma_direction(int dma_direction)
11770 -{
11771 - return ((dma_direction == DMA_BIDIRECTIONAL) ||
11772 - (dma_direction == DMA_TO_DEVICE) ||
11773 - (dma_direction == DMA_FROM_DEVICE));
11774 -}
11775 -
11776 #if 0
11777 static inline int dma_mapping_error(dma_addr_t dma_addr)
11778 {
11779 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-10-28 14:55:12.000000000 +0100
11780 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-04 11:28:34.000000000 +0100
11781 @@ -41,7 +41,7 @@ enum fixed_addresses {
11782 #ifdef CONFIG_X86_LOCAL_APIC
11783 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
11784 #endif
11785 -#ifdef CONFIG_X86_IO_APIC
11786 +#ifndef CONFIG_XEN
11787 FIX_IO_APIC_BASE_0,
11788 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
11789 #endif
11790 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-04-20 11:36:10.000000000 +0200
11791 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-04 11:28:34.000000000 +0100
11792 @@ -44,12 +44,9 @@ extern unsigned long __supported_pte_mas
11793
11794 #define swapper_pg_dir init_level4_pgt
11795
11796 -extern int nonx_setup(char *str);
11797 extern void paging_init(void);
11798 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
11799
11800 -extern unsigned long pgkern_mask;
11801 -
11802 /*
11803 * ZERO_PAGE is a global shared page that is always zero: used
11804 * for zero-mapped memory areas etc..
11805 @@ -119,9 +116,6 @@ static inline void pgd_clear (pgd_t * pg
11806 set_pgd(__user_pgd(pgd), __pgd(0));
11807 }
11808
11809 -#define pud_page(pud) \
11810 - ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
11811 -
11812 #define pte_same(a, b) ((a).pte == (b).pte)
11813
11814 #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
11815 @@ -333,7 +327,7 @@ static inline pte_t ptep_get_and_clear_f
11816 #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
11817 static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
11818 static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
11819 -static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
11820 +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
11821 static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
11822 static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
11823 static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
11824 @@ -346,29 +340,12 @@ static inline pte_t pte_mkclean(pte_t pt
11825 static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
11826 static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
11827 static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
11828 -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
11829 +static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
11830 static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
11831 static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
11832 static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
11833 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
11834 -
11835 -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
11836 -({ \
11837 - pte_t __pte = *(ptep); \
11838 - int __ret = pte_dirty(__pte); \
11839 - if (__ret) \
11840 - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
11841 - __ret; \
11842 -})
11843 -
11844 -#define ptep_test_and_clear_young(vma, addr, ptep) \
11845 -({ \
11846 - pte_t __pte = *(ptep); \
11847 - int __ret = pte_young(__pte); \
11848 - if (__ret) \
11849 - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
11850 - __ret; \
11851 -})
11852 +static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
11853
11854 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11855 {
11856 @@ -395,7 +372,8 @@ static inline int pmd_large(pmd_t pte) {
11857 /*
11858 * Level 4 access.
11859 */
11860 -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
11861 +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
11862 +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
11863 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
11864 #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
11865 #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
11866 @@ -404,16 +382,18 @@ static inline int pmd_large(pmd_t pte) {
11867
11868 /* PUD - Level3 access */
11869 /* to find an entry in a page-table-directory. */
11870 +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
11871 +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
11872 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
11873 -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
11874 +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
11875 #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
11876
11877 /* PMD - Level 2 access */
11878 -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
11879 +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
11880 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
11881
11882 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
11883 -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
11884 +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
11885 pmd_index(address))
11886 #define pmd_none(x) (!__pmd_val(x))
11887 #if CONFIG_XEN_COMPAT <= 0x030002
11888 @@ -444,6 +424,7 @@ static inline pte_t mk_pte_phys(unsigned
11889 {
11890 unsigned long pteval;
11891 pteval = physpage | pgprot_val(pgprot);
11892 + pteval &= __supported_pte_mask;
11893 return __pte(pteval);
11894 }
11895
11896 @@ -465,7 +446,7 @@ static inline pte_t pte_modify(pte_t pte
11897
11898 #define pte_index(address) \
11899 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
11900 -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
11901 +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
11902 pte_index(address))
11903
11904 /* x86-64 always has all page tables mapped. */
11905 @@ -506,6 +487,40 @@ static inline pte_t pte_modify(pte_t pte
11906 ptep_establish(vma, address, ptep, entry); \
11907 } while (0)
11908
11909 +
11910 +/*
11911 + * i386 says: We don't actually have these, but we want to advertise
11912 + * them so that we can encompass the flush here.
11913 + */
11914 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11915 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11916 +
11917 +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
11918 +#define ptep_clear_flush_dirty(vma, address, ptep) \
11919 +({ \
11920 + pte_t __pte = *(ptep); \
11921 + int __dirty = pte_dirty(__pte); \
11922 + __pte = pte_mkclean(__pte); \
11923 + if ((vma)->vm_mm->context.pinned) \
11924 + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
11925 + else if (__dirty) \
11926 + set_pte(ptep, __pte); \
11927 + __dirty; \
11928 +})
11929 +
11930 +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
11931 +#define ptep_clear_flush_young(vma, address, ptep) \
11932 +({ \
11933 + pte_t __pte = *(ptep); \
11934 + int __young = pte_young(__pte); \
11935 + __pte = pte_mkold(__pte); \
11936 + if ((vma)->vm_mm->context.pinned) \
11937 + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
11938 + else if (__young) \
11939 + set_pte(ptep, __pte); \
11940 + __young; \
11941 +})
11942 +
11943 /* Encode and de-code a swap entry */
11944 #define __swp_type(x) (((x).val >> 1) & 0x3f)
11945 #define __swp_offset(x) ((x).val >> 8)
11946 @@ -547,10 +562,11 @@ int touch_pte_range(struct mm_struct *mm
11947 unsigned long size);
11948
11949 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11950 - unsigned long addr, unsigned long end, pgprot_t newprot);
11951 + unsigned long addr, unsigned long end, pgprot_t newprot,
11952 + int dirty_accountable);
11953
11954 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11955 - xen_change_pte_range(mm, pmd, addr, end, newprot)
11956 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11957 + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11958
11959 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
11960 direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
11961 @@ -572,8 +588,6 @@ int xen_change_pte_range(struct mm_struc
11962 #define kc_offset_to_vaddr(o) \
11963 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
11964
11965 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11966 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11967 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11968 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11969 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11970 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/processor_64.h 2009-10-28 14:55:12.000000000 +0100
11971 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/processor_64.h 2009-03-04 11:28:34.000000000 +0100
11972 @@ -484,6 +484,8 @@ static inline void __mwait(unsigned long
11973 : :"a" (eax), "c" (ecx));
11974 }
11975
11976 +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11977 +
11978 #define stack_current() \
11979 ({ \
11980 struct thread_info *ti; \
11981 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp_64.h 2009-10-28 14:55:12.000000000 +0100
11982 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/smp_64.h 2009-03-04 11:28:34.000000000 +0100
11983 @@ -4,15 +4,12 @@
11984 /*
11985 * We need the APIC definitions automatically as part of 'smp.h'
11986 */
11987 -#ifndef __ASSEMBLY__
11988 #include <linux/threads.h>
11989 #include <linux/cpumask.h>
11990 #include <linux/bitops.h>
11991 extern int disable_apic;
11992 -#endif
11993
11994 #ifdef CONFIG_X86_LOCAL_APIC
11995 -#ifndef __ASSEMBLY__
11996 #include <asm/fixmap.h>
11997 #include <asm/mpspec.h>
11998 #ifdef CONFIG_X86_IO_APIC
11999 @@ -21,10 +18,8 @@ extern int disable_apic;
12000 #include <asm/apic.h>
12001 #include <asm/thread_info.h>
12002 #endif
12003 -#endif
12004
12005 #ifdef CONFIG_SMP
12006 -#ifndef ASSEMBLY
12007
12008 #include <asm/pda.h>
12009
12010 @@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized;
12011
12012 extern void smp_alloc_memory(void);
12013 extern volatile unsigned long smp_invalidate_needed;
12014 -extern int pic_mode;
12015 extern void lock_ipi_call_lock(void);
12016 extern void unlock_ipi_call_lock(void);
12017 extern int smp_num_siblings;
12018 extern void smp_send_reschedule(int cpu);
12019 void smp_stop_cpu(void);
12020 -extern int smp_call_function_single(int cpuid, void (*func) (void *info),
12021 - void *info, int retry, int wait);
12022
12023 extern cpumask_t cpu_sibling_map[NR_CPUS];
12024 extern cpumask_t cpu_core_map[NR_CPUS];
12025 @@ -77,20 +69,16 @@ static inline int hard_smp_processor_id(
12026 }
12027 #endif
12028
12029 -extern int safe_smp_processor_id(void);
12030 extern int __cpu_disable(void);
12031 extern void __cpu_die(unsigned int cpu);
12032 extern void prefill_possible_map(void);
12033 extern unsigned num_processors;
12034 extern unsigned disabled_cpus;
12035
12036 -#endif /* !ASSEMBLY */
12037 -
12038 #define NO_PROC_ID 0xFF /* No processor magic marker */
12039
12040 #endif
12041
12042 -#ifndef ASSEMBLY
12043 /*
12044 * Some lowlevel functions might want to know about
12045 * the real APIC ID <-> CPU # mapping.
12046 @@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid(
12047 }
12048 #endif
12049
12050 -#endif /* !ASSEMBLY */
12051 -
12052 #ifndef CONFIG_SMP
12053 #define stack_smp_processor_id() 0
12054 -#define safe_smp_processor_id() 0
12055 #define cpu_logical_map(x) (x)
12056 #else
12057 #include <asm/thread_info.h>
12058 @@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid(
12059 })
12060 #endif
12061
12062 -#ifndef __ASSEMBLY__
12063 #ifdef CONFIG_X86_LOCAL_APIC
12064 static __inline int logical_smp_processor_id(void)
12065 {
12066 @@ -138,13 +122,18 @@ static __inline int logical_smp_processo
12067 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
12068 }
12069 #endif
12070 -#endif
12071
12072 #ifdef CONFIG_SMP
12073 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
12074 #else
12075 #define cpu_physical_id(cpu) boot_cpu_id
12076 -#endif
12077 -
12078 +static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
12079 + void *info, int retry, int wait)
12080 +{
12081 + /* Disable interrupts here? */
12082 + func(info);
12083 + return 0;
12084 +}
12085 +#endif /* !CONFIG_SMP */
12086 #endif
12087
12088 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/system_64.h 2009-10-28 14:55:12.000000000 +0100
12089 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/system_64.h 2009-03-04 11:28:34.000000000 +0100
12090 @@ -24,6 +24,7 @@
12091 #define __EXTRA_CLOBBER \
12092 ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
12093
12094 +/* Save restore flags to clear handle leaking NT */
12095 #define switch_to(prev,next,last) \
12096 asm volatile(SAVE_CONTEXT \
12097 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
12098 --- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-10-28 14:55:12.000000000 +0100
12099 +++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/tlbflush_64.h 2009-03-04 11:28:34.000000000 +0100
12100 @@ -12,9 +12,6 @@
12101 */
12102 #define __flush_tlb_global() xen_tlb_flush()
12103
12104 -
12105 -extern unsigned long pgkern_mask;
12106 -
12107 #define __flush_tlb_all() __flush_tlb_global()
12108
12109 #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
12110 --- sle11-2009-10-16.orig/include/linux/skbuff.h 2009-08-26 11:52:33.000000000 +0200
12111 +++ sle11-2009-10-16/include/linux/skbuff.h 2009-03-04 11:28:34.000000000 +0100
12112 @@ -1771,5 +1771,12 @@ static inline void skb_forward_csum(stru
12113 }
12114
12115 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
12116 +
12117 +#ifdef CONFIG_XEN
12118 +int skb_checksum_setup(struct sk_buff *skb);
12119 +#else
12120 +static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
12121 +#endif
12122 +
12123 #endif /* __KERNEL__ */
12124 #endif /* _LINUX_SKBUFF_H */
12125 --- sle11-2009-10-16.orig/include/xen/evtchn.h 2009-10-28 14:55:12.000000000 +0100
12126 +++ sle11-2009-10-16/include/xen/evtchn.h 2009-03-04 11:28:34.000000000 +0100
12127 @@ -54,34 +54,34 @@
12128 */
12129 int bind_caller_port_to_irqhandler(
12130 unsigned int caller_port,
12131 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12132 + irq_handler_t handler,
12133 unsigned long irqflags,
12134 const char *devname,
12135 void *dev_id);
12136 int bind_listening_port_to_irqhandler(
12137 unsigned int remote_domain,
12138 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12139 + irq_handler_t handler,
12140 unsigned long irqflags,
12141 const char *devname,
12142 void *dev_id);
12143 int bind_interdomain_evtchn_to_irqhandler(
12144 unsigned int remote_domain,
12145 unsigned int remote_port,
12146 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12147 + irq_handler_t handler,
12148 unsigned long irqflags,
12149 const char *devname,
12150 void *dev_id);
12151 int bind_virq_to_irqhandler(
12152 unsigned int virq,
12153 unsigned int cpu,
12154 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12155 + irq_handler_t handler,
12156 unsigned long irqflags,
12157 const char *devname,
12158 void *dev_id);
12159 int bind_ipi_to_irqhandler(
12160 unsigned int ipi,
12161 unsigned int cpu,
12162 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12163 + irq_handler_t handler,
12164 unsigned long irqflags,
12165 const char *devname,
12166 void *dev_id);
12167 --- sle11-2009-10-16.orig/include/xen/xencons.h 2009-10-28 14:55:12.000000000 +0100
12168 +++ sle11-2009-10-16/include/xen/xencons.h 2009-03-04 11:28:34.000000000 +0100
12169 @@ -8,7 +8,7 @@ void xencons_force_flush(void);
12170 void xencons_resume(void);
12171
12172 /* Interrupt work hooks. Receive data, or kick data out. */
12173 -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
12174 +void xencons_rx(char *buf, unsigned len);
12175 void xencons_tx(void);
12176
12177 int xencons_ring_init(void);
12178 --- sle11-2009-10-16.orig/mm/mprotect.c 2009-08-26 11:52:33.000000000 +0200
12179 +++ sle11-2009-10-16/mm/mprotect.c 2009-03-04 11:28:34.000000000 +0100
12180 @@ -92,7 +92,7 @@ static inline void change_pmd_range(stru
12181 next = pmd_addr_end(addr, end);
12182 if (pmd_none_or_clear_bad(pmd))
12183 continue;
12184 - if (arch_change_pte_range(mm, pmd, addr, next, newprot))
12185 + if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
12186 continue;
12187 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
12188 } while (pmd++, addr = next, addr != end);
12189 --- sle11-2009-10-16.orig/net/core/dev.c 2009-08-26 11:52:33.000000000 +0200
12190 +++ sle11-2009-10-16/net/core/dev.c 2009-03-04 11:28:34.000000000 +0100
12191 @@ -1765,15 +1765,14 @@ inline int skb_checksum_setup(struct sk_
12192 }
12193 if ((skb->h.raw + skb->csum + 2) > skb->tail)
12194 goto out;
12195 - skb->ip_summed = CHECKSUM_HW;
12196 + skb->ip_summed = CHECKSUM_PARTIAL;
12197 skb->proto_csum_blank = 0;
12198 }
12199 return 0;
12200 out:
12201 return -EPROTO;
12202 }
12203 -#else
12204 -inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
12205 +EXPORT_SYMBOL(skb_checksum_setup);
12206 #endif
12207
12208 /**
12209 @@ -2327,7 +2326,7 @@ int netif_receive_skb(struct sk_buff *sk
12210 case CHECKSUM_UNNECESSARY:
12211 skb->proto_data_valid = 1;
12212 break;
12213 - case CHECKSUM_HW:
12214 + case CHECKSUM_PARTIAL:
12215 /* XXX Implement me. */
12216 default:
12217 skb->proto_data_valid = 0;
12218 @@ -4989,7 +4988,6 @@ EXPORT_SYMBOL(unregister_netdevice_notif
12219 EXPORT_SYMBOL(net_enable_timestamp);
12220 EXPORT_SYMBOL(net_disable_timestamp);
12221 EXPORT_SYMBOL(dev_get_flags);
12222 -EXPORT_SYMBOL(skb_checksum_setup);
12223
12224 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
12225 EXPORT_SYMBOL(br_handle_frame_hook);