]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/60028_xen3-patch-2.6.19.patch1
Imported xen patches.
[people/pmueller/ipfire-2.x.git] / src / patches / 60028_xen3-patch-2.6.19.patch1
1 From: www.kernel.org
2 Subject: Linux 2.6.19
3 Patch-mainline: 2.6.19
4
5 Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 Index: head-2008-12-01/arch/x86/Kconfig
10 ===================================================================
11 --- head-2008-12-01.orig/arch/x86/Kconfig 2008-12-03 15:49:14.000000000 +0100
12 +++ head-2008-12-01/arch/x86/Kconfig 2008-12-01 11:29:05.000000000 +0100
13 @@ -411,6 +411,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
14
15 menuconfig PARAVIRT_GUEST
16 bool "Paravirtualized guest support"
17 + depends on !X86_XEN && !X86_64_XEN
18 help
19 Say Y here to get to see options related to running Linux under
20 various hypervisors. This option alone does not add any kernel code.
21 Index: head-2008-12-01/arch/x86/kernel/apic_32-xen.c
22 ===================================================================
23 --- head-2008-12-01.orig/arch/x86/kernel/apic_32-xen.c 2008-12-03 15:49:14.000000000 +0100
24 +++ head-2008-12-01/arch/x86/kernel/apic_32-xen.c 2008-12-01 11:29:05.000000000 +0100
25 @@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
26 /*
27 * Knob to control our willingness to enable the local APIC.
28 */
29 -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
30
31 /*
32 * Debug level
33 @@ -102,7 +101,7 @@ int get_physical_broadcast(void)
34
35 #ifndef CONFIG_XEN
36 #ifndef CONFIG_SMP
37 -static void up_apic_timer_interrupt_call(struct pt_regs *regs)
38 +static void up_apic_timer_interrupt_call(void)
39 {
40 int cpu = smp_processor_id();
41
42 @@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
43 */
44 per_cpu(irq_stat, cpu).apic_timer_irqs++;
45
46 - smp_local_timer_interrupt(regs);
47 + smp_local_timer_interrupt();
48 }
49 #endif
50
51 -void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
52 +void smp_send_timer_broadcast_ipi(void)
53 {
54 cpumask_t mask;
55
56 @@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
57 * We can directly call the apic timer interrupt handler
58 * in UP case. Minus all irq related functions
59 */
60 - up_apic_timer_interrupt_call(regs);
61 + up_apic_timer_interrupt_call();
62 #endif
63 }
64 }
65 Index: head-2008-12-01/arch/x86/kernel/cpu/common-xen.c
66 ===================================================================
67 --- head-2008-12-01.orig/arch/x86/kernel/cpu/common-xen.c 2008-12-03 15:49:14.000000000 +0100
68 +++ head-2008-12-01/arch/x86/kernel/cpu/common-xen.c 2008-12-01 11:29:05.000000000 +0100
69 @@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
70
71 extern int disable_pse;
72
73 -static void default_init(struct cpuinfo_x86 * c)
74 +static void __cpuinit default_init(struct cpuinfo_x86 * c)
75 {
76 /* Not much we can do here... */
77 /* Check if at least it has cpuid */
78 @@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
79 }
80 }
81
82 -static struct cpu_dev default_cpu = {
83 +static struct cpu_dev __cpuinitdata default_cpu = {
84 .c_init = default_init,
85 .c_vendor = "Unknown",
86 };
87 @@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
88
89 static int __init x86_fxsr_setup(char * s)
90 {
91 + /* Tell all the other CPU's to not use it... */
92 disable_x86_fxsr = 1;
93 +
94 + /*
95 + * ... and clear the bits early in the boot_cpu_data
96 + * so that the bootup process doesn't try to do this
97 + * either.
98 + */
99 + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
100 + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
101 return 1;
102 }
103 __setup("nofxsr", x86_fxsr_setup);
104 @@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
105 }
106 }
107
108 -void __cpuinit generic_identify(struct cpuinfo_x86 * c)
109 +static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
110 {
111 u32 tfms, xlvl;
112 int ebx;
113 @@ -698,8 +707,7 @@ old_gdt:
114 */
115 atomic_inc(&init_mm.mm_count);
116 current->active_mm = &init_mm;
117 - if (current->mm)
118 - BUG();
119 + BUG_ON(current->mm);
120 enter_lazy_tlb(&init_mm, current);
121
122 load_esp0(t, thread);
123 @@ -712,7 +720,7 @@ old_gdt:
124 #endif
125
126 /* Clear %fs and %gs. */
127 - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
128 + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
129
130 /* Clear all 6 debug registers: */
131 set_debugreg(0, 0);
132 Index: head-2008-12-01/arch/x86/kernel/entry_32-xen.S
133 ===================================================================
134 --- head-2008-12-01.orig/arch/x86/kernel/entry_32-xen.S 2008-12-03 15:49:14.000000000 +0100
135 +++ head-2008-12-01/arch/x86/kernel/entry_32-xen.S 2008-12-01 11:29:05.000000000 +0100
136 @@ -80,8 +80,12 @@ VM_MASK = 0x00020000
137 NMI_MASK = 0x80000000
138
139 #ifndef CONFIG_XEN
140 -#define DISABLE_INTERRUPTS cli
141 -#define ENABLE_INTERRUPTS sti
142 +/* These are replaces for paravirtualization */
143 +#define DISABLE_INTERRUPTS cli
144 +#define ENABLE_INTERRUPTS sti
145 +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
146 +#define INTERRUPT_RETURN iret
147 +#define GET_CR0_INTO_EAX movl %cr0, %eax
148 #else
149 /* Offsets into shared_info_t. */
150 #define evtchn_upcall_pending /* 0 */
151 @@ -99,15 +103,29 @@ NMI_MASK = 0x80000000
152
153 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
154 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
155 +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
156 #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
157 __DISABLE_INTERRUPTS
158 #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
159 __ENABLE_INTERRUPTS
160 -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
161 +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
162 +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
163 + __TEST_PENDING ; \
164 + jnz 14f # process more events if necessary... ; \
165 + movl ESI(%esp), %esi ; \
166 + sysexit ; \
167 +14: __DISABLE_INTERRUPTS ; \
168 + TRACE_IRQS_OFF ; \
169 +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
170 + push %esp ; \
171 + call evtchn_do_upcall ; \
172 + add $4,%esp ; \
173 + jmp ret_from_intr
174 +#define INTERRUPT_RETURN iret
175 #endif
176
177 #ifdef CONFIG_PREEMPT
178 -#define preempt_stop cli; TRACE_IRQS_OFF
179 +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
180 #else
181 #define preempt_stop
182 #define resume_kernel restore_nocheck
183 @@ -206,18 +224,21 @@ NMI_MASK = 0x80000000
184
185 #define RING0_INT_FRAME \
186 CFI_STARTPROC simple;\
187 + CFI_SIGNAL_FRAME;\
188 CFI_DEF_CFA esp, 3*4;\
189 /*CFI_OFFSET cs, -2*4;*/\
190 CFI_OFFSET eip, -3*4
191
192 #define RING0_EC_FRAME \
193 CFI_STARTPROC simple;\
194 + CFI_SIGNAL_FRAME;\
195 CFI_DEF_CFA esp, 4*4;\
196 /*CFI_OFFSET cs, -2*4;*/\
197 CFI_OFFSET eip, -3*4
198
199 #define RING0_PTREGS_FRAME \
200 CFI_STARTPROC simple;\
201 + CFI_SIGNAL_FRAME;\
202 CFI_DEF_CFA esp, OLDESP-EBX;\
203 /*CFI_OFFSET cs, CS-OLDESP;*/\
204 CFI_OFFSET eip, EIP-OLDESP;\
205 @@ -263,8 +284,9 @@ ret_from_intr:
206 check_userspace:
207 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
208 movb CS(%esp), %al
209 - testl $(VM_MASK | 2), %eax
210 - jz resume_kernel
211 + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
212 + cmpl $USER_RPL, %eax
213 + jb resume_kernel # not returning to v8086 or userspace
214 ENTRY(resume_userspace)
215 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
216 # setting need_resched or sigpending
217 @@ -277,7 +299,7 @@ ENTRY(resume_userspace)
218
219 #ifdef CONFIG_PREEMPT
220 ENTRY(resume_kernel)
221 - cli
222 + DISABLE_INTERRUPTS
223 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
224 jnz restore_nocheck
225 need_resched:
226 @@ -297,6 +319,7 @@ need_resched:
227 # sysenter call handler stub
228 ENTRY(sysenter_entry)
229 CFI_STARTPROC simple
230 + CFI_SIGNAL_FRAME
231 CFI_DEF_CFA esp, 0
232 CFI_REGISTER esp, ebp
233 movl SYSENTER_stack_esp0(%esp),%esp
234 @@ -305,7 +328,7 @@ sysenter_past_esp:
235 * No need to follow this irqs on/off section: the syscall
236 * disabled irqs and here we enable it straight after entry:
237 */
238 - sti
239 + ENABLE_INTERRUPTS
240 pushl $(__USER_DS)
241 CFI_ADJUST_CFA_OFFSET 4
242 /*CFI_REL_OFFSET ss, 0*/
243 @@ -359,26 +382,8 @@ sysenter_past_esp:
244 movl EIP(%esp), %edx
245 movl OLDESP(%esp), %ecx
246 xorl %ebp,%ebp
247 -#ifdef CONFIG_XEN
248 TRACE_IRQS_ON
249 - __ENABLE_INTERRUPTS
250 -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
251 - __TEST_PENDING
252 - jnz 14f # process more events if necessary...
253 - movl ESI(%esp), %esi
254 - sysexit
255 -14: __DISABLE_INTERRUPTS
256 - TRACE_IRQS_OFF
257 -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
258 - push %esp
259 - call evtchn_do_upcall
260 - add $4,%esp
261 - jmp ret_from_intr
262 -#else
263 - TRACE_IRQS_ON
264 - sti
265 - sysexit
266 -#endif /* !CONFIG_XEN */
267 + ENABLE_INTERRUPTS_SYSEXIT
268 CFI_ENDPROC
269
270 # pv sysenter call handler stub
271 @@ -444,8 +449,8 @@ restore_all:
272 # See comments in process.c:copy_thread() for details.
273 movb OLDSS(%esp), %ah
274 movb CS(%esp), %al
275 - andl $(VM_MASK | (4 << 8) | 3), %eax
276 - cmpl $((4 << 8) | 3), %eax
277 + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
278 + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
279 CFI_REMEMBER_STATE
280 je ldt_ss # returning to user-space with LDT SS
281 restore_nocheck:
282 @@ -467,12 +472,11 @@ restore_nocheck_notrace:
283 RESTORE_REGS
284 addl $4, %esp
285 CFI_ADJUST_CFA_OFFSET -4
286 -1: iret
287 +1: INTERRUPT_RETURN
288 .section .fixup,"ax"
289 iret_exc:
290 #ifndef CONFIG_XEN
291 - TRACE_IRQS_ON
292 - sti
293 + ENABLE_INTERRUPTS
294 #endif
295 pushl $0 # no error code
296 pushl $do_iret_error
297 @@ -498,7 +502,7 @@ ldt_ss:
298 * dosemu and wine happy. */
299 subl $8, %esp # reserve space for switch16 pointer
300 CFI_ADJUST_CFA_OFFSET 8
301 - cli
302 + DISABLE_INTERRUPTS
303 TRACE_IRQS_OFF
304 movl %esp, %eax
305 /* Set up the 16bit stack frame with switch32 pointer on top,
306 @@ -508,7 +512,7 @@ ldt_ss:
307 TRACE_IRQS_IRET
308 RESTORE_REGS
309 lss 20+4(%esp), %esp # switch to 16bit stack
310 -1: iret
311 +1: INTERRUPT_RETURN
312 .section __ex_table,"a"
313 .align 4
314 .long 1b,iret_exc
315 @@ -524,7 +528,7 @@ scrit: /**** START OF CRITICAL REGION **
316 RESTORE_REGS
317 addl $4, %esp
318 CFI_ADJUST_CFA_OFFSET -4
319 -1: iret
320 +1: INTERRUPT_RETURN
321 .section __ex_table,"a"
322 .align 4
323 .long 1b,iret_exc
324 @@ -713,11 +717,9 @@ ENTRY(name) \
325 #define UNWIND_ESPFIX_STACK
326 #endif
327
328 -ENTRY(divide_error)
329 - RING0_INT_FRAME
330 - pushl $0 # no error code
331 - CFI_ADJUST_CFA_OFFSET 4
332 - pushl $do_divide_error
333 +KPROBE_ENTRY(page_fault)
334 + RING0_EC_FRAME
335 + pushl $do_page_fault
336 CFI_ADJUST_CFA_OFFSET 4
337 ALIGN
338 error_code:
339 @@ -767,6 +769,7 @@ error_code:
340 call *%edi
341 jmp ret_from_exception
342 CFI_ENDPROC
343 +KPROBE_END(page_fault)
344
345 #ifdef CONFIG_XEN
346 # A note on the "critical region" in our callback handler.
347 @@ -926,7 +929,7 @@ ENTRY(device_not_available)
348 CFI_ADJUST_CFA_OFFSET 4
349 SAVE_ALL
350 #ifndef CONFIG_XEN
351 - movl %cr0, %eax
352 + GET_CR0_INTO_EAX
353 testl $0x4, %eax # EM (math emulation bit)
354 je device_available_emulate
355 pushl $0 # temporary storage for ORIG_EIP
356 @@ -961,9 +964,15 @@ device_available_emulate:
357 jne ok; \
358 label: \
359 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
360 + CFI_DEF_CFA esp, 0; \
361 + CFI_UNDEFINED eip; \
362 pushfl; \
363 + CFI_ADJUST_CFA_OFFSET 4; \
364 pushl $__KERNEL_CS; \
365 - pushl $sysenter_past_esp
366 + CFI_ADJUST_CFA_OFFSET 4; \
367 + pushl $sysenter_past_esp; \
368 + CFI_ADJUST_CFA_OFFSET 4; \
369 + CFI_REL_OFFSET eip, 0
370 #endif /* CONFIG_XEN */
371
372 KPROBE_ENTRY(debug)
373 @@ -982,7 +991,8 @@ debug_stack_correct:
374 call do_debug
375 jmp ret_from_exception
376 CFI_ENDPROC
377 - .previous .text
378 +KPROBE_END(debug)
379 +
380 #ifndef CONFIG_XEN
381 /*
382 * NMI is doubly nasty. It can happen _while_ we're handling
383 @@ -992,7 +1002,7 @@ debug_stack_correct:
384 * check whether we got an NMI on the debug path where the debug
385 * fault happened on the sysenter path.
386 */
387 -ENTRY(nmi)
388 +KPROBE_ENTRY(nmi)
389 RING0_INT_FRAME
390 pushl %eax
391 CFI_ADJUST_CFA_OFFSET 4
392 @@ -1017,6 +1027,7 @@ ENTRY(nmi)
393 cmpl $sysenter_entry,12(%esp)
394 je nmi_debug_stack_check
395 nmi_stack_correct:
396 + /* We have a RING0_INT_FRAME here */
397 pushl %eax
398 CFI_ADJUST_CFA_OFFSET 4
399 SAVE_ALL
400 @@ -1027,9 +1038,12 @@ nmi_stack_correct:
401 CFI_ENDPROC
402
403 nmi_stack_fixup:
404 + RING0_INT_FRAME
405 FIX_STACK(12,nmi_stack_correct, 1)
406 jmp nmi_stack_correct
407 +
408 nmi_debug_stack_check:
409 + /* We have a RING0_INT_FRAME here */
410 cmpw $__KERNEL_CS,16(%esp)
411 jne nmi_stack_correct
412 cmpl $debug,(%esp)
413 @@ -1040,8 +1054,10 @@ nmi_debug_stack_check:
414 jmp nmi_stack_correct
415
416 nmi_16bit_stack:
417 - RING0_INT_FRAME
418 - /* create the pointer to lss back */
419 + /* We have a RING0_INT_FRAME here.
420 + *
421 + * create the pointer to lss back
422 + */
423 pushl %ss
424 CFI_ADJUST_CFA_OFFSET 4
425 pushl %esp
426 @@ -1062,14 +1078,14 @@ nmi_16bit_stack:
427 call do_nmi
428 RESTORE_REGS
429 lss 12+4(%esp), %esp # back to 16bit stack
430 -1: iret
431 +1: INTERRUPT_RETURN
432 CFI_ENDPROC
433 .section __ex_table,"a"
434 .align 4
435 .long 1b,iret_exc
436 .previous
437 #else
438 -ENTRY(nmi)
439 +KPROBE_ENTRY(nmi)
440 RING0_INT_FRAME
441 pushl %eax
442 CFI_ADJUST_CFA_OFFSET 4
443 @@ -1081,6 +1097,7 @@ ENTRY(nmi)
444 jmp restore_all
445 CFI_ENDPROC
446 #endif
447 +KPROBE_END(nmi)
448
449 KPROBE_ENTRY(int3)
450 RING0_INT_FRAME
451 @@ -1092,7 +1109,7 @@ KPROBE_ENTRY(int3)
452 call do_int3
453 jmp ret_from_exception
454 CFI_ENDPROC
455 - .previous .text
456 +KPROBE_END(int3)
457
458 ENTRY(overflow)
459 RING0_INT_FRAME
460 @@ -1157,7 +1174,7 @@ KPROBE_ENTRY(general_protection)
461 CFI_ADJUST_CFA_OFFSET 4
462 jmp error_code
463 CFI_ENDPROC
464 - .previous .text
465 +KPROBE_END(general_protection)
466
467 ENTRY(alignment_check)
468 RING0_EC_FRAME
469 @@ -1166,13 +1183,14 @@ ENTRY(alignment_check)
470 jmp error_code
471 CFI_ENDPROC
472
473 -KPROBE_ENTRY(page_fault)
474 - RING0_EC_FRAME
475 - pushl $do_page_fault
476 +ENTRY(divide_error)
477 + RING0_INT_FRAME
478 + pushl $0 # no error code
479 + CFI_ADJUST_CFA_OFFSET 4
480 + pushl $do_divide_error
481 CFI_ADJUST_CFA_OFFSET 4
482 jmp error_code
483 CFI_ENDPROC
484 - .previous .text
485
486 #ifdef CONFIG_X86_MCE
487 ENTRY(machine_check)
488 @@ -1234,6 +1252,19 @@ ENTRY(fixup_4gb_segment)
489 jmp error_code
490 CFI_ENDPROC
491
492 +ENTRY(kernel_thread_helper)
493 + pushl $0 # fake return address for unwinder
494 + CFI_STARTPROC
495 + movl %edx,%eax
496 + push %edx
497 + CFI_ADJUST_CFA_OFFSET 4
498 + call *%ebx
499 + push %eax
500 + CFI_ADJUST_CFA_OFFSET 4
501 + call do_exit
502 + CFI_ENDPROC
503 +ENDPROC(kernel_thread_helper)
504 +
505 .section .rodata,"a"
506 #include "syscall_table.S"
507
508 Index: head-2008-12-01/arch/x86/kernel/head_32-xen.S
509 ===================================================================
510 --- head-2008-12-01.orig/arch/x86/kernel/head_32-xen.S 2008-12-03 15:49:14.000000000 +0100
511 +++ head-2008-12-01/arch/x86/kernel/head_32-xen.S 2008-12-01 11:29:05.000000000 +0100
512 @@ -62,7 +62,7 @@ ENTRY(startup_32)
513 movl %eax,%gs
514 cld # gcc2 wants the direction flag cleared at all times
515
516 - pushl %eax # fake return address
517 + pushl $0 # fake return address for unwinder
518 jmp start_kernel
519
520 #define HYPERCALL_PAGE_OFFSET 0x1000
521 Index: head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c
522 ===================================================================
523 --- head-2008-12-01.orig/arch/x86/kernel/io_apic_32-xen.c 2008-12-03 15:49:14.000000000 +0100
524 +++ head-2008-12-01/arch/x86/kernel/io_apic_32-xen.c 2008-12-01 11:29:05.000000000 +0100
525 @@ -31,6 +31,9 @@
526 #include <linux/acpi.h>
527 #include <linux/module.h>
528 #include <linux/sysdev.h>
529 +#include <linux/pci.h>
530 +#include <linux/msi.h>
531 +#include <linux/htirq.h>
532
533 #include <asm/io.h>
534 #include <asm/smp.h>
535 @@ -38,13 +41,15 @@
536 #include <asm/timer.h>
537 #include <asm/i8259.h>
538 #include <asm/nmi.h>
539 +#include <asm/msidef.h>
540 +#include <asm/hypertransport.h>
541
542 #include <mach_apic.h>
543 +#include <mach_apicdef.h>
544
545 #include "io_ports.h"
546
547 #ifdef CONFIG_XEN
548 -
549 #include <xen/interface/xen.h>
550 #include <xen/interface/physdev.h>
551 #include <xen/evtchn.h>
552 @@ -56,32 +61,7 @@
553
554 unsigned long io_apic_irqs;
555
556 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
557 -{
558 - struct physdev_apic apic_op;
559 - int ret;
560 -
561 - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
562 - apic_op.reg = reg;
563 - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
564 - if (ret)
565 - return ret;
566 - return apic_op.value;
567 -}
568 -
569 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
570 -{
571 - struct physdev_apic apic_op;
572 -
573 - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
574 - apic_op.reg = reg;
575 - apic_op.value = value;
576 - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
577 -}
578 -
579 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
580 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
581 -
582 +#define clear_IO_APIC() ((void)0)
583 #endif /* CONFIG_XEN */
584
585 int (*ioapic_renumber_irq)(int ioapic, int irq);
586 @@ -108,7 +88,7 @@ int sis_apic_bug = -1;
587 */
588 int nr_ioapic_registers[MAX_IO_APICS];
589
590 -int disable_timer_pin_1 __initdata;
591 +static int disable_timer_pin_1 __initdata;
592
593 /*
594 * Rough estimation of how many shared IRQs there are, can
595 @@ -128,12 +108,124 @@ static struct irq_pin_list {
596 int apic, pin, next;
597 } irq_2_pin[PIN_MAP_SIZE];
598
599 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
600 -#ifdef CONFIG_PCI_MSI
601 -#define vector_to_irq(vector) \
602 - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
603 +#ifndef CONFIG_XEN
604 +struct io_apic {
605 + unsigned int index;
606 + unsigned int unused[3];
607 + unsigned int data;
608 +};
609 +
610 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
611 +{
612 + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
613 + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
614 +}
615 +#endif
616 +
617 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
618 +{
619 +#ifndef CONFIG_XEN
620 + struct io_apic __iomem *io_apic = io_apic_base(apic);
621 + writel(reg, &io_apic->index);
622 + return readl(&io_apic->data);
623 +#else
624 + struct physdev_apic apic_op;
625 + int ret;
626 +
627 + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
628 + apic_op.reg = reg;
629 + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
630 + if (ret)
631 + return ret;
632 + return apic_op.value;
633 +#endif
634 +}
635 +
636 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
637 +{
638 +#ifndef CONFIG_XEN
639 + struct io_apic __iomem *io_apic = io_apic_base(apic);
640 + writel(reg, &io_apic->index);
641 + writel(value, &io_apic->data);
642 +#else
643 + struct physdev_apic apic_op;
644 +
645 + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
646 + apic_op.reg = reg;
647 + apic_op.value = value;
648 + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
649 +#endif
650 +}
651 +
652 +#ifndef CONFIG_XEN
653 +/*
654 + * Re-write a value: to be used for read-modify-write
655 + * cycles where the read already set up the index register.
656 + *
657 + * Older SiS APIC requires we rewrite the index register
658 + */
659 +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
660 +{
661 + volatile struct io_apic *io_apic = io_apic_base(apic);
662 + if (sis_apic_bug)
663 + writel(reg, &io_apic->index);
664 + writel(value, &io_apic->data);
665 +}
666 #else
667 -#define vector_to_irq(vector) (vector)
668 +#define io_apic_modify io_apic_write
669 +#endif
670 +
671 +union entry_union {
672 + struct { u32 w1, w2; };
673 + struct IO_APIC_route_entry entry;
674 +};
675 +
676 +#ifndef CONFIG_XEN
677 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
678 +{
679 + union entry_union eu;
680 + unsigned long flags;
681 + spin_lock_irqsave(&ioapic_lock, flags);
682 + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
683 + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
684 + spin_unlock_irqrestore(&ioapic_lock, flags);
685 + return eu.entry;
686 +}
687 +#endif
688 +
689 +/*
690 + * When we write a new IO APIC routing entry, we need to write the high
691 + * word first! If the mask bit in the low word is clear, we will enable
692 + * the interrupt, and we need to make sure the entry is fully populated
693 + * before that happens.
694 + */
695 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
696 +{
697 + unsigned long flags;
698 + union entry_union eu;
699 + eu.entry = e;
700 + spin_lock_irqsave(&ioapic_lock, flags);
701 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
702 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
703 + spin_unlock_irqrestore(&ioapic_lock, flags);
704 +}
705 +
706 +#ifndef CONFIG_XEN
707 +/*
708 + * When we mask an IO APIC routing entry, we need to write the low
709 + * word first, in order to set the mask bit before we change the
710 + * high bits!
711 + */
712 +static void ioapic_mask_entry(int apic, int pin)
713 +{
714 + unsigned long flags;
715 + union entry_union eu = { .entry.mask = 1 };
716 +
717 + spin_lock_irqsave(&ioapic_lock, flags);
718 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
719 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
720 + spin_unlock_irqrestore(&ioapic_lock, flags);
721 +}
722 #endif
723
724 /*
725 @@ -159,9 +251,7 @@ static void add_pin_to_irq(unsigned int
726 entry->pin = pin;
727 }
728
729 -#ifdef CONFIG_XEN
730 -#define clear_IO_APIC() ((void)0)
731 -#else
732 +#ifndef CONFIG_XEN
733 /*
734 * Reroute an IRQ to a different pin.
735 */
736 @@ -246,25 +336,16 @@ static void unmask_IO_APIC_irq (unsigned
737 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
738 {
739 struct IO_APIC_route_entry entry;
740 - unsigned long flags;
741
742 /* Check delivery_mode to be sure we're not clearing an SMI pin */
743 - spin_lock_irqsave(&ioapic_lock, flags);
744 - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
745 - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
746 - spin_unlock_irqrestore(&ioapic_lock, flags);
747 + entry = ioapic_read_entry(apic, pin);
748 if (entry.delivery_mode == dest_SMI)
749 return;
750
751 /*
752 * Disable it in the IO-APIC irq-routing table:
753 */
754 - memset(&entry, 0, sizeof(entry));
755 - entry.mask = 1;
756 - spin_lock_irqsave(&ioapic_lock, flags);
757 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
758 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
759 - spin_unlock_irqrestore(&ioapic_lock, flags);
760 + ioapic_mask_entry(apic, pin);
761 }
762
763 static void clear_IO_APIC (void)
764 @@ -304,7 +385,7 @@ static void set_ioapic_affinity_irq(unsi
765 break;
766 entry = irq_2_pin + entry->next;
767 }
768 - set_irq_info(irq, cpumask);
769 + set_native_irq_info(irq, cpumask);
770 spin_unlock_irqrestore(&ioapic_lock, flags);
771 }
772
773 @@ -1212,43 +1293,43 @@ static inline int IO_APIC_irq_trigger(in
774 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
775 u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
776
777 -int assign_irq_vector(int irq)
778 +static int __assign_irq_vector(int irq)
779 {
780 - unsigned long flags;
781 int vector;
782 struct physdev_irq irq_op;
783
784 - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
785 + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
786
787 if (irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS)
788 return -EINVAL;
789
790 - spin_lock_irqsave(&vector_lock, flags);
791 -
792 - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
793 - spin_unlock_irqrestore(&vector_lock, flags);
794 - return IO_APIC_VECTOR(irq);
795 - }
796 + if (irq_vector[irq] > 0)
797 + return irq_vector[irq];
798
799 irq_op.irq = irq;
800 - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
801 - spin_unlock_irqrestore(&vector_lock, flags);
802 + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
803 return -ENOSPC;
804 - }
805
806 vector = irq_op.vector;
807 - vector_irq[vector] = irq;
808 - if (irq != AUTO_ASSIGN)
809 - IO_APIC_VECTOR(irq) = vector;
810 + irq_vector[irq] = vector;
811 +
812 + return vector;
813 +}
814 +
815 +static int assign_irq_vector(int irq)
816 +{
817 + unsigned long flags;
818 + int vector;
819
820 + spin_lock_irqsave(&vector_lock, flags);
821 + vector = __assign_irq_vector(irq);
822 spin_unlock_irqrestore(&vector_lock, flags);
823
824 return vector;
825 }
826
827 #ifndef CONFIG_XEN
828 -static struct hw_interrupt_type ioapic_level_type;
829 -static struct hw_interrupt_type ioapic_edge_type;
830 +static struct irq_chip ioapic_chip;
831
832 #define IOAPIC_AUTO -1
833 #define IOAPIC_EDGE 0
834 @@ -1256,16 +1337,16 @@ static struct hw_interrupt_type ioapic_e
835
836 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
837 {
838 - unsigned idx;
839 -
840 - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
841 -
842 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
843 trigger == IOAPIC_LEVEL)
844 - irq_desc[idx].chip = &ioapic_level_type;
845 - else
846 - irq_desc[idx].chip = &ioapic_edge_type;
847 - set_intr_gate(vector, interrupt[idx]);
848 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
849 + handle_fasteoi_irq, "fasteoi");
850 + else {
851 + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
852 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
853 + handle_edge_irq, "edge");
854 + }
855 + set_intr_gate(vector, interrupt[irq]);
856 }
857 #else
858 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
859 @@ -1336,9 +1417,8 @@ static void __init setup_IO_APIC_irqs(vo
860 if (!apic && (irq < 16))
861 disable_8259A_irq(irq);
862 }
863 + ioapic_write_entry(apic, pin, entry);
864 spin_lock_irqsave(&ioapic_lock, flags);
865 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
866 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
867 set_native_irq_info(irq, TARGET_CPUS);
868 spin_unlock_irqrestore(&ioapic_lock, flags);
869 }
870 @@ -1355,7 +1435,6 @@ static void __init setup_IO_APIC_irqs(vo
871 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
872 {
873 struct IO_APIC_route_entry entry;
874 - unsigned long flags;
875
876 memset(&entry,0,sizeof(entry));
877
878 @@ -1380,15 +1459,13 @@ static void __init setup_ExtINT_IRQ0_pin
879 * The timer IRQ doesn't have to know that behind the
880 * scene we have a 8259A-master in AEOI mode ...
881 */
882 - irq_desc[0].chip = &ioapic_edge_type;
883 + irq_desc[0].chip = &ioapic_chip;
884 + set_irq_handler(0, handle_edge_irq);
885
886 /*
887 * Add it to the IO-APIC irq-routing table:
888 */
889 - spin_lock_irqsave(&ioapic_lock, flags);
890 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
891 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
892 - spin_unlock_irqrestore(&ioapic_lock, flags);
893 + ioapic_write_entry(apic, pin, entry);
894
895 enable_8259A_irq(0);
896 }
897 @@ -1498,10 +1575,7 @@ void __init print_IO_APIC(void)
898 for (i = 0; i <= reg_01.bits.entries; i++) {
899 struct IO_APIC_route_entry entry;
900
901 - spin_lock_irqsave(&ioapic_lock, flags);
902 - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
903 - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
904 - spin_unlock_irqrestore(&ioapic_lock, flags);
905 + entry = ioapic_read_entry(apic, i);
906
907 printk(KERN_DEBUG " %02x %03X %02X ",
908 i,
909 @@ -1521,17 +1595,12 @@ void __init print_IO_APIC(void)
910 );
911 }
912 }
913 - if (use_pci_vector())
914 - printk(KERN_INFO "Using vector-based indexing\n");
915 printk(KERN_DEBUG "IRQ to pin mappings:\n");
916 for (i = 0; i < NR_IRQS; i++) {
917 struct irq_pin_list *entry = irq_2_pin + i;
918 if (entry->pin < 0)
919 continue;
920 - if (use_pci_vector() && !platform_legacy_irq(i))
921 - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
922 - else
923 - printk(KERN_DEBUG "IRQ%d ", i);
924 + printk(KERN_DEBUG "IRQ%d ", i);
925 for (;;) {
926 printk("-> %d:%d", entry->apic, entry->pin);
927 if (!entry->next)
928 @@ -1720,10 +1789,7 @@ static void __init enable_IO_APIC(void)
929 /* See if any of the pins is in ExtINT mode */
930 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
931 struct IO_APIC_route_entry entry;
932 - spin_lock_irqsave(&ioapic_lock, flags);
933 - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
934 - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
935 - spin_unlock_irqrestore(&ioapic_lock, flags);
936 + entry = ioapic_read_entry(apic, pin);
937
938
939 /* If the interrupt line is enabled and in ExtInt mode
940 @@ -1782,7 +1848,6 @@ void disable_IO_APIC(void)
941 */
942 if (ioapic_i8259.pin != -1) {
943 struct IO_APIC_route_entry entry;
944 - unsigned long flags;
945
946 memset(&entry, 0, sizeof(entry));
947 entry.mask = 0; /* Enabled */
948 @@ -1799,12 +1864,7 @@ void disable_IO_APIC(void)
949 /*
950 * Add it to the IO-APIC irq-routing table:
951 */
952 - spin_lock_irqsave(&ioapic_lock, flags);
953 - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
954 - *(((int *)&entry)+1));
955 - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
956 - *(((int *)&entry)+0));
957 - spin_unlock_irqrestore(&ioapic_lock, flags);
958 + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
959 }
960 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
961 #endif
962 @@ -1971,6 +2031,8 @@ static int __init timer_irq_works(void)
963 */
964
965 /*
966 + * Startup quirk:
967 + *
968 * Starting up a edge-triggered IO-APIC interrupt is
969 * nasty - we need to make sure that we get the edge.
970 * If it is already asserted for some reason, we need
971 @@ -1978,8 +2040,10 @@ static int __init timer_irq_works(void)
972 *
973 * This is not complete - we should be able to fake
974 * an edge even if it isn't on the 8259A...
975 + *
976 + * (We do this for level-triggered IRQs too - it cannot hurt.)
977 */
978 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
979 +static unsigned int startup_ioapic_irq(unsigned int irq)
980 {
981 int was_pending = 0;
982 unsigned long flags;
983 @@ -1996,47 +2060,18 @@ static unsigned int startup_edge_ioapic_
984 return was_pending;
985 }
986
987 -/*
988 - * Once we have recorded IRQ_PENDING already, we can mask the
989 - * interrupt for real. This prevents IRQ storms from unhandled
990 - * devices.
991 - */
992 -static void ack_edge_ioapic_irq(unsigned int irq)
993 -{
994 - move_irq(irq);
995 - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
996 - == (IRQ_PENDING | IRQ_DISABLED))
997 - mask_IO_APIC_irq(irq);
998 - ack_APIC_irq();
999 -}
1000 -
1001 -/*
1002 - * Level triggered interrupts can just be masked,
1003 - * and shutting down and starting up the interrupt
1004 - * is the same as enabling and disabling them -- except
1005 - * with a startup need to return a "was pending" value.
1006 - *
1007 - * Level triggered interrupts are special because we
1008 - * do not touch any IO-APIC register while handling
1009 - * them. We ack the APIC in the end-IRQ handler, not
1010 - * in the start-IRQ-handler. Protection against reentrance
1011 - * from the same interrupt is still provided, both by the
1012 - * generic IRQ layer and by the fact that an unacked local
1013 - * APIC does not accept IRQs.
1014 - */
1015 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
1016 +static void ack_ioapic_irq(unsigned int irq)
1017 {
1018 - unmask_IO_APIC_irq(irq);
1019 -
1020 - return 0; /* don't check for pending */
1021 + move_native_irq(irq);
1022 + ack_APIC_irq();
1023 }
1024
1025 -static void end_level_ioapic_irq (unsigned int irq)
1026 +static void ack_ioapic_quirk_irq(unsigned int irq)
1027 {
1028 unsigned long v;
1029 int i;
1030
1031 - move_irq(irq);
1032 + move_native_irq(irq);
1033 /*
1034 * It appears there is an erratum which affects at least version 0x11
1035 * of I/O APIC (that's the 82093AA and cores integrated into various
1036 @@ -2056,7 +2091,7 @@ static void end_level_ioapic_irq (unsign
1037 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1038 * The idea is from Manfred Spraul. --macro
1039 */
1040 - i = IO_APIC_VECTOR(irq);
1041 + i = irq_vector[irq];
1042
1043 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1044
1045 @@ -2071,104 +2106,24 @@ static void end_level_ioapic_irq (unsign
1046 }
1047 }
1048
1049 -#ifdef CONFIG_PCI_MSI
1050 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
1051 -{
1052 - int irq = vector_to_irq(vector);
1053 -
1054 - return startup_edge_ioapic_irq(irq);
1055 -}
1056 -
1057 -static void ack_edge_ioapic_vector(unsigned int vector)
1058 -{
1059 - int irq = vector_to_irq(vector);
1060 -
1061 - move_native_irq(vector);
1062 - ack_edge_ioapic_irq(irq);
1063 -}
1064 -
1065 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
1066 -{
1067 - int irq = vector_to_irq(vector);
1068 -
1069 - return startup_level_ioapic_irq (irq);
1070 -}
1071 -
1072 -static void end_level_ioapic_vector (unsigned int vector)
1073 -{
1074 - int irq = vector_to_irq(vector);
1075 -
1076 - move_native_irq(vector);
1077 - end_level_ioapic_irq(irq);
1078 -}
1079 -
1080 -static void mask_IO_APIC_vector (unsigned int vector)
1081 -{
1082 - int irq = vector_to_irq(vector);
1083 -
1084 - mask_IO_APIC_irq(irq);
1085 -}
1086 -
1087 -static void unmask_IO_APIC_vector (unsigned int vector)
1088 +static int ioapic_retrigger_irq(unsigned int irq)
1089 {
1090 - int irq = vector_to_irq(vector);
1091 -
1092 - unmask_IO_APIC_irq(irq);
1093 -}
1094 -
1095 -#ifdef CONFIG_SMP
1096 -static void set_ioapic_affinity_vector (unsigned int vector,
1097 - cpumask_t cpu_mask)
1098 -{
1099 - int irq = vector_to_irq(vector);
1100 -
1101 - set_native_irq_info(vector, cpu_mask);
1102 - set_ioapic_affinity_irq(irq, cpu_mask);
1103 -}
1104 -#endif
1105 -#endif
1106 -
1107 -static int ioapic_retrigger(unsigned int irq)
1108 -{
1109 - send_IPI_self(IO_APIC_VECTOR(irq));
1110 + send_IPI_self(irq_vector[irq]);
1111
1112 return 1;
1113 }
1114
1115 -/*
1116 - * Level and edge triggered IO-APIC interrupts need different handling,
1117 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1118 - * handled with the level-triggered descriptor, but that one has slightly
1119 - * more overhead. Level-triggered interrupts cannot be handled with the
1120 - * edge-triggered handler, without risking IRQ storms and other ugly
1121 - * races.
1122 - */
1123 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
1124 - .typename = "IO-APIC-edge",
1125 - .startup = startup_edge_ioapic,
1126 - .shutdown = shutdown_edge_ioapic,
1127 - .enable = enable_edge_ioapic,
1128 - .disable = disable_edge_ioapic,
1129 - .ack = ack_edge_ioapic,
1130 - .end = end_edge_ioapic,
1131 -#ifdef CONFIG_SMP
1132 - .set_affinity = set_ioapic_affinity,
1133 -#endif
1134 - .retrigger = ioapic_retrigger,
1135 -};
1136 -
1137 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
1138 - .typename = "IO-APIC-level",
1139 - .startup = startup_level_ioapic,
1140 - .shutdown = shutdown_level_ioapic,
1141 - .enable = enable_level_ioapic,
1142 - .disable = disable_level_ioapic,
1143 - .ack = mask_and_ack_level_ioapic,
1144 - .end = end_level_ioapic,
1145 +static struct irq_chip ioapic_chip __read_mostly = {
1146 + .name = "IO-APIC",
1147 + .startup = startup_ioapic_irq,
1148 + .mask = mask_IO_APIC_irq,
1149 + .unmask = unmask_IO_APIC_irq,
1150 + .ack = ack_ioapic_irq,
1151 + .eoi = ack_ioapic_quirk_irq,
1152 #ifdef CONFIG_SMP
1153 - .set_affinity = set_ioapic_affinity,
1154 + .set_affinity = set_ioapic_affinity_irq,
1155 #endif
1156 - .retrigger = ioapic_retrigger,
1157 + .retrigger = ioapic_retrigger_irq,
1158 };
1159 #endif /* !CONFIG_XEN */
1160
1161 @@ -2189,12 +2144,7 @@ static inline void init_IO_APIC_traps(vo
1162 */
1163 for (irq = 0; irq < NR_IRQS ; irq++) {
1164 int tmp = irq;
1165 - if (use_pci_vector()) {
1166 - if (!platform_legacy_irq(tmp))
1167 - if ((tmp = vector_to_irq(tmp)) == -1)
1168 - continue;
1169 - }
1170 - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
1171 + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1172 /*
1173 * Hmm.. We don't have an entry for this,
1174 * so default to an old-fashioned 8259
1175 @@ -2205,22 +2155,23 @@ static inline void init_IO_APIC_traps(vo
1176 #ifndef CONFIG_XEN
1177 else
1178 /* Strange. Oh, well.. */
1179 - irq_desc[irq].chip = &no_irq_type;
1180 + irq_desc[irq].chip = &no_irq_chip;
1181 #endif
1182 }
1183 }
1184 }
1185
1186 #ifndef CONFIG_XEN
1187 -static void enable_lapic_irq (unsigned int irq)
1188 -{
1189 - unsigned long v;
1190 +/*
1191 + * The local APIC irq-chip implementation:
1192 + */
1193
1194 - v = apic_read(APIC_LVT0);
1195 - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1196 +static void ack_apic(unsigned int irq)
1197 +{
1198 + ack_APIC_irq();
1199 }
1200
1201 -static void disable_lapic_irq (unsigned int irq)
1202 +static void mask_lapic_irq (unsigned int irq)
1203 {
1204 unsigned long v;
1205
1206 @@ -2228,21 +2179,19 @@ static void disable_lapic_irq (unsigned
1207 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
1208 }
1209
1210 -static void ack_lapic_irq (unsigned int irq)
1211 +static void unmask_lapic_irq (unsigned int irq)
1212 {
1213 - ack_APIC_irq();
1214 -}
1215 + unsigned long v;
1216
1217 -static void end_lapic_irq (unsigned int i) { /* nothing */ }
1218 + v = apic_read(APIC_LVT0);
1219 + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
1220 +}
1221
1222 -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
1223 - .typename = "local-APIC-edge",
1224 - .startup = NULL, /* startup_irq() not used for IRQ0 */
1225 - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1226 - .enable = enable_lapic_irq,
1227 - .disable = disable_lapic_irq,
1228 - .ack = ack_lapic_irq,
1229 - .end = end_lapic_irq
1230 +static struct irq_chip lapic_chip __read_mostly = {
1231 + .name = "local-APIC-edge",
1232 + .mask = mask_lapic_irq,
1233 + .unmask = unmask_lapic_irq,
1234 + .eoi = ack_apic,
1235 };
1236
1237 static void setup_nmi (void)
1238 @@ -2275,17 +2224,13 @@ static inline void unlock_ExtINT_logic(v
1239 int apic, pin, i;
1240 struct IO_APIC_route_entry entry0, entry1;
1241 unsigned char save_control, save_freq_select;
1242 - unsigned long flags;
1243
1244 pin = find_isa_irq_pin(8, mp_INT);
1245 apic = find_isa_irq_apic(8, mp_INT);
1246 if (pin == -1)
1247 return;
1248
1249 - spin_lock_irqsave(&ioapic_lock, flags);
1250 - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1251 - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1252 - spin_unlock_irqrestore(&ioapic_lock, flags);
1253 + entry0 = ioapic_read_entry(apic, pin);
1254 clear_IO_APIC_pin(apic, pin);
1255
1256 memset(&entry1, 0, sizeof(entry1));
1257 @@ -2298,10 +2243,7 @@ static inline void unlock_ExtINT_logic(v
1258 entry1.trigger = 0;
1259 entry1.vector = 0;
1260
1261 - spin_lock_irqsave(&ioapic_lock, flags);
1262 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
1263 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
1264 - spin_unlock_irqrestore(&ioapic_lock, flags);
1265 + ioapic_write_entry(apic, pin, entry1);
1266
1267 save_control = CMOS_READ(RTC_CONTROL);
1268 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
1269 @@ -2320,10 +2262,7 @@ static inline void unlock_ExtINT_logic(v
1270 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
1271 clear_IO_APIC_pin(apic, pin);
1272
1273 - spin_lock_irqsave(&ioapic_lock, flags);
1274 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
1275 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
1276 - spin_unlock_irqrestore(&ioapic_lock, flags);
1277 + ioapic_write_entry(apic, pin, entry0);
1278 }
1279
1280 int timer_uses_ioapic_pin_0;
1281 @@ -2423,7 +2362,8 @@ static inline void check_timer(void)
1282 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1283
1284 disable_8259A_irq(0);
1285 - irq_desc[0].chip = &lapic_irq_type;
1286 + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
1287 + "fasteio");
1288 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1289 enable_8259A_irq(0);
1290
1291 @@ -2537,17 +2477,12 @@ static int ioapic_suspend(struct sys_dev
1292 {
1293 struct IO_APIC_route_entry *entry;
1294 struct sysfs_ioapic_data *data;
1295 - unsigned long flags;
1296 int i;
1297
1298 data = container_of(dev, struct sysfs_ioapic_data, dev);
1299 entry = data->entry;
1300 - spin_lock_irqsave(&ioapic_lock, flags);
1301 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1302 - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
1303 - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
1304 - }
1305 - spin_unlock_irqrestore(&ioapic_lock, flags);
1306 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1307 + entry[i] = ioapic_read_entry(dev->id, i);
1308
1309 return 0;
1310 }
1311 @@ -2569,11 +2504,9 @@ static int ioapic_resume(struct sys_devi
1312 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
1313 io_apic_write(dev->id, 0, reg_00.raw);
1314 }
1315 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
1316 - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
1317 - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
1318 - }
1319 spin_unlock_irqrestore(&ioapic_lock, flags);
1320 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
1321 + ioapic_write_entry(dev->id, i, entry[i]);
1322
1323 return 0;
1324 }
1325 @@ -2619,8 +2552,240 @@ static int __init ioapic_init_sysfs(void
1326
1327 device_initcall(ioapic_init_sysfs);
1328
1329 +/*
1330 + * Dynamic irq allocate and deallocation
1331 + */
1332 +int create_irq(void)
1333 +{
1334 + /* Allocate an unused irq */
1335 + int irq, new, vector;
1336 + unsigned long flags;
1337 +
1338 + irq = -ENOSPC;
1339 + spin_lock_irqsave(&vector_lock, flags);
1340 + for (new = (NR_IRQS - 1); new >= 0; new--) {
1341 + if (platform_legacy_irq(new))
1342 + continue;
1343 + if (irq_vector[new] != 0)
1344 + continue;
1345 + vector = __assign_irq_vector(new);
1346 + if (likely(vector > 0))
1347 + irq = new;
1348 + break;
1349 + }
1350 + spin_unlock_irqrestore(&vector_lock, flags);
1351 +
1352 + if (irq >= 0) {
1353 + set_intr_gate(vector, interrupt[irq]);
1354 + dynamic_irq_init(irq);
1355 + }
1356 + return irq;
1357 +}
1358 +
1359 +void destroy_irq(unsigned int irq)
1360 +{
1361 + unsigned long flags;
1362 +
1363 + dynamic_irq_cleanup(irq);
1364 +
1365 + spin_lock_irqsave(&vector_lock, flags);
1366 + irq_vector[irq] = 0;
1367 + spin_unlock_irqrestore(&vector_lock, flags);
1368 +}
1369 +
1370 #endif /* CONFIG_XEN */
1371
1372 +/*
1373 + * MSI mesage composition
1374 + */
1375 +#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
1376 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1377 +{
1378 + int vector;
1379 + unsigned dest;
1380 +
1381 + vector = assign_irq_vector(irq);
1382 + if (vector >= 0) {
1383 + dest = cpu_mask_to_apicid(TARGET_CPUS);
1384 +
1385 + msg->address_hi = MSI_ADDR_BASE_HI;
1386 + msg->address_lo =
1387 + MSI_ADDR_BASE_LO |
1388 + ((INT_DEST_MODE == 0) ?
1389 + MSI_ADDR_DEST_MODE_PHYSICAL:
1390 + MSI_ADDR_DEST_MODE_LOGICAL) |
1391 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1392 + MSI_ADDR_REDIRECTION_CPU:
1393 + MSI_ADDR_REDIRECTION_LOWPRI) |
1394 + MSI_ADDR_DEST_ID(dest);
1395 +
1396 + msg->data =
1397 + MSI_DATA_TRIGGER_EDGE |
1398 + MSI_DATA_LEVEL_ASSERT |
1399 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1400 + MSI_DATA_DELIVERY_FIXED:
1401 + MSI_DATA_DELIVERY_LOWPRI) |
1402 + MSI_DATA_VECTOR(vector);
1403 + }
1404 + return vector;
1405 +}
1406 +
1407 +#ifdef CONFIG_SMP
1408 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1409 +{
1410 + struct msi_msg msg;
1411 + unsigned int dest;
1412 + cpumask_t tmp;
1413 + int vector;
1414 +
1415 + cpus_and(tmp, mask, cpu_online_map);
1416 + if (cpus_empty(tmp))
1417 + tmp = TARGET_CPUS;
1418 +
1419 + vector = assign_irq_vector(irq);
1420 + if (vector < 0)
1421 + return;
1422 +
1423 + dest = cpu_mask_to_apicid(mask);
1424 +
1425 + read_msi_msg(irq, &msg);
1426 +
1427 + msg.data &= ~MSI_DATA_VECTOR_MASK;
1428 + msg.data |= MSI_DATA_VECTOR(vector);
1429 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1430 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1431 +
1432 + write_msi_msg(irq, &msg);
1433 + set_native_irq_info(irq, mask);
1434 +}
1435 +#endif /* CONFIG_SMP */
1436 +
1437 +/*
1438 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
1439 + * which implement the MSI or MSI-X Capability Structure.
1440 + */
1441 +static struct irq_chip msi_chip = {
1442 + .name = "PCI-MSI",
1443 + .unmask = unmask_msi_irq,
1444 + .mask = mask_msi_irq,
1445 + .ack = ack_ioapic_irq,
1446 +#ifdef CONFIG_SMP
1447 + .set_affinity = set_msi_irq_affinity,
1448 +#endif
1449 + .retrigger = ioapic_retrigger_irq,
1450 +};
1451 +
1452 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1453 +{
1454 + struct msi_msg msg;
1455 + int ret;
1456 + ret = msi_compose_msg(dev, irq, &msg);
1457 + if (ret < 0)
1458 + return ret;
1459 +
1460 + write_msi_msg(irq, &msg);
1461 +
1462 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
1463 + "edge");
1464 +
1465 + return 0;
1466 +}
1467 +
1468 +void arch_teardown_msi_irq(unsigned int irq)
1469 +{
1470 + return;
1471 +}
1472 +
1473 +#endif /* CONFIG_PCI_MSI */
1474 +
1475 +/*
1476 + * Hypertransport interrupt support
1477 + */
1478 +#ifdef CONFIG_HT_IRQ
1479 +
1480 +#ifdef CONFIG_SMP
1481 +
1482 +static void target_ht_irq(unsigned int irq, unsigned int dest)
1483 +{
1484 + struct ht_irq_msg msg;
1485 + fetch_ht_irq_msg(irq, &msg);
1486 +
1487 + msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
1488 + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
1489 +
1490 + msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
1491 + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
1492 +
1493 + write_ht_irq_msg(irq, &msg);
1494 +}
1495 +
1496 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1497 +{
1498 + unsigned int dest;
1499 + cpumask_t tmp;
1500 +
1501 + cpus_and(tmp, mask, cpu_online_map);
1502 + if (cpus_empty(tmp))
1503 + tmp = TARGET_CPUS;
1504 +
1505 + cpus_and(mask, tmp, CPU_MASK_ALL);
1506 +
1507 + dest = cpu_mask_to_apicid(mask);
1508 +
1509 + target_ht_irq(irq, dest);
1510 + set_native_irq_info(irq, mask);
1511 +}
1512 +#endif
1513 +
1514 +static struct irq_chip ht_irq_chip = {
1515 + .name = "PCI-HT",
1516 + .mask = mask_ht_irq,
1517 + .unmask = unmask_ht_irq,
1518 + .ack = ack_ioapic_irq,
1519 +#ifdef CONFIG_SMP
1520 + .set_affinity = set_ht_irq_affinity,
1521 +#endif
1522 + .retrigger = ioapic_retrigger_irq,
1523 +};
1524 +
1525 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1526 +{
1527 + int vector;
1528 +
1529 + vector = assign_irq_vector(irq);
1530 + if (vector >= 0) {
1531 + struct ht_irq_msg msg;
1532 + unsigned dest;
1533 + cpumask_t tmp;
1534 +
1535 + cpus_clear(tmp);
1536 + cpu_set(vector >> 8, tmp);
1537 + dest = cpu_mask_to_apicid(tmp);
1538 +
1539 + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1540 +
1541 + msg.address_lo =
1542 + HT_IRQ_LOW_BASE |
1543 + HT_IRQ_LOW_DEST_ID(dest) |
1544 + HT_IRQ_LOW_VECTOR(vector) |
1545 + ((INT_DEST_MODE == 0) ?
1546 + HT_IRQ_LOW_DM_PHYSICAL :
1547 + HT_IRQ_LOW_DM_LOGICAL) |
1548 + HT_IRQ_LOW_RQEOI_EDGE |
1549 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1550 + HT_IRQ_LOW_MT_FIXED :
1551 + HT_IRQ_LOW_MT_ARBITRATED) |
1552 + HT_IRQ_LOW_IRQ_MASKED;
1553 +
1554 + write_ht_irq_msg(irq, &msg);
1555 +
1556 + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1557 + handle_edge_irq, "edge");
1558 + }
1559 + return vector;
1560 +}
1561 +#endif /* CONFIG_HT_IRQ */
1562 +
1563 /* --------------------------------------------------------------------------
1564 ACPI-based IOAPIC Configuration
1565 -------------------------------------------------------------------------- */
1566 @@ -2774,13 +2939,34 @@ int io_apic_set_pci_routing (int ioapic,
1567 if (!ioapic && (irq < 16))
1568 disable_8259A_irq(irq);
1569
1570 + ioapic_write_entry(ioapic, pin, entry);
1571 spin_lock_irqsave(&ioapic_lock, flags);
1572 - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
1573 - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
1574 - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
1575 + set_native_irq_info(irq, TARGET_CPUS);
1576 spin_unlock_irqrestore(&ioapic_lock, flags);
1577
1578 return 0;
1579 }
1580
1581 #endif /* CONFIG_ACPI */
1582 +
1583 +static int __init parse_disable_timer_pin_1(char *arg)
1584 +{
1585 + disable_timer_pin_1 = 1;
1586 + return 0;
1587 +}
1588 +early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
1589 +
1590 +static int __init parse_enable_timer_pin_1(char *arg)
1591 +{
1592 + disable_timer_pin_1 = -1;
1593 + return 0;
1594 +}
1595 +early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
1596 +
1597 +static int __init parse_noapic(char *arg)
1598 +{
1599 + /* disable IO-APIC */
1600 + disable_ioapic_setup();
1601 + return 0;
1602 +}
1603 +early_param("noapic", parse_noapic);
1604 Index: head-2008-12-01/arch/x86/kernel/irq_32-xen.c
1605 ===================================================================
1606 --- head-2008-12-01.orig/arch/x86/kernel/irq_32-xen.c 2008-12-03 15:49:14.000000000 +0100
1607 +++ head-2008-12-01/arch/x86/kernel/irq_32-xen.c 2008-12-01 11:29:05.000000000 +0100
1608 @@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU
1609 */
1610 fastcall unsigned int do_IRQ(struct pt_regs *regs)
1611 {
1612 + struct pt_regs *old_regs;
1613 /* high bit used in ret_from_ code */
1614 int irq = ~regs->orig_eax;
1615 + struct irq_desc *desc = irq_desc + irq;
1616 #ifdef CONFIG_4KSTACKS
1617 union irq_ctx *curctx, *irqctx;
1618 u32 *isp;
1619 @@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r
1620 BUG();
1621 }
1622
1623 + old_regs = set_irq_regs(regs);
1624 /*irq_enter();*/
1625 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1626 /* Debugging check for stack overflow: is there less than 1KB free? */
1627 @@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r
1628 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
1629
1630 asm volatile(
1631 - " xchgl %%ebx,%%esp \n"
1632 - " call __do_IRQ \n"
1633 + " xchgl %%ebx,%%esp \n"
1634 + " call *%%edi \n"
1635 " movl %%ebx,%%esp \n"
1636 : "=a" (arg1), "=d" (arg2), "=b" (ebx)
1637 - : "0" (irq), "1" (regs), "2" (isp)
1638 - : "memory", "cc", "ecx"
1639 + : "0" (irq), "1" (desc), "2" (isp),
1640 + "D" (desc->handle_irq)
1641 + : "memory", "cc"
1642 );
1643 } else
1644 #endif
1645 - __do_IRQ(irq, regs);
1646 + desc->handle_irq(irq, desc);
1647
1648 /*irq_exit();*/
1649 -
1650 + set_irq_regs(old_regs);
1651 return 1;
1652 }
1653
1654 @@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p,
1655 for_each_online_cpu(j)
1656 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
1657 #endif
1658 - seq_printf(p, " %14s", irq_desc[i].chip->typename);
1659 + seq_printf(p, " %8s", irq_desc[i].chip->name);
1660 + seq_printf(p, "-%-8s", irq_desc[i].name);
1661 seq_printf(p, " %s", action->name);
1662
1663 for (action=action->next; action; action = action->next)
1664 Index: head-2008-12-01/arch/x86/kernel/ldt_32-xen.c
1665 ===================================================================
1666 --- head-2008-12-01.orig/arch/x86/kernel/ldt_32-xen.c 2008-12-03 15:49:14.000000000 +0100
1667 +++ head-2008-12-01/arch/x86/kernel/ldt_32-xen.c 2008-12-01 11:29:05.000000000 +0100
1668 @@ -1,5 +1,5 @@
1669 /*
1670 - * linux/kernel/ldt.c
1671 + * linux/arch/i386/kernel/ldt.c
1672 *
1673 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
1674 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1675 Index: head-2008-12-01/arch/x86/kernel/microcode-xen.c
1676 ===================================================================
1677 --- head-2008-12-01.orig/arch/x86/kernel/microcode-xen.c 2008-12-03 15:49:14.000000000 +0100
1678 +++ head-2008-12-01/arch/x86/kernel/microcode-xen.c 2008-12-01 11:29:05.000000000 +0100
1679 @@ -2,6 +2,7 @@
1680 * Intel CPU Microcode Update Driver for Linux
1681 *
1682 * Copyright (C) 2000-2004 Tigran Aivazian
1683 + * 2006 Shaohua Li <shaohua.li@intel.com>
1684 *
1685 * This driver allows to upgrade microcode on Intel processors
1686 * belonging to IA-32 family - PentiumPro, Pentium II,
1687 @@ -33,7 +34,9 @@
1688 #include <linux/spinlock.h>
1689 #include <linux/mm.h>
1690 #include <linux/mutex.h>
1691 -#include <linux/syscalls.h>
1692 +#include <linux/cpu.h>
1693 +#include <linux/firmware.h>
1694 +#include <linux/platform_device.h>
1695
1696 #include <asm/msr.h>
1697 #include <asm/uaccess.h>
1698 @@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
1699 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
1700 static DEFINE_MUTEX(microcode_mutex);
1701
1702 -static int microcode_open (struct inode *unused1, struct file *unused2)
1703 -{
1704 - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1705 -}
1706 -
1707 -
1708 +#ifdef CONFIG_MICROCODE_OLD_INTERFACE
1709 static int do_microcode_update (const void __user *ubuf, size_t len)
1710 {
1711 int err;
1712 @@ -85,6 +83,11 @@ static int do_microcode_update (const vo
1713 return err;
1714 }
1715
1716 +static int microcode_open (struct inode *unused1, struct file *unused2)
1717 +{
1718 + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
1719 +}
1720 +
1721 static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
1722 {
1723 ssize_t ret;
1724 @@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
1725 .fops = &microcode_fops,
1726 };
1727
1728 -static int __init microcode_init (void)
1729 +static int __init microcode_dev_init (void)
1730 {
1731 int error;
1732
1733 @@ -129,6 +132,68 @@ static int __init microcode_init (void)
1734 return error;
1735 }
1736
1737 + return 0;
1738 +}
1739 +
1740 +static void __exit microcode_dev_exit (void)
1741 +{
1742 + misc_deregister(&microcode_dev);
1743 +}
1744 +
1745 +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1746 +#else
1747 +#define microcode_dev_init() 0
1748 +#define microcode_dev_exit() do { } while(0)
1749 +#endif
1750 +
1751 +/* fake device for request_firmware */
1752 +static struct platform_device *microcode_pdev;
1753 +
1754 +static int request_microcode(void)
1755 +{
1756 + char name[30];
1757 + const struct cpuinfo_x86 *c = &boot_cpu_data;
1758 + const struct firmware *firmware;
1759 + int error;
1760 + struct xen_platform_op op;
1761 +
1762 + sprintf(name,"intel-ucode/%02x-%02x-%02x",
1763 + c->x86, c->x86_model, c->x86_mask);
1764 + error = request_firmware(&firmware, name, &microcode_pdev->dev);
1765 + if (error) {
1766 + pr_debug("ucode data file %s load failed\n", name);
1767 + return error;
1768 + }
1769 +
1770 + op.cmd = XENPF_microcode_update;
1771 + set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
1772 + op.u.microcode.length = firmware->size;
1773 + error = HYPERVISOR_platform_op(&op);
1774 +
1775 + release_firmware(firmware);
1776 +
1777 + if (error)
1778 + pr_debug("ucode load failed\n");
1779 +
1780 + return error;
1781 +}
1782 +
1783 +static int __init microcode_init (void)
1784 +{
1785 + int error;
1786 +
1787 + error = microcode_dev_init();
1788 + if (error)
1789 + return error;
1790 + microcode_pdev = platform_device_register_simple("microcode", -1,
1791 + NULL, 0);
1792 + if (IS_ERR(microcode_pdev)) {
1793 + microcode_dev_exit();
1794 + return PTR_ERR(microcode_pdev);
1795 + }
1796 +
1797 + request_microcode();
1798 +
1799 printk(KERN_INFO
1800 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
1801 return 0;
1802 @@ -136,9 +201,9 @@ static int __init microcode_init (void)
1803
1804 static void __exit microcode_exit (void)
1805 {
1806 - misc_deregister(&microcode_dev);
1807 + microcode_dev_exit();
1808 + platform_device_unregister(microcode_pdev);
1809 }
1810
1811 module_init(microcode_init)
1812 module_exit(microcode_exit)
1813 -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
1814 Index: head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c
1815 ===================================================================
1816 --- head-2008-12-01.orig/arch/x86/kernel/mpparse_32-xen.c 2008-12-03 15:49:14.000000000 +0100
1817 +++ head-2008-12-01/arch/x86/kernel/mpparse_32-xen.c 2008-12-01 11:29:05.000000000 +0100
1818 @@ -30,6 +30,7 @@
1819 #include <asm/io_apic.h>
1820
1821 #include <mach_apic.h>
1822 +#include <mach_apicdef.h>
1823 #include <mach_mpparse.h>
1824 #include <bios_ebda.h>
1825
1826 @@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
1827 /* Processor that is doing the boot up */
1828 unsigned int boot_cpu_physical_apicid = -1U;
1829 /* Internal processor count */
1830 -static unsigned int __devinitdata num_processors;
1831 +unsigned int __cpuinitdata num_processors;
1832
1833 /* Bitmask of physically existing CPUs */
1834 physid_mask_t phys_cpu_present_map;
1835 @@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
1836
1837 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
1838
1839 +#if MAX_MP_BUSSES < 256
1840 if (m->mpc_busid >= MAX_MP_BUSSES) {
1841 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
1842 " is too large, max. supported is %d\n",
1843 m->mpc_busid, str, MAX_MP_BUSSES - 1);
1844 return;
1845 }
1846 +#endif
1847
1848 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
1849 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
1850 @@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
1851 m->mpc_irqtype, m->mpc_irqflag & 3,
1852 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
1853 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
1854 - /*
1855 - * Well it seems all SMP boards in existence
1856 - * use ExtINT/LVT1 == LINT0 and
1857 - * NMI/LVT2 == LINT1 - the following check
1858 - * will show us if this assumptions is false.
1859 - * Until then we do not have to add baggage.
1860 - */
1861 - if ((m->mpc_irqtype == mp_ExtINT) &&
1862 - (m->mpc_destapiclint != 0))
1863 - BUG();
1864 - if ((m->mpc_irqtype == mp_NMI) &&
1865 - (m->mpc_destapiclint != 1))
1866 - BUG();
1867 }
1868
1869 #ifdef CONFIG_X86_NUMAQ
1870 @@ -838,8 +828,7 @@ int es7000_plat;
1871
1872 #ifdef CONFIG_ACPI
1873
1874 -void __init mp_register_lapic_address (
1875 - u64 address)
1876 +void __init mp_register_lapic_address(u64 address)
1877 {
1878 #ifndef CONFIG_XEN
1879 mp_lapic_addr = (unsigned long) address;
1880 @@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
1881 #endif
1882 }
1883
1884 -
1885 -void __devinit mp_register_lapic (
1886 - u8 id,
1887 - u8 enabled)
1888 +void __devinit mp_register_lapic (u8 id, u8 enabled)
1889 {
1890 struct mpc_config_processor processor;
1891 - int boot_cpu = 0;
1892 + int boot_cpu = 0;
1893
1894 if (MAX_APICS - id <= 0) {
1895 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
1896 @@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
1897 u32 pin_programmed[4];
1898 } mp_ioapic_routing[MAX_IO_APICS];
1899
1900 -
1901 -static int mp_find_ioapic (
1902 - int gsi)
1903 +static int mp_find_ioapic (int gsi)
1904 {
1905 - int i = 0;
1906 + int i = 0;
1907
1908 /* Find the IOAPIC that manages this GSI. */
1909 for (i = 0; i < nr_ioapics; i++) {
1910 @@ -915,15 +899,11 @@ static int mp_find_ioapic (
1911
1912 return -1;
1913 }
1914 -
1915
1916 -void __init mp_register_ioapic (
1917 - u8 id,
1918 - u32 address,
1919 - u32 gsi_base)
1920 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
1921 {
1922 - int idx = 0;
1923 - int tmpid;
1924 + int idx = 0;
1925 + int tmpid;
1926
1927 if (nr_ioapics >= MAX_IO_APICS) {
1928 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
1929 @@ -971,16 +951,10 @@ void __init mp_register_ioapic (
1930 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
1931 mp_ioapic_routing[idx].gsi_base,
1932 mp_ioapic_routing[idx].gsi_end);
1933 -
1934 - return;
1935 }
1936
1937 -
1938 -void __init mp_override_legacy_irq (
1939 - u8 bus_irq,
1940 - u8 polarity,
1941 - u8 trigger,
1942 - u32 gsi)
1943 +void __init
1944 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1945 {
1946 struct mpc_config_intsrc intsrc;
1947 int ioapic = -1;
1948 @@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
1949 mp_irqs[mp_irq_entries] = intsrc;
1950 if (++mp_irq_entries == MAX_IRQ_SOURCES)
1951 panic("Max # of irq sources exceeded!\n");
1952 -
1953 - return;
1954 }
1955
1956 void __init mp_config_acpi_legacy_irqs (void)
1957 {
1958 struct mpc_config_intsrc intsrc;
1959 - int i = 0;
1960 - int ioapic = -1;
1961 + int i = 0;
1962 + int ioapic = -1;
1963
1964 /*
1965 * Fabricate the legacy ISA bus (bus #31).
1966 @@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
1967
1968 #define MAX_GSI_NUM 4096
1969
1970 -int mp_register_gsi (u32 gsi, int triggering, int polarity)
1971 +int mp_register_gsi(u32 gsi, int triggering, int polarity)
1972 {
1973 - int ioapic = -1;
1974 - int ioapic_pin = 0;
1975 - int idx, bit = 0;
1976 - static int pci_irq = 16;
1977 + int ioapic = -1;
1978 + int ioapic_pin = 0;
1979 + int idx, bit = 0;
1980 + static int pci_irq = 16;
1981 /*
1982 * Mapping between Global System Interrups, which
1983 * represent all possible interrupts, and IRQs
1984 Index: head-2008-12-01/arch/x86/kernel/pci-dma-xen.c
1985 ===================================================================
1986 --- head-2008-12-01.orig/arch/x86/kernel/pci-dma-xen.c 2008-12-03 15:49:14.000000000 +0100
1987 +++ head-2008-12-01/arch/x86/kernel/pci-dma-xen.c 2008-12-01 11:29:05.000000000 +0100
1988 @@ -110,8 +110,7 @@ dma_map_sg(struct device *hwdev, struct
1989 {
1990 int i, rc;
1991
1992 - if (direction == DMA_NONE)
1993 - BUG();
1994 + BUG_ON(!valid_dma_direction(direction));
1995 WARN_ON(nents == 0 || sg[0].length == 0);
1996
1997 if (swiotlb) {
1998 @@ -142,7 +141,7 @@ dma_unmap_sg(struct device *hwdev, struc
1999 {
2000 int i;
2001
2002 - BUG_ON(direction == DMA_NONE);
2003 + BUG_ON(!valid_dma_direction(direction));
2004 if (swiotlb)
2005 swiotlb_unmap_sg(hwdev, sg, nents, direction);
2006 else {
2007 @@ -159,8 +158,7 @@ dma_map_page(struct device *dev, struct
2008 {
2009 dma_addr_t dma_addr;
2010
2011 - BUG_ON(direction == DMA_NONE);
2012 -
2013 + BUG_ON(!valid_dma_direction(direction));
2014 if (swiotlb) {
2015 dma_addr = swiotlb_map_page(
2016 dev, page, offset, size, direction);
2017 @@ -177,7 +175,7 @@ void
2018 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
2019 enum dma_data_direction direction)
2020 {
2021 - BUG_ON(direction == DMA_NONE);
2022 + BUG_ON(!valid_dma_direction(direction));
2023 if (swiotlb)
2024 swiotlb_unmap_page(dev, dma_address, size, direction);
2025 else
2026 @@ -359,8 +357,7 @@ dma_map_single(struct device *dev, void
2027 {
2028 dma_addr_t dma;
2029
2030 - if (direction == DMA_NONE)
2031 - BUG();
2032 + BUG_ON(!valid_dma_direction(direction));
2033 WARN_ON(size == 0);
2034
2035 if (swiotlb) {
2036 @@ -381,8 +378,7 @@ void
2037 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
2038 enum dma_data_direction direction)
2039 {
2040 - if (direction == DMA_NONE)
2041 - BUG();
2042 + BUG_ON(!valid_dma_direction(direction));
2043 if (swiotlb)
2044 swiotlb_unmap_single(dev, dma_addr, size, direction);
2045 else
2046 Index: head-2008-12-01/arch/x86/kernel/process_32-xen.c
2047 ===================================================================
2048 --- head-2008-12-01.orig/arch/x86/kernel/process_32-xen.c 2008-12-03 15:49:14.000000000 +0100
2049 +++ head-2008-12-01/arch/x86/kernel/process_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2050 @@ -37,6 +37,7 @@
2051 #include <linux/kallsyms.h>
2052 #include <linux/ptrace.h>
2053 #include <linux/random.h>
2054 +#include <linux/personality.h>
2055
2056 #include <asm/uaccess.h>
2057 #include <asm/pgtable.h>
2058 @@ -186,7 +187,7 @@ void cpu_idle(void)
2059 void cpu_idle_wait(void)
2060 {
2061 unsigned int cpu, this_cpu = get_cpu();
2062 - cpumask_t map;
2063 + cpumask_t map, tmp = current->cpus_allowed;
2064
2065 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
2066 put_cpu();
2067 @@ -208,6 +209,8 @@ void cpu_idle_wait(void)
2068 }
2069 cpus_and(map, map, cpu_online_map);
2070 } while (!cpus_empty(map));
2071 +
2072 + set_cpus_allowed(current, tmp);
2073 }
2074 EXPORT_SYMBOL_GPL(cpu_idle_wait);
2075
2076 @@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
2077 if (user_mode_vm(regs))
2078 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
2079 printk(" EFLAGS: %08lx %s (%s %.*s)\n",
2080 - regs->eflags, print_tainted(), system_utsname.release,
2081 - (int)strcspn(system_utsname.version, " "),
2082 - system_utsname.version);
2083 + regs->eflags, print_tainted(), init_utsname()->release,
2084 + (int)strcspn(init_utsname()->version, " "),
2085 + init_utsname()->version);
2086 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
2087 regs->eax,regs->ebx,regs->ecx,regs->edx);
2088 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2089 @@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
2090 * the "args".
2091 */
2092 extern void kernel_thread_helper(void);
2093 -__asm__(".section .text\n"
2094 - ".align 4\n"
2095 - "kernel_thread_helper:\n\t"
2096 - "movl %edx,%eax\n\t"
2097 - "pushl %edx\n\t"
2098 - "call *%ebx\n\t"
2099 - "pushl %eax\n\t"
2100 - "call do_exit\n"
2101 - ".previous");
2102
2103 /*
2104 * Create a kernel thread
2105 @@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
2106 regs.xes = __USER_DS;
2107 regs.orig_eax = -1;
2108 regs.eip = (unsigned long) kernel_thread_helper;
2109 - regs.xcs = GET_KERNEL_CS();
2110 + regs.xcs = __KERNEL_CS | get_kernel_rpl();
2111 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
2112
2113 /* Ok, create the new process.. */
2114 @@ -369,13 +363,12 @@ int copy_thread(int nr, unsigned long cl
2115
2116 tsk = current;
2117 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2118 - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
2119 + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
2120 + IO_BITMAP_BYTES, GFP_KERNEL);
2121 if (!p->thread.io_bitmap_ptr) {
2122 p->thread.io_bitmap_max = 0;
2123 return -ENOMEM;
2124 }
2125 - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
2126 - IO_BITMAP_BYTES);
2127 set_tsk_thread_flag(p, TIF_IO_BITMAP);
2128 }
2129
2130 @@ -871,7 +864,7 @@ asmlinkage int sys_get_thread_area(struc
2131
2132 unsigned long arch_align_stack(unsigned long sp)
2133 {
2134 - if (randomize_va_space)
2135 + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
2136 sp -= get_random_int() % 8192;
2137 return sp & ~0xf;
2138 }
2139 Index: head-2008-12-01/arch/x86/kernel/setup_32-xen.c
2140 ===================================================================
2141 --- head-2008-12-01.orig/arch/x86/kernel/setup_32-xen.c 2008-12-03 15:49:14.000000000 +0100
2142 +++ head-2008-12-01/arch/x86/kernel/setup_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2143 @@ -56,6 +56,7 @@
2144 #include <asm/apic.h>
2145 #include <asm/e820.h>
2146 #include <asm/mpspec.h>
2147 +#include <asm/mmzone.h>
2148 #include <asm/setup.h>
2149 #include <asm/arch_hooks.h>
2150 #include <asm/sections.h>
2151 @@ -83,9 +84,6 @@ static struct notifier_block xen_panic_b
2152 xen_panic_event, NULL, 0 /* try to go last */
2153 };
2154
2155 -extern char hypercall_page[PAGE_SIZE];
2156 -EXPORT_SYMBOL(hypercall_page);
2157 -
2158 int disable_pse __devinitdata = 0;
2159
2160 /*
2161 @@ -105,18 +103,6 @@ EXPORT_SYMBOL(boot_cpu_data);
2162
2163 unsigned long mmu_cr4_features;
2164
2165 -#ifdef CONFIG_ACPI
2166 - int acpi_disabled = 0;
2167 -#else
2168 - int acpi_disabled = 1;
2169 -#endif
2170 -EXPORT_SYMBOL(acpi_disabled);
2171 -
2172 -#ifdef CONFIG_ACPI
2173 -int __initdata acpi_force = 0;
2174 -extern acpi_interrupt_flags acpi_sci_flags;
2175 -#endif
2176 -
2177 /* for MCA, but anyone else can use it if they want */
2178 unsigned int machine_id;
2179 #ifdef CONFIG_MCA
2180 @@ -170,7 +156,6 @@ struct e820map machine_e820;
2181 #endif
2182
2183 extern void early_cpu_init(void);
2184 -extern void generic_apic_probe(char *);
2185 extern int root_mountflags;
2186
2187 unsigned long saved_videomode;
2188 @@ -243,9 +228,6 @@ static struct resource adapter_rom_resou
2189 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2190 } };
2191
2192 -#define ADAPTER_ROM_RESOURCES \
2193 - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
2194 -
2195 static struct resource video_rom_resource = {
2196 .name = "Video ROM",
2197 .start = 0xc0000,
2198 @@ -307,9 +289,6 @@ static struct resource standard_io_resou
2199 .flags = IORESOURCE_BUSY | IORESOURCE_IO
2200 } };
2201
2202 -#define STANDARD_IO_RESOURCES \
2203 - (sizeof standard_io_resources / sizeof standard_io_resources[0])
2204 -
2205 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2206
2207 static int __init romchecksum(unsigned char *rom, unsigned long length)
2208 @@ -372,7 +351,7 @@ static void __init probe_roms(void)
2209 }
2210
2211 /* check for adapter roms on 2k boundaries */
2212 - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
2213 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
2214 rom = isa_bus_to_virt(start);
2215 if (!romsignature(rom))
2216 continue;
2217 @@ -779,246 +758,152 @@ static inline void copy_edd(void)
2218 }
2219 #endif
2220
2221 -static void __init parse_cmdline_early (char ** cmdline_p)
2222 +static int __initdata user_defined_memmap = 0;
2223 +
2224 +/*
2225 + * "mem=nopentium" disables the 4MB page tables.
2226 + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2227 + * to <mem>, overriding the bios size.
2228 + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2229 + * <start> to <start>+<mem>, overriding the bios size.
2230 + *
2231 + * HPA tells me bootloaders need to parse mem=, so no new
2232 + * option should be mem= [also see Documentation/i386/boot.txt]
2233 + */
2234 +static int __init parse_mem(char *arg)
2235 {
2236 - char c = ' ', *to = command_line, *from = saved_command_line;
2237 - int len = 0, max_cmdline;
2238 - int userdef = 0;
2239 -
2240 - if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2241 - max_cmdline = COMMAND_LINE_SIZE;
2242 - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
2243 - /* Save unparsed command line copy for /proc/cmdline */
2244 - saved_command_line[max_cmdline-1] = '\0';
2245 -
2246 - for (;;) {
2247 - if (c != ' ')
2248 - goto next_char;
2249 - /*
2250 - * "mem=nopentium" disables the 4MB page tables.
2251 - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
2252 - * to <mem>, overriding the bios size.
2253 - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
2254 - * <start> to <start>+<mem>, overriding the bios size.
2255 - *
2256 - * HPA tells me bootloaders need to parse mem=, so no new
2257 - * option should be mem= [also see Documentation/i386/boot.txt]
2258 - */
2259 - if (!memcmp(from, "mem=", 4)) {
2260 - if (to != command_line)
2261 - to--;
2262 - if (!memcmp(from+4, "nopentium", 9)) {
2263 - from += 9+4;
2264 - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2265 - disable_pse = 1;
2266 - } else {
2267 - /* If the user specifies memory size, we
2268 - * limit the BIOS-provided memory map to
2269 - * that size. exactmap can be used to specify
2270 - * the exact map. mem=number can be used to
2271 - * trim the existing memory map.
2272 - */
2273 - unsigned long long mem_size;
2274 -
2275 - mem_size = memparse(from+4, &from);
2276 - limit_regions(mem_size);
2277 - userdef=1;
2278 - }
2279 - }
2280 + if (!arg)
2281 + return -EINVAL;
2282
2283 - else if (!memcmp(from, "memmap=", 7)) {
2284 - if (to != command_line)
2285 - to--;
2286 - if (!memcmp(from+7, "exactmap", 8)) {
2287 -#ifdef CONFIG_CRASH_DUMP
2288 - /* If we are doing a crash dump, we
2289 - * still need to know the real mem
2290 - * size before original memory map is
2291 - * reset.
2292 - */
2293 - find_max_pfn();
2294 - saved_max_pfn = max_pfn;
2295 -#endif
2296 - from += 8+7;
2297 - e820.nr_map = 0;
2298 - userdef = 1;
2299 - } else {
2300 - /* If the user specifies memory size, we
2301 - * limit the BIOS-provided memory map to
2302 - * that size. exactmap can be used to specify
2303 - * the exact map. mem=number can be used to
2304 - * trim the existing memory map.
2305 - */
2306 - unsigned long long start_at, mem_size;
2307 + if (strcmp(arg, "nopentium") == 0) {
2308 + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
2309 + disable_pse = 1;
2310 + } else {
2311 + /* If the user specifies memory size, we
2312 + * limit the BIOS-provided memory map to
2313 + * that size. exactmap can be used to specify
2314 + * the exact map. mem=number can be used to
2315 + * trim the existing memory map.
2316 + */
2317 + unsigned long long mem_size;
2318
2319 - mem_size = memparse(from+7, &from);
2320 - if (*from == '@') {
2321 - start_at = memparse(from+1, &from);
2322 - add_memory_region(start_at, mem_size, E820_RAM);
2323 - } else if (*from == '#') {
2324 - start_at = memparse(from+1, &from);
2325 - add_memory_region(start_at, mem_size, E820_ACPI);
2326 - } else if (*from == '$') {
2327 - start_at = memparse(from+1, &from);
2328 - add_memory_region(start_at, mem_size, E820_RESERVED);
2329 - } else {
2330 - limit_regions(mem_size);
2331 - userdef=1;
2332 - }
2333 - }
2334 - }
2335 -
2336 - else if (!memcmp(from, "noexec=", 7))
2337 - noexec_setup(from + 7);
2338 + mem_size = memparse(arg, &arg);
2339 + limit_regions(mem_size);
2340 + user_defined_memmap = 1;
2341 + }
2342 + return 0;
2343 +}
2344 +early_param("mem", parse_mem);
2345
2346 +static int __init parse_memmap(char *arg)
2347 +{
2348 + if (!arg)
2349 + return -EINVAL;
2350
2351 -#ifdef CONFIG_X86_MPPARSE
2352 - /*
2353 - * If the BIOS enumerates physical processors before logical,
2354 - * maxcpus=N at enumeration-time can be used to disable HT.
2355 + if (strcmp(arg, "exactmap") == 0) {
2356 +#ifdef CONFIG_CRASH_DUMP
2357 + /* If we are doing a crash dump, we
2358 + * still need to know the real mem
2359 + * size before original memory map is
2360 + * reset.
2361 */
2362 - else if (!memcmp(from, "maxcpus=", 8)) {
2363 - extern unsigned int maxcpus;
2364 -
2365 - maxcpus = simple_strtoul(from + 8, NULL, 0);
2366 - }
2367 + find_max_pfn();
2368 + saved_max_pfn = max_pfn;
2369 #endif
2370 + e820.nr_map = 0;
2371 + user_defined_memmap = 1;
2372 + } else {
2373 + /* If the user specifies memory size, we
2374 + * limit the BIOS-provided memory map to
2375 + * that size. exactmap can be used to specify
2376 + * the exact map. mem=number can be used to
2377 + * trim the existing memory map.
2378 + */
2379 + unsigned long long start_at, mem_size;
2380
2381 -#ifdef CONFIG_ACPI
2382 - /* "acpi=off" disables both ACPI table parsing and interpreter */
2383 - else if (!memcmp(from, "acpi=off", 8)) {
2384 - disable_acpi();
2385 - }
2386 -
2387 - /* acpi=force to over-ride black-list */
2388 - else if (!memcmp(from, "acpi=force", 10)) {
2389 - acpi_force = 1;
2390 - acpi_ht = 1;
2391 - acpi_disabled = 0;
2392 - }
2393 -
2394 - /* acpi=strict disables out-of-spec workarounds */
2395 - else if (!memcmp(from, "acpi=strict", 11)) {
2396 - acpi_strict = 1;
2397 - }
2398 -
2399 - /* Limit ACPI just to boot-time to enable HT */
2400 - else if (!memcmp(from, "acpi=ht", 7)) {
2401 - if (!acpi_force)
2402 - disable_acpi();
2403 - acpi_ht = 1;
2404 - }
2405 -
2406 - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
2407 - else if (!memcmp(from, "pci=noacpi", 10)) {
2408 - acpi_disable_pci();
2409 - }
2410 - /* "acpi=noirq" disables ACPI interrupt routing */
2411 - else if (!memcmp(from, "acpi=noirq", 10)) {
2412 - acpi_noirq_set();
2413 + mem_size = memparse(arg, &arg);
2414 + if (*arg == '@') {
2415 + start_at = memparse(arg+1, &arg);
2416 + add_memory_region(start_at, mem_size, E820_RAM);
2417 + } else if (*arg == '#') {
2418 + start_at = memparse(arg+1, &arg);
2419 + add_memory_region(start_at, mem_size, E820_ACPI);
2420 + } else if (*arg == '$') {
2421 + start_at = memparse(arg+1, &arg);
2422 + add_memory_region(start_at, mem_size, E820_RESERVED);
2423 + } else {
2424 + limit_regions(mem_size);
2425 + user_defined_memmap = 1;
2426 }
2427 + }
2428 + return 0;
2429 +}
2430 +early_param("memmap", parse_memmap);
2431
2432 - else if (!memcmp(from, "acpi_sci=edge", 13))
2433 - acpi_sci_flags.trigger = 1;
2434 -
2435 - else if (!memcmp(from, "acpi_sci=level", 14))
2436 - acpi_sci_flags.trigger = 3;
2437 +#ifdef CONFIG_PROC_VMCORE
2438 +/* elfcorehdr= specifies the location of elf core header
2439 + * stored by the crashed kernel.
2440 + */
2441 +static int __init parse_elfcorehdr(char *arg)
2442 +{
2443 + if (!arg)
2444 + return -EINVAL;
2445
2446 - else if (!memcmp(from, "acpi_sci=high", 13))
2447 - acpi_sci_flags.polarity = 1;
2448 + elfcorehdr_addr = memparse(arg, &arg);
2449 + return 0;
2450 +}
2451 +early_param("elfcorehdr", parse_elfcorehdr);
2452 +#endif /* CONFIG_PROC_VMCORE */
2453
2454 - else if (!memcmp(from, "acpi_sci=low", 12))
2455 - acpi_sci_flags.polarity = 3;
2456 +/*
2457 + * highmem=size forces highmem to be exactly 'size' bytes.
2458 + * This works even on boxes that have no highmem otherwise.
2459 + * This also works to reduce highmem size on bigger boxes.
2460 + */
2461 +static int __init parse_highmem(char *arg)
2462 +{
2463 + if (!arg)
2464 + return -EINVAL;
2465
2466 -#ifdef CONFIG_X86_IO_APIC
2467 - else if (!memcmp(from, "acpi_skip_timer_override", 24))
2468 - acpi_skip_timer_override = 1;
2469 + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
2470 + return 0;
2471 +}
2472 +early_param("highmem", parse_highmem);
2473
2474 - if (!memcmp(from, "disable_timer_pin_1", 19))
2475 - disable_timer_pin_1 = 1;
2476 - if (!memcmp(from, "enable_timer_pin_1", 18))
2477 - disable_timer_pin_1 = -1;
2478 -
2479 - /* disable IO-APIC */
2480 - else if (!memcmp(from, "noapic", 6))
2481 - disable_ioapic_setup();
2482 -#endif /* CONFIG_X86_IO_APIC */
2483 -#endif /* CONFIG_ACPI */
2484 +/*
2485 + * vmalloc=size forces the vmalloc area to be exactly 'size'
2486 + * bytes. This can be used to increase (or decrease) the
2487 + * vmalloc area - the default is 128m.
2488 + */
2489 +static int __init parse_vmalloc(char *arg)
2490 +{
2491 + if (!arg)
2492 + return -EINVAL;
2493
2494 -#ifdef CONFIG_X86_LOCAL_APIC
2495 - /* enable local APIC */
2496 - else if (!memcmp(from, "lapic", 5))
2497 - lapic_enable();
2498 -
2499 - /* disable local APIC */
2500 - else if (!memcmp(from, "nolapic", 6))
2501 - lapic_disable();
2502 -#endif /* CONFIG_X86_LOCAL_APIC */
2503 + __VMALLOC_RESERVE = memparse(arg, &arg);
2504 + return 0;
2505 +}
2506 +early_param("vmalloc", parse_vmalloc);
2507
2508 -#ifdef CONFIG_KEXEC
2509 - /* crashkernel=size@addr specifies the location to reserve for
2510 - * a crash kernel. By reserving this memory we guarantee
2511 - * that linux never set's it up as a DMA target.
2512 - * Useful for holding code to do something appropriate
2513 - * after a kernel panic.
2514 - */
2515 - else if (!memcmp(from, "crashkernel=", 12)) {
2516 #ifndef CONFIG_XEN
2517 - unsigned long size, base;
2518 - size = memparse(from+12, &from);
2519 - if (*from == '@') {
2520 - base = memparse(from+1, &from);
2521 - /* FIXME: Do I want a sanity check
2522 - * to validate the memory range?
2523 - */
2524 - crashk_res.start = base;
2525 - crashk_res.end = base + size - 1;
2526 - }
2527 -#else
2528 - printk("Ignoring crashkernel command line, "
2529 - "parameter will be supplied by xen\n");
2530 -#endif
2531 - }
2532 -#endif
2533 -#ifdef CONFIG_PROC_VMCORE
2534 - /* elfcorehdr= specifies the location of elf core header
2535 - * stored by the crashed kernel.
2536 - */
2537 - else if (!memcmp(from, "elfcorehdr=", 11))
2538 - elfcorehdr_addr = memparse(from+11, &from);
2539 -#endif
2540 +/*
2541 + * reservetop=size reserves a hole at the top of the kernel address space which
2542 + * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
2543 + * so relocating the fixmap can be done before paging initialization.
2544 + */
2545 +static int __init parse_reservetop(char *arg)
2546 +{
2547 + unsigned long address;
2548
2549 - /*
2550 - * highmem=size forces highmem to be exactly 'size' bytes.
2551 - * This works even on boxes that have no highmem otherwise.
2552 - * This also works to reduce highmem size on bigger boxes.
2553 - */
2554 - else if (!memcmp(from, "highmem=", 8))
2555 - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
2556 -
2557 - /*
2558 - * vmalloc=size forces the vmalloc area to be exactly 'size'
2559 - * bytes. This can be used to increase (or decrease) the
2560 - * vmalloc area - the default is 128m.
2561 - */
2562 - else if (!memcmp(from, "vmalloc=", 8))
2563 - __VMALLOC_RESERVE = memparse(from+8, &from);
2564 + if (!arg)
2565 + return -EINVAL;
2566
2567 - next_char:
2568 - c = *(from++);
2569 - if (!c)
2570 - break;
2571 - if (COMMAND_LINE_SIZE <= ++len)
2572 - break;
2573 - *(to++) = c;
2574 - }
2575 - *to = '\0';
2576 - *cmdline_p = command_line;
2577 - if (userdef) {
2578 - printk(KERN_INFO "user-defined physical RAM map:\n");
2579 - print_memory_map("user");
2580 - }
2581 + address = memparse(arg, &arg);
2582 + reserve_top_address(address);
2583 + return 0;
2584 }
2585 +early_param("reservetop", parse_reservetop);
2586 +#endif
2587
2588 /*
2589 * Callback for efi_memory_walk.
2590 @@ -1039,7 +924,7 @@ efi_find_max_pfn(unsigned long start, un
2591 static int __init
2592 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
2593 {
2594 - memory_present(0, start, end);
2595 + memory_present(0, PFN_UP(start), PFN_DOWN(end));
2596 return 0;
2597 }
2598
2599 @@ -1306,6 +1191,14 @@ static unsigned long __init setup_memory
2600 }
2601 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
2602 pages_to_mb(highend_pfn - highstart_pfn));
2603 + num_physpages = highend_pfn;
2604 + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
2605 +#else
2606 + num_physpages = max_low_pfn;
2607 + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
2608 +#endif
2609 +#ifdef CONFIG_FLATMEM
2610 + max_mapnr = num_physpages;
2611 #endif
2612 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
2613 pages_to_mb(max_low_pfn));
2614 @@ -1317,22 +1210,19 @@ static unsigned long __init setup_memory
2615
2616 void __init zone_sizes_init(void)
2617 {
2618 - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
2619 - unsigned int max_dma, low;
2620 -
2621 - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2622 - low = max_low_pfn;
2623 -
2624 - if (low < max_dma)
2625 - zones_size[ZONE_DMA] = low;
2626 - else {
2627 - zones_size[ZONE_DMA] = max_dma;
2628 - zones_size[ZONE_NORMAL] = low - max_dma;
2629 + unsigned long max_zone_pfns[MAX_NR_ZONES];
2630 + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
2631 + max_zone_pfns[ZONE_DMA] =
2632 + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
2633 + max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
2634 #ifdef CONFIG_HIGHMEM
2635 - zones_size[ZONE_HIGHMEM] = highend_pfn - low;
2636 + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
2637 + add_active_range(0, 0, highend_pfn);
2638 +#else
2639 + add_active_range(0, 0, max_low_pfn);
2640 #endif
2641 - }
2642 - free_area_init(zones_size);
2643 +
2644 + free_area_init_nodes(max_zone_pfns);
2645 }
2646 #else
2647 extern unsigned long __init setup_memory(void);
2648 @@ -1389,6 +1279,7 @@ void __init setup_bootmem_allocator(void
2649 */
2650 acpi_reserve_bootmem();
2651 #endif
2652 + numa_kva_reserve();
2653 #endif /* !CONFIG_XEN */
2654
2655 #ifdef CONFIG_BLK_DEV_INITRD
2656 @@ -1574,7 +1465,7 @@ static int __init request_standard_resou
2657 request_resource(&iomem_resource, &video_ram_resource);
2658
2659 /* request I/O space for devices used on all i[345]86 PCs */
2660 - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
2661 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
2662 request_resource(&ioport_resource, &standard_io_resources[i]);
2663 return 0;
2664 }
2665 @@ -1705,17 +1596,19 @@ void __init setup_arch(char **cmdline_p)
2666 data_resource.start = virt_to_phys(_etext);
2667 data_resource.end = virt_to_phys(_edata)-1;
2668
2669 - parse_cmdline_early(cmdline_p);
2670 + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2671 + i = COMMAND_LINE_SIZE;
2672 + memcpy(saved_command_line, xen_start_info->cmd_line, i);
2673 + saved_command_line[i - 1] = '\0';
2674 + parse_early_param();
2675
2676 -#ifdef CONFIG_EARLY_PRINTK
2677 - {
2678 - char *s = strstr(*cmdline_p, "earlyprintk=");
2679 - if (s) {
2680 - setup_early_printk(strchr(s, '=') + 1);
2681 - printk("early console enabled\n");
2682 - }
2683 + if (user_defined_memmap) {
2684 + printk(KERN_INFO "user-defined physical RAM map:\n");
2685 + print_memory_map("user");
2686 }
2687 -#endif
2688 +
2689 + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2690 + *cmdline_p = command_line;
2691
2692 max_low_pfn = setup_memory();
2693
2694 @@ -1822,7 +1715,7 @@ void __init setup_arch(char **cmdline_p)
2695 dmi_scan_machine();
2696
2697 #ifdef CONFIG_X86_GENERICARCH
2698 - generic_apic_probe(*cmdline_p);
2699 + generic_apic_probe();
2700 #endif
2701 if (efi_enabled)
2702 efi_map_memmap();
2703 @@ -1843,9 +1736,11 @@ void __init setup_arch(char **cmdline_p)
2704 acpi_boot_table_init();
2705 #endif
2706
2707 +#ifdef CONFIG_PCI
2708 #ifdef CONFIG_X86_IO_APIC
2709 check_acpi_pci(); /* Checks more than just ACPI actually */
2710 #endif
2711 +#endif
2712
2713 #ifdef CONFIG_ACPI
2714 acpi_boot_init();
2715 Index: head-2008-12-01/arch/x86/kernel/smp_32-xen.c
2716 ===================================================================
2717 --- head-2008-12-01.orig/arch/x86/kernel/smp_32-xen.c 2008-12-03 15:49:14.000000000 +0100
2718 +++ head-2008-12-01/arch/x86/kernel/smp_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2719 @@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
2720 * 2) Leave the mm if we are in the lazy tlb mode.
2721 */
2722
2723 -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
2724 - struct pt_regs *regs)
2725 +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
2726 {
2727 unsigned long cpu;
2728
2729 @@ -567,16 +566,14 @@ void smp_send_stop(void)
2730 * all the work is done automatically when
2731 * we return from the interrupt.
2732 */
2733 -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
2734 - struct pt_regs *regs)
2735 +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
2736 {
2737
2738 return IRQ_HANDLED;
2739 }
2740
2741 #include <linux/kallsyms.h>
2742 -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
2743 - struct pt_regs *regs)
2744 +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
2745 {
2746 void (*func) (void *info) = call_data->func;
2747 void *info = call_data->info;
2748 @@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt(
2749 return IRQ_HANDLED;
2750 }
2751
2752 +/*
2753 + * this function sends a 'generic call function' IPI to one other CPU
2754 + * in the system.
2755 + *
2756 + * cpu is a standard Linux logical CPU number.
2757 + */
2758 +static void
2759 +__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
2760 + int nonatomic, int wait)
2761 +{
2762 + struct call_data_struct data;
2763 + int cpus = 1;
2764 +
2765 + data.func = func;
2766 + data.info = info;
2767 + atomic_set(&data.started, 0);
2768 + data.wait = wait;
2769 + if (wait)
2770 + atomic_set(&data.finished, 0);
2771 +
2772 + call_data = &data;
2773 + wmb();
2774 + /* Send a message to all other CPUs and wait for them to respond */
2775 + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
2776 +
2777 + /* Wait for response */
2778 + while (atomic_read(&data.started) != cpus)
2779 + cpu_relax();
2780 +
2781 + if (!wait)
2782 + return;
2783 +
2784 + while (atomic_read(&data.finished) != cpus)
2785 + cpu_relax();
2786 +}
2787 +
2788 +/*
2789 + * smp_call_function_single - Run a function on another CPU
2790 + * @func: The function to run. This must be fast and non-blocking.
2791 + * @info: An arbitrary pointer to pass to the function.
2792 + * @nonatomic: Currently unused.
2793 + * @wait: If true, wait until function has completed on other CPUs.
2794 + *
2795 + * Retrurns 0 on success, else a negative status code.
2796 + *
2797 + * Does not return until the remote CPU is nearly ready to execute <func>
2798 + * or is or has executed.
2799 + */
2800 +
2801 +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
2802 + int nonatomic, int wait)
2803 +{
2804 + /* prevent preemption and reschedule on another processor */
2805 + int me = get_cpu();
2806 + if (cpu == me) {
2807 + WARN_ON(1);
2808 + put_cpu();
2809 + return -EBUSY;
2810 + }
2811 + spin_lock_bh(&call_lock);
2812 + __smp_call_function_single(cpu, func, info, nonatomic, wait);
2813 + spin_unlock_bh(&call_lock);
2814 + put_cpu();
2815 + return 0;
2816 +}
2817 +EXPORT_SYMBOL(smp_call_function_single);
2818 Index: head-2008-12-01/arch/x86/kernel/time_32-xen.c
2819 ===================================================================
2820 --- head-2008-12-01.orig/arch/x86/kernel/time_32-xen.c 2008-12-03 15:49:14.000000000 +0100
2821 +++ head-2008-12-01/arch/x86/kernel/time_32-xen.c 2008-12-01 11:29:05.000000000 +0100
2822 @@ -89,7 +89,6 @@ int pit_latch_buggy; /* ext
2823 unsigned long vxtime_hz = PIT_TICK_RATE;
2824 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
2825 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
2826 -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
2827 struct timespec __xtime __section_xtime;
2828 struct timezone __sys_tz __section_sys_tz;
2829 #endif
2830 @@ -97,8 +96,6 @@ struct timezone __sys_tz __section_sys_t
2831 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
2832 EXPORT_SYMBOL(cpu_khz);
2833
2834 -extern unsigned long wall_jiffies;
2835 -
2836 DEFINE_SPINLOCK(rtc_lock);
2837 EXPORT_SYMBOL(rtc_lock);
2838
2839 @@ -265,11 +262,10 @@ static void __update_wallclock(time_t se
2840 time_t wtm_sec, xtime_sec;
2841 u64 tmp, wc_nsec;
2842
2843 - /* Adjust wall-clock time base based on wall_jiffies ticks. */
2844 + /* Adjust wall-clock time base. */
2845 wc_nsec = processed_system_time;
2846 wc_nsec += sec * (u64)NSEC_PER_SEC;
2847 wc_nsec += nsec;
2848 - wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
2849
2850 /* Split wallclock base into seconds and nanoseconds. */
2851 tmp = wc_nsec;
2852 @@ -394,16 +390,10 @@ void do_gettimeofday(struct timeval *tv)
2853 shadow = &per_cpu(shadow_time, cpu);
2854
2855 do {
2856 - unsigned long lost;
2857 -
2858 local_time_version = shadow->version;
2859 seq = read_seqbegin(&xtime_lock);
2860
2861 usec = get_usec_offset(shadow);
2862 - lost = jiffies - wall_jiffies;
2863 -
2864 - if (unlikely(lost))
2865 - usec += lost * (USEC_PER_SEC / HZ);
2866
2867 sec = xtime.tv_sec;
2868 usec += (xtime.tv_nsec / NSEC_PER_USEC);
2869 @@ -526,7 +516,7 @@ static void sync_xen_wallclock(unsigned
2870 write_seqlock_irq(&xtime_lock);
2871
2872 sec = xtime.tv_sec;
2873 - nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
2874 + nsec = xtime.tv_nsec;
2875 __normalize_time(&sec, &nsec);
2876
2877 op.cmd = XENPF_settime;
2878 @@ -600,42 +590,49 @@ unsigned long long sched_clock(void)
2879 }
2880 #endif
2881
2882 -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
2883 unsigned long profile_pc(struct pt_regs *regs)
2884 {
2885 unsigned long pc = instruction_pointer(regs);
2886
2887 -#ifdef __x86_64__
2888 - /* Assume the lock function has either no stack frame or only a single word.
2889 - This checks if the address on the stack looks like a kernel text address.
2890 - There is a small window for false hits, but in that case the tick
2891 - is just accounted to the spinlock function.
2892 - Better would be to write these functions in assembler again
2893 - and check exactly. */
2894 +#if defined(CONFIG_SMP) || defined(__x86_64__)
2895 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
2896 - char *v = *(char **)regs->rsp;
2897 - if ((v >= _stext && v <= _etext) ||
2898 - (v >= _sinittext && v <= _einittext) ||
2899 - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
2900 - return (unsigned long)v;
2901 - return ((unsigned long *)regs->rsp)[1];
2902 +# ifdef CONFIG_FRAME_POINTER
2903 +# ifdef __i386__
2904 + return ((unsigned long *)regs->ebp)[1];
2905 +# else
2906 + return ((unsigned long *)regs->rbp)[1];
2907 +# endif
2908 +# else
2909 +# ifdef __i386__
2910 + unsigned long *sp;
2911 + if ((regs->xcs & 2) == 0)
2912 + sp = (unsigned long *)&regs->esp;
2913 + else
2914 + sp = (unsigned long *)regs->esp;
2915 +# else
2916 + unsigned long *sp = (unsigned long *)regs->rsp;
2917 +# endif
2918 + /* Return address is either directly at stack pointer
2919 + or above a saved eflags. Eflags has bits 22-31 zero,
2920 + kernel addresses don't. */
2921 + if (sp[0] >> 22)
2922 + return sp[0];
2923 + if (sp[1] >> 22)
2924 + return sp[1];
2925 +# endif
2926 }
2927 -#else
2928 - if (!user_mode_vm(regs) && in_lock_functions(pc))
2929 - return *(unsigned long *)(regs->ebp + 4);
2930 #endif
2931
2932 return pc;
2933 }
2934 EXPORT_SYMBOL(profile_pc);
2935 -#endif
2936
2937 /*
2938 * This is the same as the above, except we _also_ save the current
2939 * Time Stamp Counter value at the time of the timer interrupt, so that
2940 * we later on can estimate the time of day more exactly.
2941 */
2942 -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
2943 +irqreturn_t timer_interrupt(int irq, void *dev_id)
2944 {
2945 s64 delta, delta_cpu, stolen, blocked;
2946 u64 sched_time;
2947 @@ -693,10 +690,14 @@ irqreturn_t timer_interrupt(int irq, voi
2948 }
2949
2950 /* System-wide jiffy work. */
2951 - while (delta >= NS_PER_TICK) {
2952 - delta -= NS_PER_TICK;
2953 - processed_system_time += NS_PER_TICK;
2954 - do_timer(regs);
2955 + if (delta >= NS_PER_TICK) {
2956 + do_div(delta, NS_PER_TICK);
2957 + processed_system_time += delta * NS_PER_TICK;
2958 + while (delta > HZ) {
2959 + do_timer(HZ);
2960 + delta -= HZ;
2961 + }
2962 + do_timer(delta);
2963 }
2964
2965 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
2966 @@ -741,7 +742,7 @@ irqreturn_t timer_interrupt(int irq, voi
2967 if (delta_cpu > 0) {
2968 do_div(delta_cpu, NS_PER_TICK);
2969 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
2970 - if (user_mode_vm(regs))
2971 + if (user_mode_vm(get_irq_regs()))
2972 account_user_time(current, (cputime_t)delta_cpu);
2973 else
2974 account_system_time(current, HARDIRQ_OFFSET,
2975 @@ -755,10 +756,10 @@ irqreturn_t timer_interrupt(int irq, voi
2976 /* Local timer processing (see update_process_times()). */
2977 run_local_timers();
2978 if (rcu_pending(cpu))
2979 - rcu_check_callbacks(cpu, user_mode_vm(regs));
2980 + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
2981 scheduler_tick();
2982 run_posix_cpu_timers(current);
2983 - profile_tick(CPU_PROFILING, regs);
2984 + profile_tick(CPU_PROFILING);
2985
2986 return IRQ_HANDLED;
2987 }
2988 @@ -968,10 +969,11 @@ extern void (*late_time_init)(void);
2989 /* Duplicate of time_init() below, with hpet_enable part added */
2990 static void __init hpet_time_init(void)
2991 {
2992 - xtime.tv_sec = get_cmos_time();
2993 - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2994 - set_normalized_timespec(&wall_to_monotonic,
2995 - -xtime.tv_sec, -xtime.tv_nsec);
2996 + struct timespec ts;
2997 + ts.tv_sec = get_cmos_time();
2998 + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2999 +
3000 + do_settimeofday(&ts);
3001
3002 if ((hpet_enable() >= 0) && hpet_use_timer) {
3003 printk("Using HPET for base-timer\n");
3004 Index: head-2008-12-01/arch/x86/kernel/traps_32-xen.c
3005 ===================================================================
3006 --- head-2008-12-01.orig/arch/x86/kernel/traps_32-xen.c 2008-12-03 15:49:14.000000000 +0100
3007 +++ head-2008-12-01/arch/x86/kernel/traps_32-xen.c 2008-12-01 11:29:05.000000000 +0100
3008 @@ -28,6 +28,7 @@
3009 #include <linux/kprobes.h>
3010 #include <linux/kexec.h>
3011 #include <linux/unwind.h>
3012 +#include <linux/uaccess.h>
3013
3014 #ifdef CONFIG_EISA
3015 #include <linux/ioport.h>
3016 @@ -40,7 +41,6 @@
3017
3018 #include <asm/processor.h>
3019 #include <asm/system.h>
3020 -#include <asm/uaccess.h>
3021 #include <asm/io.h>
3022 #include <asm/atomic.h>
3023 #include <asm/debugreg.h>
3024 @@ -51,11 +51,14 @@
3025 #include <asm/smp.h>
3026 #include <asm/arch_hooks.h>
3027 #include <asm/kdebug.h>
3028 +#include <asm/stacktrace.h>
3029
3030 #include <linux/module.h>
3031
3032 #include "mach_traps.h"
3033
3034 +int panic_on_unrecovered_nmi;
3035 +
3036 asmlinkage int system_call(void);
3037
3038 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
3039 @@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct
3040 p < (void *)tinfo + THREAD_SIZE - 3;
3041 }
3042
3043 -/*
3044 - * Print one address/symbol entries per line.
3045 - */
3046 -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
3047 -{
3048 - printk(" [<%08lx>] ", addr);
3049 -
3050 - print_symbol("%s\n", addr);
3051 -}
3052 -
3053 static inline unsigned long print_context_stack(struct thread_info *tinfo,
3054 unsigned long *stack, unsigned long ebp,
3055 - char *log_lvl)
3056 + struct stacktrace_ops *ops, void *data)
3057 {
3058 unsigned long addr;
3059
3060 #ifdef CONFIG_FRAME_POINTER
3061 while (valid_stack_ptr(tinfo, (void *)ebp)) {
3062 + unsigned long new_ebp;
3063 addr = *(unsigned long *)(ebp + 4);
3064 - print_addr_and_symbol(addr, log_lvl);
3065 + ops->address(data, addr);
3066 /*
3067 * break out of recursive entries (such as
3068 - * end_of_stack_stop_unwind_function):
3069 + * end_of_stack_stop_unwind_function). Also,
3070 + * we can never allow a frame pointer to
3071 + * move downwards!
3072 */
3073 - if (ebp == *(unsigned long *)ebp)
3074 + new_ebp = *(unsigned long *)ebp;
3075 + if (new_ebp <= ebp)
3076 break;
3077 - ebp = *(unsigned long *)ebp;
3078 + ebp = new_ebp;
3079 }
3080 #else
3081 while (valid_stack_ptr(tinfo, stack)) {
3082 addr = *stack++;
3083 if (__kernel_text_address(addr))
3084 - print_addr_and_symbol(addr, log_lvl);
3085 + ops->address(data, addr);
3086 }
3087 #endif
3088 return ebp;
3089 }
3090
3091 +struct ops_and_data {
3092 + struct stacktrace_ops *ops;
3093 + void *data;
3094 +};
3095 +
3096 static asmlinkage int
3097 -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
3098 +dump_trace_unwind(struct unwind_frame_info *info, void *data)
3099 {
3100 + struct ops_and_data *oad = (struct ops_and_data *)data;
3101 int n = 0;
3102
3103 while (unwind(info) == 0 && UNW_PC(info)) {
3104 n++;
3105 - print_addr_and_symbol(UNW_PC(info), log_lvl);
3106 + oad->ops->address(oad->data, UNW_PC(info));
3107 if (arch_unw_user_mode(info))
3108 break;
3109 }
3110 return n;
3111 }
3112
3113 -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3114 - unsigned long *stack, char *log_lvl)
3115 +void dump_trace(struct task_struct *task, struct pt_regs *regs,
3116 + unsigned long *stack,
3117 + struct stacktrace_ops *ops, void *data)
3118 {
3119 - unsigned long ebp;
3120 + unsigned long ebp = 0;
3121
3122 if (!task)
3123 task = current;
3124 @@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta
3125 if (call_trace >= 0) {
3126 int unw_ret = 0;
3127 struct unwind_frame_info info;
3128 + struct ops_and_data oad = { .ops = ops, .data = data };
3129
3130 if (regs) {
3131 if (unwind_init_frame_info(&info, task, regs) == 0)
3132 - unw_ret = show_trace_unwind(&info, log_lvl);
3133 + unw_ret = dump_trace_unwind(&info, &oad);
3134 } else if (task == current)
3135 - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
3136 + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
3137 else {
3138 if (unwind_init_blocked(&info, task) == 0)
3139 - unw_ret = show_trace_unwind(&info, log_lvl);
3140 + unw_ret = dump_trace_unwind(&info, &oad);
3141 }
3142 if (unw_ret > 0) {
3143 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
3144 - print_symbol("DWARF2 unwinder stuck at %s\n",
3145 + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
3146 UNW_PC(&info));
3147 if (UNW_SP(&info) >= PAGE_OFFSET) {
3148 - printk("Leftover inexact backtrace:\n");
3149 + ops->warning(data, "Leftover inexact backtrace:\n");
3150 stack = (void *)UNW_SP(&info);
3151 + if (!stack)
3152 + return;
3153 + ebp = UNW_FP(&info);
3154 } else
3155 - printk("Full inexact backtrace again:\n");
3156 + ops->warning(data, "Full inexact backtrace again:\n");
3157 } else if (call_trace >= 1)
3158 return;
3159 else
3160 - printk("Full inexact backtrace again:\n");
3161 + ops->warning(data, "Full inexact backtrace again:\n");
3162 } else
3163 - printk("Inexact backtrace:\n");
3164 + ops->warning(data, "Inexact backtrace:\n");
3165 }
3166 -
3167 - if (task == current) {
3168 - /* Grab ebp right from our regs */
3169 - asm ("movl %%ebp, %0" : "=r" (ebp) : );
3170 - } else {
3171 - /* ebp is the last reg pushed by switch_to */
3172 - ebp = *(unsigned long *) task->thread.esp;
3173 + if (!stack) {
3174 + unsigned long dummy;
3175 + stack = &dummy;
3176 + if (task && task != current)
3177 + stack = (unsigned long *)task->thread.esp;
3178 + }
3179 +
3180 +#ifdef CONFIG_FRAME_POINTER
3181 + if (!ebp) {
3182 + if (task == current) {
3183 + /* Grab ebp right from our regs */
3184 + asm ("movl %%ebp, %0" : "=r" (ebp) : );
3185 + } else {
3186 + /* ebp is the last reg pushed by switch_to */
3187 + ebp = *(unsigned long *) task->thread.esp;
3188 + }
3189 }
3190 +#endif
3191
3192 while (1) {
3193 struct thread_info *context;
3194 context = (struct thread_info *)
3195 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
3196 - ebp = print_context_stack(context, stack, ebp, log_lvl);
3197 + ebp = print_context_stack(context, stack, ebp, ops, data);
3198 + /* Should be after the line below, but somewhere
3199 + in early boot context comes out corrupted and we
3200 + can't reference it -AK */
3201 + if (ops->stack(data, "IRQ") < 0)
3202 + break;
3203 stack = (unsigned long*)context->previous_esp;
3204 if (!stack)
3205 break;
3206 - printk("%s =======================\n", log_lvl);
3207 }
3208 }
3209 +EXPORT_SYMBOL(dump_trace);
3210 +
3211 +static void
3212 +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
3213 +{
3214 + printk(data);
3215 + print_symbol(msg, symbol);
3216 + printk("\n");
3217 +}
3218 +
3219 +static void print_trace_warning(void *data, char *msg)
3220 +{
3221 + printk("%s%s\n", (char *)data, msg);
3222 +}
3223
3224 -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
3225 +static int print_trace_stack(void *data, char *name)
3226 +{
3227 + return 0;
3228 +}
3229 +
3230 +/*
3231 + * Print one address/symbol entries per line.
3232 + */
3233 +static void print_trace_address(void *data, unsigned long addr)
3234 +{
3235 + printk("%s [<%08lx>] ", (char *)data, addr);
3236 + print_symbol("%s\n", addr);
3237 +}
3238 +
3239 +static struct stacktrace_ops print_trace_ops = {
3240 + .warning = print_trace_warning,
3241 + .warning_symbol = print_trace_warning_symbol,
3242 + .stack = print_trace_stack,
3243 + .address = print_trace_address,
3244 +};
3245 +
3246 +static void
3247 +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3248 + unsigned long * stack, char *log_lvl)
3249 +{
3250 + dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
3251 + printk("%s =======================\n", log_lvl);
3252 +}
3253 +
3254 +void show_trace(struct task_struct *task, struct pt_regs *regs,
3255 + unsigned long * stack)
3256 {
3257 show_trace_log_lvl(task, regs, stack, "");
3258 }
3259 @@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs
3260 ss = regs->xss & 0xffff;
3261 }
3262 print_modules();
3263 - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
3264 - "EFLAGS: %08lx (%s %.*s) \n",
3265 + printk(KERN_EMERG "CPU: %d\n"
3266 + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
3267 + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
3268 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
3269 - print_tainted(), regs->eflags, system_utsname.release,
3270 - (int)strcspn(system_utsname.version, " "),
3271 - system_utsname.version);
3272 + print_tainted(), regs->eflags, init_utsname()->release,
3273 + (int)strcspn(init_utsname()->version, " "),
3274 + init_utsname()->version);
3275 print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
3276 printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
3277 regs->eax, regs->ebx, regs->ecx, regs->edx);
3278 @@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs
3279 */
3280 if (in_kernel) {
3281 u8 __user *eip;
3282 + int code_bytes = 64;
3283 + unsigned char c;
3284
3285 printk("\n" KERN_EMERG "Stack: ");
3286 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
3287 @@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs
3288 printk(KERN_EMERG "Code: ");
3289
3290 eip = (u8 __user *)regs->eip - 43;
3291 - for (i = 0; i < 64; i++, eip++) {
3292 - unsigned char c;
3293 -
3294 + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
3295 + /* try starting at EIP */
3296 + eip = (u8 __user *)regs->eip;
3297 + code_bytes = 32;
3298 + }
3299 + for (i = 0; i < code_bytes; i++, eip++) {
3300 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
3301 printk(" Bad EIP value.");
3302 break;
3303 @@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r
3304
3305 if (eip < PAGE_OFFSET)
3306 return;
3307 - if (__get_user(ud2, (unsigned short __user *)eip))
3308 + if (probe_kernel_address((unsigned short __user *)eip, ud2))
3309 return;
3310 if (ud2 != 0x0b0f)
3311 return;
3312 @@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r
3313 char *file;
3314 char c;
3315
3316 - if (__get_user(line, (unsigned short __user *)(eip + 2)))
3317 + if (probe_kernel_address((unsigned short __user *)(eip + 2),
3318 + line))
3319 break;
3320 if (__get_user(file, (char * __user *)(eip + 4)) ||
3321 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
3322 @@ -604,18 +677,24 @@ gp_in_kernel:
3323 }
3324 }
3325
3326 -static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
3327 +static __kprobes void
3328 +mem_parity_error(unsigned char reason, struct pt_regs * regs)
3329 {
3330 - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
3331 - "to continue\n");
3332 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
3333 + "CPU %d.\n", reason, smp_processor_id());
3334 printk(KERN_EMERG "You probably have a hardware problem with your RAM "
3335 "chips\n");
3336 + if (panic_on_unrecovered_nmi)
3337 + panic("NMI: Not continuing");
3338 +
3339 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3340
3341 /* Clear and disable the memory parity error line. */
3342 clear_mem_error(reason);
3343 }
3344
3345 -static void io_check_error(unsigned char reason, struct pt_regs * regs)
3346 +static __kprobes void
3347 +io_check_error(unsigned char reason, struct pt_regs * regs)
3348 {
3349 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
3350 show_registers(regs);
3351 @@ -624,7 +703,8 @@ static void io_check_error(unsigned char
3352 clear_io_check_error(reason);
3353 }
3354
3355 -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
3356 +static __kprobes void
3357 +unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
3358 {
3359 #ifdef CONFIG_MCA
3360 /* Might actually be able to figure out what the guilty party
3361 @@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c
3362 return;
3363 }
3364 #endif
3365 - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
3366 - reason, smp_processor_id());
3367 - printk("Dazed and confused, but trying to continue\n");
3368 - printk("Do you have a strange power saving mode enabled?\n");
3369 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
3370 + "CPU %d.\n", reason, smp_processor_id());
3371 + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
3372 + if (panic_on_unrecovered_nmi)
3373 + panic("NMI: Not continuing");
3374 +
3375 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3376 }
3377
3378 static DEFINE_SPINLOCK(nmi_print_lock);
3379
3380 -void die_nmi (struct pt_regs *regs, const char *msg)
3381 +void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
3382 {
3383 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
3384 NOTIFY_STOP)
3385 @@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons
3386 do_exit(SIGSEGV);
3387 }
3388
3389 -static void default_do_nmi(struct pt_regs * regs)
3390 +static __kprobes void default_do_nmi(struct pt_regs * regs)
3391 {
3392 unsigned char reason = 0;
3393
3394 @@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg
3395 * Ok, so this is none of the documented NMI sources,
3396 * so it must be the NMI watchdog.
3397 */
3398 - if (nmi_watchdog) {
3399 - nmi_watchdog_tick(regs);
3400 + if (nmi_watchdog_tick(regs, reason))
3401 return;
3402 - }
3403 + if (!do_nmi_callback(regs, smp_processor_id()))
3404 #endif
3405 - unknown_nmi_error(reason, regs);
3406 + unknown_nmi_error(reason, regs);
3407 +
3408 return;
3409 }
3410 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
3411 @@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg
3412 reassert_nmi();
3413 }
3414
3415 -static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
3416 -{
3417 - return 0;
3418 -}
3419 -
3420 -static nmi_callback_t nmi_callback = dummy_nmi_callback;
3421 -
3422 -fastcall void do_nmi(struct pt_regs * regs, long error_code)
3423 +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
3424 {
3425 int cpu;
3426
3427 @@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re
3428
3429 ++nmi_count(cpu);
3430
3431 - if (!rcu_dereference(nmi_callback)(regs, cpu))
3432 - default_do_nmi(regs);
3433 + default_do_nmi(regs);
3434
3435 nmi_exit();
3436 }
3437
3438 -void set_nmi_callback(nmi_callback_t callback)
3439 -{
3440 - vmalloc_sync_all();
3441 - rcu_assign_pointer(nmi_callback, callback);
3442 -}
3443 -EXPORT_SYMBOL_GPL(set_nmi_callback);
3444 -
3445 -void unset_nmi_callback(void)
3446 -{
3447 - nmi_callback = dummy_nmi_callback;
3448 -}
3449 -EXPORT_SYMBOL_GPL(unset_nmi_callback);
3450 -
3451 #ifdef CONFIG_KPROBES
3452 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
3453 {
3454 Index: head-2008-12-01/arch/x86/mach-xen/setup.c
3455 ===================================================================
3456 --- head-2008-12-01.orig/arch/x86/mach-xen/setup.c 2008-12-03 15:49:14.000000000 +0100
3457 +++ head-2008-12-01/arch/x86/mach-xen/setup.c 2008-12-01 11:29:05.000000000 +0100
3458 @@ -103,8 +103,10 @@ void __init pre_setup_arch_hook(void)
3459
3460 setup_xen_features();
3461
3462 - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
3463 - set_fixaddr_top(pp.virt_start);
3464 + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
3465 + hypervisor_virt_start = pp.virt_start;
3466 + reserve_top_address(0UL - pp.virt_start);
3467 + }
3468
3469 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
3470 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
3471 Index: head-2008-12-01/arch/x86/mm/fault_32-xen.c
3472 ===================================================================
3473 --- head-2008-12-01.orig/arch/x86/mm/fault_32-xen.c 2008-12-03 15:49:14.000000000 +0100
3474 +++ head-2008-12-01/arch/x86/mm/fault_32-xen.c 2008-12-01 11:29:05.000000000 +0100
3475 @@ -27,21 +27,24 @@
3476 #include <asm/uaccess.h>
3477 #include <asm/desc.h>
3478 #include <asm/kdebug.h>
3479 +#include <asm/segment.h>
3480
3481 extern void die(const char *,struct pt_regs *,long);
3482
3483 -#ifdef CONFIG_KPROBES
3484 -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3485 +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3486 +
3487 int register_page_fault_notifier(struct notifier_block *nb)
3488 {
3489 vmalloc_sync_all();
3490 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
3491 }
3492 +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
3493
3494 int unregister_page_fault_notifier(struct notifier_block *nb)
3495 {
3496 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
3497 }
3498 +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3499
3500 static inline int notify_page_fault(enum die_val val, const char *str,
3501 struct pt_regs *regs, long err, int trap, int sig)
3502 @@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
3503 };
3504 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3505 }
3506 -#else
3507 -static inline int notify_page_fault(enum die_val val, const char *str,
3508 - struct pt_regs *regs, long err, int trap, int sig)
3509 -{
3510 - return NOTIFY_DONE;
3511 -}
3512 -#endif
3513 -
3514
3515 /*
3516 * Unlock any spinlocks which will prevent us from getting the
3517 @@ -119,10 +114,10 @@ static inline unsigned long get_segment_
3518 }
3519
3520 /* The standard kernel/user address space limit. */
3521 - *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
3522 + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
3523
3524 /* By far the most common cases. */
3525 - if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
3526 + if (likely(SEGMENT_IS_FLAT_CODE(seg)))
3527 return eip;
3528
3529 /* Check the segment exists, is within the current LDT/GDT size,
3530 @@ -559,11 +554,7 @@ good_area:
3531 write = 0;
3532 switch (error_code & 3) {
3533 default: /* 3: write, present */
3534 -#ifdef TEST_VERIFY_AREA
3535 - if (regs->cs == GET_KERNEL_CS())
3536 - printk("WP fault at %08lx\n", regs->eip);
3537 -#endif
3538 - /* fall through */
3539 + /* fall through */
3540 case 2: /* write, not present */
3541 if (!(vma->vm_flags & VM_WRITE))
3542 goto bad_area;
3543 @@ -572,7 +563,7 @@ good_area:
3544 case 1: /* read, present */
3545 goto bad_area;
3546 case 0: /* read, not present */
3547 - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
3548 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
3549 goto bad_area;
3550 }
3551
3552 @@ -704,7 +695,7 @@ no_context:
3553 */
3554 out_of_memory:
3555 up_read(&mm->mmap_sem);
3556 - if (tsk->pid == 1) {
3557 + if (is_init(tsk)) {
3558 yield();
3559 down_read(&mm->mmap_sem);
3560 goto survive;
3561 Index: head-2008-12-01/arch/x86/mm/highmem_32-xen.c
3562 ===================================================================
3563 --- head-2008-12-01.orig/arch/x86/mm/highmem_32-xen.c 2008-12-03 15:49:14.000000000 +0100
3564 +++ head-2008-12-01/arch/x86/mm/highmem_32-xen.c 2008-12-01 11:29:05.000000000 +0100
3565 @@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
3566
3567 idx = type + KM_TYPE_NR*smp_processor_id();
3568 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3569 -#ifdef CONFIG_DEBUG_HIGHMEM
3570 if (!pte_none(*(kmap_pte-idx)))
3571 BUG();
3572 -#endif
3573 - set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3574 + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3575
3576 return (void*) vaddr;
3577 }
3578 @@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page,
3579
3580 void kunmap_atomic(void *kvaddr, enum km_type type)
3581 {
3582 -#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
3583 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
3584 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
3585
3586 - if (vaddr < FIXADDR_START) { // FIXME
3587 +#ifdef CONFIG_DEBUG_HIGHMEM
3588 + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
3589 dec_preempt_count();
3590 preempt_check_resched();
3591 return;
3592 }
3593 -#endif
3594
3595 -#if defined(CONFIG_DEBUG_HIGHMEM)
3596 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
3597 BUG();
3598 -
3599 - /*
3600 - * force other mappings to Oops if they'll try to access
3601 - * this pte without first remap it
3602 - */
3603 - pte_clear(&init_mm, vaddr, kmap_pte-idx);
3604 - __flush_tlb_one(vaddr);
3605 -#elif defined(CONFIG_XEN)
3606 +#endif
3607 /*
3608 - * We must ensure there are no dangling pagetable references when
3609 - * returning memory to Xen (decrease_reservation).
3610 - * XXX TODO: We could make this faster by only zapping when
3611 - * kmap_flush_unused is called but that is trickier and more invasive.
3612 + * Force other mappings to Oops if they'll try to access this pte
3613 + * without first remap it. Keeping stale mappings around is a bad idea
3614 + * also, in case the page changes cacheability attributes or becomes
3615 + * a protected page in a hypervisor.
3616 */
3617 - pte_clear(&init_mm, vaddr, kmap_pte-idx);
3618 -#endif
3619 + kpte_clear_flush(kmap_pte-idx, vaddr);
3620
3621 dec_preempt_count();
3622 preempt_check_resched();
3623 @@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn,
3624 idx = type + KM_TYPE_NR*smp_processor_id();
3625 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3626 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
3627 - __flush_tlb_one(vaddr);
3628
3629 return (void*) vaddr;
3630 }
3631 Index: head-2008-12-01/arch/x86/mm/hypervisor.c
3632 ===================================================================
3633 --- head-2008-12-01.orig/arch/x86/mm/hypervisor.c 2008-12-03 15:49:14.000000000 +0100
3634 +++ head-2008-12-01/arch/x86/mm/hypervisor.c 2008-12-01 11:29:05.000000000 +0100
3635 @@ -31,6 +31,7 @@
3636 */
3637
3638 #include <linux/sched.h>
3639 +#include <linux/hardirq.h>
3640 #include <linux/mm.h>
3641 #include <linux/vmalloc.h>
3642 #include <asm/page.h>
3643 @@ -44,6 +45,302 @@
3644 #include <asm/tlbflush.h>
3645 #include <linux/highmem.h>
3646
3647 +EXPORT_SYMBOL(hypercall_page);
3648 +
3649 +#define NR_MC BITS_PER_LONG
3650 +#define NR_MMU BITS_PER_LONG
3651 +#define NR_MMUEXT (BITS_PER_LONG / 4)
3652 +
3653 +DEFINE_PER_CPU(bool, xen_lazy_mmu);
3654 +EXPORT_PER_CPU_SYMBOL(xen_lazy_mmu);
3655 +struct lazy_mmu {
3656 + unsigned int nr_mc, nr_mmu, nr_mmuext;
3657 + multicall_entry_t mc[NR_MC];
3658 + mmu_update_t mmu[NR_MMU];
3659 + struct mmuext_op mmuext[NR_MMUEXT];
3660 +};
3661 +static DEFINE_PER_CPU(struct lazy_mmu, lazy_mmu);
3662 +
3663 +static inline bool use_lazy_mmu_mode(void)
3664 +{
3665 +#ifdef CONFIG_PREEMPT
3666 + if (!preempt_count())
3667 + return false;
3668 +#endif
3669 + return !irq_count();
3670 +}
3671 +
3672 +static void multicall_failed(const multicall_entry_t *mc, int rc)
3673 +{
3674 + printk(KERN_EMERG "hypercall#%lu(%lx, %lx, %lx, %lx)"
3675 + " failed: %d (caller %lx)\n",
3676 + mc->op, mc->args[0], mc->args[1], mc->args[2], mc->args[3],
3677 + rc, mc->args[5]);
3678 + BUG();
3679 +}
3680 +
3681 +int xen_multicall_flush(bool ret_last) {
3682 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3683 + multicall_entry_t *mc = lazy->mc;
3684 + unsigned int count = lazy->nr_mc;
3685 +
3686 + if (!count || !use_lazy_mmu_mode())
3687 + return 0;
3688 +
3689 + lazy->nr_mc = 0;
3690 + lazy->nr_mmu = 0;
3691 + lazy->nr_mmuext = 0;
3692 +
3693 + if (count == 1) {
3694 + int rc = _hypercall(int, mc->op, mc->args[0], mc->args[1],
3695 + mc->args[2], mc->args[3], mc->args[4]);
3696 +
3697 + if (unlikely(rc)) {
3698 + if (ret_last)
3699 + return rc;
3700 + multicall_failed(mc, rc);
3701 + }
3702 + } else {
3703 + if (HYPERVISOR_multicall(mc, count))
3704 + BUG();
3705 + while (count-- > ret_last)
3706 + if (unlikely(mc++->result))
3707 + multicall_failed(mc - 1, mc[-1].result);
3708 + if (ret_last)
3709 + return mc->result;
3710 + }
3711 +
3712 + return 0;
3713 +}
3714 +EXPORT_SYMBOL(xen_multicall_flush);
3715 +
3716 +int xen_multi_update_va_mapping(unsigned long va, pte_t pte,
3717 + unsigned long uvmf)
3718 +{
3719 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3720 + multicall_entry_t *mc;
3721 +
3722 + if (unlikely(!use_lazy_mmu_mode()))
3723 +#ifdef CONFIG_X86_PAE
3724 + return _hypercall4(int, update_va_mapping, va,
3725 + pte.pte_low, pte.pte_high, uvmf);
3726 +#else
3727 + return _hypercall3(int, update_va_mapping, va,
3728 + pte.pte, uvmf);
3729 +#endif
3730 +
3731 + if (unlikely(lazy->nr_mc == NR_MC))
3732 + xen_multicall_flush(false);
3733 +
3734 + mc = lazy->mc + lazy->nr_mc++;
3735 + mc->op = __HYPERVISOR_update_va_mapping;
3736 + mc->args[0] = va;
3737 +#ifndef CONFIG_X86_PAE
3738 + mc->args[1] = pte.pte;
3739 +#else
3740 + mc->args[1] = pte.pte_low;
3741 + mc->args[2] = pte.pte_high;
3742 +#endif
3743 + mc->args[MULTI_UVMFLAGS_INDEX] = uvmf;
3744 + mc->args[5] = (long)__builtin_return_address(0);
3745 +
3746 + return 0;
3747 +}
3748 +
3749 +static inline bool mmu_may_merge(const multicall_entry_t *mc,
3750 + unsigned int op, domid_t domid)
3751 +{
3752 + return mc->op == op && !mc->args[2] && mc->args[3] == domid;
3753 +}
3754 +
3755 +int xen_multi_mmu_update(mmu_update_t *src, unsigned int count,
3756 + unsigned int *success_count, domid_t domid)
3757 +{
3758 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3759 + multicall_entry_t *mc = lazy->mc + lazy->nr_mc;
3760 + mmu_update_t *dst;
3761 + bool commit, merge;
3762 +
3763 + if (unlikely(!use_lazy_mmu_mode()))
3764 + return _hypercall4(int, mmu_update, src, count,
3765 + success_count, domid);
3766 +
3767 + commit = (lazy->nr_mmu + count) > NR_MMU || success_count;
3768 + merge = lazy->nr_mc && !commit
3769 + && mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid);
3770 + if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
3771 + xen_multicall_flush(false);
3772 + mc = lazy->mc;
3773 + commit = count > NR_MMU || success_count;
3774 + }
3775 +
3776 + if (!lazy->nr_mc && unlikely(commit))
3777 + return _hypercall4(int, mmu_update, src, count,
3778 + success_count, domid);
3779 +
3780 + dst = lazy->mmu + lazy->nr_mmu;
3781 + lazy->nr_mmu += count;
3782 + if (merge) {
3783 + mc[-1].args[1] += count;
3784 + memcpy(dst, src, count * sizeof(*src));
3785 + } else {
3786 + ++lazy->nr_mc;
3787 + mc->op = __HYPERVISOR_mmu_update;
3788 + if (!commit) {
3789 + mc->args[0] = (unsigned long)dst;
3790 + memcpy(dst, src, count * sizeof(*src));
3791 + } else
3792 + mc->args[0] = (unsigned long)src;
3793 + mc->args[1] = count;
3794 + mc->args[2] = (unsigned long)success_count;
3795 + mc->args[3] = domid;
3796 + mc->args[5] = (long)__builtin_return_address(0);
3797 + }
3798 +
3799 + while (!commit && count--)
3800 + switch (src++->ptr & (sizeof(pteval_t) - 1)) {
3801 + case MMU_NORMAL_PT_UPDATE:
3802 + case MMU_PT_UPDATE_PRESERVE_AD:
3803 + break;
3804 + default:
3805 + commit = true;
3806 + break;
3807 + }
3808 +
3809 + return commit ? xen_multicall_flush(true) : 0;
3810 +}
3811 +
3812 +int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count,
3813 + unsigned int *success_count, domid_t domid)
3814 +{
3815 + struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
3816 + multicall_entry_t *mc;
3817 + struct mmuext_op *dst;
3818 + bool commit, merge;
3819 +
3820 + if (unlikely(!use_lazy_mmu_mode()))
3821 + return _hypercall4(int, mmuext_op, src, count,
3822 + success_count, domid);
3823 +
3824 + /*
3825 + * While it could be useful in theory, I've never seen the body of
3826 + * this conditional to be reached, hence it seems more reasonable
3827 + * to disable it for the time being.
3828 + */
3829 + if (0 && likely(count)
3830 + && likely(!success_count)
3831 + && likely(domid == DOMID_SELF)
3832 + && likely(lazy->nr_mc)
3833 + && lazy->mc[lazy->nr_mc - 1].op == __HYPERVISOR_update_va_mapping) {
3834 + unsigned long oldf, newf = UVMF_NONE;
3835 +
3836 + switch (src->cmd) {
3837 + case MMUEXT_TLB_FLUSH_ALL:
3838 + newf = UVMF_TLB_FLUSH | UVMF_ALL;
3839 + break;
3840 + case MMUEXT_INVLPG_ALL:
3841 + newf = UVMF_INVLPG | UVMF_ALL;
3842 + break;
3843 + case MMUEXT_TLB_FLUSH_MULTI:
3844 + newf = UVMF_TLB_FLUSH | UVMF_MULTI
3845 + | (unsigned long)src->arg2.vcpumask.p;
3846 + break;
3847 + case MMUEXT_INVLPG_MULTI:
3848 + newf = UVMF_INVLPG | UVMF_MULTI
3849 + | (unsigned long)src->arg2.vcpumask.p;
3850 + break;
3851 + case MMUEXT_TLB_FLUSH_LOCAL:
3852 + newf = UVMF_TLB_FLUSH | UVMF_LOCAL;
3853 + break;
3854 + case MMUEXT_INVLPG_LOCAL:
3855 + newf = UVMF_INVLPG | UVMF_LOCAL;
3856 + break;
3857 + }
3858 + mc = lazy->mc + lazy->nr_mc - 1;
3859 + oldf = mc->args[MULTI_UVMFLAGS_INDEX];
3860 + if (newf == UVMF_NONE || oldf == UVMF_NONE
3861 + || newf == (UVMF_TLB_FLUSH | UVMF_ALL))
3862 + ;
3863 + else if (oldf == (UVMF_TLB_FLUSH | UVMF_ALL))
3864 + newf = UVMF_TLB_FLUSH | UVMF_ALL;
3865 + else if ((newf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
3866 + && (oldf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
3867 + && ((src->arg1.linear_addr ^ mc->args[0])
3868 + >> PAGE_SHIFT))
3869 + newf = UVMF_NONE;
3870 + else if (((oldf | newf) & UVMF_ALL)
3871 + && !((oldf ^ newf) & UVMF_FLUSHTYPE_MASK))
3872 + newf |= UVMF_ALL;
3873 + else if ((oldf ^ newf) & ~UVMF_FLUSHTYPE_MASK)
3874 + newf = UVMF_NONE;
3875 + else if ((oldf & UVMF_FLUSHTYPE_MASK) == UVMF_TLB_FLUSH)
3876 + newf = (newf & ~UVMF_FLUSHTYPE_MASK) | UVMF_TLB_FLUSH;
3877 + else if ((newf & UVMF_FLUSHTYPE_MASK) != UVMF_TLB_FLUSH
3878 + && ((newf ^ oldf) & UVMF_FLUSHTYPE_MASK))
3879 + newf = UVMF_NONE;
3880 + if (newf != UVMF_NONE) {
3881 + mc->args[MULTI_UVMFLAGS_INDEX] = newf;
3882 + ++src;
3883 + if (!--count)
3884 + return 0;
3885 + }
3886 + }
3887 +
3888 + mc = lazy->mc + lazy->nr_mc;
3889 + commit = (lazy->nr_mmuext + count) > NR_MMUEXT || success_count;
3890 + merge = lazy->nr_mc && !commit
3891 + && mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid);
3892 + if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
3893 + xen_multicall_flush(false);
3894 + mc = lazy->mc;
3895 + commit = count > NR_MMUEXT || success_count;
3896 + }
3897 +
3898 + if (!lazy->nr_mc && unlikely(commit))
3899 + return _hypercall4(int, mmuext_op, src, count,
3900 + success_count, domid);
3901 +
3902 + dst = lazy->mmuext + lazy->nr_mmuext;
3903 + lazy->nr_mmuext += count;
3904 + if (merge) {
3905 + mc[-1].args[1] += count;
3906 + memcpy(dst, src, count * sizeof(*src));
3907 + } else {
3908 + ++lazy->nr_mc;
3909 + mc->op = __HYPERVISOR_mmuext_op;
3910 + if (!commit) {
3911 + mc->args[0] = (unsigned long)dst;
3912 + memcpy(dst, src, count * sizeof(*src));
3913 + } else
3914 + mc->args[0] = (unsigned long)src;
3915 + mc->args[1] = count;
3916 + mc->args[2] = (unsigned long)success_count;
3917 + mc->args[3] = domid;
3918 + mc->args[5] = (long)__builtin_return_address(0);
3919 + }
3920 +
3921 + while (!commit && count--)
3922 + switch (src++->cmd) {
3923 + case MMUEXT_PIN_L1_TABLE:
3924 + case MMUEXT_PIN_L2_TABLE:
3925 + case MMUEXT_PIN_L3_TABLE:
3926 + case MMUEXT_PIN_L4_TABLE:
3927 + case MMUEXT_UNPIN_TABLE:
3928 + case MMUEXT_TLB_FLUSH_LOCAL:
3929 + case MMUEXT_INVLPG_LOCAL:
3930 + case MMUEXT_TLB_FLUSH_MULTI:
3931 + case MMUEXT_INVLPG_MULTI:
3932 + case MMUEXT_TLB_FLUSH_ALL:
3933 + case MMUEXT_INVLPG_ALL:
3934 + break;
3935 + default:
3936 + commit = true;
3937 + break;
3938 + }
3939 +
3940 + return commit ? xen_multicall_flush(true) : 0;
3941 +}
3942 +
3943 void xen_l1_entry_update(pte_t *ptr, pte_t val)
3944 {
3945 mmu_update_t u;
3946 @@ -547,7 +844,8 @@ int write_ldt_entry(void *ldt, int entry
3947 #define MAX_BATCHED_FULL_PTES 32
3948
3949 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
3950 - unsigned long addr, unsigned long end, pgprot_t newprot)
3951 + unsigned long addr, unsigned long end, pgprot_t newprot,
3952 + int dirty_accountable)
3953 {
3954 int rc = 0, i = 0;
3955 mmu_update_t u[MAX_BATCHED_FULL_PTES];
3956 @@ -560,10 +858,14 @@ int xen_change_pte_range(struct mm_struc
3957 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
3958 do {
3959 if (pte_present(*pte)) {
3960 + pte_t ptent = pte_modify(*pte, newprot);
3961 +
3962 + if (dirty_accountable && pte_dirty(ptent))
3963 + ptent = pte_mkwrite(ptent);
3964 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
3965 | ((unsigned long)pte & ~PAGE_MASK)
3966 | MMU_PT_UPDATE_PRESERVE_AD;
3967 - u[i].val = __pte_val(pte_modify(*pte, newprot));
3968 + u[i].val = __pte_val(ptent);
3969 if (++i == MAX_BATCHED_FULL_PTES) {
3970 if ((rc = HYPERVISOR_mmu_update(
3971 &u[0], i, NULL, DOMID_SELF)) != 0)
3972 Index: head-2008-12-01/arch/x86/mm/init_32-xen.c
3973 ===================================================================
3974 --- head-2008-12-01.orig/arch/x86/mm/init_32-xen.c 2008-12-03 15:49:14.000000000 +0100
3975 +++ head-2008-12-01/arch/x86/mm/init_32-xen.c 2008-12-01 11:29:05.000000000 +0100
3976 @@ -462,16 +462,22 @@ EXPORT_SYMBOL(__supported_pte_mask);
3977 * on Enable
3978 * off Disable
3979 */
3980 -void __init noexec_setup(const char *str)
3981 +static int __init noexec_setup(char *str)
3982 {
3983 - if (!strncmp(str, "on",2) && cpu_has_nx) {
3984 - __supported_pte_mask |= _PAGE_NX;
3985 - disable_nx = 0;
3986 - } else if (!strncmp(str,"off",3)) {
3987 + if (!str || !strcmp(str, "on")) {
3988 + if (cpu_has_nx) {
3989 + __supported_pte_mask |= _PAGE_NX;
3990 + disable_nx = 0;
3991 + }
3992 + } else if (!strcmp(str,"off")) {
3993 disable_nx = 1;
3994 __supported_pte_mask &= ~_PAGE_NX;
3995 - }
3996 + } else
3997 + return -EINVAL;
3998 +
3999 + return 0;
4000 }
4001 +early_param("noexec", noexec_setup);
4002
4003 int nx_enabled = 0;
4004 #ifdef CONFIG_X86_PAE
4005 @@ -514,6 +520,7 @@ int __init set_kernel_exec(unsigned long
4006 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
4007 else
4008 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
4009 + pte_update_defer(&init_mm, vaddr, pte);
4010 __flush_tlb_all();
4011 out:
4012 return ret;
4013 @@ -596,18 +603,6 @@ static void __init test_wp_bit(void)
4014 }
4015 }
4016
4017 -static void __init set_max_mapnr_init(void)
4018 -{
4019 -#ifdef CONFIG_HIGHMEM
4020 - num_physpages = highend_pfn;
4021 -#else
4022 - num_physpages = max_low_pfn;
4023 -#endif
4024 -#ifdef CONFIG_FLATMEM
4025 - max_mapnr = num_physpages;
4026 -#endif
4027 -}
4028 -
4029 static struct kcore_list kcore_mem, kcore_vmalloc;
4030
4031 void __init mem_init(void)
4032 @@ -623,8 +618,7 @@ void __init mem_init(void)
4033 #endif
4034
4035 #ifdef CONFIG_FLATMEM
4036 - if (!mem_map)
4037 - BUG();
4038 + BUG_ON(!mem_map);
4039 #endif
4040
4041 bad_ppro = ppro_with_ram_bug();
4042 @@ -639,17 +633,6 @@ void __init mem_init(void)
4043 }
4044 #endif
4045
4046 - set_max_mapnr_init();
4047 -
4048 -#ifdef CONFIG_HIGHMEM
4049 - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
4050 -#else
4051 - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
4052 -#endif
4053 - printk("vmalloc area: %lx-%lx, maxmem %lx\n",
4054 - VMALLOC_START,VMALLOC_END,MAXMEM);
4055 - BUG_ON(VMALLOC_START > VMALLOC_END);
4056 -
4057 /* this will put all low memory onto the freelists */
4058 totalram_pages += free_all_bootmem();
4059 /* XEN: init and count low-mem pages outside initial allocation. */
4060 @@ -687,6 +670,48 @@ void __init mem_init(void)
4061 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
4062 );
4063
4064 +#if 1 /* double-sanity-check paranoia */
4065 + printk("virtual kernel memory layout:\n"
4066 + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
4067 +#ifdef CONFIG_HIGHMEM
4068 + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
4069 +#endif
4070 + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
4071 + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
4072 + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
4073 + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
4074 + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
4075 + FIXADDR_START, FIXADDR_TOP,
4076 + (FIXADDR_TOP - FIXADDR_START) >> 10,
4077 +
4078 +#ifdef CONFIG_HIGHMEM
4079 + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
4080 + (LAST_PKMAP*PAGE_SIZE) >> 10,
4081 +#endif
4082 +
4083 + VMALLOC_START, VMALLOC_END,
4084 + (VMALLOC_END - VMALLOC_START) >> 20,
4085 +
4086 + (unsigned long)__va(0), (unsigned long)high_memory,
4087 + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
4088 +
4089 + (unsigned long)&__init_begin, (unsigned long)&__init_end,
4090 + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
4091 +
4092 + (unsigned long)&_etext, (unsigned long)&_edata,
4093 + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
4094 +
4095 + (unsigned long)&_text, (unsigned long)&_etext,
4096 + ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
4097 +
4098 +#ifdef CONFIG_HIGHMEM
4099 + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
4100 + BUG_ON(VMALLOC_END > PKMAP_BASE);
4101 +#endif
4102 + BUG_ON(VMALLOC_START > VMALLOC_END);
4103 + BUG_ON((unsigned long)high_memory > VMALLOC_START);
4104 +#endif /* double-sanity-check paranoia */
4105 +
4106 #ifdef CONFIG_X86_PAE
4107 if (!cpu_has_pae)
4108 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
4109 @@ -717,7 +742,7 @@ void __init mem_init(void)
4110 int arch_add_memory(int nid, u64 start, u64 size)
4111 {
4112 struct pglist_data *pgdata = &contig_page_data;
4113 - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
4114 + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
4115 unsigned long start_pfn = start >> PAGE_SHIFT;
4116 unsigned long nr_pages = size >> PAGE_SHIFT;
4117
4118 Index: head-2008-12-01/arch/x86/mm/ioremap_32-xen.c
4119 ===================================================================
4120 --- head-2008-12-01.orig/arch/x86/mm/ioremap_32-xen.c 2008-12-03 15:49:14.000000000 +0100
4121 +++ head-2008-12-01/arch/x86/mm/ioremap_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4122 @@ -12,7 +12,7 @@
4123 #include <linux/init.h>
4124 #include <linux/slab.h>
4125 #include <linux/module.h>
4126 -#include <asm/io.h>
4127 +#include <linux/io.h>
4128 #include <asm/fixmap.h>
4129 #include <asm/cacheflush.h>
4130 #include <asm/tlbflush.h>
4131 @@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are
4132 if (domid == DOMID_SELF)
4133 return -EINVAL;
4134
4135 - vma->vm_flags |= VM_IO | VM_RESERVED;
4136 + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
4137
4138 vma->vm_mm->context.has_foreign_mappings = 1;
4139
4140 @@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p
4141 void __iomem * addr;
4142 struct vm_struct * area;
4143 unsigned long offset, last_addr;
4144 + pgprot_t prot;
4145 domid_t domid = DOMID_IO;
4146
4147 /* Don't allow wraparound or zero size */
4148 @@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p
4149 domid = DOMID_SELF;
4150 }
4151
4152 + prot = __pgprot(_KERNPG_TABLE | flags);
4153 +
4154 /*
4155 * Mappings have to be page-aligned
4156 */
4157 @@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p
4158 return NULL;
4159 area->phys_addr = phys_addr;
4160 addr = (void __iomem *) area->addr;
4161 - flags |= _KERNPG_TABLE;
4162 if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
4163 phys_addr>>PAGE_SHIFT,
4164 - size, __pgprot(flags), domid)) {
4165 + size, prot, domid)) {
4166 vunmap((void __force *) addr);
4167 return NULL;
4168 }
4169 Index: head-2008-12-01/arch/x86/mm/pgtable_32-xen.c
4170 ===================================================================
4171 --- head-2008-12-01.orig/arch/x86/mm/pgtable_32-xen.c 2008-12-03 15:49:14.000000000 +0100
4172 +++ head-2008-12-01/arch/x86/mm/pgtable_32-xen.c 2008-12-01 11:29:05.000000000 +0100
4173 @@ -68,7 +68,9 @@ void show_mem(void)
4174 printk(KERN_INFO "%lu pages writeback\n",
4175 global_page_state(NR_WRITEBACK));
4176 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
4177 - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
4178 + printk(KERN_INFO "%lu pages slab\n",
4179 + global_page_state(NR_SLAB_RECLAIMABLE) +
4180 + global_page_state(NR_SLAB_UNRECLAIMABLE));
4181 printk(KERN_INFO "%lu pages pagetables\n",
4182 global_page_state(NR_PAGETABLE));
4183 }
4184 @@ -108,18 +110,11 @@ void set_pmd_pfn(unsigned long vaddr, un
4185 __flush_tlb_one(vaddr);
4186 }
4187
4188 -static int nr_fixmaps = 0;
4189 +static int fixmaps;
4190 unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
4191 -unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
4192 +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
4193 EXPORT_SYMBOL(__FIXADDR_TOP);
4194
4195 -void __init set_fixaddr_top(unsigned long top)
4196 -{
4197 - BUG_ON(nr_fixmaps > 0);
4198 - hypervisor_virt_start = top;
4199 - __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
4200 -}
4201 -
4202 void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
4203 {
4204 unsigned long address = __fix_to_virt(idx);
4205 @@ -141,7 +136,21 @@ void __set_fixmap (enum fixed_addresses
4206 if (HYPERVISOR_update_va_mapping(address, pte,
4207 UVMF_INVLPG|UVMF_ALL))
4208 BUG();
4209 - nr_fixmaps++;
4210 + fixmaps++;
4211 +}
4212 +
4213 +/**
4214 + * reserve_top_address - reserves a hole in the top of kernel address space
4215 + * @reserve - size of hole to reserve
4216 + *
4217 + * Can be used to relocate the fixmap area and poke a hole in the top
4218 + * of kernel address space to make room for a hypervisor.
4219 + */
4220 +void __init reserve_top_address(unsigned long reserve)
4221 +{
4222 + BUG_ON(fixmaps > 0);
4223 + __FIXADDR_TOP = -reserve - PAGE_SIZE;
4224 + __VMALLOC_RESERVE += reserve;
4225 }
4226
4227 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
4228 Index: head-2008-12-01/arch/x86/pci/irq-xen.c
4229 ===================================================================
4230 --- head-2008-12-01.orig/arch/x86/pci/irq-xen.c 2008-12-03 15:49:14.000000000 +0100
4231 +++ head-2008-12-01/arch/x86/pci/irq-xen.c 2008-12-01 11:29:05.000000000 +0100
4232 @@ -991,10 +991,6 @@ static void __init pcibios_fixup_irqs(vo
4233 pci_name(bridge), 'A' + pin, irq);
4234 }
4235 if (irq >= 0) {
4236 - if (use_pci_vector() &&
4237 - !platform_legacy_irq(irq))
4238 - irq = IO_APIC_VECTOR(irq);
4239 -
4240 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
4241 pci_name(dev), 'A' + pin, irq);
4242 dev->irq = irq;
4243 @@ -1155,10 +1151,6 @@ static int pirq_enable_irq(struct pci_de
4244 }
4245 dev = temp_dev;
4246 if (irq >= 0) {
4247 -#ifdef CONFIG_PCI_MSI
4248 - if (!platform_legacy_irq(irq))
4249 - irq = IO_APIC_VECTOR(irq);
4250 -#endif
4251 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
4252 pci_name(dev), 'A' + pin, irq);
4253 dev->irq = irq;
4254 @@ -1179,33 +1171,3 @@ static int pirq_enable_irq(struct pci_de
4255 }
4256 return 0;
4257 }
4258 -
4259 -int pci_vector_resources(int last, int nr_released)
4260 -{
4261 - int count = nr_released;
4262 -
4263 - int next = last;
4264 - int offset = (last % 8);
4265 -
4266 - while (next < FIRST_SYSTEM_VECTOR) {
4267 - next += 8;
4268 -#ifdef CONFIG_X86_64
4269 - if (next == IA32_SYSCALL_VECTOR)
4270 - continue;
4271 -#else
4272 - if (next == SYSCALL_VECTOR)
4273 - continue;
4274 -#endif
4275 - count++;
4276 - if (next >= FIRST_SYSTEM_VECTOR) {
4277 - if (offset%8) {
4278 - next = FIRST_DEVICE_VECTOR + offset;
4279 - offset++;
4280 - continue;
4281 - }
4282 - count--;
4283 - }
4284 - }
4285 -
4286 - return count;
4287 -}
4288 Index: head-2008-12-01/arch/x86/ia32/ia32entry-xen.S
4289 ===================================================================
4290 --- head-2008-12-01.orig/arch/x86/ia32/ia32entry-xen.S 2008-12-03 15:49:14.000000000 +0100
4291 +++ head-2008-12-01/arch/x86/ia32/ia32entry-xen.S 2008-12-01 11:29:05.000000000 +0100
4292 @@ -83,6 +83,7 @@
4293 */
4294 ENTRY(ia32_sysenter_target)
4295 CFI_STARTPROC32 simple
4296 + CFI_SIGNAL_FRAME
4297 CFI_DEF_CFA rsp,SS+8-RIP+16
4298 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4299 CFI_REL_OFFSET rsp,RSP-RIP+16
4300 @@ -164,6 +165,7 @@ ENDPROC(ia32_sysenter_target)
4301 */
4302 ENTRY(ia32_cstar_target)
4303 CFI_STARTPROC32 simple
4304 + CFI_SIGNAL_FRAME
4305 CFI_DEF_CFA rsp,SS+8-RIP+16
4306 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4307 CFI_REL_OFFSET rsp,RSP-RIP+16
4308 @@ -243,6 +245,7 @@ ia32_badarg:
4309
4310 ENTRY(ia32_syscall)
4311 CFI_STARTPROC simple
4312 + CFI_SIGNAL_FRAME
4313 CFI_DEF_CFA rsp,SS+8-RIP+16
4314 /*CFI_REL_OFFSET ss,SS-RIP+16*/
4315 CFI_REL_OFFSET rsp,RSP-RIP+16
4316 @@ -320,6 +323,7 @@ ENTRY(ia32_ptregs_common)
4317 popq %r11
4318 CFI_ENDPROC
4319 CFI_STARTPROC32 simple
4320 + CFI_SIGNAL_FRAME
4321 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
4322 CFI_REL_OFFSET rax,RAX-ARGOFFSET
4323 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
4324 @@ -653,8 +657,8 @@ ia32_sys_call_table:
4325 .quad sys_readlinkat /* 305 */
4326 .quad sys_fchmodat
4327 .quad sys_faccessat
4328 - .quad quiet_ni_syscall /* pselect6 for now */
4329 - .quad quiet_ni_syscall /* ppoll for now */
4330 + .quad compat_sys_pselect6
4331 + .quad compat_sys_ppoll
4332 .quad sys_unshare /* 310 */
4333 .quad compat_sys_set_robust_list
4334 .quad compat_sys_get_robust_list
4335 @@ -663,4 +667,5 @@ ia32_sys_call_table:
4336 .quad sys_tee
4337 .quad compat_sys_vmsplice
4338 .quad compat_sys_move_pages
4339 + .quad sys_getcpu
4340 ia32_syscall_end:
4341 Index: head-2008-12-01/arch/x86/kernel/Makefile
4342 ===================================================================
4343 --- head-2008-12-01.orig/arch/x86/kernel/Makefile 2008-12-03 15:49:14.000000000 +0100
4344 +++ head-2008-12-01/arch/x86/kernel/Makefile 2008-12-01 11:32:33.000000000 +0100
4345 @@ -104,9 +104,9 @@ obj-$(CONFIG_X86_XEN) += fixup.o
4346 ###
4347 # 64 bit specific files
4348 ifeq ($(CONFIG_X86_64),y)
4349 - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
4350 - obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o
4351 - obj-y += bios_uv.o
4352 + obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
4353 + obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
4354 + obj-y += tlb_uv.o bios_uv.o
4355 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
4356 obj-$(CONFIG_AUDIT) += audit_64.o
4357
4358 @@ -124,5 +124,7 @@ ifeq ($(CONFIG_X86_64),y)
4359 pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
4360 endif
4361
4362 -disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
4363 +disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
4364 + smpboot_$(BITS).o tsc_$(BITS).o
4365 +disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
4366 %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
4367 Index: head-2008-12-01/arch/x86/kernel/apic_64-xen.c
4368 ===================================================================
4369 --- head-2008-12-01.orig/arch/x86/kernel/apic_64-xen.c 2008-12-03 15:49:14.000000000 +0100
4370 +++ head-2008-12-01/arch/x86/kernel/apic_64-xen.c 2008-12-01 11:29:05.000000000 +0100
4371 @@ -43,7 +43,7 @@ int apic_verbosity;
4372 */
4373 void ack_bad_irq(unsigned int irq)
4374 {
4375 - printk("unexpected IRQ trap at vector %02x\n", irq);
4376 + printk("unexpected IRQ trap at irq %02x\n", irq);
4377 /*
4378 * Currently unexpected vectors happen only on SMP and APIC.
4379 * We _must_ ack these because every local APIC has only N
4380 @@ -62,19 +62,19 @@ int setup_profiling_timer(unsigned int m
4381 return -EINVAL;
4382 }
4383
4384 -void smp_local_timer_interrupt(struct pt_regs *regs)
4385 +void smp_local_timer_interrupt(void)
4386 {
4387 - profile_tick(CPU_PROFILING, regs);
4388 + profile_tick(CPU_PROFILING);
4389 #ifndef CONFIG_XEN
4390 #ifdef CONFIG_SMP
4391 - update_process_times(user_mode(regs));
4392 + update_process_times(user_mode(get_irq_regs()));
4393 #endif
4394 #endif
4395 /*
4396 * We take the 'long' return path, and there every subsystem
4397 * grabs the appropriate locks (kernel lock/ irq lock).
4398 *
4399 - * we might want to decouple profiling from the 'long path',
4400 + * We might want to decouple profiling from the 'long path',
4401 * and do the profiling totally in assembly.
4402 *
4403 * Currently this isn't too much of an issue (performance wise),
4404 @@ -92,6 +92,8 @@ void smp_local_timer_interrupt(struct pt
4405 */
4406 void smp_apic_timer_interrupt(struct pt_regs *regs)
4407 {
4408 + struct pt_regs *old_regs = set_irq_regs(regs);
4409 +
4410 /*
4411 * the NMI deadlock-detector uses this.
4412 */
4413 @@ -109,8 +111,9 @@ void smp_apic_timer_interrupt(struct pt_
4414 */
4415 exit_idle();
4416 irq_enter();
4417 - smp_local_timer_interrupt(regs);
4418 + smp_local_timer_interrupt();
4419 irq_exit();
4420 + set_irq_regs(old_regs);
4421 }
4422
4423 /*
4424 @@ -188,9 +191,8 @@ int disable_apic;
4425 int __init APIC_init_uniprocessor (void)
4426 {
4427 #ifdef CONFIG_X86_IO_APIC
4428 - if (smp_found_config)
4429 - if (!skip_ioapic_setup && nr_ioapics)
4430 - setup_IO_APIC();
4431 + if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
4432 + setup_IO_APIC();
4433 #endif
4434
4435 return 1;
4436 Index: head-2008-12-01/arch/x86/kernel/e820_64-xen.c
4437 ===================================================================
4438 --- head-2008-12-01.orig/arch/x86/kernel/e820_64-xen.c 2008-12-03 15:49:14.000000000 +0100
4439 +++ head-2008-12-01/arch/x86/kernel/e820_64-xen.c 2008-12-01 11:29:05.000000000 +0100
4440 @@ -16,6 +16,7 @@
4441 #include <linux/string.h>
4442 #include <linux/kexec.h>
4443 #include <linux/module.h>
4444 +#include <linux/mm.h>
4445
4446 #include <asm/pgtable.h>
4447 #include <asm/page.h>
4448 @@ -25,6 +26,11 @@
4449 #include <asm/sections.h>
4450 #include <xen/interface/memory.h>
4451
4452 +struct e820map e820 __initdata;
4453 +#ifdef CONFIG_XEN
4454 +struct e820map machine_e820 __initdata;
4455 +#endif
4456 +
4457 /*
4458 * PFN of last memory page.
4459 */
4460 @@ -41,14 +47,10 @@ unsigned long end_pfn_map;
4461 /*
4462 * Last pfn which the user wants to use.
4463 */
4464 -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
4465 +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
4466
4467 extern struct resource code_resource, data_resource;
4468
4469 -#ifdef CONFIG_XEN
4470 -extern struct e820map machine_e820;
4471 -#endif
4472 -
4473 /* Check for some hardcoded bad areas that early boot is not allowed to touch */
4474 static inline int bad_addr(unsigned long *addrp, unsigned long size)
4475 {
4476 @@ -57,13 +59,13 @@ static inline int bad_addr(unsigned long
4477 #ifndef CONFIG_XEN
4478 /* various gunk below that needed for SMP startup */
4479 if (addr < 0x8000) {
4480 - *addrp = 0x8000;
4481 + *addrp = PAGE_ALIGN(0x8000);
4482 return 1;
4483 }
4484
4485 /* direct mapping tables of the kernel */
4486 if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
4487 - *addrp = table_end << PAGE_SHIFT;
4488 + *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
4489 return 1;
4490 }
4491
4492 @@ -71,23 +73,18 @@ static inline int bad_addr(unsigned long
4493 #ifdef CONFIG_BLK_DEV_INITRD
4494 if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
4495 addr < INITRD_START+INITRD_SIZE) {
4496 - *addrp = INITRD_START + INITRD_SIZE;
4497 + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
4498 return 1;
4499 }
4500 #endif
4501 - /* kernel code + 640k memory hole (later should not be needed, but
4502 - be paranoid for now) */
4503 - if (last >= 640*1024 && addr < 1024*1024) {
4504 - *addrp = 1024*1024;
4505 - return 1;
4506 - }
4507 - if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
4508 - *addrp = __pa_symbol(&_end);
4509 + /* kernel code */
4510 + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
4511 + *addrp = PAGE_ALIGN(__pa_symbol(&_end));
4512 return 1;
4513 }
4514
4515 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
4516 - *addrp = ebda_addr + ebda_size;
4517 + *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
4518 return 1;
4519 }
4520
4521 @@ -184,7 +181,7 @@ unsigned long __init find_e820_area(unsi
4522 continue;
4523 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
4524 ;
4525 - last = addr + size;
4526 + last = PAGE_ALIGN(addr) + size;
4527 if (last > ei->addr + ei->size)
4528 continue;
4529 if (last > end)
4530 @@ -194,59 +191,14 @@ unsigned long __init find_e820_area(unsi
4531 return -1UL;
4532 }
4533
4534 -/*
4535 - * Free bootmem based on the e820 table for a node.
4536 - */
4537 -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
4538 -{
4539 - int i;
4540 - for (i = 0; i < e820.nr_map; i++) {
4541 - struct e820entry *ei = &e820.map[i];
4542 - unsigned long last, addr;
4543 -
4544 - if (ei->type != E820_RAM ||
4545 - ei->addr+ei->size <= start ||
4546 - ei->addr >= end)
4547 - continue;
4548 -
4549 - addr = round_up(ei->addr, PAGE_SIZE);
4550 - if (addr < start)
4551 - addr = start;
4552 -
4553 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
4554 - if (last >= end)
4555 - last = end;
4556 -
4557 - if (last > addr && last-addr >= PAGE_SIZE)
4558 - free_bootmem_node(pgdat, addr, last-addr);
4559 - }
4560 -}
4561 -
4562 /*
4563 * Find the highest page frame number we have available
4564 */
4565 unsigned long __init e820_end_of_ram(void)
4566 {
4567 - int i;
4568 unsigned long end_pfn = 0;
4569 + end_pfn = find_max_pfn_with_active_regions();
4570
4571 - for (i = 0; i < e820.nr_map; i++) {
4572 - struct e820entry *ei = &e820.map[i];
4573 - unsigned long start, end;
4574 -
4575 - start = round_up(ei->addr, PAGE_SIZE);
4576 - end = round_down(ei->addr + ei->size, PAGE_SIZE);
4577 - if (start >= end)
4578 - continue;
4579 - if (ei->type == E820_RAM) {
4580 - if (end > end_pfn<<PAGE_SHIFT)
4581 - end_pfn = end>>PAGE_SHIFT;
4582 - } else {
4583 - if (end > end_pfn_map<<PAGE_SHIFT)
4584 - end_pfn_map = end>>PAGE_SHIFT;
4585 - }
4586 - }
4587 -
4588 if (end_pfn > end_pfn_map)
4589 end_pfn_map = end_pfn;
4590 if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
4591 @@ -256,43 +208,10 @@ unsigned long __init e820_end_of_ram(voi
4592 if (end_pfn > end_pfn_map)
4593 end_pfn = end_pfn_map;
4594
4595 + printk("end_pfn_map = %lu\n", end_pfn_map);
4596 return end_pfn;
4597 }
4598
4599 -/*
4600 - * Compute how much memory is missing in a range.
4601 - * Unlike the other functions in this file the arguments are in page numbers.
4602 - */
4603 -unsigned long __init
4604 -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
4605 -{
4606 - unsigned long ram = 0;
4607 - unsigned long start = start_pfn << PAGE_SHIFT;
4608 - unsigned long end = end_pfn << PAGE_SHIFT;
4609 - int i;
4610 - for (i = 0; i < e820.nr_map; i++) {
4611 - struct e820entry *ei = &e820.map[i];
4612 - unsigned long last, addr;
4613 -
4614 - if (ei->type != E820_RAM ||
4615 - ei->addr+ei->size <= start ||
4616 - ei->addr >= end)
4617 - continue;
4618 -
4619 - addr = round_up(ei->addr, PAGE_SIZE);
4620 - if (addr < start)
4621 - addr = start;
4622 -
4623 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
4624 - if (last >= end)
4625 - last = end;
4626 -
4627 - if (last > addr)
4628 - ram += last - addr;
4629 - }
4630 - return ((end - start) - ram) >> PAGE_SHIFT;
4631 -}
4632 -
4633 /*
4634 * Mark e820 reserved areas as busy for the resource manager.
4635 */
4636 @@ -333,6 +252,98 @@ void __init e820_reserve_resources(struc
4637 }
4638 }
4639
4640 +#ifndef CONFIG_XEN
4641 +/* Mark pages corresponding to given address range as nosave */
4642 +static void __init
4643 +e820_mark_nosave_range(unsigned long start, unsigned long end)
4644 +{
4645 + unsigned long pfn, max_pfn;
4646 +
4647 + if (start >= end)
4648 + return;
4649 +
4650 + printk("Nosave address range: %016lx - %016lx\n", start, end);
4651 + max_pfn = end >> PAGE_SHIFT;
4652 + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
4653 + if (pfn_valid(pfn))
4654 + SetPageNosave(pfn_to_page(pfn));
4655 +}
4656 +
4657 +/*
4658 + * Find the ranges of physical addresses that do not correspond to
4659 + * e820 RAM areas and mark the corresponding pages as nosave for software
4660 + * suspend and suspend to RAM.
4661 + *
4662 + * This function requires the e820 map to be sorted and without any
4663 + * overlapping entries and assumes the first e820 area to be RAM.
4664 + */
4665 +void __init e820_mark_nosave_regions(void)
4666 +{
4667 + int i;
4668 + unsigned long paddr;
4669 +
4670 + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
4671 + for (i = 1; i < e820.nr_map; i++) {
4672 + struct e820entry *ei = &e820.map[i];
4673 +
4674 + if (paddr < ei->addr)
4675 + e820_mark_nosave_range(paddr,
4676 + round_up(ei->addr, PAGE_SIZE));
4677 +
4678 + paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
4679 + if (ei->type != E820_RAM)
4680 + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
4681 + paddr);
4682 +
4683 + if (paddr >= (end_pfn << PAGE_SHIFT))
4684 + break;
4685 + }
4686 +}
4687 +#endif
4688 +
4689 +/* Walk the e820 map and register active regions within a node */
4690 +void __init
4691 +e820_register_active_regions(int nid, unsigned long start_pfn,
4692 + unsigned long end_pfn)
4693 +{
4694 + int i;
4695 + unsigned long ei_startpfn, ei_endpfn;
4696 + for (i = 0; i < e820.nr_map; i++) {
4697 + struct e820entry *ei = &e820.map[i];
4698 + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
4699 + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
4700 + >> PAGE_SHIFT;
4701 +
4702 + /* Skip map entries smaller than a page */
4703 + if (ei_startpfn >= ei_endpfn)
4704 + continue;
4705 +
4706 + /* Check if end_pfn_map should be updated */
4707 + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
4708 + end_pfn_map = ei_endpfn;
4709 +
4710 + /* Skip if map is outside the node */
4711 + if (ei->type != E820_RAM ||
4712 + ei_endpfn <= start_pfn ||
4713 + ei_startpfn >= end_pfn)
4714 + continue;
4715 +
4716 + /* Check for overlaps */
4717 + if (ei_startpfn < start_pfn)
4718 + ei_startpfn = start_pfn;
4719 + if (ei_endpfn > end_pfn)
4720 + ei_endpfn = end_pfn;
4721 +
4722 + /* Obey end_user_pfn to save on memmap */
4723 + if (ei_startpfn >= end_user_pfn)
4724 + continue;
4725 + if (ei_endpfn > end_user_pfn)
4726 + ei_endpfn = end_user_pfn;
4727 +
4728 + add_active_range(nid, ei_startpfn, ei_endpfn);
4729 + }
4730 +}
4731 +
4732 /*
4733 * Add a memory region to the kernel e820 map.
4734 */
4735 @@ -553,13 +564,6 @@ static int __init sanitize_e820_map(stru
4736 * If we're lucky and live on a modern system, the setup code
4737 * will have given us a memory map that we can use to properly
4738 * set up memory. If we aren't, we'll fake a memory map.
4739 - *
4740 - * We check to see that the memory map contains at least 2 elements
4741 - * before we'll use it, because the detection code in setup.S may
4742 - * not be perfect and most every PC known to man has two memory
4743 - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
4744 - * thinkpad 560x, for example, does not cooperate with the memory
4745 - * detection code.)
4746 */
4747 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
4748 {
4749 @@ -581,27 +585,6 @@ static int __init copy_e820_map(struct e
4750 if (start > end)
4751 return -1;
4752
4753 -#ifndef CONFIG_XEN
4754 - /*
4755 - * Some BIOSes claim RAM in the 640k - 1M region.
4756 - * Not right. Fix it up.
4757 - *
4758 - * This should be removed on Hammer which is supposed to not
4759 - * have non e820 covered ISA mappings there, but I had some strange
4760 - * problems so it stays for now. -AK
4761 - */
4762 - if (type == E820_RAM) {
4763 - if (start < 0x100000ULL && end > 0xA0000ULL) {
4764 - if (start < 0xA0000ULL)
4765 - add_memory_region(start, 0xA0000ULL-start, type);
4766 - if (end <= 0x100000ULL)
4767 - continue;
4768 - start = 0x100000ULL;
4769 - size = end - start;
4770 - }
4771 - }
4772 -#endif
4773 -
4774 add_memory_region(start, size, type);
4775 } while (biosmap++,--nr_map);
4776
4777 @@ -622,11 +605,15 @@ static int __init copy_e820_map(struct e
4778 return 0;
4779 }
4780
4781 +void early_panic(char *msg)
4782 +{
4783 + early_printk(msg);
4784 + panic(msg);
4785 +}
4786 +
4787 #ifndef CONFIG_XEN
4788 void __init setup_memory_region(void)
4789 {
4790 - char *who = "BIOS-e820";
4791 -
4792 /*
4793 * Try to copy the BIOS-supplied E820-map.
4794 *
4795 @@ -634,24 +621,10 @@ void __init setup_memory_region(void)
4796 * the next section from 1mb->appropriate_mem_k
4797 */
4798 sanitize_e820_map(E820_MAP, &E820_MAP_NR);
4799 - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
4800 - unsigned long mem_size;
4801 -
4802 - /* compare results from other methods and take the greater */
4803 - if (ALT_MEM_K < EXT_MEM_K) {
4804 - mem_size = EXT_MEM_K;
4805 - who = "BIOS-88";
4806 - } else {
4807 - mem_size = ALT_MEM_K;
4808 - who = "BIOS-e801";
4809 - }
4810 -
4811 - e820.nr_map = 0;
4812 - add_memory_region(0, LOWMEMSIZE(), E820_RAM);
4813 - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
4814 - }
4815 + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
4816 + early_panic("Cannot find a valid memory map");
4817 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4818 - e820_print_map(who);
4819 + e820_print_map("BIOS-e820");
4820 }
4821
4822 #else /* CONFIG_XEN */
4823 @@ -683,20 +656,23 @@ void __init setup_memory_region(void)
4824
4825 sanitize_e820_map(map, (char *)&memmap.nr_entries);
4826
4827 - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
4828 + if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
4829 + early_panic("Cannot find a valid memory map");
4830
4831 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4832 e820_print_map("Xen");
4833 }
4834 #endif
4835
4836 -void __init parse_memopt(char *p, char **from)
4837 -{
4838 +static int __init parse_memopt(char *p)
4839 +{
4840 int i;
4841 unsigned long current_end;
4842 unsigned long end;
4843
4844 - end_user_pfn = memparse(p, from);
4845 + if (!p)
4846 + return -EINVAL;
4847 + end_user_pfn = memparse(p, &p);
4848 end_user_pfn >>= PAGE_SHIFT;
4849
4850 end = end_user_pfn<<PAGE_SHIFT;
4851 @@ -713,27 +689,61 @@ void __init parse_memopt(char *p, char *
4852 else
4853 add_memory_region(current_end, end - current_end, E820_RAM);
4854 }
4855 +
4856 + return 0;
4857 }
4858 +early_param("mem", parse_memopt);
4859 +
4860 +static int userdef __initdata;
4861
4862 -void __init parse_memmapopt(char *p, char **from)
4863 +static int __init parse_memmap_opt(char *p)
4864 {
4865 + char *oldp;
4866 unsigned long long start_at, mem_size;
4867
4868 - mem_size = memparse(p, from);
4869 - p = *from;
4870 + if (!strcmp(p, "exactmap")) {
4871 +#ifdef CONFIG_CRASH_DUMP
4872 + /* If we are doing a crash dump, we
4873 + * still need to know the real mem
4874 + * size before original memory map is
4875 + * reset.
4876 + */
4877 + e820_register_active_regions(0, 0, -1UL);
4878 + saved_max_pfn = e820_end_of_ram();
4879 + remove_all_active_ranges();
4880 +#endif
4881 + end_pfn_map = 0;
4882 + e820.nr_map = 0;
4883 + userdef = 1;
4884 + return 0;
4885 + }
4886 +
4887 + oldp = p;
4888 + mem_size = memparse(p, &p);
4889 + if (p == oldp)
4890 + return -EINVAL;
4891 if (*p == '@') {
4892 - start_at = memparse(p+1, from);
4893 + start_at = memparse(p+1, &p);
4894 add_memory_region(start_at, mem_size, E820_RAM);
4895 } else if (*p == '#') {
4896 - start_at = memparse(p+1, from);
4897 + start_at = memparse(p+1, &p);
4898 add_memory_region(start_at, mem_size, E820_ACPI);
4899 } else if (*p == '$') {
4900 - start_at = memparse(p+1, from);
4901 + start_at = memparse(p+1, &p);
4902 add_memory_region(start_at, mem_size, E820_RESERVED);
4903 } else {
4904 end_user_pfn = (mem_size >> PAGE_SHIFT);
4905 }
4906 - p = *from;
4907 + return *p == '\0' ? 0 : -EINVAL;
4908 +}
4909 +early_param("memmap", parse_memmap_opt);
4910 +
4911 +void finish_e820_parsing(void)
4912 +{
4913 + if (userdef) {
4914 + printk(KERN_INFO "user-defined physical RAM map:\n");
4915 + e820_print_map("user");
4916 + }
4917 }
4918
4919 unsigned long pci_mem_start = 0xaeedbabe;
4920 Index: head-2008-12-01/arch/x86/kernel/early_printk-xen.c
4921 ===================================================================
4922 --- head-2008-12-01.orig/arch/x86/kernel/early_printk-xen.c 2008-12-03 15:49:14.000000000 +0100
4923 +++ head-2008-12-01/arch/x86/kernel/early_printk-xen.c 2008-12-01 11:29:05.000000000 +0100
4924 @@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...)
4925
4926 static int __initdata keep_early;
4927
4928 -int __init setup_early_printk(char *opt)
4929 +static int __init setup_early_printk(char *buf)
4930 {
4931 - char *space;
4932 - char buf[256];
4933 + if (!buf)
4934 + return 0;
4935
4936 if (early_console_initialized)
4937 - return 1;
4938 -
4939 - strlcpy(buf,opt,sizeof(buf));
4940 - space = strchr(buf, ' ');
4941 - if (space)
4942 - *space = 0;
4943 + return 0;
4944 + early_console_initialized = 1;
4945
4946 - if (strstr(buf,"keep"))
4947 + if (strstr(buf, "keep"))
4948 keep_early = 1;
4949
4950 if (!strncmp(buf, "serial", 6)) {
4951 @@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt)
4952 early_console = &simnow_console;
4953 keep_early = 1;
4954 }
4955 - early_console_initialized = 1;
4956 register_console(early_console);
4957 return 0;
4958 }
4959
4960 +early_param("earlyprintk", setup_early_printk);
4961 +
4962 void __init disable_early_printk(void)
4963 {
4964 if (!early_console_initialized || !early_console)
4965 @@ -299,4 +296,3 @@ void __init disable_early_printk(void)
4966 }
4967 }
4968
4969 -__setup("earlyprintk=", setup_early_printk);
4970 Index: head-2008-12-01/arch/x86/kernel/entry_64-xen.S
4971 ===================================================================
4972 --- head-2008-12-01.orig/arch/x86/kernel/entry_64-xen.S 2008-12-03 15:49:14.000000000 +0100
4973 +++ head-2008-12-01/arch/x86/kernel/entry_64-xen.S 2008-12-01 11:29:05.000000000 +0100
4974 @@ -4,9 +4,6 @@
4975 * Copyright (C) 1991, 1992 Linus Torvalds
4976 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
4977 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
4978 - *
4979 - * $Id$
4980 - *
4981 * Jun Nakajima <jun.nakajima@intel.com>
4982 * Asit Mallick <asit.k.mallick@intel.com>
4983 * Modified for Xen
4984 @@ -26,15 +23,25 @@
4985 * at the top of the kernel process stack.
4986 * - partial stack frame: partially saved registers upto R11.
4987 * - full stack frame: Like partial stack frame, but all register saved.
4988 - *
4989 - * TODO:
4990 - * - schedule it carefully for the final hardware.
4991 + *
4992 + * Some macro usage:
4993 + * - CFI macros are used to generate dwarf2 unwind information for better
4994 + * backtraces. They don't change any code.
4995 + * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
4996 + * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
4997 + * There are unfortunately lots of special cases where some registers
4998 + * not touched. The macro is a big mess that should be cleaned up.
4999 + * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
5000 + * Gives a full stack frame.
5001 + * - ENTRY/END Define functions in the symbol table.
5002 + * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
5003 + * frame that is otherwise undefined after a SYSCALL
5004 + * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
5005 + * - errorentry/paranoidentry/zeroentry - Define exception entry points.
5006 */
5007
5008 -#define ASSEMBLY 1
5009 #include <linux/linkage.h>
5010 #include <asm/segment.h>
5011 -#include <asm/smp.h>
5012 #include <asm/cache.h>
5013 #include <asm/errno.h>
5014 #include <asm/dwarf2.h>
5015 @@ -117,6 +124,7 @@ NMI_MASK = 0x80000000
5016 .macro CFI_DEFAULT_STACK start=1,adj=0
5017 .if \start
5018 CFI_STARTPROC simple
5019 + CFI_SIGNAL_FRAME
5020 CFI_DEF_CFA rsp,SS+8 - \adj*ARGOFFSET
5021 .else
5022 CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET
5023 @@ -207,6 +215,7 @@ END(ret_from_fork)
5024 */
5025 .macro _frame ref
5026 CFI_STARTPROC simple
5027 + CFI_SIGNAL_FRAME
5028 CFI_DEF_CFA rsp,SS+8-\ref
5029 /*CFI_REL_OFFSET ss,SS-\ref*/
5030 CFI_REL_OFFSET rsp,RSP-\ref
5031 @@ -334,6 +343,8 @@ tracesys:
5032 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
5033 RESTORE_REST
5034 cmpq $__NR_syscall_max,%rax
5035 + movq $-ENOSYS,%rcx
5036 + cmova %rcx,%rax
5037 ja 1f
5038 movq %r10,%rcx /* fixup for C */
5039 call *sys_call_table(,%rax,8)
5040 @@ -349,6 +360,7 @@ END(system_call)
5041 */
5042 ENTRY(int_ret_from_sys_call)
5043 CFI_STARTPROC simple
5044 + CFI_SIGNAL_FRAME
5045 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
5046 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
5047 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
5048 @@ -583,8 +595,7 @@ retint_signal:
5049 #ifdef CONFIG_PREEMPT
5050 /* Returning to kernel space. Check if we need preemption */
5051 /* rcx: threadinfo. interrupts off. */
5052 - .p2align
5053 -retint_kernel:
5054 +ENTRY(retint_kernel)
5055 cmpl $0,threadinfo_preempt_count(%rcx)
5056 jnz retint_restore_args
5057 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
5058 @@ -644,7 +655,6 @@ ENTRY(call_function_interrupt)
5059 END(call_function_interrupt)
5060 #endif
5061
5062 -#ifdef CONFIG_X86_LOCAL_APIC
5063 ENTRY(apic_timer_interrupt)
5064 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
5065 END(apic_timer_interrupt)
5066 @@ -656,7 +666,6 @@ END(error_interrupt)
5067 ENTRY(spurious_interrupt)
5068 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
5069 END(spurious_interrupt)
5070 -#endif
5071 #endif /* !CONFIG_XEN */
5072
5073 /*
5074 @@ -755,7 +764,9 @@ paranoid_exit\trace:
5075 testl $3,CS(%rsp)
5076 jnz paranoid_userspace\trace
5077 paranoid_swapgs\trace:
5078 + .if \trace
5079 TRACE_IRQS_IRETQ 0
5080 + .endif
5081 swapgs
5082 paranoid_restore\trace:
5083 RESTORE_ALL 8
5084 @@ -802,7 +813,7 @@ paranoid_schedule\trace:
5085 * Exception entry point. This expects an error code/orig_rax on the stack
5086 * and the exception handler in %rax.
5087 */
5088 -ENTRY(error_entry)
5089 +KPROBE_ENTRY(error_entry)
5090 _frame RDI
5091 CFI_REL_OFFSET rax,0
5092 /* rdi slot contains rax, oldrax contains error code */
5093 @@ -896,7 +907,7 @@ error_kernelspace:
5094 jmp error_sti
5095 #endif
5096 CFI_ENDPROC
5097 -END(error_entry)
5098 +KPROBE_END(error_entry)
5099
5100 ENTRY(hypervisor_callback)
5101 zeroentry do_hypervisor_callback
5102 @@ -936,26 +947,6 @@ ENTRY(do_hypervisor_callback) # do_hyp
5103 CFI_ENDPROC
5104 END(do_hypervisor_callback)
5105
5106 -#ifdef CONFIG_X86_LOCAL_APIC
5107 -KPROBE_ENTRY(nmi)
5108 - zeroentry do_nmi_callback
5109 -ENTRY(do_nmi_callback)
5110 - CFI_STARTPROC
5111 - addq $8, %rsp
5112 - CFI_ENDPROC
5113 - CFI_DEFAULT_STACK
5114 - call do_nmi
5115 - orl $NMI_MASK,EFLAGS(%rsp)
5116 - RESTORE_REST
5117 - XEN_BLOCK_EVENTS(%rsi)
5118 - TRACE_IRQS_OFF
5119 - GET_THREAD_INFO(%rcx)
5120 - jmp retint_restore_args
5121 - CFI_ENDPROC
5122 - .previous .text
5123 -END(nmi)
5124 -#endif
5125 -
5126 ALIGN
5127 restore_all_enable_events:
5128 CFI_DEFAULT_STACK adj=1
5129 @@ -1121,7 +1112,7 @@ ENDPROC(child_rip)
5130 * do_sys_execve asm fallback arguments:
5131 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
5132 */
5133 -ENTRY(execve)
5134 +ENTRY(kernel_execve)
5135 CFI_STARTPROC
5136 FAKE_STACK_FRAME $0
5137 SAVE_ALL
5138 @@ -1135,12 +1126,11 @@ ENTRY(execve)
5139 UNFAKE_STACK_FRAME
5140 ret
5141 CFI_ENDPROC
5142 -ENDPROC(execve)
5143 +ENDPROC(kernel_execve)
5144
5145 KPROBE_ENTRY(page_fault)
5146 errorentry do_page_fault
5147 -END(page_fault)
5148 - .previous .text
5149 +KPROBE_END(page_fault)
5150
5151 ENTRY(coprocessor_error)
5152 zeroentry do_coprocessor_error
5153 @@ -1162,25 +1152,25 @@ KPROBE_ENTRY(debug)
5154 zeroentry do_debug
5155 /* paranoidexit
5156 CFI_ENDPROC */
5157 -END(debug)
5158 - .previous .text
5159 +KPROBE_END(debug)
5160
5161 -#if 0
5162 - /* runs on exception stack */
5163 KPROBE_ENTRY(nmi)
5164 - INTR_FRAME
5165 - pushq $-1
5166 - CFI_ADJUST_CFA_OFFSET 8
5167 - paranoidentry do_nmi, 0, 0
5168 -#ifdef CONFIG_TRACE_IRQFLAGS
5169 - paranoidexit 0
5170 -#else
5171 - jmp paranoid_exit1
5172 - CFI_ENDPROC
5173 -#endif
5174 -END(nmi)
5175 - .previous .text
5176 -#endif
5177 + zeroentry do_nmi_callback
5178 +KPROBE_END(nmi)
5179 +do_nmi_callback:
5180 + CFI_STARTPROC
5181 + addq $8, %rsp
5182 + CFI_ENDPROC
5183 + CFI_DEFAULT_STACK
5184 + call do_nmi
5185 + orl $NMI_MASK,EFLAGS(%rsp)
5186 + RESTORE_REST
5187 + XEN_BLOCK_EVENTS(%rsi)
5188 + TRACE_IRQS_OFF
5189 + GET_THREAD_INFO(%rcx)
5190 + jmp retint_restore_args
5191 + CFI_ENDPROC
5192 +END(do_nmi_callback)
5193
5194 KPROBE_ENTRY(int3)
5195 /* INTR_FRAME
5196 @@ -1189,8 +1179,7 @@ KPROBE_ENTRY(int3)
5197 zeroentry do_int3
5198 /* jmp paranoid_exit1
5199 CFI_ENDPROC */
5200 -END(int3)
5201 - .previous .text
5202 +KPROBE_END(int3)
5203
5204 ENTRY(overflow)
5205 zeroentry do_overflow
5206 @@ -1241,8 +1230,7 @@ END(stack_segment)
5207
5208 KPROBE_ENTRY(general_protection)
5209 errorentry do_general_protection
5210 -END(general_protection)
5211 - .previous .text
5212 +KPROBE_END(general_protection)
5213
5214 ENTRY(alignment_check)
5215 errorentry do_alignment_check
5216 Index: head-2008-12-01/arch/x86/kernel/genapic_xen_64.c
5217 ===================================================================
5218 --- head-2008-12-01.orig/arch/x86/kernel/genapic_xen_64.c 2008-12-03 15:49:14.000000000 +0100
5219 +++ head-2008-12-01/arch/x86/kernel/genapic_xen_64.c 2008-12-01 11:29:05.000000000 +0100
5220 @@ -71,6 +71,13 @@ static cpumask_t xen_target_cpus(void)
5221 return cpu_online_map;
5222 }
5223
5224 +static cpumask_t xen_vector_allocation_domain(int cpu)
5225 +{
5226 + cpumask_t domain = CPU_MASK_NONE;
5227 + cpu_set(cpu, domain);
5228 + return domain;
5229 +}
5230 +
5231 /*
5232 * Set up the logical destination ID.
5233 * Do nothing, not called now.
5234 @@ -147,8 +154,8 @@ struct genapic apic_xen = {
5235 .int_delivery_mode = dest_LowestPrio,
5236 #endif
5237 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
5238 - .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
5239 .target_cpus = xen_target_cpus,
5240 + .vector_allocation_domain = xen_vector_allocation_domain,
5241 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
5242 .apic_id_registered = xen_apic_id_registered,
5243 #endif
5244 Index: head-2008-12-01/arch/x86/kernel/head_64-xen.S
5245 ===================================================================
5246 --- head-2008-12-01.orig/arch/x86/kernel/head_64-xen.S 2008-12-03 15:49:14.000000000 +0100
5247 +++ head-2008-12-01/arch/x86/kernel/head_64-xen.S 2008-12-01 11:29:05.000000000 +0100
5248 @@ -5,9 +5,6 @@
5249 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
5250 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
5251 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
5252 - *
5253 - * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
5254 - *
5255 * Jun Nakajima <jun.nakajima@intel.com>
5256 * Modified for Xen
5257 */
5258 @@ -149,7 +146,7 @@ ENTRY(cpu_gdt_table)
5259 .quad 0,0 /* TSS */
5260 .quad 0,0 /* LDT */
5261 .quad 0,0,0 /* three TLS descriptors */
5262 - .quad 0 /* unused */
5263 + .quad 0x0000f40000000000 /* node/CPU stored in limit */
5264 gdt_end:
5265 /* asm/segment.h:GDT_ENTRIES must match this */
5266 /* This should be a multiple of the cache line size */
5267 Index: head-2008-12-01/arch/x86/kernel/head64-xen.c
5268 ===================================================================
5269 --- head-2008-12-01.orig/arch/x86/kernel/head64-xen.c 2008-12-03 15:49:14.000000000 +0100
5270 +++ head-2008-12-01/arch/x86/kernel/head64-xen.c 2008-12-01 11:29:05.000000000 +0100
5271 @@ -7,6 +7,9 @@
5272 * Modified for Xen.
5273 */
5274
5275 +/* PDA is not ready to be used until the end of x86_64_start_kernel(). */
5276 +#define arch_use_lazy_mmu_mode() false
5277 +
5278 #include <linux/init.h>
5279 #include <linux/linkage.h>
5280 #include <linux/types.h>
5281 @@ -54,11 +57,9 @@ static void __init copy_bootdata(char *r
5282 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
5283 if (!new_data) {
5284 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
5285 - printk("so old bootloader that it does not support commandline?!\n");
5286 return;
5287 }
5288 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
5289 - printk("old bootloader convention, maybe loadlin?\n");
5290 }
5291 command_line = (char *) ((u64)(new_data));
5292 memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
5293 @@ -70,25 +71,6 @@ static void __init copy_bootdata(char *r
5294 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
5295 saved_command_line[max_cmdline-1] = '\0';
5296 #endif
5297 - printk("Bootdata ok (command line is %s)\n", saved_command_line);
5298 -}
5299 -
5300 -static void __init setup_boot_cpu_data(void)
5301 -{
5302 - unsigned int dummy, eax;
5303 -
5304 - /* get vendor info */
5305 - cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
5306 - (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
5307 - (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
5308 - (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
5309 -
5310 - /* get cpu type */
5311 - cpuid(1, &eax, &dummy, &dummy,
5312 - (unsigned int *) &boot_cpu_data.x86_capability);
5313 - boot_cpu_data.x86 = (eax >> 8) & 0xf;
5314 - boot_cpu_data.x86_model = (eax >> 4) & 0xf;
5315 - boot_cpu_data.x86_mask = eax & 0xf;
5316 }
5317
5318 #include <xen/interface/memory.h>
5319 @@ -101,7 +83,6 @@ void __init x86_64_start_kernel(char * r
5320 {
5321 struct xen_machphys_mapping mapping;
5322 unsigned long machine_to_phys_nr_ents;
5323 - char *s;
5324 int i;
5325
5326 setup_xen_features();
5327 @@ -128,10 +109,7 @@ void __init x86_64_start_kernel(char * r
5328 asm volatile("lidt %0" :: "m" (idt_descr));
5329 #endif
5330
5331 - /*
5332 - * This must be called really, really early:
5333 - */
5334 - lockdep_init();
5335 + early_printk("Kernel alive\n");
5336
5337 for (i = 0; i < NR_CPUS; i++)
5338 cpu_pda(i) = &boot_cpu_pda[i];
5339 @@ -141,22 +119,5 @@ void __init x86_64_start_kernel(char * r
5340 #ifdef CONFIG_SMP
5341 cpu_set(0, cpu_online_map);
5342 #endif
5343 - s = strstr(saved_command_line, "earlyprintk=");
5344 - if (s != NULL)
5345 - setup_early_printk(strchr(s, '=') + 1);
5346 -#ifdef CONFIG_NUMA
5347 - s = strstr(saved_command_line, "numa=");
5348 - if (s != NULL)
5349 - numa_setup(s+5);
5350 -#endif
5351 -#ifdef CONFIG_X86_IO_APIC
5352 - if (strstr(saved_command_line, "disableapic"))
5353 - disable_apic = 1;
5354 -#endif
5355 - /* You need early console to see that */
5356 - if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
5357 - panic("Kernel too big for kernel mapping\n");
5358 -
5359 - setup_boot_cpu_data();
5360 start_kernel();
5361 }
5362 Index: head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c
5363 ===================================================================
5364 --- head-2008-12-01.orig/arch/x86/kernel/io_apic_64-xen.c 2008-12-03 15:49:14.000000000 +0100
5365 +++ head-2008-12-01/arch/x86/kernel/io_apic_64-xen.c 2008-12-01 11:29:05.000000000 +0100
5366 @@ -26,9 +26,12 @@
5367 #include <linux/delay.h>
5368 #include <linux/sched.h>
5369 #include <linux/smp_lock.h>
5370 +#include <linux/pci.h>
5371 #include <linux/mc146818rtc.h>
5372 #include <linux/acpi.h>
5373 #include <linux/sysdev.h>
5374 +#include <linux/msi.h>
5375 +#include <linux/htirq.h>
5376 #ifdef CONFIG_ACPI
5377 #include <acpi/acpi_bus.h>
5378 #endif
5379 @@ -41,6 +44,10 @@
5380 #include <asm/acpi.h>
5381 #include <asm/dma.h>
5382 #include <asm/nmi.h>
5383 +#include <asm/msidef.h>
5384 +#include <asm/hypertransport.h>
5385 +
5386 +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
5387
5388 #define __apicdebuginit __init
5389
5390 @@ -48,17 +55,30 @@ int sis_apic_bug; /* not actually suppor
5391
5392 static int no_timer_check;
5393
5394 -int disable_timer_pin_1 __initdata;
5395 +static int disable_timer_pin_1 __initdata;
5396
5397 -#ifndef CONFIG_XEN
5398 -int timer_over_8254 __initdata = 0;
5399 +#ifdef CONFIG_XEN
5400 +#include <xen/interface/xen.h>
5401 +#include <xen/interface/physdev.h>
5402 +#include <xen/evtchn.h>
5403 +
5404 +/* Fake i8259 */
5405 +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
5406 +#define disable_8259A_irq(_irq) ((void)0)
5407 +#define i8259A_irq_pending(_irq) (0)
5408 +
5409 +unsigned long io_apic_irqs;
5410 +
5411 +#define clear_IO_APIC() ((void)0)
5412 +#else
5413 +int timer_over_8254 __initdata = 1;
5414
5415 /* Where if anywhere is the i8259 connect in external int mode */
5416 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
5417 #endif
5418
5419 static DEFINE_SPINLOCK(ioapic_lock);
5420 -static DEFINE_SPINLOCK(vector_lock);
5421 +DEFINE_SPINLOCK(vector_lock);
5422
5423 /*
5424 * # of IRQ routing registers
5425 @@ -83,29 +103,27 @@ static struct irq_pin_list {
5426 short apic, pin, next;
5427 } irq_2_pin[PIN_MAP_SIZE];
5428
5429 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
5430 -#ifdef CONFIG_PCI_MSI
5431 -#define vector_to_irq(vector) \
5432 - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
5433 -#else
5434 -#define vector_to_irq(vector) (vector)
5435 -#endif
5436 -
5437 -#ifdef CONFIG_XEN
5438 -
5439 -#include <xen/interface/xen.h>
5440 -#include <xen/interface/physdev.h>
5441 -#include <xen/evtchn.h>
5442 -
5443 -/* Fake i8259 */
5444 -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
5445 -#define disable_8259A_irq(_irq) ((void)0)
5446 -#define i8259A_irq_pending(_irq) (0)
5447 +#ifndef CONFIG_XEN
5448 +struct io_apic {
5449 + unsigned int index;
5450 + unsigned int unused[3];
5451 + unsigned int data;
5452 +};
5453
5454 -unsigned long io_apic_irqs;
5455 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
5456 +{
5457 + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
5458 + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
5459 +}
5460 +#endif
5461
5462 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
5463 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
5464 {
5465 +#ifndef CONFIG_XEN
5466 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5467 + writel(reg, &io_apic->index);
5468 + return readl(&io_apic->data);
5469 +#else
5470 struct physdev_apic apic_op;
5471 int ret;
5472
5473 @@ -115,31 +133,133 @@ static inline unsigned int xen_io_apic_r
5474 if (ret)
5475 return ret;
5476 return apic_op.value;
5477 +#endif
5478 }
5479
5480 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
5481 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
5482 {
5483 +#ifndef CONFIG_XEN
5484 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5485 + writel(reg, &io_apic->index);
5486 + writel(value, &io_apic->data);
5487 +#else
5488 struct physdev_apic apic_op;
5489
5490 apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
5491 apic_op.reg = reg;
5492 apic_op.value = value;
5493 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
5494 +#endif
5495 +}
5496 +
5497 +#ifndef CONFIG_XEN
5498 +/*
5499 + * Re-write a value: to be used for read-modify-write
5500 + * cycles where the read already set up the index register.
5501 + */
5502 +static inline void io_apic_modify(unsigned int apic, unsigned int value)
5503 +{
5504 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5505 + writel(value, &io_apic->data);
5506 }
5507 +#else
5508 +#define io_apic_modify io_apic_write
5509 +#endif
5510
5511 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
5512 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
5513 +/*
5514 + * Synchronize the IO-APIC and the CPU by doing
5515 + * a dummy read from the IO-APIC
5516 + */
5517 +static inline void io_apic_sync(unsigned int apic)
5518 +{
5519 +#ifndef CONFIG_XEN
5520 + struct io_apic __iomem *io_apic = io_apic_base(apic);
5521 + readl(&io_apic->data);
5522 +#endif
5523 +}
5524
5525 -#define clear_IO_APIC() ((void)0)
5526 +union entry_union {
5527 + struct { u32 w1, w2; };
5528 + struct IO_APIC_route_entry entry;
5529 +};
5530
5531 -#else
5532 +#ifndef CONFIG_XEN
5533 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
5534 +{
5535 + union entry_union eu;
5536 + unsigned long flags;
5537 + spin_lock_irqsave(&ioapic_lock, flags);
5538 + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
5539 + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
5540 + spin_unlock_irqrestore(&ioapic_lock, flags);
5541 + return eu.entry;
5542 +}
5543 +#endif
5544 +
5545 +/*
5546 + * When we write a new IO APIC routing entry, we need to write the high
5547 + * word first! If the mask bit in the low word is clear, we will enable
5548 + * the interrupt, and we need to make sure the entry is fully populated
5549 + * before that happens.
5550 + */
5551 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
5552 +{
5553 + unsigned long flags;
5554 + union entry_union eu;
5555 + eu.entry = e;
5556 + spin_lock_irqsave(&ioapic_lock, flags);
5557 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
5558 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
5559 + spin_unlock_irqrestore(&ioapic_lock, flags);
5560 +}
5561 +
5562 +#ifndef CONFIG_XEN
5563 +/*
5564 + * When we mask an IO APIC routing entry, we need to write the low
5565 + * word first, in order to set the mask bit before we change the
5566 + * high bits!
5567 + */
5568 +static void ioapic_mask_entry(int apic, int pin)
5569 +{
5570 + unsigned long flags;
5571 + union entry_union eu = { .entry.mask = 1 };
5572 +
5573 + spin_lock_irqsave(&ioapic_lock, flags);
5574 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
5575 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
5576 + spin_unlock_irqrestore(&ioapic_lock, flags);
5577 +}
5578
5579 #ifdef CONFIG_SMP
5580 +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
5581 +{
5582 + int apic, pin;
5583 + struct irq_pin_list *entry = irq_2_pin + irq;
5584 +
5585 + BUG_ON(irq >= NR_IRQS);
5586 + for (;;) {
5587 + unsigned int reg;
5588 + apic = entry->apic;
5589 + pin = entry->pin;
5590 + if (pin == -1)
5591 + break;
5592 + io_apic_write(apic, 0x11 + pin*2, dest);
5593 + reg = io_apic_read(apic, 0x10 + pin*2);
5594 + reg &= ~0x000000ff;
5595 + reg |= vector;
5596 + io_apic_modify(apic, reg);
5597 + if (!entry->next)
5598 + break;
5599 + entry = irq_2_pin + entry->next;
5600 + }
5601 +}
5602 +
5603 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
5604 {
5605 unsigned long flags;
5606 unsigned int dest;
5607 cpumask_t tmp;
5608 + int vector;
5609
5610 cpus_and(tmp, mask, cpu_online_map);
5611 if (cpus_empty(tmp))
5612 @@ -147,7 +267,11 @@ static void set_ioapic_affinity_irq(unsi
5613
5614 cpus_and(mask, tmp, CPU_MASK_ALL);
5615
5616 - dest = cpu_mask_to_apicid(mask);
5617 + vector = assign_irq_vector(irq, mask, &tmp);
5618 + if (vector < 0)
5619 + return;
5620 +
5621 + dest = cpu_mask_to_apicid(tmp);
5622
5623 /*
5624 * Only the high 8 bits are valid.
5625 @@ -155,13 +279,12 @@ static void set_ioapic_affinity_irq(unsi
5626 dest = SET_APIC_LOGICAL_ID(dest);
5627
5628 spin_lock_irqsave(&ioapic_lock, flags);
5629 - __DO_ACTION(1, = dest, )
5630 - set_irq_info(irq, mask);
5631 + __target_IO_APIC_irq(irq, dest, vector);
5632 + set_native_irq_info(irq, mask);
5633 spin_unlock_irqrestore(&ioapic_lock, flags);
5634 }
5635 #endif
5636 -
5637 -#endif /* !CONFIG_XEN */
5638 +#endif
5639
5640 /*
5641 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
5642 @@ -241,24 +364,15 @@ static void unmask_IO_APIC_irq (unsigned
5643 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
5644 {
5645 struct IO_APIC_route_entry entry;
5646 - unsigned long flags;
5647
5648 /* Check delivery_mode to be sure we're not clearing an SMI pin */
5649 - spin_lock_irqsave(&ioapic_lock, flags);
5650 - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
5651 - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
5652 - spin_unlock_irqrestore(&ioapic_lock, flags);
5653 + entry = ioapic_read_entry(apic, pin);
5654 if (entry.delivery_mode == dest_SMI)
5655 return;
5656 /*
5657 * Disable it in the IO-APIC irq-routing table:
5658 */
5659 - memset(&entry, 0, sizeof(entry));
5660 - entry.mask = 1;
5661 - spin_lock_irqsave(&ioapic_lock, flags);
5662 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
5663 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
5664 - spin_unlock_irqrestore(&ioapic_lock, flags);
5665 + ioapic_mask_entry(apic, pin);
5666 }
5667
5668 static void clear_IO_APIC (void)
5669 @@ -272,16 +386,6 @@ static void clear_IO_APIC (void)
5670
5671 #endif /* !CONFIG_XEN */
5672
5673 -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
5674 -
5675 -/*
5676 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
5677 - * specific CPU-side IRQs.
5678 - */
5679 -
5680 -#define MAX_PIRQS 8
5681 -static int pirq_entries [MAX_PIRQS];
5682 -static int pirqs_enabled;
5683 int skip_ioapic_setup;
5684 int ioapic_force;
5685
5686 @@ -290,18 +394,17 @@ int ioapic_force;
5687 static int __init disable_ioapic_setup(char *str)
5688 {
5689 skip_ioapic_setup = 1;
5690 - return 1;
5691 + return 0;
5692 }
5693 +early_param("noapic", disable_ioapic_setup);
5694
5695 -static int __init enable_ioapic_setup(char *str)
5696 +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
5697 +static int __init disable_timer_pin_setup(char *arg)
5698 {
5699 - ioapic_force = 1;
5700 - skip_ioapic_setup = 0;
5701 + disable_timer_pin_1 = 1;
5702 return 1;
5703 }
5704 -
5705 -__setup("noapic", disable_ioapic_setup);
5706 -__setup("apic", enable_ioapic_setup);
5707 +__setup("disable_timer_pin_1", disable_timer_pin_setup);
5708
5709 #ifndef CONFIG_XEN
5710 static int __init setup_disable_8254_timer(char *s)
5711 @@ -319,137 +422,6 @@ __setup("disable_8254_timer", setup_disa
5712 __setup("enable_8254_timer", setup_enable_8254_timer);
5713 #endif /* !CONFIG_XEN */
5714
5715 -#include <asm/pci-direct.h>
5716 -#include <linux/pci_ids.h>
5717 -#include <linux/pci.h>
5718 -
5719 -
5720 -#ifdef CONFIG_ACPI
5721 -
5722 -static int nvidia_hpet_detected __initdata;
5723 -
5724 -static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
5725 -{
5726 - nvidia_hpet_detected = 1;
5727 - return 0;
5728 -}
5729 -#endif
5730 -
5731 -/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
5732 - off. Check for an Nvidia or VIA PCI bridge and turn it off.
5733 - Use pci direct infrastructure because this runs before the PCI subsystem.
5734 -
5735 - Can be overwritten with "apic"
5736 -
5737 - And another hack to disable the IOMMU on VIA chipsets.
5738 -
5739 - ... and others. Really should move this somewhere else.
5740 -
5741 - Kludge-O-Rama. */
5742 -void __init check_ioapic(void)
5743 -{
5744 - int num,slot,func;
5745 - /* Poor man's PCI discovery */
5746 - for (num = 0; num < 32; num++) {
5747 - for (slot = 0; slot < 32; slot++) {
5748 - for (func = 0; func < 8; func++) {
5749 - u32 class;
5750 - u32 vendor;
5751 - u8 type;
5752 - class = read_pci_config(num,slot,func,
5753 - PCI_CLASS_REVISION);
5754 - if (class == 0xffffffff)
5755 - break;
5756 -
5757 - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
5758 - continue;
5759 -
5760 - vendor = read_pci_config(num, slot, func,
5761 - PCI_VENDOR_ID);
5762 - vendor &= 0xffff;
5763 - switch (vendor) {
5764 - case PCI_VENDOR_ID_VIA:
5765 -#ifdef CONFIG_IOMMU
5766 - if ((end_pfn > MAX_DMA32_PFN ||
5767 - force_iommu) &&
5768 - !iommu_aperture_allowed) {
5769 - printk(KERN_INFO
5770 - "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
5771 - iommu_aperture_disabled = 1;
5772 - }
5773 -#endif
5774 - return;
5775 - case PCI_VENDOR_ID_NVIDIA:
5776 -#ifdef CONFIG_ACPI
5777 - /*
5778 - * All timer overrides on Nvidia are
5779 - * wrong unless HPET is enabled.
5780 - */
5781 - nvidia_hpet_detected = 0;
5782 - acpi_table_parse(ACPI_HPET,
5783 - nvidia_hpet_check);
5784 - if (nvidia_hpet_detected == 0) {
5785 - acpi_skip_timer_override = 1;
5786 - printk(KERN_INFO "Nvidia board "
5787 - "detected. Ignoring ACPI "
5788 - "timer override.\n");
5789 - }
5790 -#endif
5791 - /* RED-PEN skip them on mptables too? */
5792 - return;
5793 - case PCI_VENDOR_ID_ATI:
5794 -
5795 - /* This should be actually default, but
5796 - for 2.6.16 let's do it for ATI only where
5797 - it's really needed. */
5798 -#ifndef CONFIG_XEN
5799 - if (timer_over_8254 == 1) {
5800 - timer_over_8254 = 0;
5801 - printk(KERN_INFO
5802 - "ATI board detected. Disabling timer routing over 8254.\n");
5803 - }
5804 -#endif
5805 - return;
5806 - }
5807 -
5808 -
5809 - /* No multi-function device? */
5810 - type = read_pci_config_byte(num,slot,func,
5811 - PCI_HEADER_TYPE);
5812 - if (!(type & 0x80))
5813 - break;
5814 - }
5815 - }
5816 - }
5817 -}
5818 -
5819 -static int __init ioapic_pirq_setup(char *str)
5820 -{
5821 - int i, max;
5822 - int ints[MAX_PIRQS+1];
5823 -
5824 - get_options(str, ARRAY_SIZE(ints), ints);
5825 -
5826 - for (i = 0; i < MAX_PIRQS; i++)
5827 - pirq_entries[i] = -1;
5828 -
5829 - pirqs_enabled = 1;
5830 - apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
5831 - max = MAX_PIRQS;
5832 - if (ints[0] < MAX_PIRQS)
5833 - max = ints[0];
5834 -
5835 - for (i = 0; i < max; i++) {
5836 - apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
5837 - /*
5838 - * PIRQs are mapped upside down, usually.
5839 - */
5840 - pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
5841 - }
5842 - return 1;
5843 -}
5844 -
5845 -__setup("pirq=", ioapic_pirq_setup);
5846
5847 /*
5848 * Find the IRQ entry number of a certain pin.
5849 @@ -479,9 +451,7 @@ static int __init find_isa_irq_pin(int i
5850 for (i = 0; i < mp_irq_entries; i++) {
5851 int lbus = mp_irqs[i].mpc_srcbus;
5852
5853 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
5854 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
5855 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
5856 + if (test_bit(lbus, mp_bus_not_pci) &&
5857 (mp_irqs[i].mpc_irqtype == type) &&
5858 (mp_irqs[i].mpc_srcbusirq == irq))
5859
5860 @@ -497,9 +467,7 @@ static int __init find_isa_irq_apic(int
5861 for (i = 0; i < mp_irq_entries; i++) {
5862 int lbus = mp_irqs[i].mpc_srcbus;
5863
5864 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
5865 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
5866 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
5867 + if (test_bit(lbus, mp_bus_not_pci) &&
5868 (mp_irqs[i].mpc_irqtype == type) &&
5869 (mp_irqs[i].mpc_srcbusirq == irq))
5870 break;
5871 @@ -540,7 +508,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
5872 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
5873 break;
5874
5875 - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
5876 + if (!test_bit(lbus, mp_bus_not_pci) &&
5877 !mp_irqs[i].mpc_irqtype &&
5878 (bus == lbus) &&
5879 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
5880 @@ -563,27 +531,6 @@ int IO_APIC_get_PCI_irq_vector(int bus,
5881 return best_guess;
5882 }
5883
5884 -/*
5885 - * EISA Edge/Level control register, ELCR
5886 - */
5887 -static int EISA_ELCR(unsigned int irq)
5888 -{
5889 - if (irq < 16) {
5890 - unsigned int port = 0x4d0 + (irq >> 3);
5891 - return (inb(port) >> (irq & 7)) & 1;
5892 - }
5893 - apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
5894 - return 0;
5895 -}
5896 -
5897 -/* EISA interrupts are always polarity zero and can be edge or level
5898 - * trigger depending on the ELCR value. If an interrupt is listed as
5899 - * EISA conforming in the MP table, that means its trigger type must
5900 - * be read in from the ELCR */
5901 -
5902 -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
5903 -#define default_EISA_polarity(idx) (0)
5904 -
5905 /* ISA interrupts are always polarity zero edge triggered,
5906 * when listed as conforming in the MP table. */
5907
5908 @@ -596,12 +543,6 @@ static int EISA_ELCR(unsigned int irq)
5909 #define default_PCI_trigger(idx) (1)
5910 #define default_PCI_polarity(idx) (1)
5911
5912 -/* MCA interrupts are always polarity zero level triggered,
5913 - * when listed as conforming in the MP table. */
5914 -
5915 -#define default_MCA_trigger(idx) (1)
5916 -#define default_MCA_polarity(idx) (0)
5917 -
5918 static int __init MPBIOS_polarity(int idx)
5919 {
5920 int bus = mp_irqs[idx].mpc_srcbus;
5921 @@ -613,38 +554,11 @@ static int __init MPBIOS_polarity(int id
5922 switch (mp_irqs[idx].mpc_irqflag & 3)
5923 {
5924 case 0: /* conforms, ie. bus-type dependent polarity */
5925 - {
5926 - switch (mp_bus_id_to_type[bus])
5927 - {
5928 - case MP_BUS_ISA: /* ISA pin */
5929 - {
5930 - polarity = default_ISA_polarity(idx);
5931 - break;
5932 - }
5933 - case MP_BUS_EISA: /* EISA pin */
5934 - {
5935 - polarity = default_EISA_polarity(idx);
5936 - break;
5937 - }
5938 - case MP_BUS_PCI: /* PCI pin */
5939 - {
5940 - polarity = default_PCI_polarity(idx);
5941 - break;
5942 - }
5943 - case MP_BUS_MCA: /* MCA pin */
5944 - {
5945 - polarity = default_MCA_polarity(idx);
5946 - break;
5947 - }
5948 - default:
5949 - {
5950 - printk(KERN_WARNING "broken BIOS!!\n");
5951 - polarity = 1;
5952 - break;
5953 - }
5954 - }
5955 + if (test_bit(bus, mp_bus_not_pci))
5956 + polarity = default_ISA_polarity(idx);
5957 + else
5958 + polarity = default_PCI_polarity(idx);
5959 break;
5960 - }
5961 case 1: /* high active */
5962 {
5963 polarity = 0;
5964 @@ -682,38 +596,11 @@ static int MPBIOS_trigger(int idx)
5965 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
5966 {
5967 case 0: /* conforms, ie. bus-type dependent */
5968 - {
5969 - switch (mp_bus_id_to_type[bus])
5970 - {
5971 - case MP_BUS_ISA: /* ISA pin */
5972 - {
5973 - trigger = default_ISA_trigger(idx);
5974 - break;
5975 - }
5976 - case MP_BUS_EISA: /* EISA pin */
5977 - {
5978 - trigger = default_EISA_trigger(idx);
5979 - break;
5980 - }
5981 - case MP_BUS_PCI: /* PCI pin */
5982 - {
5983 - trigger = default_PCI_trigger(idx);
5984 - break;
5985 - }
5986 - case MP_BUS_MCA: /* MCA pin */
5987 - {
5988 - trigger = default_MCA_trigger(idx);
5989 - break;
5990 - }
5991 - default:
5992 - {
5993 - printk(KERN_WARNING "broken BIOS!!\n");
5994 - trigger = 1;
5995 - break;
5996 - }
5997 - }
5998 + if (test_bit(bus, mp_bus_not_pci))
5999 + trigger = default_ISA_trigger(idx);
6000 + else
6001 + trigger = default_PCI_trigger(idx);
6002 break;
6003 - }
6004 case 1: /* edge */
6005 {
6006 trigger = 0;
6007 @@ -750,64 +637,6 @@ static inline int irq_trigger(int idx)
6008 return MPBIOS_trigger(idx);
6009 }
6010
6011 -static int next_irq = 16;
6012 -
6013 -/*
6014 - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
6015 - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
6016 - * from ACPI, which can reach 800 in large boxen.
6017 - *
6018 - * Compact the sparse GSI space into a sequential IRQ series and reuse
6019 - * vectors if possible.
6020 - */
6021 -int gsi_irq_sharing(int gsi)
6022 -{
6023 - int i, tries, vector;
6024 -
6025 - BUG_ON(gsi >= NR_IRQ_VECTORS);
6026 -
6027 - if (platform_legacy_irq(gsi))
6028 - return gsi;
6029 -
6030 - if (gsi_2_irq[gsi] != 0xFF)
6031 - return (int)gsi_2_irq[gsi];
6032 -
6033 - tries = NR_IRQS;
6034 - try_again:
6035 - vector = assign_irq_vector(gsi);
6036 -
6037 - /*
6038 - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
6039 - * use of vector and if found, return that IRQ. However, we never want
6040 - * to share legacy IRQs, which usually have a different trigger mode
6041 - * than PCI.
6042 - */
6043 - for (i = 0; i < NR_IRQS; i++)
6044 - if (IO_APIC_VECTOR(i) == vector)
6045 - break;
6046 - if (platform_legacy_irq(i)) {
6047 - if (--tries >= 0) {
6048 - IO_APIC_VECTOR(i) = 0;
6049 - goto try_again;
6050 - }
6051 - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
6052 - }
6053 - if (i < NR_IRQS) {
6054 - gsi_2_irq[gsi] = i;
6055 - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
6056 - gsi, vector, i);
6057 - return i;
6058 - }
6059 -
6060 - i = next_irq++;
6061 - BUG_ON(i >= NR_IRQS);
6062 - gsi_2_irq[gsi] = i;
6063 - IO_APIC_VECTOR(i) = vector;
6064 - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
6065 - gsi, vector, i);
6066 - return i;
6067 -}
6068 -
6069 static int pin_2_irq(int idx, int apic, int pin)
6070 {
6071 int irq, i;
6072 @@ -819,49 +648,16 @@ static int pin_2_irq(int idx, int apic,
6073 if (mp_irqs[idx].mpc_dstirq != pin)
6074 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
6075
6076 - switch (mp_bus_id_to_type[bus])
6077 - {
6078 - case MP_BUS_ISA: /* ISA pin */
6079 - case MP_BUS_EISA:
6080 - case MP_BUS_MCA:
6081 - {
6082 - irq = mp_irqs[idx].mpc_srcbusirq;
6083 - break;
6084 - }
6085 - case MP_BUS_PCI: /* PCI pin */
6086 - {
6087 - /*
6088 - * PCI IRQs are mapped in order
6089 - */
6090 - i = irq = 0;
6091 - while (i < apic)
6092 - irq += nr_ioapic_registers[i++];
6093 - irq += pin;
6094 - irq = gsi_irq_sharing(irq);
6095 - break;
6096 - }
6097 - default:
6098 - {
6099 - printk(KERN_ERR "unknown bus type %d.\n",bus);
6100 - irq = 0;
6101 - break;
6102 - }
6103 - }
6104 - BUG_ON(irq >= NR_IRQS);
6105 -
6106 - /*
6107 - * PCI IRQ command line redirection. Yes, limits are hardcoded.
6108 - */
6109 - if ((pin >= 16) && (pin <= 23)) {
6110 - if (pirq_entries[pin-16] != -1) {
6111 - if (!pirq_entries[pin-16]) {
6112 - apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
6113 - } else {
6114 - irq = pirq_entries[pin-16];
6115 - apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
6116 - pin-16, irq);
6117 - }
6118 - }
6119 + if (test_bit(bus, mp_bus_not_pci)) {
6120 + irq = mp_irqs[idx].mpc_srcbusirq;
6121 + } else {
6122 + /*
6123 + * PCI IRQs are mapped in order
6124 + */
6125 + i = irq = 0;
6126 + while (i < apic)
6127 + irq += nr_ioapic_registers[i++];
6128 + irq += pin;
6129 }
6130 BUG_ON(irq >= NR_IRQS);
6131 return irq;
6132 @@ -885,46 +681,71 @@ static inline int IO_APIC_irq_trigger(in
6133 }
6134
6135 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
6136 -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
6137 +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
6138
6139 -int assign_irq_vector(int irq)
6140 +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
6141 {
6142 - unsigned long flags;
6143 int vector;
6144 struct physdev_irq irq_op;
6145
6146 - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
6147 + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
6148
6149 if (irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS)
6150 return -EINVAL;
6151
6152 - spin_lock_irqsave(&vector_lock, flags);
6153 + cpus_and(*result, mask, cpu_online_map);
6154
6155 - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
6156 - spin_unlock_irqrestore(&vector_lock, flags);
6157 - return IO_APIC_VECTOR(irq);
6158 - }
6159 + if (irq_vector[irq] > 0)
6160 + return irq_vector[irq];
6161
6162 irq_op.irq = irq;
6163 - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
6164 - spin_unlock_irqrestore(&vector_lock, flags);
6165 + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
6166 return -ENOSPC;
6167 - }
6168
6169 vector = irq_op.vector;
6170 - vector_irq[vector] = irq;
6171 - if (irq != AUTO_ASSIGN)
6172 - IO_APIC_VECTOR(irq) = vector;
6173 + irq_vector[irq] = vector;
6174
6175 - spin_unlock_irqrestore(&vector_lock, flags);
6176 + return vector;
6177 +}
6178 +
6179 +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
6180 +{
6181 + int vector;
6182 + unsigned long flags;
6183
6184 + spin_lock_irqsave(&vector_lock, flags);
6185 + vector = __assign_irq_vector(irq, mask, result);
6186 + spin_unlock_irqrestore(&vector_lock, flags);
6187 return vector;
6188 }
6189
6190 -extern void (*interrupt[NR_IRQS])(void);
6191 #ifndef CONFIG_XEN
6192 -static struct hw_interrupt_type ioapic_level_type;
6193 -static struct hw_interrupt_type ioapic_edge_type;
6194 +void __setup_vector_irq(int cpu)
6195 +{
6196 + /* Initialize vector_irq on a new cpu */
6197 + /* This function must be called with vector_lock held */
6198 + int irq, vector;
6199 +
6200 + /* Mark the inuse vectors */
6201 + for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
6202 + if (!cpu_isset(cpu, irq_domain[irq]))
6203 + continue;
6204 + vector = irq_vector[irq];
6205 + per_cpu(vector_irq, cpu)[vector] = irq;
6206 + }
6207 + /* Mark the free vectors */
6208 + for (vector = 0; vector < NR_VECTORS; ++vector) {
6209 + irq = per_cpu(vector_irq, cpu)[vector];
6210 + if (irq < 0)
6211 + continue;
6212 + if (!cpu_isset(cpu, irq_domain[irq]))
6213 + per_cpu(vector_irq, cpu)[vector] = -1;
6214 + }
6215 +}
6216 +
6217 +extern void (*interrupt[NR_IRQS])(void);
6218 +
6219 +static struct irq_chip ioapic_chip;
6220
6221 #define IOAPIC_AUTO -1
6222 #define IOAPIC_EDGE 0
6223 @@ -932,16 +753,15 @@ static struct hw_interrupt_type ioapic_e
6224
6225 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
6226 {
6227 - unsigned idx;
6228 -
6229 - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
6230 -
6231 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
6232 trigger == IOAPIC_LEVEL)
6233 - irq_desc[idx].chip = &ioapic_level_type;
6234 - else
6235 - irq_desc[idx].chip = &ioapic_edge_type;
6236 - set_intr_gate(vector, interrupt[idx]);
6237 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
6238 + handle_fasteoi_irq, "fasteoi");
6239 + else {
6240 + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
6241 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
6242 + handle_edge_irq, "edge");
6243 + }
6244 }
6245 #else
6246 #define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
6247 @@ -994,16 +814,21 @@ static void __init setup_IO_APIC_irqs(vo
6248 continue;
6249
6250 if (IO_APIC_IRQ(irq)) {
6251 - vector = assign_irq_vector(irq);
6252 + cpumask_t mask;
6253 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
6254 + if (vector < 0)
6255 + continue;
6256 +
6257 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
6258 entry.vector = vector;
6259
6260 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
6261 if (!apic && (irq < 16))
6262 disable_8259A_irq(irq);
6263 }
6264 + ioapic_write_entry(apic, pin, entry);
6265 +
6266 spin_lock_irqsave(&ioapic_lock, flags);
6267 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
6268 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
6269 set_native_irq_info(irq, TARGET_CPUS);
6270 spin_unlock_irqrestore(&ioapic_lock, flags);
6271 }
6272 @@ -1046,7 +871,7 @@ static void __init setup_ExtINT_IRQ0_pin
6273 * The timer IRQ doesn't have to know that behind the
6274 * scene we have a 8259A-master in AEOI mode ...
6275 */
6276 - irq_desc[0].chip = &ioapic_edge_type;
6277 + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
6278
6279 /*
6280 * Add it to the IO-APIC irq-routing table:
6281 @@ -1142,10 +967,7 @@ void __apicdebuginit print_IO_APIC(void)
6282 for (i = 0; i <= reg_01.bits.entries; i++) {
6283 struct IO_APIC_route_entry entry;
6284
6285 - spin_lock_irqsave(&ioapic_lock, flags);
6286 - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
6287 - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
6288 - spin_unlock_irqrestore(&ioapic_lock, flags);
6289 + entry = ioapic_read_entry(apic, i);
6290
6291 printk(KERN_DEBUG " %02x %03X %02X ",
6292 i,
6293 @@ -1165,17 +987,12 @@ void __apicdebuginit print_IO_APIC(void)
6294 );
6295 }
6296 }
6297 - if (use_pci_vector())
6298 - printk(KERN_INFO "Using vector-based indexing\n");
6299 printk(KERN_DEBUG "IRQ to pin mappings:\n");
6300 for (i = 0; i < NR_IRQS; i++) {
6301 struct irq_pin_list *entry = irq_2_pin + i;
6302 if (entry->pin < 0)
6303 continue;
6304 - if (use_pci_vector() && !platform_legacy_irq(i))
6305 - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
6306 - else
6307 - printk(KERN_DEBUG "IRQ%d ", i);
6308 + printk(KERN_DEBUG "IRQ%d ", i);
6309 for (;;) {
6310 printk("-> %d:%d", entry->apic, entry->pin);
6311 if (!entry->next)
6312 @@ -1339,9 +1156,6 @@ static void __init enable_IO_APIC(void)
6313 irq_2_pin[i].pin = -1;
6314 irq_2_pin[i].next = 0;
6315 }
6316 - if (!pirqs_enabled)
6317 - for (i = 0; i < MAX_PIRQS; i++)
6318 - pirq_entries[i] = -1;
6319
6320 /*
6321 * The number of IO-APIC IRQ registers (== #pins):
6322 @@ -1358,11 +1172,7 @@ static void __init enable_IO_APIC(void)
6323 /* See if any of the pins is in ExtINT mode */
6324 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
6325 struct IO_APIC_route_entry entry;
6326 - spin_lock_irqsave(&ioapic_lock, flags);
6327 - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
6328 - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
6329 - spin_unlock_irqrestore(&ioapic_lock, flags);
6330 -
6331 + entry = ioapic_read_entry(apic, pin);
6332
6333 /* If the interrupt line is enabled and in ExtInt mode
6334 * I have found the pin where the i8259 is connected.
6335 @@ -1416,7 +1226,6 @@ void disable_IO_APIC(void)
6336 */
6337 if (ioapic_i8259.pin != -1) {
6338 struct IO_APIC_route_entry entry;
6339 - unsigned long flags;
6340
6341 memset(&entry, 0, sizeof(entry));
6342 entry.mask = 0; /* Enabled */
6343 @@ -1433,12 +1242,7 @@ void disable_IO_APIC(void)
6344 /*
6345 * Add it to the IO-APIC irq-routing table:
6346 */
6347 - spin_lock_irqsave(&ioapic_lock, flags);
6348 - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
6349 - *(((int *)&entry)+1));
6350 - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
6351 - *(((int *)&entry)+0));
6352 - spin_unlock_irqrestore(&ioapic_lock, flags);
6353 + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
6354 }
6355
6356 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
6357 @@ -1446,76 +1250,6 @@ void disable_IO_APIC(void)
6358 }
6359
6360 /*
6361 - * function to set the IO-APIC physical IDs based on the
6362 - * values stored in the MPC table.
6363 - *
6364 - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
6365 - */
6366 -
6367 -#ifndef CONFIG_XEN
6368 -static void __init setup_ioapic_ids_from_mpc (void)
6369 -{
6370 - union IO_APIC_reg_00 reg_00;
6371 - int apic;
6372 - int i;
6373 - unsigned char old_id;
6374 - unsigned long flags;
6375 -
6376 - /*
6377 - * Set the IOAPIC ID to the value stored in the MPC table.
6378 - */
6379 - for (apic = 0; apic < nr_ioapics; apic++) {
6380 -
6381 - /* Read the register 0 value */
6382 - spin_lock_irqsave(&ioapic_lock, flags);
6383 - reg_00.raw = io_apic_read(apic, 0);
6384 - spin_unlock_irqrestore(&ioapic_lock, flags);
6385 -
6386 - old_id = mp_ioapics[apic].mpc_apicid;
6387 -
6388 -
6389 - printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
6390 -
6391 -
6392 - /*
6393 - * We need to adjust the IRQ routing table
6394 - * if the ID changed.
6395 - */
6396 - if (old_id != mp_ioapics[apic].mpc_apicid)
6397 - for (i = 0; i < mp_irq_entries; i++)
6398 - if (mp_irqs[i].mpc_dstapic == old_id)
6399 - mp_irqs[i].mpc_dstapic
6400 - = mp_ioapics[apic].mpc_apicid;
6401 -
6402 - /*
6403 - * Read the right value from the MPC table and
6404 - * write it into the ID register.
6405 - */
6406 - apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
6407 - mp_ioapics[apic].mpc_apicid);
6408 -
6409 - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
6410 - spin_lock_irqsave(&ioapic_lock, flags);
6411 - io_apic_write(apic, 0, reg_00.raw);
6412 - spin_unlock_irqrestore(&ioapic_lock, flags);
6413 -
6414 - /*
6415 - * Sanity check
6416 - */
6417 - spin_lock_irqsave(&ioapic_lock, flags);
6418 - reg_00.raw = io_apic_read(apic, 0);
6419 - spin_unlock_irqrestore(&ioapic_lock, flags);
6420 - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
6421 - printk("could not set ID!\n");
6422 - else
6423 - apic_printk(APIC_VERBOSE," ok.\n");
6424 - }
6425 -}
6426 -#else
6427 -static void __init setup_ioapic_ids_from_mpc(void) { }
6428 -#endif
6429 -
6430 -/*
6431 * There is a nasty bug in some older SMP boards, their mptable lies
6432 * about the timer IRQ. We do the following to work around the situation:
6433 *
6434 @@ -1569,7 +1303,7 @@ static int __init timer_irq_works(void)
6435 * an edge even if it isn't on the 8259A...
6436 */
6437
6438 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
6439 +static unsigned int startup_ioapic_irq(unsigned int irq)
6440 {
6441 int was_pending = 0;
6442 unsigned long flags;
6443 @@ -1586,107 +1320,19 @@ static unsigned int startup_edge_ioapic_
6444 return was_pending;
6445 }
6446
6447 -/*
6448 - * Once we have recorded IRQ_PENDING already, we can mask the
6449 - * interrupt for real. This prevents IRQ storms from unhandled
6450 - * devices.
6451 - */
6452 -static void ack_edge_ioapic_irq(unsigned int irq)
6453 -{
6454 - move_irq(irq);
6455 - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
6456 - == (IRQ_PENDING | IRQ_DISABLED))
6457 - mask_IO_APIC_irq(irq);
6458 - ack_APIC_irq();
6459 -}
6460 -
6461 -/*
6462 - * Level triggered interrupts can just be masked,
6463 - * and shutting down and starting up the interrupt
6464 - * is the same as enabling and disabling them -- except
6465 - * with a startup need to return a "was pending" value.
6466 - *
6467 - * Level triggered interrupts are special because we
6468 - * do not touch any IO-APIC register while handling
6469 - * them. We ack the APIC in the end-IRQ handler, not
6470 - * in the start-IRQ-handler. Protection against reentrance
6471 - * from the same interrupt is still provided, both by the
6472 - * generic IRQ layer and by the fact that an unacked local
6473 - * APIC does not accept IRQs.
6474 - */
6475 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
6476 -{
6477 - unmask_IO_APIC_irq(irq);
6478 -
6479 - return 0; /* don't check for pending */
6480 -}
6481 -
6482 -static void end_level_ioapic_irq (unsigned int irq)
6483 -{
6484 - move_irq(irq);
6485 - ack_APIC_irq();
6486 -}
6487 -
6488 -#ifdef CONFIG_PCI_MSI
6489 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
6490 -{
6491 - int irq = vector_to_irq(vector);
6492 -
6493 - return startup_edge_ioapic_irq(irq);
6494 -}
6495 -
6496 -static void ack_edge_ioapic_vector(unsigned int vector)
6497 -{
6498 - int irq = vector_to_irq(vector);
6499 -
6500 - move_native_irq(vector);
6501 - ack_edge_ioapic_irq(irq);
6502 -}
6503 -
6504 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
6505 -{
6506 - int irq = vector_to_irq(vector);
6507 -
6508 - return startup_level_ioapic_irq (irq);
6509 -}
6510 -
6511 -static void end_level_ioapic_vector (unsigned int vector)
6512 -{
6513 - int irq = vector_to_irq(vector);
6514 -
6515 - move_native_irq(vector);
6516 - end_level_ioapic_irq(irq);
6517 -}
6518 -
6519 -static void mask_IO_APIC_vector (unsigned int vector)
6520 -{
6521 - int irq = vector_to_irq(vector);
6522 -
6523 - mask_IO_APIC_irq(irq);
6524 -}
6525 -
6526 -static void unmask_IO_APIC_vector (unsigned int vector)
6527 +static int ioapic_retrigger_irq(unsigned int irq)
6528 {
6529 - int irq = vector_to_irq(vector);
6530 -
6531 - unmask_IO_APIC_irq(irq);
6532 -}
6533 -
6534 -#ifdef CONFIG_SMP
6535 -static void set_ioapic_affinity_vector (unsigned int vector,
6536 - cpumask_t cpu_mask)
6537 -{
6538 - int irq = vector_to_irq(vector);
6539 + cpumask_t mask;
6540 + unsigned vector;
6541 + unsigned long flags;
6542
6543 - set_native_irq_info(vector, cpu_mask);
6544 - set_ioapic_affinity_irq(irq, cpu_mask);
6545 -}
6546 -#endif // CONFIG_SMP
6547 -#endif // CONFIG_PCI_MSI
6548 + spin_lock_irqsave(&vector_lock, flags);
6549 + vector = irq_vector[irq];
6550 + cpus_clear(mask);
6551 + cpu_set(first_cpu(irq_domain[irq]), mask);
6552
6553 -static int ioapic_retrigger(unsigned int irq)
6554 -{
6555 - send_IPI_self(IO_APIC_VECTOR(irq));
6556 + send_IPI_mask(mask, vector);
6557 + spin_unlock_irqrestore(&vector_lock, flags);
6558
6559 return 1;
6560 }
6561 @@ -1700,32 +1346,47 @@ static int ioapic_retrigger(unsigned int
6562 * races.
6563 */
6564
6565 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
6566 - .typename = "IO-APIC-edge",
6567 - .startup = startup_edge_ioapic,
6568 - .shutdown = shutdown_edge_ioapic,
6569 - .enable = enable_edge_ioapic,
6570 - .disable = disable_edge_ioapic,
6571 - .ack = ack_edge_ioapic,
6572 - .end = end_edge_ioapic,
6573 -#ifdef CONFIG_SMP
6574 - .set_affinity = set_ioapic_affinity,
6575 +static void ack_apic_edge(unsigned int irq)
6576 +{
6577 + move_native_irq(irq);
6578 + ack_APIC_irq();
6579 +}
6580 +
6581 +static void ack_apic_level(unsigned int irq)
6582 +{
6583 + int do_unmask_irq = 0;
6584 +
6585 +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
6586 + /* If we are moving the irq we need to mask it */
6587 + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
6588 + do_unmask_irq = 1;
6589 + mask_IO_APIC_irq(irq);
6590 + }
6591 #endif
6592 - .retrigger = ioapic_retrigger,
6593 -};
6594
6595 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
6596 - .typename = "IO-APIC-level",
6597 - .startup = startup_level_ioapic,
6598 - .shutdown = shutdown_level_ioapic,
6599 - .enable = enable_level_ioapic,
6600 - .disable = disable_level_ioapic,
6601 - .ack = mask_and_ack_level_ioapic,
6602 - .end = end_level_ioapic,
6603 + /*
6604 + * We must acknowledge the irq before we move it or the acknowledge will
6605 + * not propogate properly.
6606 + */
6607 + ack_APIC_irq();
6608 +
6609 + /* Now we can move and renable the irq */
6610 + move_masked_irq(irq);
6611 + if (unlikely(do_unmask_irq))
6612 + unmask_IO_APIC_irq(irq);
6613 +}
6614 +
6615 +static struct irq_chip ioapic_chip __read_mostly = {
6616 + .name = "IO-APIC",
6617 + .startup = startup_ioapic_irq,
6618 + .mask = mask_IO_APIC_irq,
6619 + .unmask = unmask_IO_APIC_irq,
6620 + .ack = ack_apic_edge,
6621 + .eoi = ack_apic_level,
6622 #ifdef CONFIG_SMP
6623 - .set_affinity = set_ioapic_affinity,
6624 + .set_affinity = set_ioapic_affinity_irq,
6625 #endif
6626 - .retrigger = ioapic_retrigger,
6627 + .retrigger = ioapic_retrigger_irq,
6628 };
6629 #endif /* !CONFIG_XEN */
6630
6631 @@ -1746,12 +1407,7 @@ static inline void init_IO_APIC_traps(vo
6632 */
6633 for (irq = 0; irq < NR_IRQS ; irq++) {
6634 int tmp = irq;
6635 - if (use_pci_vector()) {
6636 - if (!platform_legacy_irq(tmp))
6637 - if ((tmp = vector_to_irq(tmp)) == -1)
6638 - continue;
6639 - }
6640 - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
6641 + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
6642 /*
6643 * Hmm.. We don't have an entry for this,
6644 * so default to an old-fashioned 8259
6645 @@ -1762,7 +1418,7 @@ static inline void init_IO_APIC_traps(vo
6646 #ifndef CONFIG_XEN
6647 else
6648 /* Strange. Oh, well.. */
6649 - irq_desc[irq].chip = &no_irq_type;
6650 + irq_desc[irq].chip = &no_irq_chip;
6651 #endif
6652 }
6653 }
6654 @@ -1883,8 +1539,6 @@ static inline void unlock_ExtINT_logic(v
6655 spin_unlock_irqrestore(&ioapic_lock, flags);
6656 }
6657
6658 -int timer_uses_ioapic_pin_0;
6659 -
6660 /*
6661 * This code may look a bit paranoid, but it's supposed to cooperate with
6662 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
6663 @@ -1897,13 +1551,13 @@ static inline void check_timer(void)
6664 {
6665 int apic1, pin1, apic2, pin2;
6666 int vector;
6667 + cpumask_t mask;
6668
6669 /*
6670 * get/set the timer IRQ vector:
6671 */
6672 disable_8259A_irq(0);
6673 - vector = assign_irq_vector(0);
6674 - set_intr_gate(vector, interrupt[0]);
6675 + vector = assign_irq_vector(0, TARGET_CPUS, &mask);
6676
6677 /*
6678 * Subtle, code in do_timer_interrupt() expects an AEOI
6679 @@ -1922,9 +1576,6 @@ static inline void check_timer(void)
6680 pin2 = ioapic_i8259.pin;
6681 apic2 = ioapic_i8259.apic;
6682
6683 - if (pin1 == 0)
6684 - timer_uses_ioapic_pin_0 = 1;
6685 -
6686 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
6687 vector, apic1, pin1, apic2, pin2);
6688
6689 @@ -2039,11 +1690,6 @@ void __init setup_IO_APIC(void)
6690
6691 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
6692
6693 - /*
6694 - * Set up the IO-APIC IRQ routing table.
6695 - */
6696 - if (!acpi_ioapic)
6697 - setup_ioapic_ids_from_mpc();
6698 #ifndef CONFIG_XEN
6699 sync_Arb_IDs();
6700 #endif /* !CONFIG_XEN */
6701 @@ -2066,17 +1712,12 @@ static int ioapic_suspend(struct sys_dev
6702 {
6703 struct IO_APIC_route_entry *entry;
6704 struct sysfs_ioapic_data *data;
6705 - unsigned long flags;
6706 int i;
6707
6708 data = container_of(dev, struct sysfs_ioapic_data, dev);
6709 entry = data->entry;
6710 - spin_lock_irqsave(&ioapic_lock, flags);
6711 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
6712 - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
6713 - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
6714 - }
6715 - spin_unlock_irqrestore(&ioapic_lock, flags);
6716 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
6717 + *entry = ioapic_read_entry(dev->id, i);
6718
6719 return 0;
6720 }
6721 @@ -2098,11 +1739,9 @@ static int ioapic_resume(struct sys_devi
6722 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
6723 io_apic_write(dev->id, 0, reg_00.raw);
6724 }
6725 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
6726 - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
6727 - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
6728 - }
6729 spin_unlock_irqrestore(&ioapic_lock, flags);
6730 + for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
6731 + ioapic_write_entry(dev->id, i, entry[i]);
6732
6733 return 0;
6734 }
6735 @@ -2148,28 +1787,254 @@ static int __init ioapic_init_sysfs(void
6736
6737 device_initcall(ioapic_init_sysfs);
6738
6739 +/*
6740 + * Dynamic irq allocate and deallocation
6741 + */
6742 +int create_irq(void)
6743 +{
6744 + /* Allocate an unused irq */
6745 + int irq;
6746 + int new;
6747 + int vector = 0;
6748 + unsigned long flags;
6749 + cpumask_t mask;
6750 +
6751 + irq = -ENOSPC;
6752 + spin_lock_irqsave(&vector_lock, flags);
6753 + for (new = (NR_IRQS - 1); new >= 0; new--) {
6754 + if (platform_legacy_irq(new))
6755 + continue;
6756 + if (irq_vector[new] != 0)
6757 + continue;
6758 + vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
6759 + if (likely(vector > 0))
6760 + irq = new;
6761 + break;
6762 + }
6763 + spin_unlock_irqrestore(&vector_lock, flags);
6764 +
6765 + if (irq >= 0) {
6766 + dynamic_irq_init(irq);
6767 + }
6768 + return irq;
6769 +}
6770 +
6771 +void destroy_irq(unsigned int irq)
6772 +{
6773 + unsigned long flags;
6774 +
6775 + dynamic_irq_cleanup(irq);
6776 +
6777 + spin_lock_irqsave(&vector_lock, flags);
6778 + irq_vector[irq] = 0;
6779 + spin_unlock_irqrestore(&vector_lock, flags);
6780 +}
6781 +
6782 #endif /* CONFIG_XEN */
6783
6784 -/* --------------------------------------------------------------------------
6785 - ACPI-based IOAPIC Configuration
6786 - -------------------------------------------------------------------------- */
6787 +/*
6788 + * MSI mesage composition
6789 + */
6790 +#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
6791 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
6792 +{
6793 + int vector;
6794 + unsigned dest;
6795 + cpumask_t tmp;
6796
6797 -#ifdef CONFIG_ACPI
6798 + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
6799 + if (vector >= 0) {
6800 + dest = cpu_mask_to_apicid(tmp);
6801 +
6802 + msg->address_hi = MSI_ADDR_BASE_HI;
6803 + msg->address_lo =
6804 + MSI_ADDR_BASE_LO |
6805 + ((INT_DEST_MODE == 0) ?
6806 + MSI_ADDR_DEST_MODE_PHYSICAL:
6807 + MSI_ADDR_DEST_MODE_LOGICAL) |
6808 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6809 + MSI_ADDR_REDIRECTION_CPU:
6810 + MSI_ADDR_REDIRECTION_LOWPRI) |
6811 + MSI_ADDR_DEST_ID(dest);
6812 +
6813 + msg->data =
6814 + MSI_DATA_TRIGGER_EDGE |
6815 + MSI_DATA_LEVEL_ASSERT |
6816 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6817 + MSI_DATA_DELIVERY_FIXED:
6818 + MSI_DATA_DELIVERY_LOWPRI) |
6819 + MSI_DATA_VECTOR(vector);
6820 + }
6821 + return vector;
6822 +}
6823
6824 -#define IO_APIC_MAX_ID 0xFE
6825 +#ifdef CONFIG_SMP
6826 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
6827 +{
6828 + struct msi_msg msg;
6829 + unsigned int dest;
6830 + cpumask_t tmp;
6831 + int vector;
6832 +
6833 + cpus_and(tmp, mask, cpu_online_map);
6834 + if (cpus_empty(tmp))
6835 + tmp = TARGET_CPUS;
6836 +
6837 + cpus_and(mask, tmp, CPU_MASK_ALL);
6838 +
6839 + vector = assign_irq_vector(irq, mask, &tmp);
6840 + if (vector < 0)
6841 + return;
6842 +
6843 + dest = cpu_mask_to_apicid(tmp);
6844 +
6845 + read_msi_msg(irq, &msg);
6846 +
6847 + msg.data &= ~MSI_DATA_VECTOR_MASK;
6848 + msg.data |= MSI_DATA_VECTOR(vector);
6849 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
6850 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
6851
6852 -int __init io_apic_get_version (int ioapic)
6853 + write_msi_msg(irq, &msg);
6854 + set_native_irq_info(irq, mask);
6855 +}
6856 +#endif /* CONFIG_SMP */
6857 +
6858 +/*
6859 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
6860 + * which implement the MSI or MSI-X Capability Structure.
6861 + */
6862 +static struct irq_chip msi_chip = {
6863 + .name = "PCI-MSI",
6864 + .unmask = unmask_msi_irq,
6865 + .mask = mask_msi_irq,
6866 + .ack = ack_apic_edge,
6867 +#ifdef CONFIG_SMP
6868 + .set_affinity = set_msi_irq_affinity,
6869 +#endif
6870 + .retrigger = ioapic_retrigger_irq,
6871 +};
6872 +
6873 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
6874 {
6875 - union IO_APIC_reg_01 reg_01;
6876 - unsigned long flags;
6877 + struct msi_msg msg;
6878 + int ret;
6879 + ret = msi_compose_msg(dev, irq, &msg);
6880 + if (ret < 0)
6881 + return ret;
6882
6883 - spin_lock_irqsave(&ioapic_lock, flags);
6884 - reg_01.raw = io_apic_read(ioapic, 1);
6885 - spin_unlock_irqrestore(&ioapic_lock, flags);
6886 + write_msi_msg(irq, &msg);
6887
6888 - return reg_01.bits.version;
6889 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
6890 +
6891 + return 0;
6892 +}
6893 +
6894 +void arch_teardown_msi_irq(unsigned int irq)
6895 +{
6896 + return;
6897 }
6898
6899 +#endif /* CONFIG_PCI_MSI */
6900 +
6901 +/*
6902 + * Hypertransport interrupt support
6903 + */
6904 +#ifdef CONFIG_HT_IRQ
6905 +
6906 +#ifdef CONFIG_SMP
6907 +
6908 +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
6909 +{
6910 + struct ht_irq_msg msg;
6911 + fetch_ht_irq_msg(irq, &msg);
6912 +
6913 + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
6914 + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
6915 +
6916 + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
6917 + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
6918 +
6919 + write_ht_irq_msg(irq, &msg);
6920 +}
6921 +
6922 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
6923 +{
6924 + unsigned int dest;
6925 + cpumask_t tmp;
6926 + int vector;
6927 +
6928 + cpus_and(tmp, mask, cpu_online_map);
6929 + if (cpus_empty(tmp))
6930 + tmp = TARGET_CPUS;
6931 +
6932 + cpus_and(mask, tmp, CPU_MASK_ALL);
6933 +
6934 + vector = assign_irq_vector(irq, mask, &tmp);
6935 + if (vector < 0)
6936 + return;
6937 +
6938 + dest = cpu_mask_to_apicid(tmp);
6939 +
6940 + target_ht_irq(irq, dest, vector);
6941 + set_native_irq_info(irq, mask);
6942 +}
6943 +#endif
6944 +
6945 +static struct irq_chip ht_irq_chip = {
6946 + .name = "PCI-HT",
6947 + .mask = mask_ht_irq,
6948 + .unmask = unmask_ht_irq,
6949 + .ack = ack_apic_edge,
6950 +#ifdef CONFIG_SMP
6951 + .set_affinity = set_ht_irq_affinity,
6952 +#endif
6953 + .retrigger = ioapic_retrigger_irq,
6954 +};
6955 +
6956 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
6957 +{
6958 + int vector;
6959 + cpumask_t tmp;
6960 +
6961 + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
6962 + if (vector >= 0) {
6963 + struct ht_irq_msg msg;
6964 + unsigned dest;
6965 +
6966 + dest = cpu_mask_to_apicid(tmp);
6967 +
6968 + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
6969 +
6970 + msg.address_lo =
6971 + HT_IRQ_LOW_BASE |
6972 + HT_IRQ_LOW_DEST_ID(dest) |
6973 + HT_IRQ_LOW_VECTOR(vector) |
6974 + ((INT_DEST_MODE == 0) ?
6975 + HT_IRQ_LOW_DM_PHYSICAL :
6976 + HT_IRQ_LOW_DM_LOGICAL) |
6977 + HT_IRQ_LOW_RQEOI_EDGE |
6978 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
6979 + HT_IRQ_LOW_MT_FIXED :
6980 + HT_IRQ_LOW_MT_ARBITRATED) |
6981 + HT_IRQ_LOW_IRQ_MASKED;
6982 +
6983 + write_ht_irq_msg(irq, &msg);
6984 +
6985 + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
6986 + handle_edge_irq, "edge");
6987 + }
6988 + return vector;
6989 +}
6990 +#endif /* CONFIG_HT_IRQ */
6991 +
6992 +/* --------------------------------------------------------------------------
6993 + ACPI-based IOAPIC Configuration
6994 + -------------------------------------------------------------------------- */
6995 +
6996 +#ifdef CONFIG_ACPI
6997 +
6998 +#define IO_APIC_MAX_ID 0xFE
6999
7000 int __init io_apic_get_redir_entries (int ioapic)
7001 {
7002 @@ -2188,6 +2053,8 @@ int io_apic_set_pci_routing (int ioapic,
7003 {
7004 struct IO_APIC_route_entry entry;
7005 unsigned long flags;
7006 + int vector;
7007 + cpumask_t mask;
7008
7009 if (!IO_APIC_IRQ(irq)) {
7010 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
7011 @@ -2196,6 +2063,17 @@ int io_apic_set_pci_routing (int ioapic,
7012 }
7013
7014 /*
7015 + * IRQs < 16 are already in the irq_2_pin[] map
7016 + */
7017 + if (irq >= 16)
7018 + add_pin_to_irq(irq, ioapic, pin);
7019 +
7020 +
7021 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
7022 + if (vector < 0)
7023 + return vector;
7024 +
7025 + /*
7026 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
7027 * Note that we mask (disable) IRQs now -- these get enabled when the
7028 * corresponding device driver registers for this IRQ.
7029 @@ -2205,19 +2083,11 @@ int io_apic_set_pci_routing (int ioapic,
7030
7031 entry.delivery_mode = INT_DELIVERY_MODE;
7032 entry.dest_mode = INT_DEST_MODE;
7033 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
7034 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
7035 entry.trigger = edge_level;
7036 entry.polarity = active_high_low;
7037 entry.mask = 1; /* Disabled (masked) */
7038 -
7039 - irq = gsi_irq_sharing(irq);
7040 - /*
7041 - * IRQs < 16 are already in the irq_2_pin[] map
7042 - */
7043 - if (irq >= 16)
7044 - add_pin_to_irq(irq, ioapic, pin);
7045 -
7046 - entry.vector = assign_irq_vector(irq);
7047 + entry.vector = vector & 0xff;
7048
7049 apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
7050 "IRQ %d Mode:%i Active:%i)\n", ioapic,
7051 @@ -2229,10 +2099,10 @@ int io_apic_set_pci_routing (int ioapic,
7052 if (!ioapic && (irq < 16))
7053 disable_8259A_irq(irq);
7054
7055 + ioapic_write_entry(ioapic, pin, entry);
7056 +
7057 spin_lock_irqsave(&ioapic_lock, flags);
7058 - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
7059 - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
7060 - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
7061 + set_native_irq_info(irq, TARGET_CPUS);
7062 spin_unlock_irqrestore(&ioapic_lock, flags);
7063
7064 return 0;
7065 Index: head-2008-12-01/arch/x86/kernel/ioport_64-xen.c
7066 ===================================================================
7067 --- head-2008-12-01.orig/arch/x86/kernel/ioport_64-xen.c 2008-12-03 15:49:14.000000000 +0100
7068 +++ head-2008-12-01/arch/x86/kernel/ioport_64-xen.c 2008-12-01 11:29:05.000000000 +0100
7069 @@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long
7070
7071 memset(bitmap, 0xff, IO_BITMAP_BYTES);
7072 t->io_bitmap_ptr = bitmap;
7073 + set_thread_flag(TIF_IO_BITMAP);
7074
7075 set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
7076 set_iobitmap.nr_ports = IO_BITMAP_BITS;
7077 Index: head-2008-12-01/arch/x86/kernel/irq_64-xen.c
7078 ===================================================================
7079 --- head-2008-12-01.orig/arch/x86/kernel/irq_64-xen.c 2008-12-03 15:49:14.000000000 +0100
7080 +++ head-2008-12-01/arch/x86/kernel/irq_64-xen.c 2008-12-01 11:29:05.000000000 +0100
7081 @@ -20,11 +20,6 @@
7082 #include <asm/idle.h>
7083
7084 atomic_t irq_err_count;
7085 -#ifdef CONFIG_X86_IO_APIC
7086 -#ifdef APIC_MISMATCH_DEBUG
7087 -atomic_t irq_mis_count;
7088 -#endif
7089 -#endif
7090
7091 #ifdef CONFIG_DEBUG_STACKOVERFLOW
7092 /*
7093 @@ -79,7 +74,8 @@ int show_interrupts(struct seq_file *p,
7094 for_each_online_cpu(j)
7095 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
7096 #endif
7097 - seq_printf(p, " %14s", irq_desc[i].chip->typename);
7098 + seq_printf(p, " %8s", irq_desc[i].chip->name);
7099 + seq_printf(p, "-%-8s", irq_desc[i].name);
7100
7101 seq_printf(p, " %s", action->name);
7102 for (action=action->next; action; action = action->next)
7103 @@ -99,11 +95,6 @@ skip:
7104 seq_putc(p, '\n');
7105 #endif
7106 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
7107 -#ifdef CONFIG_X86_IO_APIC
7108 -#ifdef APIC_MISMATCH_DEBUG
7109 - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
7110 -#endif
7111 -#endif
7112 }
7113 return 0;
7114 }
7115 @@ -114,24 +105,28 @@ skip:
7116 * handlers).
7117 */
7118 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
7119 -{
7120 +{
7121 + struct pt_regs *old_regs = set_irq_regs(regs);
7122 +
7123 /* high bit used in ret_from_ code */
7124 unsigned irq = ~regs->orig_rax;
7125
7126 - if (unlikely(irq >= NR_IRQS)) {
7127 - printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
7128 - __FUNCTION__, irq);
7129 - BUG();
7130 - }
7131 -
7132 /*exit_idle();*/
7133 /*irq_enter();*/
7134 +
7135 #ifdef CONFIG_DEBUG_STACKOVERFLOW
7136 stack_overflow_check(regs);
7137 #endif
7138 - __do_IRQ(irq, regs);
7139 +
7140 + if (likely(irq < NR_IRQS))
7141 + generic_handle_irq(irq);
7142 + else
7143 + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
7144 + __func__, smp_processor_id(), irq);
7145 +
7146 /*irq_exit();*/
7147
7148 + set_irq_regs(old_regs);
7149 return 1;
7150 }
7151
7152 @@ -192,6 +187,6 @@ EXPORT_SYMBOL(do_softirq);
7153 */
7154 void ack_bad_irq(unsigned int irq)
7155 {
7156 - printk("unexpected IRQ trap at vector %02x\n", irq);
7157 + printk("unexpected IRQ trap at irq %02x\n", irq);
7158 }
7159 #endif
7160 Index: head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c
7161 ===================================================================
7162 --- head-2008-12-01.orig/arch/x86/kernel/mpparse_64-xen.c 2008-12-03 15:49:14.000000000 +0100
7163 +++ head-2008-12-01/arch/x86/kernel/mpparse_64-xen.c 2008-12-01 11:29:05.000000000 +0100
7164 @@ -41,8 +41,7 @@ int acpi_found_madt;
7165 * Various Linux-internal data structures created from the
7166 * MP-table.
7167 */
7168 -unsigned char apic_version [MAX_APICS];
7169 -unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
7170 +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
7171 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
7172
7173 static int mp_current_pci_id = 0;
7174 @@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
7175 int mp_irq_entries;
7176
7177 int nr_ioapics;
7178 -int pic_mode;
7179 unsigned long mp_lapic_addr = 0;
7180
7181
7182 @@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
7183 /* Bitmask of physically existing CPUs */
7184 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
7185
7186 -/* ACPI MADT entry parsing functions */
7187 -#ifdef CONFIG_ACPI
7188 -extern struct acpi_boot_flags acpi_boot;
7189 -#ifdef CONFIG_X86_LOCAL_APIC
7190 -extern int acpi_parse_lapic (acpi_table_entry_header *header);
7191 -extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
7192 -extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
7193 -#endif /*CONFIG_X86_LOCAL_APIC*/
7194 -#ifdef CONFIG_X86_IO_APIC
7195 -extern int acpi_parse_ioapic (acpi_table_entry_header *header);
7196 -#endif /*CONFIG_X86_IO_APIC*/
7197 -#endif /*CONFIG_ACPI*/
7198 -
7199 u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
7200
7201
7202 @@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned
7203 static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
7204 {
7205 int cpu;
7206 - unsigned char ver;
7207 cpumask_t tmp_map;
7208 + char *bootup_cpu = "";
7209
7210 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
7211 disabled_cpus++;
7212 return;
7213 }
7214 -
7215 - printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
7216 - m->mpc_apicid,
7217 - (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
7218 - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
7219 - m->mpc_apicver);
7220 -
7221 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
7222 - Dprintk(" Bootup CPU\n");
7223 + bootup_cpu = " (Bootup-CPU)";
7224 boot_cpu_id = m->mpc_apicid;
7225 }
7226 +
7227 + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
7228 +
7229 if (num_processors >= NR_CPUS) {
7230 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
7231 " Processor ignored.\n", NR_CPUS);
7232 @@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info
7233 cpus_complement(tmp_map, cpu_present_map);
7234 cpu = first_cpu(tmp_map);
7235
7236 -#if MAX_APICS < 255
7237 - if ((int)m->mpc_apicid > MAX_APICS) {
7238 - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
7239 - m->mpc_apicid, MAX_APICS);
7240 - return;
7241 - }
7242 -#endif
7243 - ver = m->mpc_apicver;
7244 -
7245 physid_set(m->mpc_apicid, phys_cpu_present_map);
7246 - /*
7247 - * Validate version
7248 - */
7249 - if (ver == 0x0) {
7250 - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
7251 - ver = 0x10;
7252 - }
7253 - apic_version[m->mpc_apicid] = ver;
7254 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
7255 /*
7256 * bios_cpu_apicid is required to have processors listed
7257 @@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m
7258 Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
7259
7260 if (strncmp(str, "ISA", 3) == 0) {
7261 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
7262 - } else if (strncmp(str, "EISA", 4) == 0) {
7263 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
7264 + set_bit(m->mpc_busid, mp_bus_not_pci);
7265 } else if (strncmp(str, "PCI", 3) == 0) {
7266 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
7267 + clear_bit(m->mpc_busid, mp_bus_not_pci);
7268 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
7269 mp_current_pci_id++;
7270 - } else if (strncmp(str, "MCA", 3) == 0) {
7271 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
7272 } else {
7273 printk(KERN_ERR "Unknown bustype %s\n", str);
7274 }
7275 }
7276
7277 +static int bad_ioapic(unsigned long address)
7278 +{
7279 + if (nr_ioapics >= MAX_IO_APICS) {
7280 + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
7281 + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
7282 + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
7283 + }
7284 + if (!address) {
7285 + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
7286 + " found in table, skipping!\n");
7287 + return 1;
7288 + }
7289 + return 0;
7290 +}
7291 +
7292 static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
7293 {
7294 if (!(m->mpc_flags & MPC_APIC_USABLE))
7295 return;
7296
7297 - printk("I/O APIC #%d Version %d at 0x%X.\n",
7298 - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
7299 - if (nr_ioapics >= MAX_IO_APICS) {
7300 - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
7301 - MAX_IO_APICS, nr_ioapics);
7302 - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
7303 - }
7304 - if (!m->mpc_apicaddr) {
7305 - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
7306 - " found in MP table, skipping!\n");
7307 + printk("I/O APIC #%d at 0x%X.\n",
7308 + m->mpc_apicid, m->mpc_apicaddr);
7309 +
7310 + if (bad_ioapic(m->mpc_apicaddr))
7311 return;
7312 - }
7313 +
7314 mp_ioapics[nr_ioapics] = *m;
7315 nr_ioapics++;
7316 }
7317 @@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru
7318 m->mpc_irqtype, m->mpc_irqflag & 3,
7319 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
7320 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
7321 - /*
7322 - * Well it seems all SMP boards in existence
7323 - * use ExtINT/LVT1 == LINT0 and
7324 - * NMI/LVT2 == LINT1 - the following check
7325 - * will show us if this assumptions is false.
7326 - * Until then we do not have to add baggage.
7327 - */
7328 - if ((m->mpc_irqtype == mp_ExtINT) &&
7329 - (m->mpc_destapiclint != 0))
7330 - BUG();
7331 - if ((m->mpc_irqtype == mp_NMI) &&
7332 - (m->mpc_destapiclint != 1))
7333 - BUG();
7334 }
7335
7336 /*
7337 @@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp
7338 unsigned char *mpt=((unsigned char *)mpc)+count;
7339
7340 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
7341 - printk("SMP mptable: bad signature [%c%c%c%c]!\n",
7342 + printk("MPTABLE: bad signature [%c%c%c%c]!\n",
7343 mpc->mpc_signature[0],
7344 mpc->mpc_signature[1],
7345 mpc->mpc_signature[2],
7346 @@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp
7347 return 0;
7348 }
7349 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
7350 - printk("SMP mptable: checksum error!\n");
7351 + printk("MPTABLE: checksum error!\n");
7352 return 0;
7353 }
7354 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
7355 - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
7356 + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
7357 mpc->mpc_spec);
7358 return 0;
7359 }
7360 if (!mpc->mpc_lapic) {
7361 - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
7362 + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
7363 return 0;
7364 }
7365 memcpy(str,mpc->mpc_oem,8);
7366 - str[8]=0;
7367 - printk(KERN_INFO "OEM ID: %s ",str);
7368 + str[8] = 0;
7369 + printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
7370
7371 memcpy(str,mpc->mpc_productid,12);
7372 - str[12]=0;
7373 - printk("Product ID: %s ",str);
7374 + str[12] = 0;
7375 + printk("MPTABLE: Product ID: %s ",str);
7376
7377 - printk("APIC at: 0x%X\n",mpc->mpc_lapic);
7378 + printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
7379
7380 /* save the local APIC address, it might be non-default */
7381 if (!acpi_lapic)
7382 - mp_lapic_addr = mpc->mpc_lapic;
7383 + mp_lapic_addr = mpc->mpc_lapic;
7384
7385 /*
7386 * Now process the configuration blocks.
7387 @@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp
7388 struct mpc_config_processor *m=
7389 (struct mpc_config_processor *)mpt;
7390 if (!acpi_lapic)
7391 - MP_processor_info(m);
7392 + MP_processor_info(m);
7393 mpt += sizeof(*m);
7394 count += sizeof(*m);
7395 break;
7396 @@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp
7397 struct mpc_config_ioapic *m=
7398 (struct mpc_config_ioapic *)mpt;
7399 MP_ioapic_info(m);
7400 - mpt+=sizeof(*m);
7401 - count+=sizeof(*m);
7402 + mpt += sizeof(*m);
7403 + count += sizeof(*m);
7404 break;
7405 }
7406 case MP_INTSRC:
7407 @@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp
7408 (struct mpc_config_intsrc *)mpt;
7409
7410 MP_intsrc_info(m);
7411 - mpt+=sizeof(*m);
7412 - count+=sizeof(*m);
7413 + mpt += sizeof(*m);
7414 + count += sizeof(*m);
7415 break;
7416 }
7417 case MP_LINTSRC:
7418 @@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp
7419 struct mpc_config_lintsrc *m=
7420 (struct mpc_config_lintsrc *)mpt;
7421 MP_lintsrc_info(m);
7422 - mpt+=sizeof(*m);
7423 - count+=sizeof(*m);
7424 + mpt += sizeof(*m);
7425 + count += sizeof(*m);
7426 break;
7427 }
7428 }
7429 }
7430 clustered_apic_check();
7431 if (!num_processors)
7432 - printk(KERN_ERR "SMP mptable: no processors registered!\n");
7433 + printk(KERN_ERR "MPTABLE: no processors registered!\n");
7434 return num_processors;
7435 }
7436
7437 @@ -451,13 +407,10 @@ static inline void __init construct_defa
7438 * 2 CPUs, numbered 0 & 1.
7439 */
7440 processor.mpc_type = MP_PROCESSOR;
7441 - /* Either an integrated APIC or a discrete 82489DX. */
7442 - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
7443 + processor.mpc_apicver = 0;
7444 processor.mpc_cpuflag = CPU_ENABLED;
7445 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
7446 - (boot_cpu_data.x86_model << 4) |
7447 - boot_cpu_data.x86_mask;
7448 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
7449 + processor.mpc_cpufeature = 0;
7450 + processor.mpc_featureflag = 0;
7451 processor.mpc_reserved[0] = 0;
7452 processor.mpc_reserved[1] = 0;
7453 for (i = 0; i < 2; i++) {
7454 @@ -476,14 +429,6 @@ static inline void __init construct_defa
7455 case 5:
7456 memcpy(bus.mpc_bustype, "ISA ", 6);
7457 break;
7458 - case 2:
7459 - case 6:
7460 - case 3:
7461 - memcpy(bus.mpc_bustype, "EISA ", 6);
7462 - break;
7463 - case 4:
7464 - case 7:
7465 - memcpy(bus.mpc_bustype, "MCA ", 6);
7466 }
7467 MP_bus_info(&bus);
7468 if (mpc_default_type > 4) {
7469 @@ -494,7 +439,7 @@ static inline void __init construct_defa
7470
7471 ioapic.mpc_type = MP_IOAPIC;
7472 ioapic.mpc_apicid = 2;
7473 - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
7474 + ioapic.mpc_apicver = 0;
7475 ioapic.mpc_flags = MPC_APIC_USABLE;
7476 ioapic.mpc_apicaddr = 0xFEC00000;
7477 MP_ioapic_info(&ioapic);
7478 @@ -537,13 +482,6 @@ void __init get_smp_config (void)
7479 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
7480
7481 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
7482 - if (mpf->mpf_feature2 & (1<<7)) {
7483 - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
7484 - pic_mode = 1;
7485 - } else {
7486 - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
7487 - pic_mode = 0;
7488 - }
7489
7490 /*
7491 * Now see if we need to read further.
7492 @@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig
7493 return 0;
7494 }
7495
7496 -void __init find_intel_smp (void)
7497 +void __init find_smp_config(void)
7498 {
7499 unsigned int address;
7500
7501 @@ -637,9 +575,7 @@ void __init find_intel_smp (void)
7502 smp_scan_config(0xF0000,0x10000))
7503 return;
7504 /*
7505 - * If it is an SMP machine we should know now, unless the
7506 - * configuration is in an EISA/MCA bus machine with an
7507 - * extended bios data area.
7508 + * If it is an SMP machine we should know now.
7509 *
7510 * there is a real-mode segmented pointer pointing to the
7511 * 4K EBDA area at 0x40E, calculate and scan it here.
7512 @@ -660,64 +596,38 @@ void __init find_intel_smp (void)
7513 printk(KERN_INFO "No mptable found.\n");
7514 }
7515
7516 -/*
7517 - * - Intel MP Configuration Table
7518 - */
7519 -void __init find_smp_config (void)
7520 -{
7521 -#ifdef CONFIG_X86_LOCAL_APIC
7522 - find_intel_smp();
7523 -#endif
7524 -}
7525 -
7526 -
7527 /* --------------------------------------------------------------------------
7528 ACPI-based MP Configuration
7529 -------------------------------------------------------------------------- */
7530
7531 #ifdef CONFIG_ACPI
7532
7533 -void __init mp_register_lapic_address (
7534 - u64 address)
7535 +void __init mp_register_lapic_address(u64 address)
7536 {
7537 #ifndef CONFIG_XEN
7538 mp_lapic_addr = (unsigned long) address;
7539 -
7540 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
7541 -
7542 if (boot_cpu_id == -1U)
7543 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
7544 -
7545 - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
7546 #endif
7547 }
7548
7549 -
7550 -void __cpuinit mp_register_lapic (
7551 - u8 id,
7552 - u8 enabled)
7553 +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
7554 {
7555 struct mpc_config_processor processor;
7556 int boot_cpu = 0;
7557
7558 - if (id >= MAX_APICS) {
7559 - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
7560 - id, MAX_APICS);
7561 - return;
7562 - }
7563 -
7564 - if (id == boot_cpu_physical_apicid)
7565 + if (id == boot_cpu_id)
7566 boot_cpu = 1;
7567
7568 #ifndef CONFIG_XEN
7569 processor.mpc_type = MP_PROCESSOR;
7570 processor.mpc_apicid = id;
7571 - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
7572 + processor.mpc_apicver = 0;
7573 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
7574 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
7575 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
7576 - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
7577 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
7578 + processor.mpc_cpufeature = 0;
7579 + processor.mpc_featureflag = 0;
7580 processor.mpc_reserved[0] = 0;
7581 processor.mpc_reserved[1] = 0;
7582 #endif
7583 @@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic (
7584 MP_processor_info(&processor);
7585 }
7586
7587 -#ifdef CONFIG_X86_IO_APIC
7588 -
7589 #define MP_ISA_BUS 0
7590 #define MP_MAX_IOAPIC_PIN 127
7591
7592 @@ -737,11 +645,9 @@ static struct mp_ioapic_routing {
7593 u32 pin_programmed[4];
7594 } mp_ioapic_routing[MAX_IO_APICS];
7595
7596 -
7597 -static int mp_find_ioapic (
7598 - int gsi)
7599 +static int mp_find_ioapic(int gsi)
7600 {
7601 - int i = 0;
7602 + int i = 0;
7603
7604 /* Find the IOAPIC that manages this GSI. */
7605 for (i = 0; i < nr_ioapics; i++) {
7606 @@ -751,28 +657,15 @@ static int mp_find_ioapic (
7607 }
7608
7609 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
7610 -
7611 return -1;
7612 }
7613 -
7614
7615 -void __init mp_register_ioapic (
7616 - u8 id,
7617 - u32 address,
7618 - u32 gsi_base)
7619 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
7620 {
7621 - int idx = 0;
7622 + int idx = 0;
7623
7624 - if (nr_ioapics >= MAX_IO_APICS) {
7625 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
7626 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
7627 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
7628 - }
7629 - if (!address) {
7630 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
7631 - " found in MADT table, skipping!\n");
7632 + if (bad_ioapic(address))
7633 return;
7634 - }
7635
7636 idx = nr_ioapics++;
7637
7638 @@ -784,7 +677,7 @@ void __init mp_register_ioapic (
7639 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
7640 #endif
7641 mp_ioapics[idx].mpc_apicid = id;
7642 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
7643 + mp_ioapics[idx].mpc_apicver = 0;
7644
7645 /*
7646 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
7647 @@ -795,21 +688,15 @@ void __init mp_register_ioapic (
7648 mp_ioapic_routing[idx].gsi_end = gsi_base +
7649 io_apic_get_redir_entries(idx);
7650
7651 - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
7652 + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
7653 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
7654 - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
7655 + mp_ioapics[idx].mpc_apicaddr,
7656 mp_ioapic_routing[idx].gsi_start,
7657 mp_ioapic_routing[idx].gsi_end);
7658 -
7659 - return;
7660 }
7661
7662 -
7663 -void __init mp_override_legacy_irq (
7664 - u8 bus_irq,
7665 - u8 polarity,
7666 - u8 trigger,
7667 - u32 gsi)
7668 +void __init
7669 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
7670 {
7671 struct mpc_config_intsrc intsrc;
7672 int ioapic = -1;
7673 @@ -847,22 +734,18 @@ void __init mp_override_legacy_irq (
7674 mp_irqs[mp_irq_entries] = intsrc;
7675 if (++mp_irq_entries == MAX_IRQ_SOURCES)
7676 panic("Max # of irq sources exceeded!\n");
7677 -
7678 - return;
7679 }
7680
7681 -
7682 -void __init mp_config_acpi_legacy_irqs (void)
7683 +void __init mp_config_acpi_legacy_irqs(void)
7684 {
7685 struct mpc_config_intsrc intsrc;
7686 - int i = 0;
7687 - int ioapic = -1;
7688 + int i = 0;
7689 + int ioapic = -1;
7690
7691 /*
7692 * Fabricate the legacy ISA bus (bus #31).
7693 */
7694 - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
7695 - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
7696 + set_bit(MP_ISA_BUS, mp_bus_not_pci);
7697
7698 /*
7699 * Locate the IOAPIC that manages the ISA IRQs (0-15).
7700 @@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs (
7701 if (++mp_irq_entries == MAX_IRQ_SOURCES)
7702 panic("Max # of irq sources exceeded!\n");
7703 }
7704 -
7705 - return;
7706 }
7707
7708 -#define MAX_GSI_NUM 4096
7709 -
7710 int mp_register_gsi(u32 gsi, int triggering, int polarity)
7711 {
7712 - int ioapic = -1;
7713 - int ioapic_pin = 0;
7714 - int idx, bit = 0;
7715 - static int pci_irq = 16;
7716 - /*
7717 - * Mapping between Global System Interrupts, which
7718 - * represent all possible interrupts, to the IRQs
7719 - * assigned to actual devices.
7720 - */
7721 - static int gsi_to_irq[MAX_GSI_NUM];
7722 + int ioapic = -1;
7723 + int ioapic_pin = 0;
7724 + int idx, bit = 0;
7725
7726 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
7727 return gsi;
7728 @@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger
7729 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
7730 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
7731 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
7732 - return gsi_to_irq[gsi];
7733 + return gsi;
7734 }
7735
7736 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
7737
7738 - if (triggering == ACPI_LEVEL_SENSITIVE) {
7739 - /*
7740 - * For PCI devices assign IRQs in order, avoiding gaps
7741 - * due to unused I/O APIC pins.
7742 - */
7743 - int irq = gsi;
7744 - if (gsi < MAX_GSI_NUM) {
7745 - /*
7746 - * Retain the VIA chipset work-around (gsi > 15), but
7747 - * avoid a problem where the 8254 timer (IRQ0) is setup
7748 - * via an override (so it's not on pin 0 of the ioapic),
7749 - * and at the same time, the pin 0 interrupt is a PCI
7750 - * type. The gsi > 15 test could cause these two pins
7751 - * to be shared as IRQ0, and they are not shareable.
7752 - * So test for this condition, and if necessary, avoid
7753 - * the pin collision.
7754 - */
7755 - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
7756 - gsi = pci_irq++;
7757 - /*
7758 - * Don't assign IRQ used by ACPI SCI
7759 - */
7760 - if (gsi == acpi_fadt.sci_int)
7761 - gsi = pci_irq++;
7762 - gsi_to_irq[irq] = gsi;
7763 - } else {
7764 - printk(KERN_ERR "GSI %u is too high\n", gsi);
7765 - return gsi;
7766 - }
7767 - }
7768 -
7769 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
7770 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
7771 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
7772 return gsi;
7773 }
7774 -
7775 -#endif /*CONFIG_X86_IO_APIC*/
7776 #endif /*CONFIG_ACPI*/
7777 Index: head-2008-12-01/arch/x86/kernel/process_64-xen.c
7778 ===================================================================
7779 --- head-2008-12-01.orig/arch/x86/kernel/process_64-xen.c 2008-12-03 15:49:14.000000000 +0100
7780 +++ head-2008-12-01/arch/x86/kernel/process_64-xen.c 2008-12-01 11:29:05.000000000 +0100
7781 @@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not
7782 }
7783 EXPORT_SYMBOL(idle_notifier_unregister);
7784
7785 -enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
7786 -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
7787 -
7788 void enter_idle(void)
7789 {
7790 - __get_cpu_var(idle_state) = CPU_IDLE;
7791 + write_pda(isidle, 1);
7792 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
7793 }
7794
7795 static void __exit_idle(void)
7796 {
7797 - __get_cpu_var(idle_state) = CPU_NOT_IDLE;
7798 + if (test_and_clear_bit_pda(0, isidle) == 0)
7799 + return;
7800 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
7801 }
7802
7803 /* Called from interrupts to signify idle end */
7804 void exit_idle(void)
7805 {
7806 - if (current->pid | read_pda(irqcount))
7807 + /* idle loop has pid 0 */
7808 + if (current->pid)
7809 return;
7810 __exit_idle();
7811 }
7812 @@ -184,6 +183,9 @@ void cpu_idle (void)
7813 play_dead();
7814 enter_idle();
7815 idle();
7816 + /* In many cases the interrupt that ended idle
7817 + has already called exit_idle. But some idle
7818 + loops can be woken up without interrupt. */
7819 __exit_idle();
7820 }
7821
7822 @@ -196,7 +198,7 @@ void cpu_idle (void)
7823 void cpu_idle_wait(void)
7824 {
7825 unsigned int cpu, this_cpu = get_cpu();
7826 - cpumask_t map;
7827 + cpumask_t map, tmp = current->cpus_allowed;
7828
7829 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
7830 put_cpu();
7831 @@ -219,6 +221,8 @@ void cpu_idle_wait(void)
7832 }
7833 cpus_and(map, map, cpu_online_map);
7834 } while (!cpus_empty(map));
7835 +
7836 + set_cpus_allowed(current, tmp);
7837 }
7838 EXPORT_SYMBOL_GPL(cpu_idle_wait);
7839
7840 @@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs)
7841 print_modules();
7842 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
7843 current->pid, current->comm, print_tainted(),
7844 - system_utsname.release,
7845 - (int)strcspn(system_utsname.version, " "),
7846 - system_utsname.version);
7847 + init_utsname()->release,
7848 + (int)strcspn(init_utsname()->version, " "),
7849 + init_utsname()->version);
7850 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
7851 printk_address(regs->rip);
7852 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
7853 @@ -310,6 +314,7 @@ void exit_thread(void)
7854
7855 kfree(t->io_bitmap_ptr);
7856 t->io_bitmap_ptr = NULL;
7857 + clear_thread_flag(TIF_IO_BITMAP);
7858 /*
7859 * Careful, clear this in the TSS too:
7860 */
7861 @@ -340,6 +345,7 @@ void flush_thread(void)
7862 if (t->flags & _TIF_IA32)
7863 current_thread_info()->status |= TS_COMPAT;
7864 }
7865 + t->flags &= ~_TIF_DEBUG;
7866
7867 tsk->thread.debugreg0 = 0;
7868 tsk->thread.debugreg1 = 0;
7869 @@ -432,7 +438,7 @@ int copy_thread(int nr, unsigned long cl
7870 asm("mov %%es,%0" : "=m" (p->thread.es));
7871 asm("mov %%ds,%0" : "=m" (p->thread.ds));
7872
7873 - if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
7874 + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
7875 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
7876 if (!p->thread.io_bitmap_ptr) {
7877 p->thread.io_bitmap_max = 0;
7878 @@ -440,6 +446,7 @@ int copy_thread(int nr, unsigned long cl
7879 }
7880 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
7881 IO_BITMAP_BYTES);
7882 + set_tsk_thread_flag(p, TIF_IO_BITMAP);
7883 }
7884
7885 /*
7886 @@ -474,6 +481,30 @@ static inline void __save_init_fpu( stru
7887 }
7888
7889 /*
7890 + * This special macro can be used to load a debugging register
7891 + */
7892 +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
7893 +
7894 +static inline void __switch_to_xtra(struct task_struct *prev_p,
7895 + struct task_struct *next_p)
7896 +{
7897 + struct thread_struct *prev, *next;
7898 +
7899 + prev = &prev_p->thread,
7900 + next = &next_p->thread;
7901 +
7902 + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
7903 + loaddebug(next, 0);
7904 + loaddebug(next, 1);
7905 + loaddebug(next, 2);
7906 + loaddebug(next, 3);
7907 + /* no 4 and 5 */
7908 + loaddebug(next, 6);
7909 + loaddebug(next, 7);
7910 + }
7911 +}
7912 +
7913 +/*
7914 * switch_to(x,y) should switch tasks from x to y.
7915 *
7916 * This could still be optimized:
7917 @@ -501,6 +532,10 @@ __switch_to(struct task_struct *prev_p,
7918 #endif
7919 multicall_entry_t _mcl[8], *mcl = _mcl;
7920
7921 + /* we're going to use this soon, after a few expensive things */
7922 + if (next_p->fpu_counter>5)
7923 + prefetch(&next->i387.fxsave);
7924 +
7925 /*
7926 * This is basically '__unlazy_fpu', except that we queue a
7927 * multicall to indicate FPU task switch, rather than
7928 @@ -513,7 +548,8 @@ __switch_to(struct task_struct *prev_p,
7929 mcl->op = __HYPERVISOR_fpu_taskswitch;
7930 mcl->args[0] = 1;
7931 mcl++;
7932 - }
7933 + } else
7934 + prev_p->fpu_counter = 0;
7935
7936 /*
7937 * Reload esp0, LDT and the page table pointer:
7938 @@ -608,21 +644,29 @@ __switch_to(struct task_struct *prev_p,
7939 write_pda(oldrsp, next->userrsp);
7940 write_pda(pcurrent, next_p);
7941 write_pda(kernelstack,
7942 - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
7943 + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
7944 +#ifdef CONFIG_CC_STACKPROTECTOR
7945 + write_pda(stack_canary, next_p->stack_canary);
7946 +
7947 + /*
7948 + * Build time only check to make sure the stack_canary is at
7949 + * offset 40 in the pda; this is a gcc ABI requirement
7950 + */
7951 + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
7952 +#endif
7953
7954 /*
7955 * Now maybe reload the debug registers
7956 */
7957 - if (unlikely(next->debugreg7)) {
7958 - set_debugreg(next->debugreg0, 0);
7959 - set_debugreg(next->debugreg1, 1);
7960 - set_debugreg(next->debugreg2, 2);
7961 - set_debugreg(next->debugreg3, 3);
7962 - /* no 4 and 5 */
7963 - set_debugreg(next->debugreg6, 6);
7964 - set_debugreg(next->debugreg7, 7);
7965 - }
7966 + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
7967 + __switch_to_xtra(prev_p, next_p);
7968
7969 + /* If the task has used fpu the last 5 timeslices, just do a full
7970 + * restore of the math state immediately to avoid the trap; the
7971 + * chances of needing FPU soon are obviously high now
7972 + */
7973 + if (next_p->fpu_counter>5)
7974 + math_state_restore();
7975 return prev_p;
7976 }
7977
7978 @@ -842,7 +886,7 @@ int dump_task_regs(struct task_struct *t
7979
7980 unsigned long arch_align_stack(unsigned long sp)
7981 {
7982 - if (randomize_va_space)
7983 + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
7984 sp -= get_random_int() % 8192;
7985 return sp & ~0xf;
7986 }
7987 Index: head-2008-12-01/arch/x86/kernel/setup_64-xen.c
7988 ===================================================================
7989 --- head-2008-12-01.orig/arch/x86/kernel/setup_64-xen.c 2008-12-03 15:49:14.000000000 +0100
7990 +++ head-2008-12-01/arch/x86/kernel/setup_64-xen.c 2008-12-01 11:29:05.000000000 +0100
7991 @@ -88,9 +88,6 @@ extern struct edid_info edid_info;
7992 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
7993 EXPORT_SYMBOL(HYPERVISOR_shared_info);
7994
7995 -extern char hypercall_page[PAGE_SIZE];
7996 -EXPORT_SYMBOL(hypercall_page);
7997 -
7998 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
7999 static struct notifier_block xen_panic_block = {
8000 xen_panic_event, NULL, 0 /* try to go last */
8001 @@ -118,16 +115,6 @@ EXPORT_SYMBOL(boot_cpu_data);
8002
8003 unsigned long mmu_cr4_features;
8004
8005 -int acpi_disabled;
8006 -EXPORT_SYMBOL(acpi_disabled);
8007 -#ifdef CONFIG_ACPI
8008 -extern int __initdata acpi_ht;
8009 -extern acpi_interrupt_flags acpi_sci_flags;
8010 -int __initdata acpi_force = 0;
8011 -#endif
8012 -
8013 -int acpi_numa __initdata;
8014 -
8015 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
8016 int bootloader_type;
8017
8018 @@ -151,10 +138,6 @@ struct sys_desc_table_struct {
8019
8020 struct edid_info edid_info;
8021 EXPORT_SYMBOL_GPL(edid_info);
8022 -struct e820map e820;
8023 -#ifdef CONFIG_XEN
8024 -struct e820map machine_e820;
8025 -#endif
8026
8027 extern int root_mountflags;
8028
8029 @@ -181,9 +164,6 @@ struct resource standard_io_resources[]
8030 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
8031 };
8032
8033 -#define STANDARD_IO_RESOURCES \
8034 - (sizeof standard_io_resources / sizeof standard_io_resources[0])
8035 -
8036 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
8037
8038 struct resource data_resource = {
8039 @@ -230,9 +210,6 @@ static struct resource adapter_rom_resou
8040 .flags = IORESOURCE_ROM }
8041 };
8042
8043 -#define ADAPTER_ROM_RESOURCES \
8044 - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
8045 -
8046 static struct resource video_rom_resource = {
8047 .name = "Video ROM",
8048 .start = 0xc0000,
8049 @@ -309,7 +286,8 @@ static void __init probe_roms(void)
8050 }
8051
8052 /* check for adapter roms on 2k boundaries */
8053 - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
8054 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
8055 + start += 2048) {
8056 rom = isa_bus_to_virt(start);
8057 if (!romsignature(rom))
8058 continue;
8059 @@ -329,187 +307,22 @@ static void __init probe_roms(void)
8060 }
8061 }
8062
8063 -/* Check for full argument with no trailing characters */
8064 -static int fullarg(char *p, char *arg)
8065 +#ifdef CONFIG_PROC_VMCORE
8066 +/* elfcorehdr= specifies the location of elf core header
8067 + * stored by the crashed kernel. This option will be passed
8068 + * by kexec loader to the capture kernel.
8069 + */
8070 +static int __init setup_elfcorehdr(char *arg)
8071 {
8072 - int l = strlen(arg);
8073 - return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
8074 + char *end;
8075 + if (!arg)
8076 + return -EINVAL;
8077 + elfcorehdr_addr = memparse(arg, &end);
8078 + return end > arg ? 0 : -EINVAL;
8079 }
8080 -
8081 -static __init void parse_cmdline_early (char ** cmdline_p)
8082 -{
8083 - char c = ' ', *to = command_line, *from = COMMAND_LINE;
8084 - int len = 0;
8085 - int userdef = 0;
8086 -
8087 - for (;;) {
8088 - if (c != ' ')
8089 - goto next_char;
8090 -
8091 -#ifdef CONFIG_SMP
8092 - /*
8093 - * If the BIOS enumerates physical processors before logical,
8094 - * maxcpus=N at enumeration-time can be used to disable HT.
8095 - */
8096 - else if (!memcmp(from, "maxcpus=", 8)) {
8097 - extern unsigned int maxcpus;
8098 -
8099 - maxcpus = simple_strtoul(from + 8, NULL, 0);
8100 - }
8101 -#endif
8102 -#ifdef CONFIG_ACPI
8103 - /* "acpi=off" disables both ACPI table parsing and interpreter init */
8104 - if (fullarg(from,"acpi=off"))
8105 - disable_acpi();
8106 -
8107 - if (fullarg(from, "acpi=force")) {
8108 - /* add later when we do DMI horrors: */
8109 - acpi_force = 1;
8110 - acpi_disabled = 0;
8111 - }
8112 -
8113 - /* acpi=ht just means: do ACPI MADT parsing
8114 - at bootup, but don't enable the full ACPI interpreter */
8115 - if (fullarg(from, "acpi=ht")) {
8116 - if (!acpi_force)
8117 - disable_acpi();
8118 - acpi_ht = 1;
8119 - }
8120 - else if (fullarg(from, "pci=noacpi"))
8121 - acpi_disable_pci();
8122 - else if (fullarg(from, "acpi=noirq"))
8123 - acpi_noirq_set();
8124 -
8125 - else if (fullarg(from, "acpi_sci=edge"))
8126 - acpi_sci_flags.trigger = 1;
8127 - else if (fullarg(from, "acpi_sci=level"))
8128 - acpi_sci_flags.trigger = 3;
8129 - else if (fullarg(from, "acpi_sci=high"))
8130 - acpi_sci_flags.polarity = 1;
8131 - else if (fullarg(from, "acpi_sci=low"))
8132 - acpi_sci_flags.polarity = 3;
8133 -
8134 - /* acpi=strict disables out-of-spec workarounds */
8135 - else if (fullarg(from, "acpi=strict")) {
8136 - acpi_strict = 1;
8137 - }
8138 -#ifdef CONFIG_X86_IO_APIC
8139 - else if (fullarg(from, "acpi_skip_timer_override"))
8140 - acpi_skip_timer_override = 1;
8141 -#endif
8142 -#endif
8143 -
8144 -#ifndef CONFIG_XEN
8145 - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
8146 - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
8147 - disable_apic = 1;
8148 - }
8149 -
8150 - if (fullarg(from, "noapic"))
8151 - skip_ioapic_setup = 1;
8152 -
8153 - if (fullarg(from,"apic")) {
8154 - skip_ioapic_setup = 0;
8155 - ioapic_force = 1;
8156 - }
8157 -#endif
8158 -
8159 - if (!memcmp(from, "mem=", 4))
8160 - parse_memopt(from+4, &from);
8161 -
8162 - if (!memcmp(from, "memmap=", 7)) {
8163 - /* exactmap option is for used defined memory */
8164 - if (!memcmp(from+7, "exactmap", 8)) {
8165 -#ifdef CONFIG_CRASH_DUMP
8166 - /* If we are doing a crash dump, we
8167 - * still need to know the real mem
8168 - * size before original memory map is
8169 - * reset.
8170 - */
8171 - saved_max_pfn = e820_end_of_ram();
8172 -#endif
8173 - from += 8+7;
8174 - end_pfn_map = 0;
8175 - e820.nr_map = 0;
8176 - userdef = 1;
8177 - }
8178 - else {
8179 - parse_memmapopt(from+7, &from);
8180 - userdef = 1;
8181 - }
8182 - }
8183 -
8184 -#ifdef CONFIG_NUMA
8185 - if (!memcmp(from, "numa=", 5))
8186 - numa_setup(from+5);
8187 +early_param("elfcorehdr", setup_elfcorehdr);
8188 #endif
8189
8190 - if (!memcmp(from,"iommu=",6)) {
8191 - iommu_setup(from+6);
8192 - }
8193 -
8194 - if (fullarg(from,"oops=panic"))
8195 - panic_on_oops = 1;
8196 -
8197 - if (!memcmp(from, "noexec=", 7))
8198 - nonx_setup(from + 7);
8199 -
8200 -#ifdef CONFIG_KEXEC
8201 - /* crashkernel=size@addr specifies the location to reserve for
8202 - * a crash kernel. By reserving this memory we guarantee
8203 - * that linux never set's it up as a DMA target.
8204 - * Useful for holding code to do something appropriate
8205 - * after a kernel panic.
8206 - */
8207 - else if (!memcmp(from, "crashkernel=", 12)) {
8208 -#ifndef CONFIG_XEN
8209 - unsigned long size, base;
8210 - size = memparse(from+12, &from);
8211 - if (*from == '@') {
8212 - base = memparse(from+1, &from);
8213 - /* FIXME: Do I want a sanity check
8214 - * to validate the memory range?
8215 - */
8216 - crashk_res.start = base;
8217 - crashk_res.end = base + size - 1;
8218 - }
8219 -#else
8220 - printk("Ignoring crashkernel command line, "
8221 - "parameter will be supplied by xen\n");
8222 -#endif
8223 - }
8224 -#endif
8225 -
8226 -#ifdef CONFIG_PROC_VMCORE
8227 - /* elfcorehdr= specifies the location of elf core header
8228 - * stored by the crashed kernel. This option will be passed
8229 - * by kexec loader to the capture kernel.
8230 - */
8231 - else if(!memcmp(from, "elfcorehdr=", 11))
8232 - elfcorehdr_addr = memparse(from+11, &from);
8233 -#endif
8234 -
8235 -#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
8236 - else if (!memcmp(from, "additional_cpus=", 16))
8237 - setup_additional_cpus(from+16);
8238 -#endif
8239 -
8240 - next_char:
8241 - c = *(from++);
8242 - if (!c)
8243 - break;
8244 - if (COMMAND_LINE_SIZE <= ++len)
8245 - break;
8246 - *(to++) = c;
8247 - }
8248 - if (userdef) {
8249 - printk(KERN_INFO "user-defined physical RAM map:\n");
8250 - e820_print_map("user");
8251 - }
8252 - *to = '\0';
8253 - *cmdline_p = command_line;
8254 -}
8255 -
8256 #ifndef CONFIG_NUMA
8257 static void __init
8258 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
8259 @@ -521,10 +334,11 @@ contig_initmem_init(unsigned long start_
8260 if (bootmap == -1L)
8261 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
8262 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
8263 + e820_register_active_regions(0, start_pfn, end_pfn);
8264 #ifdef CONFIG_XEN
8265 - e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
8266 + free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
8267 #else
8268 - e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
8269 + free_bootmem_with_active_regions(0, end_pfn);
8270 #endif
8271 reserve_bootmem(bootmap, bootmap_size);
8272 }
8273 @@ -587,6 +401,10 @@ static void discover_ebda(void)
8274 void __init setup_arch(char **cmdline_p)
8275 {
8276 #ifdef CONFIG_XEN
8277 + extern struct e820map machine_e820;
8278 +
8279 + printk(KERN_INFO "Command line: %s\n", saved_command_line);
8280 +
8281 /* Register a call for panic conditions. */
8282 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
8283
8284 @@ -612,6 +430,8 @@ void __init setup_arch(char **cmdline_p)
8285
8286 ARCH_SETUP
8287 #else
8288 + printk(KERN_INFO "Command line: %s\n", saved_command_line);
8289 +
8290 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
8291 screen_info = SCREEN_INFO;
8292 edid_info = EDID_INFO;
8293 @@ -639,16 +459,22 @@ void __init setup_arch(char **cmdline_p)
8294 data_resource.start = virt_to_phys(&_etext);
8295 data_resource.end = virt_to_phys(&_edata)-1;
8296
8297 - parse_cmdline_early(cmdline_p);
8298 -
8299 early_identify_cpu(&boot_cpu_data);
8300
8301 + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
8302 + *cmdline_p = command_line;
8303 +
8304 + parse_early_param();
8305 +
8306 + finish_e820_parsing();
8307 +
8308 + e820_register_active_regions(0, 0, -1UL);
8309 /*
8310 * partially used pages are not usable - thus
8311 * we are rounding upwards:
8312 */
8313 end_pfn = e820_end_of_ram();
8314 - num_physpages = end_pfn; /* for pfn_valid */
8315 + num_physpages = end_pfn;
8316
8317 check_efer();
8318
8319 @@ -659,6 +485,14 @@ void __init setup_arch(char **cmdline_p)
8320 if (is_initial_xendomain())
8321 dmi_scan_machine();
8322
8323 + /* How many end-of-memory variables you have, grandma! */
8324 + max_low_pfn = end_pfn;
8325 + max_pfn = end_pfn;
8326 + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
8327 +
8328 + /* Remove active ranges so rediscovery with NUMA-awareness happens */
8329 + remove_all_active_ranges();
8330 +
8331 #ifdef CONFIG_ACPI_NUMA
8332 /*
8333 * Parse SRAT to discover nodes.
8334 @@ -848,16 +682,16 @@ void __init setup_arch(char **cmdline_p)
8335 BUG();
8336 }
8337
8338 +#ifdef CONFIG_ACPI
8339 if (!is_initial_xendomain()) {
8340 acpi_disabled = 1;
8341 -#ifdef CONFIG_ACPI
8342 acpi_ht = 0;
8343 -#endif
8344 }
8345 #endif
8346 +#endif
8347
8348 -#ifndef CONFIG_XEN
8349 - check_ioapic();
8350 +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
8351 + early_quirks();
8352 #endif
8353
8354 zap_low_mappings(0);
8355 @@ -907,6 +741,7 @@ void __init setup_arch(char **cmdline_p)
8356 e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
8357 #else
8358 e820_reserve_resources(e820.map, e820.nr_map);
8359 + e820_mark_nosave_regions();
8360 #endif
8361
8362 request_resource(&iomem_resource, &video_ram_resource);
8363 @@ -914,7 +749,7 @@ void __init setup_arch(char **cmdline_p)
8364 {
8365 unsigned i;
8366 /* request I/O space for devices used on all i[345]86 PCs */
8367 - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
8368 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
8369 request_resource(&ioport_resource, &standard_io_resources[i]);
8370 }
8371
8372 @@ -1098,7 +933,7 @@ static void __init amd_detect_cmp(struct
8373 #endif
8374 }
8375
8376 -static void __init init_amd(struct cpuinfo_x86 *c)
8377 +static void __cpuinit init_amd(struct cpuinfo_x86 *c)
8378 {
8379 unsigned level;
8380
8381 @@ -1154,6 +989,12 @@ static void __init init_amd(struct cpuin
8382
8383 /* Fix cpuid4 emulation for more */
8384 num_cache_leaves = 3;
8385 +
8386 + /* When there is only one core no need to synchronize RDTSC */
8387 + if (num_possible_cpus() == 1)
8388 + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8389 + else
8390 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8391 }
8392
8393 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
8394 @@ -1235,8 +1076,7 @@ static void srat_detect_node(void)
8395 node = first_node(node_online_map);
8396 numa_set_node(cpu, node);
8397
8398 - if (acpi_numa > 0)
8399 - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
8400 + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
8401 #endif
8402 }
8403
8404 @@ -1270,6 +1110,8 @@ static void __cpuinit init_intel(struct
8405 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
8406 (c->x86 == 0x6 && c->x86_model >= 0x0e))
8407 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
8408 + if (c->x86 == 6)
8409 + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
8410 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
8411 c->x86_max_cores = intel_num_cpu_cores(c);
8412
8413 @@ -1488,8 +1330,8 @@ static int show_cpuinfo(struct seq_file
8414
8415 /* Intel-defined (#2) */
8416 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
8417 - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
8418 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8419 + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
8420 + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
8421 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
8422
8423 /* VIA/Cyrix/Centaur-defined */
8424 Index: head-2008-12-01/arch/x86/kernel/setup64-xen.c
8425 ===================================================================
8426 --- head-2008-12-01.orig/arch/x86/kernel/setup64-xen.c 2008-12-03 15:49:14.000000000 +0100
8427 +++ head-2008-12-01/arch/x86/kernel/setup64-xen.c 2008-12-01 11:29:05.000000000 +0100
8428 @@ -31,7 +31,7 @@
8429 #include <asm/hypervisor.h>
8430 #endif
8431
8432 -char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
8433 +char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
8434
8435 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
8436
8437 @@ -55,8 +55,10 @@ Control non executable mappings for 64bi
8438 on Enable(default)
8439 off Disable
8440 */
8441 -int __init nonx_setup(char *str)
8442 +static int __init nonx_setup(char *str)
8443 {
8444 + if (!str)
8445 + return -EINVAL;
8446 if (!strncmp(str, "on", 2)) {
8447 __supported_pte_mask |= _PAGE_NX;
8448 do_not_nx = 0;
8449 @@ -64,9 +66,9 @@ int __init nonx_setup(char *str)
8450 do_not_nx = 1;
8451 __supported_pte_mask &= ~_PAGE_NX;
8452 }
8453 - return 1;
8454 + return 0;
8455 }
8456 -__setup("noexec=", nonx_setup); /* parsed early actually */
8457 +early_param("noexec", nonx_setup);
8458
8459 int force_personality32 = 0;
8460
8461 @@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void)
8462 #endif
8463
8464 /* Copy section for each CPU (we discard the original) */
8465 - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
8466 -#ifdef CONFIG_MODULES
8467 - if (size < PERCPU_ENOUGH_ROOM)
8468 - size = PERCPU_ENOUGH_ROOM;
8469 -#endif
8470 + size = PERCPU_ENOUGH_ROOM;
8471
8472 + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
8473 for_each_cpu_mask (i, cpu_possible_map) {
8474 char *ptr;
8475
8476 @@ -169,7 +168,10 @@ void pda_init(int cpu)
8477 /* Setup up data that may be needed in __get_free_pages early */
8478 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
8479 #ifndef CONFIG_XEN
8480 + /* Memory clobbers used to order PDA accessed */
8481 + mb();
8482 wrmsrl(MSR_GS_BASE, pda);
8483 + mb();
8484 #else
8485 if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
8486 (unsigned long)pda))
8487 @@ -302,28 +304,17 @@ void __cpuinit cpu_init (void)
8488 * set up and load the per-CPU TSS
8489 */
8490 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
8491 + static const unsigned int order[N_EXCEPTION_STACKS] = {
8492 + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
8493 + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
8494 + };
8495 if (cpu) {
8496 - static const unsigned int order[N_EXCEPTION_STACKS] = {
8497 - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
8498 - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
8499 - };
8500 -
8501 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
8502 if (!estacks)
8503 panic("Cannot allocate exception stack %ld %d\n",
8504 v, cpu);
8505 }
8506 - switch (v + 1) {
8507 -#if DEBUG_STKSZ > EXCEPTION_STKSZ
8508 - case DEBUG_STACK:
8509 - cpu_pda(cpu)->debugstack = (unsigned long)estacks;
8510 - estacks += DEBUG_STKSZ;
8511 - break;
8512 -#endif
8513 - default:
8514 - estacks += EXCEPTION_STKSZ;
8515 - break;
8516 - }
8517 + estacks += PAGE_SIZE << order[v];
8518 orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
8519 }
8520
8521 Index: head-2008-12-01/arch/x86/kernel/smp_64-xen.c
8522 ===================================================================
8523 --- head-2008-12-01.orig/arch/x86/kernel/smp_64-xen.c 2008-12-03 15:49:14.000000000 +0100
8524 +++ head-2008-12-01/arch/x86/kernel/smp_64-xen.c 2008-12-01 11:29:05.000000000 +0100
8525 @@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v
8526 /* prevent preemption and reschedule on another processor */
8527 int me = get_cpu();
8528 if (cpu == me) {
8529 - WARN_ON(1);
8530 put_cpu();
8531 - return -EBUSY;
8532 + return 0;
8533 }
8534 spin_lock_bh(&call_lock);
8535 __smp_call_function_single(cpu, func, info, nonatomic, wait);
8536 @@ -501,7 +500,7 @@ void smp_send_stop(void)
8537 #ifndef CONFIG_XEN
8538 asmlinkage void smp_reschedule_interrupt(void)
8539 #else
8540 -asmlinkage irqreturn_t smp_reschedule_interrupt(void)
8541 +asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
8542 #endif
8543 {
8544 #ifndef CONFIG_XEN
8545 @@ -514,7 +513,7 @@ asmlinkage irqreturn_t smp_reschedule_in
8546 #ifndef CONFIG_XEN
8547 asmlinkage void smp_call_function_interrupt(void)
8548 #else
8549 -asmlinkage irqreturn_t smp_call_function_interrupt(void)
8550 +asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
8551 #endif
8552 {
8553 void (*func) (void *info) = call_data->func;
8554 @@ -545,31 +544,3 @@ asmlinkage irqreturn_t smp_call_function
8555 return IRQ_HANDLED;
8556 #endif
8557 }
8558 -
8559 -int safe_smp_processor_id(void)
8560 -{
8561 -#ifdef CONFIG_XEN
8562 - return smp_processor_id();
8563 -#else
8564 - unsigned apicid, i;
8565 -
8566 - if (disable_apic)
8567 - return 0;
8568 -
8569 - apicid = hard_smp_processor_id();
8570 - if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
8571 - return apicid;
8572 -
8573 - for (i = 0; i < NR_CPUS; ++i) {
8574 - if (x86_cpu_to_apicid[i] == apicid)
8575 - return i;
8576 - }
8577 -
8578 - /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
8579 - * or called too early. Either way, we must be CPU 0. */
8580 - if (x86_cpu_to_apicid[0] == BAD_APICID)
8581 - return 0;
8582 -
8583 - return 0; /* Should not happen */
8584 -#endif
8585 -}
8586 Index: head-2008-12-01/arch/x86/kernel/traps_64-xen.c
8587 ===================================================================
8588 --- head-2008-12-01.orig/arch/x86/kernel/traps_64-xen.c 2008-12-03 15:49:14.000000000 +0100
8589 +++ head-2008-12-01/arch/x86/kernel/traps_64-xen.c 2008-12-01 11:29:05.000000000 +0100
8590 @@ -23,6 +23,7 @@
8591 #include <linux/delay.h>
8592 #include <linux/spinlock.h>
8593 #include <linux/interrupt.h>
8594 +#include <linux/kallsyms.h>
8595 #include <linux/module.h>
8596 #include <linux/moduleparam.h>
8597 #include <linux/nmi.h>
8598 @@ -45,6 +46,7 @@
8599 #include <asm/pda.h>
8600 #include <asm/proto.h>
8601 #include <asm/nmi.h>
8602 +#include <asm/stacktrace.h>
8603
8604 asmlinkage void divide_error(void);
8605 asmlinkage void debug(void);
8606 @@ -114,7 +116,6 @@ static int call_trace = 1;
8607 #endif
8608
8609 #ifdef CONFIG_KALLSYMS
8610 -# include <linux/kallsyms.h>
8611 void printk_address(unsigned long address)
8612 {
8613 unsigned long offset = 0, symsize;
8614 @@ -142,7 +143,7 @@ void printk_address(unsigned long addres
8615 #endif
8616
8617 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
8618 - unsigned *usedp, const char **idp)
8619 + unsigned *usedp, char **idp)
8620 {
8621 #ifndef CONFIG_X86_NO_TSS
8622 static char ids[][8] = {
8623 @@ -162,26 +163,7 @@ static unsigned long *in_exception_stack
8624 * 'stack' is in one of them:
8625 */
8626 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
8627 - unsigned long end;
8628 -
8629 - /*
8630 - * set 'end' to the end of the exception stack.
8631 - */
8632 - switch (k + 1) {
8633 - /*
8634 - * TODO: this block is not needed i think, because
8635 - * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
8636 - * properly too.
8637 - */
8638 -#if DEBUG_STKSZ > EXCEPTION_STKSZ
8639 - case DEBUG_STACK:
8640 - end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
8641 - break;
8642 -#endif
8643 - default:
8644 - end = per_cpu(orig_ist, cpu).ist[k];
8645 - break;
8646 - }
8647 + unsigned long end = per_cpu(orig_ist, cpu).ist[k];
8648 /*
8649 * Is 'stack' above this exception frame's end?
8650 * If yes then skip to the next frame.
8651 @@ -236,13 +218,19 @@ static unsigned long *in_exception_stack
8652 return NULL;
8653 }
8654
8655 -static int show_trace_unwind(struct unwind_frame_info *info, void *context)
8656 +struct ops_and_data {
8657 + struct stacktrace_ops *ops;
8658 + void *data;
8659 +};
8660 +
8661 +static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
8662 {
8663 + struct ops_and_data *oad = (struct ops_and_data *)context;
8664 int n = 0;
8665
8666 while (unwind(info) == 0 && UNW_PC(info)) {
8667 n++;
8668 - printk_address(UNW_PC(info));
8669 + oad->ops->address(oad->data, UNW_PC(info));
8670 if (arch_unw_user_mode(info))
8671 break;
8672 }
8673 @@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi
8674 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
8675 */
8676
8677 -void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
8678 +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
8679 {
8680 - const unsigned cpu = safe_smp_processor_id();
8681 + void *t = (void *)tinfo;
8682 + return p > t && p < t + THREAD_SIZE - 3;
8683 +}
8684 +
8685 +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
8686 + struct stacktrace_ops *ops, void *data)
8687 +{
8688 + const unsigned cpu = smp_processor_id();
8689 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
8690 unsigned used = 0;
8691 -
8692 - printk("\nCall Trace:\n");
8693 + struct thread_info *tinfo;
8694
8695 if (!tsk)
8696 tsk = current;
8697 @@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk,
8698 if (call_trace >= 0) {
8699 int unw_ret = 0;
8700 struct unwind_frame_info info;
8701 + struct ops_and_data oad = { .ops = ops, .data = data };
8702
8703 if (regs) {
8704 if (unwind_init_frame_info(&info, tsk, regs) == 0)
8705 - unw_ret = show_trace_unwind(&info, NULL);
8706 + unw_ret = dump_trace_unwind(&info, &oad);
8707 } else if (tsk == current)
8708 - unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
8709 + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
8710 else {
8711 if (unwind_init_blocked(&info, tsk) == 0)
8712 - unw_ret = show_trace_unwind(&info, NULL);
8713 + unw_ret = dump_trace_unwind(&info, &oad);
8714 }
8715 if (unw_ret > 0) {
8716 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
8717 - print_symbol("DWARF2 unwinder stuck at %s\n",
8718 + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
8719 UNW_PC(&info));
8720 if ((long)UNW_SP(&info) < 0) {
8721 - printk("Leftover inexact backtrace:\n");
8722 + ops->warning(data, "Leftover inexact backtrace:\n");
8723 stack = (unsigned long *)UNW_SP(&info);
8724 + if (!stack)
8725 + return;
8726 } else
8727 - printk("Full inexact backtrace again:\n");
8728 + ops->warning(data, "Full inexact backtrace again:\n");
8729 } else if (call_trace >= 1)
8730 return;
8731 else
8732 - printk("Full inexact backtrace again:\n");
8733 + ops->warning(data, "Full inexact backtrace again:\n");
8734 } else
8735 - printk("Inexact backtrace:\n");
8736 + ops->warning(data, "Inexact backtrace:\n");
8737 }
8738 + if (!stack) {
8739 + unsigned long dummy;
8740 + stack = &dummy;
8741 + if (tsk && tsk != current)
8742 + stack = (unsigned long *)tsk->thread.rsp;
8743 + }
8744 + /*
8745 + * Align the stack pointer on word boundary, later loops
8746 + * rely on that (and corruption / debug info bugs can cause
8747 + * unaligned values here):
8748 + */
8749 + stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
8750
8751 /*
8752 * Print function call entries within a stack. 'cond' is the
8753 @@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk,
8754 #define HANDLE_STACK(cond) \
8755 do while (cond) { \
8756 unsigned long addr = *stack++; \
8757 - if (kernel_text_address(addr)) { \
8758 + if (oops_in_progress ? \
8759 + __kernel_text_address(addr) : \
8760 + kernel_text_address(addr)) { \
8761 /* \
8762 * If the address is either in the text segment of the \
8763 * kernel, or in the region which contains vmalloc'ed \
8764 @@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk,
8765 * down the cause of the crash will be able to figure \
8766 * out the call path that was taken. \
8767 */ \
8768 - printk_address(addr); \
8769 + ops->address(data, addr); \
8770 } \
8771 } while (0)
8772
8773 @@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk,
8774 * current stack address. If the stacks consist of nested
8775 * exceptions
8776 */
8777 - for ( ; ; ) {
8778 - const char *id;
8779 + for (;;) {
8780 + char *id;
8781 unsigned long *estack_end;
8782 estack_end = in_exception_stack(cpu, (unsigned long)stack,
8783 &used, &id);
8784
8785 if (estack_end) {
8786 - printk(" <%s>", id);
8787 + if (ops->stack(data, id) < 0)
8788 + break;
8789 HANDLE_STACK (stack < estack_end);
8790 - printk(" <EOE>");
8791 + ops->stack(data, "<EOE>");
8792 /*
8793 * We link to the next stack via the
8794 * second-to-last pointer (index -2 to end) in the
8795 @@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk,
8796 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
8797
8798 if (stack >= irqstack && stack < irqstack_end) {
8799 - printk(" <IRQ>");
8800 + if (ops->stack(data, "IRQ") < 0)
8801 + break;
8802 HANDLE_STACK (stack < irqstack_end);
8803 /*
8804 * We link to the next stack (which would be
8805 @@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk,
8806 */
8807 stack = (unsigned long *) (irqstack_end[-1]);
8808 irqstack_end = NULL;
8809 - printk(" <EOI>");
8810 + ops->stack(data, "EOI");
8811 continue;
8812 }
8813 }
8814 @@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk,
8815 }
8816
8817 /*
8818 - * This prints the process stack:
8819 + * This handles the process stack:
8820 */
8821 - HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
8822 + tinfo = current_thread_info();
8823 + HANDLE_STACK (valid_stack_ptr(tinfo, stack));
8824 #undef HANDLE_STACK
8825 +}
8826 +EXPORT_SYMBOL(dump_trace);
8827
8828 +static void
8829 +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
8830 +{
8831 + print_symbol(msg, symbol);
8832 printk("\n");
8833 }
8834
8835 -static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
8836 +static void print_trace_warning(void *data, char *msg)
8837 +{
8838 + printk("%s\n", msg);
8839 +}
8840 +
8841 +static int print_trace_stack(void *data, char *name)
8842 +{
8843 + printk(" <%s> ", name);
8844 + return 0;
8845 +}
8846 +
8847 +static void print_trace_address(void *data, unsigned long addr)
8848 +{
8849 + printk_address(addr);
8850 +}
8851 +
8852 +static struct stacktrace_ops print_trace_ops = {
8853 + .warning = print_trace_warning,
8854 + .warning_symbol = print_trace_warning_symbol,
8855 + .stack = print_trace_stack,
8856 + .address = print_trace_address,
8857 +};
8858 +
8859 +void
8860 +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
8861 +{
8862 + printk("\nCall Trace:\n");
8863 + dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
8864 + printk("\n");
8865 +}
8866 +
8867 +static void
8868 +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
8869 {
8870 unsigned long *stack;
8871 int i;
8872 - const int cpu = safe_smp_processor_id();
8873 + const int cpu = smp_processor_id();
8874 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
8875 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
8876
8877 @@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs
8878 int i;
8879 int in_kernel = !user_mode(regs);
8880 unsigned long rsp;
8881 - const int cpu = safe_smp_processor_id();
8882 + const int cpu = smp_processor_id();
8883 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
8884
8885 rsp = regs->rsp;
8886 @@ -505,9 +557,11 @@ static unsigned int die_nest_count;
8887
8888 unsigned __kprobes long oops_begin(void)
8889 {
8890 - int cpu = safe_smp_processor_id();
8891 + int cpu = smp_processor_id();
8892 unsigned long flags;
8893
8894 + oops_enter();
8895 +
8896 /* racy, but better than risking deadlock. */
8897 local_irq_save(flags);
8898 if (!spin_trylock(&die_lock)) {
8899 @@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl
8900 spin_unlock_irqrestore(&die_lock, flags);
8901 if (panic_on_oops)
8902 panic("Fatal exception");
8903 + oops_exit();
8904 }
8905
8906 void __kprobes __die(const char * str, struct pt_regs * regs, long err)
8907 @@ -572,8 +627,8 @@ void die(const char * str, struct pt_reg
8908 do_exit(SIGSEGV);
8909 }
8910
8911 -#ifdef CONFIG_X86_LOCAL_APIC
8912 -void __kprobes die_nmi(char *str, struct pt_regs *regs)
8913 +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
8914 +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
8915 {
8916 unsigned long flags = oops_begin();
8917
8918 @@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct
8919 * We are in trouble anyway, lets at least try
8920 * to get a message out.
8921 */
8922 - printk(str, safe_smp_processor_id());
8923 + printk(str, smp_processor_id());
8924 show_registers(regs);
8925 if (kexec_should_crash(current))
8926 crash_kexec(regs);
8927 - if (panic_on_timeout || panic_on_oops)
8928 - panic("nmi watchdog");
8929 - printk("console shuts up ...\n");
8930 + if (do_panic || panic_on_oops)
8931 + panic("Non maskable interrupt");
8932 oops_end(flags);
8933 nmi_exit();
8934 local_irq_enable();
8935 @@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro
8936 static __kprobes void
8937 mem_parity_error(unsigned char reason, struct pt_regs * regs)
8938 {
8939 - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
8940 - printk("You probably have a hardware problem with your RAM chips\n");
8941 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8942 + reason);
8943 + printk(KERN_EMERG "You probably have a hardware problem with your "
8944 + "RAM chips\n");
8945 +
8946 + if (panic_on_unrecovered_nmi)
8947 + panic("NMI: Not continuing");
8948 +
8949 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8950
8951 #if 0 /* XEN */
8952 /* Clear and disable the memory parity error line. */
8953 @@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str
8954
8955 static __kprobes void
8956 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
8957 -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
8958 - printk("Dazed and confused, but trying to continue\n");
8959 - printk("Do you have a strange power saving mode enabled?\n");
8960 +{
8961 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8962 + reason);
8963 + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
8964 +
8965 + if (panic_on_unrecovered_nmi)
8966 + panic("NMI: Not continuing");
8967 +
8968 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8969 }
8970
8971 /* Runs on IST stack. This code must keep interrupts off all the time.
8972 @@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi
8973 * Ok, so this is none of the documented NMI sources,
8974 * so it must be the NMI watchdog.
8975 */
8976 - if (nmi_watchdog > 0) {
8977 - nmi_watchdog_tick(regs,reason);
8978 + if (nmi_watchdog_tick(regs,reason))
8979 return;
8980 - }
8981 #endif
8982 - unknown_nmi_error(reason, regs);
8983 + if (!do_nmi_callback(regs,cpu))
8984 + unknown_nmi_error(reason, regs);
8985 +
8986 return;
8987 }
8988 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
8989 @@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void)
8990 init_fpu(me);
8991 restore_fpu_checking(&me->thread.i387.fxsave);
8992 task_thread_info(me)->status |= TS_USEDFPU;
8993 + me->fpu_counter++;
8994 }
8995
8996
8997 @@ -1141,24 +1209,30 @@ void __cpuinit smp_trap_init(trap_info_t
8998 }
8999
9000
9001 -/* Actual parsing is done early in setup.c. */
9002 -static int __init oops_dummy(char *s)
9003 +static int __init oops_setup(char *s)
9004 {
9005 - panic_on_oops = 1;
9006 - return 1;
9007 + if (!s)
9008 + return -EINVAL;
9009 + if (!strcmp(s, "panic"))
9010 + panic_on_oops = 1;
9011 + return 0;
9012 }
9013 -__setup("oops=", oops_dummy);
9014 +early_param("oops", oops_setup);
9015
9016 static int __init kstack_setup(char *s)
9017 {
9018 + if (!s)
9019 + return -EINVAL;
9020 kstack_depth_to_print = simple_strtoul(s,NULL,0);
9021 - return 1;
9022 + return 0;
9023 }
9024 -__setup("kstack=", kstack_setup);
9025 +early_param("kstack", kstack_setup);
9026
9027 #ifdef CONFIG_STACK_UNWIND
9028 static int __init call_trace_setup(char *s)
9029 {
9030 + if (!s)
9031 + return -EINVAL;
9032 if (strcmp(s, "old") == 0)
9033 call_trace = -1;
9034 else if (strcmp(s, "both") == 0)
9035 @@ -1167,7 +1241,7 @@ static int __init call_trace_setup(char
9036 call_trace = 1;
9037 else if (strcmp(s, "new") == 0)
9038 call_trace = 2;
9039 - return 1;
9040 + return 0;
9041 }
9042 -__setup("call_trace=", call_trace_setup);
9043 +early_param("call_trace", call_trace_setup);
9044 #endif
9045 Index: head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c
9046 ===================================================================
9047 --- head-2008-12-01.orig/arch/x86/kernel/vsyscall_64-xen.c 2008-12-03 15:49:14.000000000 +0100
9048 +++ head-2008-12-01/arch/x86/kernel/vsyscall_64-xen.c 2008-12-01 11:29:05.000000000 +0100
9049 @@ -26,6 +26,10 @@
9050 #include <linux/seqlock.h>
9051 #include <linux/jiffies.h>
9052 #include <linux/sysctl.h>
9053 +#include <linux/getcpu.h>
9054 +#include <linux/cpu.h>
9055 +#include <linux/smp.h>
9056 +#include <linux/notifier.h>
9057
9058 #include <asm/vsyscall.h>
9059 #include <asm/pgtable.h>
9060 @@ -33,11 +37,15 @@
9061 #include <asm/fixmap.h>
9062 #include <asm/errno.h>
9063 #include <asm/io.h>
9064 +#include <asm/segment.h>
9065 +#include <asm/desc.h>
9066 +#include <asm/topology.h>
9067
9068 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
9069
9070 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
9071 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
9072 +int __vgetcpu_mode __section_vgetcpu_mode;
9073
9074 #include <asm/unistd.h>
9075
9076 @@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo
9077 sequence = read_seqbegin(&__xtime_lock);
9078
9079 sec = __xtime.tv_sec;
9080 - usec = (__xtime.tv_nsec / 1000) +
9081 - (__jiffies - __wall_jiffies) * (1000000 / HZ);
9082 + usec = __xtime.tv_nsec / 1000;
9083
9084 if (__vxtime.mode != VXTIME_HPET) {
9085 t = get_cycles_sync();
9086 @@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo
9087 __vxtime.tsc_quot) >> 32;
9088 /* See comment in x86_64 do_gettimeofday. */
9089 } else {
9090 - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
9091 + usec += ((readl((void __iomem *)
9092 + fix_to_virt(VSYSCALL_HPET) + 0xf0) -
9093 __vxtime.last) * __vxtime.quot) >> 32;
9094 }
9095 } while (read_seqretry(&__xtime_lock, sequence));
9096 @@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t)
9097 return __xtime.tv_sec;
9098 }
9099
9100 -long __vsyscall(2) venosys_0(void)
9101 -{
9102 - return -ENOSYS;
9103 +/* Fast way to get current CPU and node.
9104 + This helps to do per node and per CPU caches in user space.
9105 + The result is not guaranteed without CPU affinity, but usually
9106 + works out because the scheduler tries to keep a thread on the same
9107 + CPU.
9108 +
9109 + tcache must point to a two element sized long array.
9110 + All arguments can be NULL. */
9111 +long __vsyscall(2)
9112 +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
9113 +{
9114 + unsigned int dummy, p;
9115 + unsigned long j = 0;
9116 +
9117 + /* Fast cache - only recompute value once per jiffies and avoid
9118 + relatively costly rdtscp/cpuid otherwise.
9119 + This works because the scheduler usually keeps the process
9120 + on the same CPU and this syscall doesn't guarantee its
9121 + results anyways.
9122 + We do this here because otherwise user space would do it on
9123 + its own in a likely inferior way (no access to jiffies).
9124 + If you don't like it pass NULL. */
9125 + if (tcache && tcache->blob[0] == (j = __jiffies)) {
9126 + p = tcache->blob[1];
9127 + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
9128 + /* Load per CPU data from RDTSCP */
9129 + rdtscp(dummy, dummy, p);
9130 + } else {
9131 + /* Load per CPU data from GDT */
9132 + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
9133 + }
9134 + if (tcache) {
9135 + tcache->blob[0] = j;
9136 + tcache->blob[1] = p;
9137 + }
9138 + if (cpu)
9139 + *cpu = p & 0xfff;
9140 + if (node)
9141 + *node = p >> 12;
9142 + return 0;
9143 }
9144
9145 long __vsyscall(3) venosys_1(void)
9146 @@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta
9147 void __user *buffer, size_t *lenp, loff_t *ppos)
9148 {
9149 extern u16 vsysc1, vsysc2;
9150 - u16 *map1, *map2;
9151 + u16 __iomem *map1;
9152 + u16 __iomem *map2;
9153 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
9154 if (!write)
9155 return ret;
9156 @@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta
9157 goto out;
9158 }
9159 if (!sysctl_vsyscall) {
9160 - *map1 = SYSCALL;
9161 - *map2 = SYSCALL;
9162 + writew(SYSCALL, map1);
9163 + writew(SYSCALL, map2);
9164 } else {
9165 - *map1 = NOP2;
9166 - *map2 = NOP2;
9167 + writew(NOP2, map1);
9168 + writew(NOP2, map2);
9169 }
9170 iounmap(map2);
9171 out:
9172 @@ -200,6 +246,48 @@ static ctl_table kernel_root_table2[] =
9173
9174 #endif
9175
9176 +/* Assume __initcall executes before all user space. Hopefully kmod
9177 + doesn't violate that. We'll find out if it does. */
9178 +static void __cpuinit vsyscall_set_cpu(int cpu)
9179 +{
9180 + unsigned long d;
9181 + unsigned long node = 0;
9182 +#ifdef CONFIG_NUMA
9183 + node = cpu_to_node[cpu];
9184 +#endif
9185 + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
9186 + write_rdtscp_aux((node << 12) | cpu);
9187 +
9188 + /* Store cpu number in limit so that it can be loaded quickly
9189 + in user space in vgetcpu.
9190 + 12 bits for the CPU and 8 bits for the node. */
9191 + d = 0x0f40000000000ULL;
9192 + d |= cpu;
9193 + d |= (node & 0xf) << 12;
9194 + d |= (node >> 4) << 48;
9195 + if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
9196 + + GDT_ENTRY_PER_CPU),
9197 + d))
9198 + BUG();
9199 +}
9200 +
9201 +static void __cpuinit cpu_vsyscall_init(void *arg)
9202 +{
9203 + /* preemption should be already off */
9204 + vsyscall_set_cpu(raw_smp_processor_id());
9205 +}
9206 +
9207 +#ifdef CONFIG_HOTPLUG_CPU
9208 +static int __cpuinit
9209 +cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
9210 +{
9211 + long cpu = (long)arg;
9212 + if (action == CPU_ONLINE)
9213 + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
9214 + return NOTIFY_DONE;
9215 +}
9216 +#endif
9217 +
9218 static void __init map_vsyscall(void)
9219 {
9220 extern char __vsyscall_0;
9221 @@ -214,13 +302,20 @@ static int __init vsyscall_init(void)
9222 VSYSCALL_ADDR(__NR_vgettimeofday)));
9223 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
9224 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
9225 + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
9226 map_vsyscall();
9227 #ifdef CONFIG_XEN
9228 sysctl_vsyscall = 0; /* disable vgettimeofay() */
9229 + if (boot_cpu_has(X86_FEATURE_RDTSCP))
9230 + vgetcpu_mode = VGETCPU_RDTSCP;
9231 + else
9232 + vgetcpu_mode = VGETCPU_LSL;
9233 #endif
9234 #ifdef CONFIG_SYSCTL
9235 register_sysctl_table(kernel_root_table2, 0);
9236 #endif
9237 + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
9238 + hotcpu_notifier(cpu_vsyscall_notifier, 0);
9239 return 0;
9240 }
9241
9242 Index: head-2008-12-01/arch/x86/mm/fault_64-xen.c
9243 ===================================================================
9244 --- head-2008-12-01.orig/arch/x86/mm/fault_64-xen.c 2008-12-03 15:49:14.000000000 +0100
9245 +++ head-2008-12-01/arch/x86/mm/fault_64-xen.c 2008-12-01 11:29:05.000000000 +0100
9246 @@ -40,8 +40,7 @@
9247 #define PF_RSVD (1<<3)
9248 #define PF_INSTR (1<<4)
9249
9250 -#ifdef CONFIG_KPROBES
9251 -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
9252 +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
9253
9254 /* Hook to register for page fault notifications */
9255 int register_page_fault_notifier(struct notifier_block *nb)
9256 @@ -49,11 +48,13 @@ int register_page_fault_notifier(struct
9257 vmalloc_sync_all();
9258 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
9259 }
9260 +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
9261
9262 int unregister_page_fault_notifier(struct notifier_block *nb)
9263 {
9264 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
9265 }
9266 +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
9267
9268 static inline int notify_page_fault(enum die_val val, const char *str,
9269 struct pt_regs *regs, long err, int trap, int sig)
9270 @@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
9271 };
9272 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
9273 }
9274 -#else
9275 -static inline int notify_page_fault(enum die_val val, const char *str,
9276 - struct pt_regs *regs, long err, int trap, int sig)
9277 -{
9278 - return NOTIFY_DONE;
9279 -}
9280 -#endif
9281
9282 void bust_spinlocks(int yes)
9283 {
9284 @@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
9285 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
9286 unsigned long error_code)
9287 {
9288 - unsigned char *instr;
9289 + unsigned char __user *instr;
9290 int scan_more = 1;
9291 int prefetch = 0;
9292 unsigned char *max_instr;
9293 @@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p
9294 if (error_code & PF_INSTR)
9295 return 0;
9296
9297 - instr = (unsigned char *)convert_rip_to_linear(current, regs);
9298 + instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
9299 max_instr = instr + 15;
9300
9301 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
9302 @@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p
9303 unsigned char instr_hi;
9304 unsigned char instr_lo;
9305
9306 - if (__get_user(opcode, instr))
9307 + if (__get_user(opcode, (char __user *)instr))
9308 break;
9309
9310 instr_hi = opcode & 0xf0;
9311 @@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p
9312 case 0x00:
9313 /* Prefetch instruction is 0x0F0D or 0x0F18 */
9314 scan_more = 0;
9315 - if (__get_user(opcode, instr))
9316 + if (__get_user(opcode, (char __user *)instr))
9317 break;
9318 prefetch = (instr_lo == 0xF) &&
9319 (opcode == 0x0D || opcode == 0x18);
9320 @@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p
9321 static int bad_address(void *p)
9322 {
9323 unsigned long dummy;
9324 - return __get_user(dummy, (unsigned long *)p);
9325 + return __get_user(dummy, (unsigned long __user *)p);
9326 }
9327
9328 void dump_pagetable(unsigned long address)
9329 @@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r
9330
9331 int unhandled_signal(struct task_struct *tsk, int sig)
9332 {
9333 - if (tsk->pid == 1)
9334 + if (is_init(tsk))
9335 return 1;
9336 if (tsk->ptrace & PT_PTRACED)
9337 return 0;
9338 @@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a
9339 if (pgd_none(*pgd))
9340 set_pgd(pgd, *pgd_ref);
9341 else
9342 - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
9343 + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
9344
9345 /* Below here mismatches are bugs because these lower tables
9346 are shared */
9347 @@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a
9348 pud_ref = pud_offset(pgd_ref, address);
9349 if (pud_none(*pud_ref))
9350 return -1;
9351 - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
9352 + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
9353 BUG();
9354 pmd = pmd_offset(pud, address);
9355 pmd_ref = pmd_offset(pud_ref, address);
9356 @@ -531,7 +525,7 @@ good_area:
9357 case PF_PROT: /* read, present */
9358 goto bad_area;
9359 case 0: /* read, not present */
9360 - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
9361 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
9362 goto bad_area;
9363 }
9364
9365 @@ -647,7 +641,7 @@ no_context:
9366 */
9367 out_of_memory:
9368 up_read(&mm->mmap_sem);
9369 - if (current->pid == 1) {
9370 + if (is_init(current)) {
9371 yield();
9372 goto again;
9373 }
9374 @@ -702,7 +696,7 @@ void vmalloc_sync_all(void)
9375 if (pgd_none(*pgd))
9376 set_pgd(pgd, *pgd_ref);
9377 else
9378 - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
9379 + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
9380 }
9381 spin_unlock(&pgd_lock);
9382 set_bit(pgd_index(address), insync);
9383 Index: head-2008-12-01/arch/x86/mm/init_64-xen.c
9384 ===================================================================
9385 --- head-2008-12-01.orig/arch/x86/mm/init_64-xen.c 2008-12-03 15:49:14.000000000 +0100
9386 +++ head-2008-12-01/arch/x86/mm/init_64-xen.c 2008-12-01 11:29:05.000000000 +0100
9387 @@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user);
9388
9389 int after_bootmem;
9390
9391 -static unsigned long dma_reserve __initdata;
9392 -
9393 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
9394 extern unsigned long start_pfn;
9395
9396 @@ -420,7 +418,6 @@ __init void *early_ioremap(unsigned long
9397
9398 /* actually usually some more */
9399 if (size >= LARGE_PAGE_SIZE) {
9400 - printk("SMBIOS area too long %lu\n", size);
9401 return NULL;
9402 }
9403 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
9404 @@ -442,16 +439,24 @@ __init void early_iounmap(void *addr, un
9405 #endif
9406
9407 static void __meminit
9408 -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
9409 +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
9410 {
9411 - int i, k;
9412 + int i = pmd_index(address);
9413
9414 - for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
9415 + for (; i < PTRS_PER_PMD; i++) {
9416 unsigned long pte_phys;
9417 + pmd_t *pmd = pmd_page + i;
9418 pte_t *pte, *pte_save;
9419 + int k;
9420
9421 if (address >= end)
9422 break;
9423 +
9424 + if (__pmd_val(*pmd)) {
9425 + address += PMD_SIZE;
9426 + continue;
9427 + }
9428 +
9429 pte = alloc_static_page(&pte_phys);
9430 pte_save = pte;
9431 for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
9432 @@ -478,40 +483,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long
9433 static void __meminit
9434 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
9435 {
9436 - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
9437 -
9438 - if (pmd_none(*pmd)) {
9439 - spin_lock(&init_mm.page_table_lock);
9440 - phys_pmd_init(pmd, address, end);
9441 - spin_unlock(&init_mm.page_table_lock);
9442 - __flush_tlb_all();
9443 - }
9444 + pmd_t *pmd = pmd_offset(pud,0);
9445 + spin_lock(&init_mm.page_table_lock);
9446 + phys_pmd_init(pmd, address, end);
9447 + spin_unlock(&init_mm.page_table_lock);
9448 + __flush_tlb_all();
9449 }
9450
9451 -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
9452 +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
9453 {
9454 - long i = pud_index(address);
9455 -
9456 - pud = pud + i;
9457 -
9458 - if (after_bootmem && pud_val(*pud)) {
9459 - phys_pmd_update(pud, address, end);
9460 - return;
9461 - }
9462 + int i = pud_index(addr);
9463
9464 - for (; i < PTRS_PER_PUD; pud++, i++) {
9465 - unsigned long paddr, pmd_phys;
9466 + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
9467 + unsigned long pmd_phys;
9468 + pud_t *pud = pud_page + pud_index(addr);
9469 pmd_t *pmd;
9470
9471 - paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
9472 - if (paddr >= end)
9473 + if (addr >= end)
9474 break;
9475
9476 + if (__pud_val(*pud)) {
9477 + phys_pmd_update(pud, addr, end);
9478 + continue;
9479 + }
9480 +
9481 pmd = alloc_static_page(&pmd_phys);
9482
9483 spin_lock(&init_mm.page_table_lock);
9484 *pud = __pud(pmd_phys | _KERNPG_TABLE);
9485 - phys_pmd_init(pmd, paddr, end);
9486 + phys_pmd_init(pmd, addr, end);
9487 spin_unlock(&init_mm.page_table_lock);
9488
9489 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
9490 @@ -775,69 +775,18 @@ void __cpuinit zap_low_mappings(int cpu)
9491 #endif
9492 }
9493
9494 -/* Compute zone sizes for the DMA and DMA32 zones in a node. */
9495 -__init void
9496 -size_zones(unsigned long *z, unsigned long *h,
9497 - unsigned long start_pfn, unsigned long end_pfn)
9498 -{
9499 - int i;
9500 - unsigned long w;
9501 -
9502 - for (i = 0; i < MAX_NR_ZONES; i++)
9503 - z[i] = 0;
9504 -
9505 - if (start_pfn < MAX_DMA_PFN)
9506 - z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
9507 - if (start_pfn < MAX_DMA32_PFN) {
9508 - unsigned long dma32_pfn = MAX_DMA32_PFN;
9509 - if (dma32_pfn > end_pfn)
9510 - dma32_pfn = end_pfn;
9511 - z[ZONE_DMA32] = dma32_pfn - start_pfn;
9512 - }
9513 - z[ZONE_NORMAL] = end_pfn - start_pfn;
9514 -
9515 - /* Remove lower zones from higher ones. */
9516 - w = 0;
9517 - for (i = 0; i < MAX_NR_ZONES; i++) {
9518 - if (z[i])
9519 - z[i] -= w;
9520 - w += z[i];
9521 - }
9522 -
9523 - /* Compute holes */
9524 - w = start_pfn;
9525 - for (i = 0; i < MAX_NR_ZONES; i++) {
9526 - unsigned long s = w;
9527 - w += z[i];
9528 - h[i] = e820_hole_size(s, w);
9529 - }
9530 -
9531 - /* Add the space pace needed for mem_map to the holes too. */
9532 - for (i = 0; i < MAX_NR_ZONES; i++)
9533 - h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
9534 -
9535 - /* The 16MB DMA zone has the kernel and other misc mappings.
9536 - Account them too */
9537 - if (h[ZONE_DMA]) {
9538 - h[ZONE_DMA] += dma_reserve;
9539 - if (h[ZONE_DMA] >= z[ZONE_DMA]) {
9540 - printk(KERN_WARNING
9541 - "Kernel too large and filling up ZONE_DMA?\n");
9542 - h[ZONE_DMA] = z[ZONE_DMA];
9543 - }
9544 - }
9545 -}
9546 -
9547 #ifndef CONFIG_NUMA
9548 void __init paging_init(void)
9549 {
9550 - unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
9551 + unsigned long max_zone_pfns[MAX_NR_ZONES];
9552 + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
9553 + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
9554 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
9555 + max_zone_pfns[ZONE_NORMAL] = end_pfn;
9556
9557 memory_present(0, 0, end_pfn);
9558 sparse_init();
9559 - size_zones(zones, holes, 0, end_pfn);
9560 - free_area_init_node(0, NODE_DATA(0), zones,
9561 - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
9562 + free_area_init_nodes(max_zone_pfns);
9563
9564 init_mm.context.pinned = 1;
9565 }
9566 @@ -891,36 +840,23 @@ void online_page(struct page *page)
9567
9568 #ifdef CONFIG_MEMORY_HOTPLUG
9569 /*
9570 - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
9571 - * via probe interface of sysfs. If acpi notifies hot-add event, then it
9572 - * can tell node id by searching dsdt. But, probe interface doesn't have
9573 - * node id. So, return 0 as node id at this time.
9574 - */
9575 -#ifdef CONFIG_NUMA
9576 -int memory_add_physaddr_to_nid(u64 start)
9577 -{
9578 - return 0;
9579 -}
9580 -#endif
9581 -
9582 -/*
9583 * Memory is added always to NORMAL zone. This means you will never get
9584 * additional DMA/DMA32 memory.
9585 */
9586 int arch_add_memory(int nid, u64 start, u64 size)
9587 {
9588 struct pglist_data *pgdat = NODE_DATA(nid);
9589 - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
9590 + struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
9591 unsigned long start_pfn = start >> PAGE_SHIFT;
9592 unsigned long nr_pages = size >> PAGE_SHIFT;
9593 int ret;
9594
9595 + init_memory_mapping(start, (start + size -1));
9596 +
9597 ret = __add_pages(zone, start_pfn, nr_pages);
9598 if (ret)
9599 goto error;
9600
9601 - init_memory_mapping(start, (start + size -1));
9602 -
9603 return ret;
9604 error:
9605 printk("%s: Problem encountered in __add_pages!\n", __func__);
9606 @@ -934,7 +870,17 @@ int remove_memory(u64 start, u64 size)
9607 }
9608 EXPORT_SYMBOL_GPL(remove_memory);
9609
9610 -#else /* CONFIG_MEMORY_HOTPLUG */
9611 +#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
9612 +int memory_add_physaddr_to_nid(u64 start)
9613 +{
9614 + return 0;
9615 +}
9616 +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
9617 +#endif
9618 +
9619 +#endif /* CONFIG_MEMORY_HOTPLUG */
9620 +
9621 +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
9622 /*
9623 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
9624 * just online the pages.
9625 @@ -960,7 +906,7 @@ int __add_pages(struct zone *z, unsigned
9626 }
9627 return err;
9628 }
9629 -#endif /* CONFIG_MEMORY_HOTPLUG */
9630 +#endif
9631
9632 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
9633 kcore_vsyscall;
9634 @@ -972,12 +918,6 @@ void __init mem_init(void)
9635
9636 pci_iommu_alloc();
9637
9638 - /* How many end-of-memory variables you have, grandma! */
9639 - max_low_pfn = end_pfn;
9640 - max_pfn = end_pfn;
9641 - num_physpages = end_pfn;
9642 - high_memory = (void *) __va(end_pfn * PAGE_SIZE);
9643 -
9644 /* clear the zero-page */
9645 memset(empty_zero_page, 0, PAGE_SIZE);
9646
9647 @@ -995,7 +935,8 @@ void __init mem_init(void)
9648 init_page_count(pfn_to_page(pfn));
9649 totalram_pages++;
9650 }
9651 - reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
9652 + reservedpages = end_pfn - totalram_pages -
9653 + absent_pages_in_range(0, end_pfn);
9654
9655 after_bootmem = 1;
9656
9657 @@ -1102,15 +1043,34 @@ void free_initrd_mem(unsigned long start
9658
9659 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
9660 {
9661 - /* Should check here against the e820 map to avoid double free */
9662 #ifdef CONFIG_NUMA
9663 int nid = phys_to_nid(phys);
9664 +#endif
9665 + unsigned long pfn = phys >> PAGE_SHIFT;
9666 + if (pfn >= end_pfn) {
9667 + /* This can happen with kdump kernels when accessing firmware
9668 + tables. */
9669 + if (pfn < end_pfn_map)
9670 + return;
9671 + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
9672 + phys, len);
9673 + return;
9674 + }
9675 +
9676 + /* Should check here against the e820 map to avoid double free */
9677 +#ifdef CONFIG_NUMA
9678 reserve_bootmem_node(NODE_DATA(nid), phys, len);
9679 #else
9680 reserve_bootmem(phys, len);
9681 #endif
9682 - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
9683 +#ifndef CONFIG_XEN
9684 + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
9685 + static unsigned long dma_reserve __initdata;
9686 +
9687 dma_reserve += len / PAGE_SIZE;
9688 + set_dma_reserve(dma_reserve);
9689 + }
9690 +#endif
9691 }
9692
9693 int kern_addr_valid(unsigned long addr)
9694 Index: head-2008-12-01/arch/x86/mm/pageattr_64-xen.c
9695 ===================================================================
9696 --- head-2008-12-01.orig/arch/x86/mm/pageattr_64-xen.c 2008-12-03 15:49:14.000000000 +0100
9697 +++ head-2008-12-01/arch/x86/mm/pageattr_64-xen.c 2008-12-01 11:29:05.000000000 +0100
9698 @@ -377,8 +377,8 @@ static void revert_page(unsigned long ad
9699 BUG_ON(pud_none(*pud));
9700 pmd = pmd_offset(pud, address);
9701 BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
9702 - pgprot_val(ref_prot) |= _PAGE_PSE;
9703 large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
9704 + large_pte = pte_mkhuge(large_pte);
9705 set_pte((pte_t *)pmd, large_pte);
9706 }
9707
9708 @@ -388,32 +388,28 @@ __change_page_attr(unsigned long address
9709 {
9710 pte_t *kpte;
9711 struct page *kpte_page;
9712 - unsigned kpte_flags;
9713 pgprot_t ref_prot2;
9714 kpte = lookup_address(address);
9715 if (!kpte) return 0;
9716 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
9717 - kpte_flags = pte_val(*kpte);
9718 if (pgprot_val(prot) != pgprot_val(ref_prot)) {
9719 - if ((kpte_flags & _PAGE_PSE) == 0) {
9720 + if (!pte_huge(*kpte)) {
9721 set_pte(kpte, pfn_pte(pfn, prot));
9722 } else {
9723 /*
9724 * split_large_page will take the reference for this
9725 * change_page_attr on the split page.
9726 */
9727 -
9728 struct page *split;
9729 - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
9730 -
9731 + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
9732 split = split_large_page(address, prot, ref_prot2);
9733 if (!split)
9734 return -ENOMEM;
9735 - set_pte(kpte,mk_pte(split, ref_prot2));
9736 + set_pte(kpte, mk_pte(split, ref_prot2));
9737 kpte_page = split;
9738 - }
9739 + }
9740 page_private(kpte_page)++;
9741 - } else if ((kpte_flags & _PAGE_PSE) == 0) {
9742 + } else if (!pte_huge(*kpte)) {
9743 set_pte(kpte, pfn_pte(pfn, ref_prot));
9744 BUG_ON(page_private(kpte_page) == 0);
9745 page_private(kpte_page)--;
9746 @@ -470,10 +466,12 @@ int change_page_attr_addr(unsigned long
9747 * lowmem */
9748 if (__pa(address) < KERNEL_TEXT_SIZE) {
9749 unsigned long addr2;
9750 - pgprot_t prot2 = prot;
9751 + pgprot_t prot2;
9752 addr2 = __START_KERNEL_map + __pa(address);
9753 - pgprot_val(prot2) &= ~_PAGE_NX;
9754 - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
9755 + /* Make sure the kernel mappings stay executable */
9756 + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
9757 + err = __change_page_attr(addr2, pfn, prot2,
9758 + PAGE_KERNEL_EXEC);
9759 }
9760 }
9761 up_write(&init_mm.mmap_sem);
9762 Index: head-2008-12-01/drivers/char/tpm/tpm_xen.c
9763 ===================================================================
9764 --- head-2008-12-01.orig/drivers/char/tpm/tpm_xen.c 2008-12-03 15:49:14.000000000 +0100
9765 +++ head-2008-12-01/drivers/char/tpm/tpm_xen.c 2008-12-01 11:29:05.000000000 +0100
9766 @@ -85,8 +85,7 @@ static struct tpm_private *my_priv;
9767
9768 /* local function prototypes */
9769 static irqreturn_t tpmif_int(int irq,
9770 - void *tpm_priv,
9771 - struct pt_regs *ptregs);
9772 + void *tpm_priv);
9773 static void tpmif_rx_action(unsigned long unused);
9774 static int tpmif_connect(struct xenbus_device *dev,
9775 struct tpm_private *tp,
9776 @@ -559,7 +558,7 @@ static void tpmif_rx_action(unsigned lon
9777 }
9778
9779
9780 -static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
9781 +static irqreturn_t tpmif_int(int irq, void *tpm_priv)
9782 {
9783 struct tpm_private *tp = tpm_priv;
9784 unsigned long flags;
9785 Index: head-2008-12-01/drivers/pci/Kconfig
9786 ===================================================================
9787 --- head-2008-12-01.orig/drivers/pci/Kconfig 2008-12-03 15:49:14.000000000 +0100
9788 +++ head-2008-12-01/drivers/pci/Kconfig 2008-12-01 11:29:05.000000000 +0100
9789 @@ -48,7 +48,7 @@ config PCI_DEBUG
9790 config HT_IRQ
9791 bool "Interrupts on hypertransport devices"
9792 default y
9793 - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
9794 + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
9795 help
9796 This allows native hypertransport devices to use interrupts.
9797
9798 Index: head-2008-12-01/drivers/pci/msi-xen.c
9799 ===================================================================
9800 --- head-2008-12-01.orig/drivers/pci/msi-xen.c 2008-12-03 15:49:14.000000000 +0100
9801 +++ head-2008-12-01/drivers/pci/msi-xen.c 2008-12-01 11:29:05.000000000 +0100
9802 @@ -6,6 +6,7 @@
9803 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
9804 */
9805
9806 +#include <linux/err.h>
9807 #include <linux/mm.h>
9808 #include <linux/irq.h>
9809 #include <linux/interrupt.h>
9810 @@ -14,6 +15,7 @@
9811 #include <linux/smp_lock.h>
9812 #include <linux/pci.h>
9813 #include <linux/proc_fs.h>
9814 +#include <linux/msi.h>
9815
9816 #include <xen/evtchn.h>
9817
9818 @@ -26,14 +28,6 @@
9819
9820 static int pci_msi_enable = 1;
9821
9822 -static struct msi_ops *msi_ops;
9823 -
9824 -int msi_register(struct msi_ops *ops)
9825 -{
9826 - msi_ops = ops;
9827 - return 0;
9828 -}
9829 -
9830 static LIST_HEAD(msi_dev_head);
9831 DEFINE_SPINLOCK(msi_dev_lock);
9832
9833 @@ -406,9 +400,9 @@ void pci_restore_msix_state(struct pci_d
9834 * @dev: pointer to the pci_dev data structure of MSI device function
9835 *
9836 * Setup the MSI capability structure of device function with a single
9837 - * MSI vector, regardless of device function is capable of handling
9838 + * MSI irq, regardless of device function is capable of handling
9839 * multiple messages. A return of zero indicates the successful setup
9840 - * of an entry zero with the new MSI vector or non-zero for otherwise.
9841 + * of an entry zero with the new MSI irq or non-zero for otherwise.
9842 **/
9843 static int msi_capability_init(struct pci_dev *dev)
9844 {
9845 @@ -422,11 +416,11 @@ static int msi_capability_init(struct pc
9846 if (pirq < 0)
9847 return -EBUSY;
9848
9849 - dev->irq = pirq;
9850 /* Set MSI enabled bits */
9851 enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
9852 dev->msi_enabled = 1;
9853
9854 + dev->irq = pirq;
9855 return 0;
9856 }
9857
9858 @@ -437,8 +431,8 @@ static int msi_capability_init(struct pc
9859 * @nvec: number of @entries
9860 *
9861 * Setup the MSI-X capability structure of device function with a
9862 - * single MSI-X vector. A return of zero indicates the successful setup of
9863 - * requested MSI-X entries with allocated vectors or non-zero for otherwise.
9864 + * single MSI-X irq. A return of zero indicates the successful setup of
9865 + * requested MSI-X entries with allocated irqs or non-zero for otherwise.
9866 **/
9867 static int msix_capability_init(struct pci_dev *dev,
9868 struct msix_entry *entries, int nvec)
9869 @@ -480,12 +474,18 @@ static int msix_capability_init(struct p
9870 }
9871
9872 if (i != nvec) {
9873 + int avail = i - 1;
9874 for (j = --i; j >= 0; j--) {
9875 msi_unmap_pirq(dev, entries[j].vector);
9876 detach_pirq_entry(entries[j].entry, msi_dev_entry);
9877 entries[j].vector = 0;
9878 }
9879 - return -EBUSY;
9880 + /* If we had some success report the number of irqs
9881 + * we succeeded in setting up.
9882 + */
9883 + if (avail <= 0)
9884 + avail = -EBUSY;
9885 + return avail;
9886 }
9887
9888 enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
9889 @@ -495,11 +495,40 @@ static int msix_capability_init(struct p
9890 }
9891
9892 /**
9893 + * pci_msi_supported - check whether MSI may be enabled on device
9894 + * @dev: pointer to the pci_dev data structure of MSI device function
9895 + *
9896 + * Look at global flags, the device itself, and its parent busses
9897 + * to return 0 if MSI are supported for the device.
9898 + **/
9899 +static
9900 +int pci_msi_supported(struct pci_dev * dev)
9901 +{
9902 + struct pci_bus *bus;
9903 +
9904 + /* MSI must be globally enabled and supported by the device */
9905 + if (!pci_msi_enable || !dev || dev->no_msi)
9906 + return -EINVAL;
9907 +
9908 + /* Any bridge which does NOT route MSI transactions from it's
9909 + * secondary bus to it's primary bus must set NO_MSI flag on
9910 + * the secondary pci_bus.
9911 + * We expect only arch-specific PCI host bus controller driver
9912 + * or quirks for specific PCI bridges to be setting NO_MSI.
9913 + */
9914 + for (bus = dev->bus; bus; bus = bus->parent)
9915 + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9916 + return -EINVAL;
9917 +
9918 + return 0;
9919 +}
9920 +
9921 +/**
9922 * pci_enable_msi - configure device's MSI capability structure
9923 * @dev: pointer to the pci_dev data structure of MSI device function
9924 *
9925 * Setup the MSI capability structure of device function with
9926 - * a single MSI vector upon its software driver call to request for
9927 + * a single MSI irq upon its software driver call to request for
9928 * MSI mode enabled on its hardware device function. A return of zero
9929 * indicates the successful setup of an entry zero with the new MSI
9930 * vector or non-zero for otherwise.
9931 @@ -508,13 +537,10 @@ extern int pci_frontend_enable_msi(struc
9932 int pci_enable_msi(struct pci_dev* dev)
9933 {
9934 struct pci_bus *bus;
9935 - int pos, temp, status = -EINVAL;
9936 -
9937 - if (!pci_msi_enable || !dev)
9938 - return status;
9939 + int pos, temp, status;
9940
9941 - if (dev->no_msi)
9942 - return status;
9943 + if (pci_msi_supported(dev) < 0)
9944 + return -EINVAL;
9945
9946 for (bus = dev->bus; bus; bus = bus->parent)
9947 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9948 @@ -547,10 +573,10 @@ int pci_enable_msi(struct pci_dev* dev)
9949 if (!pos)
9950 return -EINVAL;
9951
9952 - /* Check whether driver already requested for MSI-X vectors */
9953 + /* Check whether driver already requested for MSI-X irqs */
9954 if (dev->msix_enabled) {
9955 printk(KERN_INFO "PCI: %s: Can't enable MSI. "
9956 - "Device already has MSI-X vectors assigned\n",
9957 + "Device already has MSI-X irq assigned\n",
9958 pci_name(dev));
9959 dev->irq = temp;
9960 return -EINVAL;
9961 @@ -602,36 +628,28 @@ void pci_disable_msi(struct pci_dev* dev
9962 * pci_enable_msix - configure device's MSI-X capability structure
9963 * @dev: pointer to the pci_dev data structure of MSI-X device function
9964 * @entries: pointer to an array of MSI-X entries
9965 - * @nvec: number of MSI-X vectors requested for allocation by device driver
9966 + * @nvec: number of MSI-X irqs requested for allocation by device driver
9967 *
9968 * Setup the MSI-X capability structure of device function with the number
9969 - * of requested vectors upon its software driver call to request for
9970 + * of requested irqs upon its software driver call to request for
9971 * MSI-X mode enabled on its hardware device function. A return of zero
9972 * indicates the successful configuration of MSI-X capability structure
9973 - * with new allocated MSI-X vectors. A return of < 0 indicates a failure.
9974 + * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
9975 * Or a return of > 0 indicates that driver request is exceeding the number
9976 - * of vectors available. Driver should use the returned value to re-send
9977 + * of irqs available. Driver should use the returned value to re-send
9978 * its request.
9979 **/
9980 extern int pci_frontend_enable_msix(struct pci_dev *dev,
9981 struct msix_entry *entries, int nvec);
9982 int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
9983 {
9984 - struct pci_bus *bus;
9985 int status, pos, nr_entries;
9986 int i, j, temp;
9987 u16 control;
9988
9989 - if (!pci_msi_enable || !dev || !entries)
9990 + if (!entries || pci_msi_supported(dev) < 0)
9991 return -EINVAL;
9992
9993 - if (dev->no_msi)
9994 - return -EINVAL;
9995 -
9996 - for (bus = dev->bus; bus; bus = bus->parent)
9997 - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
9998 - return -EINVAL;
9999 -
10000 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
10001 if (!is_initial_xendomain()) {
10002 struct msi_dev_list *msi_dev_entry;
10003 @@ -694,7 +712,7 @@ int pci_enable_msix(struct pci_dev* dev,
10004 /* Check whether driver already requested for MSI vector */
10005 if (dev->msi_enabled) {
10006 printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
10007 - "Device already has an MSI vector assigned\n",
10008 + "Device already has an MSI irq assigned\n",
10009 pci_name(dev));
10010 dev->irq = temp;
10011 return -EINVAL;
10012 @@ -757,11 +775,11 @@ void pci_disable_msix(struct pci_dev* de
10013 }
10014
10015 /**
10016 - * msi_remove_pci_irq_vectors - reclaim MSI(X) vectors to unused state
10017 + * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
10018 * @dev: pointer to the pci_dev data structure of MSI(X) device function
10019 *
10020 * Being called during hotplug remove, from which the device function
10021 - * is hot-removed. All previous assigned MSI/MSI-X vectors, if
10022 + * is hot-removed. All previous assigned MSI/MSI-X irqs, if
10023 * allocated for this device function, are reclaimed to unused state,
10024 * which may be used later on.
10025 **/
10026 Index: head-2008-12-01/drivers/xen/Kconfig
10027 ===================================================================
10028 --- head-2008-12-01.orig/drivers/xen/Kconfig 2008-12-03 15:49:14.000000000 +0100
10029 +++ head-2008-12-01/drivers/xen/Kconfig 2008-12-01 11:29:05.000000000 +0100
10030 @@ -287,6 +287,9 @@ endmenu
10031 config HAVE_IRQ_IGNORE_UNHANDLED
10032 def_bool y
10033
10034 +config GENERIC_HARDIRQS_NO__DO_IRQ
10035 + def_bool y
10036 +
10037 config NO_IDLE_HZ
10038 def_bool y
10039
10040 Index: head-2008-12-01/drivers/xen/balloon/balloon.c
10041 ===================================================================
10042 --- head-2008-12-01.orig/drivers/xen/balloon/balloon.c 2008-12-03 15:49:14.000000000 +0100
10043 +++ head-2008-12-01/drivers/xen/balloon/balloon.c 2008-12-01 11:29:05.000000000 +0100
10044 @@ -84,7 +84,7 @@ static unsigned long frame_list[PAGE_SIZ
10045 /* VM /proc information for memory */
10046 extern unsigned long totalram_pages;
10047
10048 -#ifndef MODULE
10049 +#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
10050 extern unsigned long totalhigh_pages;
10051 #define inc_totalhigh_pages() (totalhigh_pages++)
10052 #define dec_totalhigh_pages() (totalhigh_pages--)
10053 Index: head-2008-12-01/drivers/xen/blkback/blkback.c
10054 ===================================================================
10055 --- head-2008-12-01.orig/drivers/xen/blkback/blkback.c 2008-12-03 15:49:14.000000000 +0100
10056 +++ head-2008-12-01/drivers/xen/blkback/blkback.c 2008-12-01 11:29:05.000000000 +0100
10057 @@ -288,7 +288,7 @@ static void blkif_notify_work(blkif_t *b
10058 wake_up(&blkif->wq);
10059 }
10060
10061 -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10062 +irqreturn_t blkif_be_int(int irq, void *dev_id)
10063 {
10064 blkif_notify_work(dev_id);
10065 return IRQ_HANDLED;
10066 Index: head-2008-12-01/drivers/xen/blkback/common.h
10067 ===================================================================
10068 --- head-2008-12-01.orig/drivers/xen/blkback/common.h 2008-12-03 15:49:14.000000000 +0100
10069 +++ head-2008-12-01/drivers/xen/blkback/common.h 2008-12-01 11:29:05.000000000 +0100
10070 @@ -130,7 +130,7 @@ void blkif_interface_init(void);
10071
10072 void blkif_xenbus_init(void);
10073
10074 -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10075 +irqreturn_t blkif_be_int(int irq, void *dev_id);
10076 int blkif_schedule(void *arg);
10077
10078 int blkback_barrier(struct xenbus_transaction xbt,
10079 Index: head-2008-12-01/drivers/xen/blkfront/blkfront.c
10080 ===================================================================
10081 --- head-2008-12-01.orig/drivers/xen/blkfront/blkfront.c 2008-12-03 15:49:14.000000000 +0100
10082 +++ head-2008-12-01/drivers/xen/blkfront/blkfront.c 2008-12-01 11:29:05.000000000 +0100
10083 @@ -69,7 +69,7 @@ static int setup_blkring(struct xenbus_d
10084
10085 static void kick_pending_request_queues(struct blkfront_info *);
10086
10087 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
10088 +static irqreturn_t blkif_int(int irq, void *dev_id);
10089 static void blkif_restart_queue(void *arg);
10090 static void blkif_recover(struct blkfront_info *);
10091 static void blkif_completion(struct blk_shadow *);
10092 @@ -712,7 +712,7 @@ void do_blkif_request(request_queue_t *r
10093 }
10094
10095
10096 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
10097 +static irqreturn_t blkif_int(int irq, void *dev_id)
10098 {
10099 struct request *req;
10100 blkif_response_t *bret;
10101 Index: head-2008-12-01/drivers/xen/blktap/blktap.c
10102 ===================================================================
10103 --- head-2008-12-01.orig/drivers/xen/blktap/blktap.c 2008-12-03 15:49:14.000000000 +0100
10104 +++ head-2008-12-01/drivers/xen/blktap/blktap.c 2008-12-01 11:29:05.000000000 +0100
10105 @@ -1225,7 +1225,7 @@ static void blkif_notify_work(blkif_t *b
10106 wake_up(&blkif->wq);
10107 }
10108
10109 -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10110 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
10111 {
10112 blkif_notify_work(dev_id);
10113 return IRQ_HANDLED;
10114 Index: head-2008-12-01/drivers/xen/blktap/common.h
10115 ===================================================================
10116 --- head-2008-12-01.orig/drivers/xen/blktap/common.h 2008-12-03 15:49:14.000000000 +0100
10117 +++ head-2008-12-01/drivers/xen/blktap/common.h 2008-12-01 11:29:05.000000000 +0100
10118 @@ -113,7 +113,7 @@ void tap_blkif_interface_init(void);
10119
10120 void tap_blkif_xenbus_init(void);
10121
10122 -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10123 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
10124 int tap_blkif_schedule(void *arg);
10125
10126 int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
10127 Index: head-2008-12-01/drivers/xen/console/console.c
10128 ===================================================================
10129 --- head-2008-12-01.orig/drivers/xen/console/console.c 2008-12-03 15:49:14.000000000 +0100
10130 +++ head-2008-12-01/drivers/xen/console/console.c 2008-12-01 11:29:05.000000000 +0100
10131 @@ -345,7 +345,7 @@ static struct tty_struct *xencons_tty;
10132 static int xencons_priv_irq;
10133 static char x_char;
10134
10135 -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
10136 +void xencons_rx(char *buf, unsigned len)
10137 {
10138 int i;
10139 unsigned long flags;
10140 @@ -370,8 +370,7 @@ void xencons_rx(char *buf, unsigned len,
10141 if (time_before(jiffies, sysrq_timeout)) {
10142 spin_unlock_irqrestore(
10143 &xencons_lock, flags);
10144 - handle_sysrq(
10145 - buf[i], regs, xencons_tty);
10146 + handle_sysrq(buf[i], xencons_tty);
10147 spin_lock_irqsave(
10148 &xencons_lock, flags);
10149 continue;
10150 @@ -436,14 +435,13 @@ void xencons_tx(void)
10151 }
10152
10153 /* Privileged receive callback and transmit kicker. */
10154 -static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
10155 - struct pt_regs *regs)
10156 +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
10157 {
10158 static char rbuf[16];
10159 int l;
10160
10161 while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
10162 - xencons_rx(rbuf, l, regs);
10163 + xencons_rx(rbuf, l);
10164
10165 xencons_tx();
10166
10167 @@ -631,7 +629,7 @@ static void xencons_close(struct tty_str
10168 spin_unlock_irqrestore(&xencons_lock, flags);
10169 }
10170
10171 -static struct tty_operations xencons_ops = {
10172 +static const struct tty_operations xencons_ops = {
10173 .open = xencons_open,
10174 .close = xencons_close,
10175 .write = xencons_write,
10176 Index: head-2008-12-01/drivers/xen/console/xencons_ring.c
10177 ===================================================================
10178 --- head-2008-12-01.orig/drivers/xen/console/xencons_ring.c 2008-12-03 15:49:14.000000000 +0100
10179 +++ head-2008-12-01/drivers/xen/console/xencons_ring.c 2008-12-01 11:29:05.000000000 +0100
10180 @@ -83,7 +83,7 @@ int xencons_ring_send(const char *data,
10181 return sent;
10182 }
10183
10184 -static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
10185 +static irqreturn_t handle_input(int irq, void *unused)
10186 {
10187 struct xencons_interface *intf = xencons_interface();
10188 XENCONS_RING_IDX cons, prod;
10189 @@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq,
10190 BUG_ON((prod - cons) > sizeof(intf->in));
10191
10192 while (cons != prod) {
10193 - xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
10194 + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
10195 cons++;
10196 }
10197
10198 Index: head-2008-12-01/drivers/xen/core/evtchn.c
10199 ===================================================================
10200 --- head-2008-12-01.orig/drivers/xen/core/evtchn.c 2008-12-03 15:49:14.000000000 +0100
10201 +++ head-2008-12-01/drivers/xen/core/evtchn.c 2008-12-03 15:52:42.000000000 +0100
10202 @@ -515,7 +515,7 @@ static void unbind_from_irq(unsigned int
10203
10204 int bind_caller_port_to_irqhandler(
10205 unsigned int caller_port,
10206 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10207 + irq_handler_t handler,
10208 unsigned long irqflags,
10209 const char *devname,
10210 void *dev_id)
10211 @@ -538,7 +538,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir
10212
10213 int bind_listening_port_to_irqhandler(
10214 unsigned int remote_domain,
10215 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10216 + irq_handler_t handler,
10217 unsigned long irqflags,
10218 const char *devname,
10219 void *dev_id)
10220 @@ -562,7 +562,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to
10221 int bind_interdomain_evtchn_to_irqhandler(
10222 unsigned int remote_domain,
10223 unsigned int remote_port,
10224 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10225 + irq_handler_t handler,
10226 unsigned long irqflags,
10227 const char *devname,
10228 void *dev_id)
10229 @@ -586,7 +586,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch
10230 int bind_virq_to_irqhandler(
10231 unsigned int virq,
10232 unsigned int cpu,
10233 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10234 + irq_handler_t handler,
10235 unsigned long irqflags,
10236 const char *devname,
10237 void *dev_id)
10238 @@ -610,7 +610,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle
10239 int bind_ipi_to_irqhandler(
10240 unsigned int ipi,
10241 unsigned int cpu,
10242 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
10243 + irq_handler_t handler,
10244 unsigned long irqflags,
10245 const char *devname,
10246 void *dev_id)
10247 @@ -695,15 +695,7 @@ static unsigned int startup_dynirq(unsig
10248 return 0;
10249 }
10250
10251 -static void shutdown_dynirq(unsigned int irq)
10252 -{
10253 - int evtchn = evtchn_from_irq(irq);
10254 -
10255 - if (VALID_EVTCHN(evtchn))
10256 - mask_evtchn(evtchn);
10257 -}
10258 -
10259 -static void enable_dynirq(unsigned int irq)
10260 +static void unmask_dynirq(unsigned int irq)
10261 {
10262 int evtchn = evtchn_from_irq(irq);
10263
10264 @@ -711,7 +703,7 @@ static void enable_dynirq(unsigned int i
10265 unmask_evtchn(evtchn);
10266 }
10267
10268 -static void disable_dynirq(unsigned int irq)
10269 +static void mask_dynirq(unsigned int irq)
10270 {
10271 int evtchn = evtchn_from_irq(irq);
10272
10273 @@ -739,12 +731,13 @@ static void end_dynirq(unsigned int irq)
10274 unmask_evtchn(evtchn);
10275 }
10276
10277 -static struct hw_interrupt_type dynirq_type = {
10278 - .typename = "Dynamic-irq",
10279 +static struct irq_chip dynirq_chip = {
10280 + .name = "Dynamic",
10281 .startup = startup_dynirq,
10282 - .shutdown = shutdown_dynirq,
10283 - .enable = enable_dynirq,
10284 - .disable = disable_dynirq,
10285 + .shutdown = mask_dynirq,
10286 + .mask = mask_dynirq,
10287 + .unmask = unmask_dynirq,
10288 + .mask_ack = ack_dynirq,
10289 .ack = ack_dynirq,
10290 .end = end_dynirq,
10291 #ifdef CONFIG_SMP
10292 @@ -859,12 +852,12 @@ static void shutdown_pirq(unsigned int i
10293 irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
10294 }
10295
10296 -static void enable_pirq(unsigned int irq)
10297 +static void unmask_pirq(unsigned int irq)
10298 {
10299 startup_pirq(irq);
10300 }
10301
10302 -static void disable_pirq(unsigned int irq)
10303 +static void mask_pirq(unsigned int irq)
10304 {
10305 }
10306
10307 @@ -891,12 +884,13 @@ static void end_pirq(unsigned int irq)
10308 pirq_unmask_and_notify(evtchn, irq);
10309 }
10310
10311 -static struct hw_interrupt_type pirq_type = {
10312 - .typename = "Phys-irq",
10313 +static struct irq_chip pirq_chip = {
10314 + .name = "Phys",
10315 .startup = startup_pirq,
10316 .shutdown = shutdown_pirq,
10317 - .enable = enable_pirq,
10318 - .disable = disable_pirq,
10319 + .mask = mask_pirq,
10320 + .unmask = unmask_pirq,
10321 + .mask_ack = ack_pirq,
10322 .ack = ack_pirq,
10323 .end = end_pirq,
10324 #ifdef CONFIG_SMP
10325 @@ -1081,7 +1075,8 @@ void evtchn_register_pirq(int irq)
10326 if (identity_mapped_irq(irq))
10327 return;
10328 irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
10329 - irq_desc[irq].chip = &pirq_type;
10330 + set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq,
10331 + "level");
10332 }
10333
10334 int evtchn_map_pirq(int irq, int xen_pirq)
10335 @@ -1104,11 +1099,18 @@ int evtchn_map_pirq(int irq, int xen_pir
10336 spin_unlock(&irq_alloc_lock);
10337 if (irq < PIRQ_BASE)
10338 return -ENOSPC;
10339 - irq_desc[irq].chip = &pirq_type;
10340 + set_irq_chip_and_handler_name(irq, &pirq_chip,
10341 + handle_level_irq, "level");
10342 } else if (!xen_pirq) {
10343 if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
10344 return -EINVAL;
10345 - irq_desc[irq].chip = &no_irq_type;
10346 + /*
10347 + * dynamic_irq_cleanup(irq) would seem to be the correct thing
10348 + * here, but cannot be used as we get here also during shutdown
10349 + * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
10350 + * then causes a warning in dynamic_irq_cleanup().
10351 + */
10352 + set_irq_chip_and_handler(irq, NULL, NULL);
10353 irq_info[irq] = IRQ_UNBOUND;
10354 return 0;
10355 } else if (type_from_irq(irq) != IRQT_PIRQ
10356 @@ -1153,10 +1155,9 @@ void __init xen_init_IRQ(void)
10357 for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
10358 irq_bindcount[i] = 0;
10359
10360 - irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
10361 - irq_desc[i].action = NULL;
10362 - irq_desc[i].depth = 1;
10363 - irq_desc[i].chip = &dynirq_type;
10364 + irq_desc[i].status |= IRQ_NOPROBE;
10365 + set_irq_chip_and_handler_name(i, &dynirq_chip,
10366 + handle_level_irq, "level");
10367 }
10368
10369 /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
10370 @@ -1172,9 +1173,7 @@ void __init xen_init_IRQ(void)
10371 continue;
10372 #endif
10373
10374 - irq_desc[i].status = IRQ_DISABLED;
10375 - irq_desc[i].action = NULL;
10376 - irq_desc[i].depth = 1;
10377 - irq_desc[i].chip = &pirq_type;
10378 + set_irq_chip_and_handler_name(i, &pirq_chip,
10379 + handle_level_irq, "level");
10380 }
10381 }
10382 Index: head-2008-12-01/drivers/xen/core/reboot.c
10383 ===================================================================
10384 --- head-2008-12-01.orig/drivers/xen/core/reboot.c 2008-12-03 15:49:14.000000000 +0100
10385 +++ head-2008-12-01/drivers/xen/core/reboot.c 2008-12-01 11:29:05.000000000 +0100
10386 @@ -14,6 +14,7 @@
10387
10388 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
10389 #include <xen/platform-compat.h>
10390 +#undef handle_sysrq
10391 #endif
10392
10393 MODULE_LICENSE("Dual BSD/GPL");
10394 @@ -231,7 +232,7 @@ static void sysrq_handler(struct xenbus_
10395
10396 #ifdef CONFIG_MAGIC_SYSRQ
10397 if (sysrq_key != '\0')
10398 - handle_sysrq(sysrq_key, NULL, NULL);
10399 + handle_sysrq(sysrq_key, NULL);
10400 #endif
10401 }
10402
10403 @@ -245,7 +246,7 @@ static struct xenbus_watch sysrq_watch =
10404 .callback = sysrq_handler
10405 };
10406
10407 -static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
10408 +static irqreturn_t suspend_int(int irq, void* dev_id)
10409 {
10410 switch_shutdown_state(SHUTDOWN_SUSPEND);
10411 return IRQ_HANDLED;
10412 Index: head-2008-12-01/drivers/xen/core/smpboot.c
10413 ===================================================================
10414 --- head-2008-12-01.orig/drivers/xen/core/smpboot.c 2008-12-03 15:49:14.000000000 +0100
10415 +++ head-2008-12-01/drivers/xen/core/smpboot.c 2008-12-01 11:29:05.000000000 +0100
10416 @@ -25,8 +25,8 @@
10417 #include <xen/cpu_hotplug.h>
10418 #include <xen/xenbus.h>
10419
10420 -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
10421 -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
10422 +extern irqreturn_t smp_reschedule_interrupt(int, void *);
10423 +extern irqreturn_t smp_call_function_interrupt(int, void *);
10424
10425 extern int local_setup_timer(unsigned int cpu);
10426 extern void local_teardown_timer(unsigned int cpu);
10427 @@ -66,8 +66,6 @@ EXPORT_SYMBOL(cpu_core_map);
10428 #if defined(__i386__)
10429 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
10430 EXPORT_SYMBOL(x86_cpu_to_apicid);
10431 -#elif !defined(CONFIG_X86_IO_APIC)
10432 -unsigned int maxcpus = NR_CPUS;
10433 #endif
10434
10435 void __init prefill_possible_map(void)
10436 Index: head-2008-12-01/drivers/xen/fbfront/xenfb.c
10437 ===================================================================
10438 --- head-2008-12-01.orig/drivers/xen/fbfront/xenfb.c 2008-12-03 15:49:14.000000000 +0100
10439 +++ head-2008-12-01/drivers/xen/fbfront/xenfb.c 2008-12-01 11:29:05.000000000 +0100
10440 @@ -523,8 +523,7 @@ static struct fb_ops xenfb_fb_ops = {
10441 .fb_set_par = xenfb_set_par,
10442 };
10443
10444 -static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
10445 - struct pt_regs *regs)
10446 +static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
10447 {
10448 /*
10449 * No in events recognized, simply ignore them all.
10450 Index: head-2008-12-01/drivers/xen/fbfront/xenkbd.c
10451 ===================================================================
10452 --- head-2008-12-01.orig/drivers/xen/fbfront/xenkbd.c 2008-12-03 15:49:14.000000000 +0100
10453 +++ head-2008-12-01/drivers/xen/fbfront/xenkbd.c 2008-12-01 11:29:05.000000000 +0100
10454 @@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st
10455 * to do that.
10456 */
10457
10458 -static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
10459 +static irqreturn_t input_handler(int rq, void *dev_id)
10460 {
10461 struct xenkbd_info *info = dev_id;
10462 struct xenkbd_page *page = info->page;
10463 Index: head-2008-12-01/drivers/xen/gntdev/gntdev.c
10464 ===================================================================
10465 --- head-2008-12-01.orig/drivers/xen/gntdev/gntdev.c 2008-12-03 15:49:14.000000000 +0100
10466 +++ head-2008-12-01/drivers/xen/gntdev/gntdev.c 2008-12-01 11:29:05.000000000 +0100
10467 @@ -752,9 +752,6 @@ static pte_t gntdev_clear_pte(struct vm_
10468 BUG();
10469 }
10470
10471 - /* Copy the existing value of the PTE for returning. */
10472 - copy = *ptep;
10473 -
10474 /* Calculate the grant relating to this PTE. */
10475 slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
10476
10477 @@ -769,6 +766,10 @@ static pte_t gntdev_clear_pte(struct vm_
10478 GNTDEV_INVALID_HANDLE &&
10479 !xen_feature(XENFEAT_auto_translated_physmap)) {
10480 /* NOT USING SHADOW PAGE TABLES. */
10481 +
10482 + /* Copy the existing value of the PTE for returning. */
10483 + copy = *ptep;
10484 +
10485 gnttab_set_unmap_op(&op, virt_to_machine(ptep),
10486 GNTMAP_contains_pte,
10487 private_data->grants[slot_index]
10488 @@ -781,7 +782,7 @@ static pte_t gntdev_clear_pte(struct vm_
10489 op.status);
10490 } else {
10491 /* USING SHADOW PAGE TABLES. */
10492 - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10493 + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10494 }
10495
10496 /* Finally, we unmap the grant from kernel space. */
10497 @@ -809,7 +810,7 @@ static pte_t gntdev_clear_pte(struct vm_
10498 >> PAGE_SHIFT, INVALID_P2M_ENTRY);
10499
10500 } else {
10501 - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10502 + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
10503 }
10504
10505 return copy;
10506 Index: head-2008-12-01/drivers/xen/netback/accel.c
10507 ===================================================================
10508 --- head-2008-12-01.orig/drivers/xen/netback/accel.c 2008-12-03 15:49:14.000000000 +0100
10509 +++ head-2008-12-01/drivers/xen/netback/accel.c 2008-12-01 11:29:05.000000000 +0100
10510 @@ -65,7 +65,7 @@ static int match_accelerator(struct xenb
10511
10512 if (IS_ERR(eth_name)) {
10513 /* Probably means not present */
10514 - DPRINTK("%s: no match due to xenbus_read accel error %d\n",
10515 + DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
10516 __FUNCTION__, PTR_ERR(eth_name));
10517 return 0;
10518 } else {
10519 Index: head-2008-12-01/drivers/xen/netback/common.h
10520 ===================================================================
10521 --- head-2008-12-01.orig/drivers/xen/netback/common.h 2008-12-03 15:49:14.000000000 +0100
10522 +++ head-2008-12-01/drivers/xen/netback/common.h 2008-12-01 11:29:05.000000000 +0100
10523 @@ -200,7 +200,7 @@ void netif_deschedule_work(netif_t *neti
10524
10525 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
10526 struct net_device_stats *netif_be_get_stats(struct net_device *dev);
10527 -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10528 +irqreturn_t netif_be_int(int irq, void *dev_id);
10529
10530 static inline int netbk_can_queue(struct net_device *dev)
10531 {
10532 Index: head-2008-12-01/drivers/xen/netback/loopback.c
10533 ===================================================================
10534 --- head-2008-12-01.orig/drivers/xen/netback/loopback.c 2008-12-03 15:49:14.000000000 +0100
10535 +++ head-2008-12-01/drivers/xen/netback/loopback.c 2008-12-01 11:29:05.000000000 +0100
10536 @@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk
10537 np->stats.rx_bytes += skb->len;
10538 np->stats.rx_packets++;
10539
10540 - if (skb->ip_summed == CHECKSUM_HW) {
10541 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10542 /* Defer checksum calculation. */
10543 skb->proto_csum_blank = 1;
10544 /* Must be a local packet: assert its integrity. */
10545 Index: head-2008-12-01/drivers/xen/netback/netback.c
10546 ===================================================================
10547 --- head-2008-12-01.orig/drivers/xen/netback/netback.c 2008-12-03 15:49:14.000000000 +0100
10548 +++ head-2008-12-01/drivers/xen/netback/netback.c 2008-12-01 11:29:05.000000000 +0100
10549 @@ -676,7 +676,7 @@ static void net_rx_action(unsigned long
10550 id = meta[npo.meta_cons].id;
10551 flags = nr_frags ? NETRXF_more_data : 0;
10552
10553 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
10554 + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
10555 flags |= NETRXF_csum_blank | NETRXF_data_validated;
10556 else if (skb->proto_data_valid) /* remote but checksummed? */
10557 flags |= NETRXF_data_validated;
10558 @@ -1441,7 +1441,7 @@ static void netif_page_release(struct pa
10559 netif_idx_release(netif_page_index(page));
10560 }
10561
10562 -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10563 +irqreturn_t netif_be_int(int irq, void *dev_id)
10564 {
10565 netif_t *netif = dev_id;
10566
10567 @@ -1508,7 +1508,7 @@ static netif_rx_response_t *make_rx_resp
10568 }
10569
10570 #ifdef NETBE_DEBUG_INTERRUPT
10571 -static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
10572 +static irqreturn_t netif_be_dbg(int irq, void *dev_id)
10573 {
10574 struct list_head *ent;
10575 netif_t *netif;
10576 Index: head-2008-12-01/drivers/xen/netfront/netfront.c
10577 ===================================================================
10578 --- head-2008-12-01.orig/drivers/xen/netfront/netfront.c 2008-12-03 15:49:14.000000000 +0100
10579 +++ head-2008-12-01/drivers/xen/netfront/netfront.c 2008-12-01 11:29:05.000000000 +0100
10580 @@ -136,7 +136,7 @@ static inline int netif_needs_gso(struct
10581 {
10582 return skb_is_gso(skb) &&
10583 (!skb_gso_ok(skb, dev->features) ||
10584 - unlikely(skb->ip_summed != CHECKSUM_HW));
10585 + unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
10586 }
10587 #else
10588 #define HAVE_GSO 0
10589 @@ -222,7 +222,7 @@ static void network_tx_buf_gc(struct net
10590 static void network_alloc_rx_buffers(struct net_device *);
10591 static void send_fake_arp(struct net_device *);
10592
10593 -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
10594 +static irqreturn_t netif_int(int irq, void *dev_id);
10595
10596 #ifdef CONFIG_SYSFS
10597 static int xennet_sysfs_addif(struct net_device *netdev);
10598 @@ -992,7 +992,7 @@ static int network_start_xmit(struct sk_
10599 tx->flags = 0;
10600 extra = NULL;
10601
10602 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
10603 + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
10604 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
10605 #ifdef CONFIG_XEN
10606 if (skb->proto_data_valid) /* remote but checksummed? */
10607 @@ -1049,7 +1049,7 @@ static int network_start_xmit(struct sk_
10608 return 0;
10609 }
10610
10611 -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
10612 +static irqreturn_t netif_int(int irq, void *dev_id)
10613 {
10614 struct net_device *dev = dev_id;
10615 struct netfront_info *np = netdev_priv(dev);
10616 Index: head-2008-12-01/drivers/xen/pciback/pciback.h
10617 ===================================================================
10618 --- head-2008-12-01.orig/drivers/xen/pciback/pciback.h 2008-12-03 15:49:14.000000000 +0100
10619 +++ head-2008-12-01/drivers/xen/pciback/pciback.h 2008-12-01 11:29:05.000000000 +0100
10620 @@ -87,7 +87,7 @@ int pciback_publish_pci_roots(struct pci
10621 void pciback_release_devices(struct pciback_device *pdev);
10622
10623 /* Handles events from front-end */
10624 -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
10625 +irqreturn_t pciback_handle_event(int irq, void *dev_id);
10626 void pciback_do_op(void *data);
10627
10628 int pciback_xenbus_register(void);
10629 Index: head-2008-12-01/drivers/xen/pciback/pciback_ops.c
10630 ===================================================================
10631 --- head-2008-12-01.orig/drivers/xen/pciback/pciback_ops.c 2008-12-03 15:49:14.000000000 +0100
10632 +++ head-2008-12-01/drivers/xen/pciback/pciback_ops.c 2008-12-01 11:29:05.000000000 +0100
10633 @@ -107,7 +107,7 @@ void pciback_do_op(void *data)
10634 test_and_schedule_op(pdev);
10635 }
10636
10637 -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
10638 +irqreturn_t pciback_handle_event(int irq, void *dev_id)
10639 {
10640 struct pciback_device *pdev = dev_id;
10641
10642 Index: head-2008-12-01/drivers/xen/pcifront/pci_op.c
10643 ===================================================================
10644 --- head-2008-12-01.orig/drivers/xen/pcifront/pci_op.c 2008-12-03 15:49:14.000000000 +0100
10645 +++ head-2008-12-01/drivers/xen/pcifront/pci_op.c 2008-12-01 11:29:05.000000000 +0100
10646 @@ -508,10 +508,16 @@ int __devinit pcifront_rescan_root(struc
10647
10648 d = pci_scan_single_device(b, devfn);
10649 if (d) {
10650 + int err;
10651 +
10652 dev_info(&pdev->xdev->dev, "New device on "
10653 "%04x:%02x:%02x.%02x found.\n", domain, bus,
10654 PCI_SLOT(devfn), PCI_FUNC(devfn));
10655 - pci_bus_add_device(d);
10656 + err = pci_bus_add_device(d);
10657 + if (err)
10658 + dev_err(&pdev->xdev->dev,
10659 + "error %d adding device, continuing.\n",
10660 + err);
10661 }
10662 }
10663
10664 Index: head-2008-12-01/drivers/xen/privcmd/compat_privcmd.c
10665 ===================================================================
10666 --- head-2008-12-01.orig/drivers/xen/privcmd/compat_privcmd.c 2008-12-03 15:49:14.000000000 +0100
10667 +++ head-2008-12-01/drivers/xen/privcmd/compat_privcmd.c 2008-12-01 11:29:05.000000000 +0100
10668 @@ -18,7 +18,6 @@
10669 * Authors: Jimi Xenidis <jimix@watson.ibm.com>
10670 */
10671
10672 -#include <linux/config.h>
10673 #include <linux/compat.h>
10674 #include <linux/ioctl.h>
10675 #include <linux/syscalls.h>
10676 Index: head-2008-12-01/drivers/xen/privcmd/privcmd.c
10677 ===================================================================
10678 --- head-2008-12-01.orig/drivers/xen/privcmd/privcmd.c 2008-12-03 15:49:14.000000000 +0100
10679 +++ head-2008-12-01/drivers/xen/privcmd/privcmd.c 2008-12-01 11:29:05.000000000 +0100
10680 @@ -40,7 +40,7 @@ static int privcmd_enforce_singleshot_ma
10681 static long privcmd_ioctl(struct file *file,
10682 unsigned int cmd, unsigned long data)
10683 {
10684 - int ret = -ENOSYS;
10685 + long ret = -ENOSYS;
10686 void __user *udata = (void __user *) data;
10687
10688 switch (cmd) {
10689 @@ -50,42 +50,15 @@ static long privcmd_ioctl(struct file *f
10690 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
10691 return -EFAULT;
10692
10693 -#if defined(__i386__)
10694 +#ifdef CONFIG_X86
10695 if (hypercall.op >= (PAGE_SIZE >> 5))
10696 break;
10697 - __asm__ __volatile__ (
10698 - "pushl %%ebx; pushl %%ecx; pushl %%edx; "
10699 - "pushl %%esi; pushl %%edi; "
10700 - "movl 8(%%eax),%%ebx ;"
10701 - "movl 16(%%eax),%%ecx ;"
10702 - "movl 24(%%eax),%%edx ;"
10703 - "movl 32(%%eax),%%esi ;"
10704 - "movl 40(%%eax),%%edi ;"
10705 - "movl (%%eax),%%eax ;"
10706 - "shll $5,%%eax ;"
10707 - "addl $hypercall_page,%%eax ;"
10708 - "call *%%eax ;"
10709 - "popl %%edi; popl %%esi; popl %%edx; "
10710 - "popl %%ecx; popl %%ebx"
10711 - : "=a" (ret) : "0" (&hypercall) : "memory" );
10712 -#elif defined (__x86_64__)
10713 - if (hypercall.op < (PAGE_SIZE >> 5)) {
10714 - long ign1, ign2, ign3;
10715 - __asm__ __volatile__ (
10716 - "movq %8,%%r10; movq %9,%%r8;"
10717 - "shll $5,%%eax ;"
10718 - "addq $hypercall_page,%%rax ;"
10719 - "call *%%rax"
10720 - : "=a" (ret), "=D" (ign1),
10721 - "=S" (ign2), "=d" (ign3)
10722 - : "0" ((unsigned int)hypercall.op),
10723 - "1" (hypercall.arg[0]),
10724 - "2" (hypercall.arg[1]),
10725 - "3" (hypercall.arg[2]),
10726 - "g" (hypercall.arg[3]),
10727 - "g" (hypercall.arg[4])
10728 - : "r8", "r10", "memory" );
10729 - }
10730 + ret = _hypercall(long, (unsigned int)hypercall.op,
10731 + (unsigned long)hypercall.arg[0],
10732 + (unsigned long)hypercall.arg[1],
10733 + (unsigned long)hypercall.arg[2],
10734 + (unsigned long)hypercall.arg[3],
10735 + (unsigned long)hypercall.arg[4]);
10736 #else
10737 ret = privcmd_hypercall(&hypercall);
10738 #endif
10739 @@ -306,7 +279,7 @@ static int privcmd_mmap(struct file * fi
10740 return -ENOSYS;
10741
10742 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
10743 - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
10744 + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
10745 vma->vm_ops = &privcmd_vm_ops;
10746 vma->vm_private_data = NULL;
10747
10748 Index: head-2008-12-01/drivers/xen/scsiback/common.h
10749 ===================================================================
10750 --- head-2008-12-01.orig/drivers/xen/scsiback/common.h 2008-12-03 15:49:14.000000000 +0100
10751 +++ head-2008-12-01/drivers/xen/scsiback/common.h 2008-12-01 11:29:05.000000000 +0100
10752 @@ -142,7 +142,7 @@ typedef struct {
10753 #define VSCSIIF_TIMEOUT (900*HZ)
10754
10755
10756 -irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
10757 +irqreturn_t scsiback_intr(int, void *);
10758 int scsiback_init_sring(struct vscsibk_info *info,
10759 unsigned long ring_ref, unsigned int evtchn);
10760 int scsiback_schedule(void *data);
10761 Index: head-2008-12-01/drivers/xen/scsiback/scsiback.c
10762 ===================================================================
10763 --- head-2008-12-01.orig/drivers/xen/scsiback/scsiback.c 2008-12-03 15:49:14.000000000 +0100
10764 +++ head-2008-12-01/drivers/xen/scsiback/scsiback.c 2008-12-01 11:29:05.000000000 +0100
10765 @@ -440,7 +440,7 @@ void scsiback_cmd_exec(pending_req_t *pe
10766 write = (data_dir == DMA_TO_DEVICE);
10767 rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
10768
10769 - rq->flags |= REQ_BLOCK_PC;
10770 + rq->cmd_type = REQ_TYPE_BLOCK_PC;
10771 rq->cmd_len = cmd_len;
10772 memcpy(rq->cmd, pending_req->cmnd, cmd_len);
10773
10774 @@ -484,7 +484,7 @@ static void scsiback_device_reset_exec(p
10775 }
10776
10777
10778 -irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
10779 +irqreturn_t scsiback_intr(int irq, void *dev_id)
10780 {
10781 scsiback_notify_work((struct vscsibk_info *)dev_id);
10782 return IRQ_HANDLED;
10783 Index: head-2008-12-01/drivers/xen/scsifront/common.h
10784 ===================================================================
10785 --- head-2008-12-01.orig/drivers/xen/scsifront/common.h 2008-12-03 15:49:14.000000000 +0100
10786 +++ head-2008-12-01/drivers/xen/scsifront/common.h 2008-12-01 11:29:05.000000000 +0100
10787 @@ -122,7 +122,7 @@ struct vscsifrnt_info {
10788 int scsifront_xenbus_init(void);
10789 void scsifront_xenbus_unregister(void);
10790 int scsifront_schedule(void *data);
10791 -irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
10792 +irqreturn_t scsifront_intr(int irq, void *dev_id);
10793 int scsifront_cmd_done(struct vscsifrnt_info *info);
10794
10795
10796 Index: head-2008-12-01/drivers/xen/scsifront/scsifront.c
10797 ===================================================================
10798 --- head-2008-12-01.orig/drivers/xen/scsifront/scsifront.c 2008-12-03 15:49:14.000000000 +0100
10799 +++ head-2008-12-01/drivers/xen/scsifront/scsifront.c 2008-12-01 11:29:05.000000000 +0100
10800 @@ -100,7 +100,7 @@ static void scsifront_do_request(struct
10801 notify_remote_via_irq(irq);
10802 }
10803
10804 -irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
10805 +irqreturn_t scsifront_intr(int irq, void *dev_id)
10806 {
10807 scsifront_notify_work((struct vscsifrnt_info *)dev_id);
10808 return IRQ_HANDLED;
10809 Index: head-2008-12-01/drivers/xen/sfc_netback/accel_xenbus.c
10810 ===================================================================
10811 --- head-2008-12-01.orig/drivers/xen/sfc_netback/accel_xenbus.c 2008-12-03 15:49:14.000000000 +0100
10812 +++ head-2008-12-01/drivers/xen/sfc_netback/accel_xenbus.c 2008-12-01 11:29:05.000000000 +0100
10813 @@ -68,8 +68,7 @@ static void unlink_bend(struct netback_a
10814
10815
10816 /* Demultiplex a message IRQ from the frontend driver. */
10817 -static irqreturn_t msgirq_from_frontend(int irq, void *context,
10818 - struct pt_regs *unused)
10819 +static irqreturn_t msgirq_from_frontend(int irq, void *context)
10820 {
10821 struct xenbus_device *dev = context;
10822 struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
10823 @@ -84,8 +83,7 @@ static irqreturn_t msgirq_from_frontend(
10824 * functionally, but we need it to pass to the bind function, and may
10825 * get called spuriously
10826 */
10827 -static irqreturn_t netirq_from_frontend(int irq, void *context,
10828 - struct pt_regs *unused)
10829 +static irqreturn_t netirq_from_frontend(int irq, void *context)
10830 {
10831 VPRINTK("netirq %d from device %s\n", irq,
10832 ((struct xenbus_device *)context)->nodename);
10833 Index: head-2008-12-01/drivers/xen/sfc_netfront/accel.h
10834 ===================================================================
10835 --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel.h 2008-12-03 15:49:14.000000000 +0100
10836 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel.h 2008-12-01 11:29:05.000000000 +0100
10837 @@ -449,10 +449,8 @@ void netfront_accel_msg_tx_fastpath(netf
10838 u32 ip, u16 port, u8 protocol);
10839
10840 /* Process an IRQ received from back end driver */
10841 -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10842 - struct pt_regs *unused);
10843 -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10844 - struct pt_regs *unused);
10845 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
10846 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
10847
10848 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
10849 extern void netfront_accel_msg_from_bend(struct work_struct *context);
10850 Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_msg.c
10851 ===================================================================
10852 --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_msg.c 2008-12-03 15:49:14.000000000 +0100
10853 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_msg.c 2008-12-01 11:29:05.000000000 +0100
10854 @@ -490,8 +490,7 @@ void netfront_accel_msg_from_bend(void *
10855 }
10856
10857
10858 -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10859 - struct pt_regs *unused)
10860 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
10861 {
10862 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10863 VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
10864 @@ -502,8 +501,7 @@ irqreturn_t netfront_accel_msg_channel_i
10865 }
10866
10867 /* Process an interrupt received from the NIC via backend */
10868 -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10869 - struct pt_regs *unused)
10870 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
10871 {
10872 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10873 struct net_device *net_dev = vnic->net_dev;
10874 Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_tso.c
10875 ===================================================================
10876 --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_tso.c 2008-12-03 15:49:14.000000000 +0100
10877 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_tso.c 2008-12-01 11:29:05.000000000 +0100
10878 @@ -363,7 +363,7 @@ int netfront_accel_enqueue_skb_tso(netfr
10879
10880 tso_check_safe(skb);
10881
10882 - if (skb->ip_summed != CHECKSUM_HW)
10883 + if (skb->ip_summed != CHECKSUM_PARTIAL)
10884 EPRINTK("Trying to TSO send a packet without HW checksum\n");
10885
10886 tso_start(&state, skb);
10887 Index: head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c
10888 ===================================================================
10889 --- head-2008-12-01.orig/drivers/xen/sfc_netfront/accel_vi.c 2008-12-03 15:49:14.000000000 +0100
10890 +++ head-2008-12-01/drivers/xen/sfc_netfront/accel_vi.c 2008-12-01 11:29:05.000000000 +0100
10891 @@ -461,7 +461,7 @@ netfront_accel_enqueue_skb_multi(netfron
10892
10893 frag_i = -1;
10894
10895 - if (skb->ip_summed == CHECKSUM_HW) {
10896 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10897 /* Set to zero to encourage falcon to work it out for us */
10898 *(u16*)(skb->h.raw + skb->csum) = 0;
10899 }
10900 @@ -580,7 +580,7 @@ netfront_accel_enqueue_skb_single(netfro
10901
10902 kva = buf->pkt_kva;
10903
10904 - if (skb->ip_summed == CHECKSUM_HW) {
10905 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10906 /* Set to zero to encourage falcon to work it out for us */
10907 *(u16*)(skb->h.raw + skb->csum) = 0;
10908 }
10909 Index: head-2008-12-01/drivers/xen/tpmback/common.h
10910 ===================================================================
10911 --- head-2008-12-01.orig/drivers/xen/tpmback/common.h 2008-12-03 15:49:14.000000000 +0100
10912 +++ head-2008-12-01/drivers/xen/tpmback/common.h 2008-12-01 11:29:05.000000000 +0100
10913 @@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm
10914 void tpmif_xenbus_init(void);
10915 void tpmif_xenbus_exit(void);
10916 int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
10917 -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10918 +irqreturn_t tpmif_be_int(int irq, void *dev_id);
10919
10920 long int tpmback_get_instance(struct backend_info *bi);
10921
10922 Index: head-2008-12-01/drivers/xen/tpmback/tpmback.c
10923 ===================================================================
10924 --- head-2008-12-01.orig/drivers/xen/tpmback/tpmback.c 2008-12-03 15:49:14.000000000 +0100
10925 +++ head-2008-12-01/drivers/xen/tpmback/tpmback.c 2008-12-01 11:29:05.000000000 +0100
10926 @@ -502,7 +502,7 @@ static ssize_t vtpm_op_read(struct file
10927 list_del(&pak->next);
10928 write_unlock_irqrestore(&dataex.pak_lock, flags);
10929
10930 - DPRINTK("size given by app: %d, available: %d\n", size, left);
10931 + DPRINTK("size given by app: %zu, available: %u\n", size, left);
10932
10933 ret_size = min_t(size_t, size, left);
10934
10935 @@ -899,7 +899,7 @@ static void tpm_tx_action(unsigned long
10936 }
10937 }
10938
10939 -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10940 +irqreturn_t tpmif_be_int(int irq, void *dev_id)
10941 {
10942 tpmif_t *tpmif = (tpmif_t *) dev_id;
10943
10944 Index: head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c
10945 ===================================================================
10946 --- head-2008-12-01.orig/drivers/xen/xenbus/xenbus_comms.c 2008-12-03 15:49:14.000000000 +0100
10947 +++ head-2008-12-01/drivers/xen/xenbus/xenbus_comms.c 2008-12-01 11:29:05.000000000 +0100
10948 @@ -55,7 +55,7 @@ static DECLARE_WORK(probe_work, xenbus_p
10949
10950 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
10951
10952 -static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
10953 +static irqreturn_t wake_waiting(int irq, void *unused)
10954 {
10955 if (unlikely(xenstored_ready == 0)) {
10956 xenstored_ready = 1;
10957 Index: head-2008-12-01/drivers/xen/xenoprof/xenoprofile.c
10958 ===================================================================
10959 --- head-2008-12-01.orig/drivers/xen/xenoprof/xenoprofile.c 2008-12-03 15:49:14.000000000 +0100
10960 +++ head-2008-12-01/drivers/xen/xenoprof/xenoprofile.c 2008-12-01 11:29:05.000000000 +0100
10961 @@ -194,8 +194,7 @@ done:
10962 oprofile_add_domain_switch(COORDINATOR_DOMAIN);
10963 }
10964
10965 -static irqreturn_t
10966 -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
10967 +static irqreturn_t xenoprof_ovf_interrupt(int irq, void *dev_id)
10968 {
10969 struct xenoprof_buf * buf;
10970 static unsigned long flag;
10971 Index: head-2008-12-01/include/asm-generic/pgtable.h
10972 ===================================================================
10973 --- head-2008-12-01.orig/include/asm-generic/pgtable.h 2008-12-03 15:49:14.000000000 +0100
10974 +++ head-2008-12-01/include/asm-generic/pgtable.h 2008-12-01 11:29:05.000000000 +0100
10975 @@ -100,7 +100,7 @@ static inline void ptep_set_wrprotect(st
10976 #endif
10977
10978 #ifndef arch_change_pte_range
10979 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
10980 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
10981 #endif
10982
10983 #ifndef __HAVE_ARCH_PTE_SAME
10984 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/desc_32.h
10985 ===================================================================
10986 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-03 15:49:14.000000000 +0100
10987 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/desc_32.h 2008-12-01 11:29:05.000000000 +0100
10988 @@ -32,52 +32,110 @@ static inline struct desc_struct *get_cp
10989 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
10990 }
10991
10992 +/*
10993 + * This is the ldt that every process will get unless we need
10994 + * something other than this.
10995 + */
10996 +extern struct desc_struct default_ldt[];
10997 +extern struct desc_struct idt_table[];
10998 +extern void set_intr_gate(unsigned int irq, void * addr);
10999 +
11000 +static inline void pack_descriptor(__u32 *a, __u32 *b,
11001 + unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
11002 +{
11003 + *a = ((base & 0xffff) << 16) | (limit & 0xffff);
11004 + *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
11005 + (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
11006 +}
11007 +
11008 +static inline void pack_gate(__u32 *a, __u32 *b,
11009 + unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
11010 +{
11011 + *a = (seg << 16) | (base & 0xffff);
11012 + *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
11013 +}
11014 +
11015 +#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
11016 +#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
11017 +#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
11018 +#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
11019 +#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
11020 +#define DESCTYPE_DPL3 0x60 /* DPL-3 */
11021 +#define DESCTYPE_S 0x10 /* !system */
11022 +
11023 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
11024 #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
11025
11026 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
11027 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
11028 -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
11029 -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
11030 +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
11031 +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
11032
11033 #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
11034 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
11035 -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
11036 -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
11037 +#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
11038 +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
11039
11040 -/*
11041 - * This is the ldt that every process will get unless we need
11042 - * something other than this.
11043 - */
11044 -extern struct desc_struct default_ldt[];
11045 -extern void set_intr_gate(unsigned int irq, void * addr);
11046 +#if TLS_SIZE != 24
11047 +# error update this code.
11048 +#endif
11049 +
11050 +static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
11051 +{
11052 +#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
11053 + *(u64 *)&t->tls_array[i]) \
11054 + BUG()
11055 + C(0); C(1); C(2);
11056 +#undef C
11057 +}
11058
11059 -#define _set_tssldt_desc(n,addr,limit,type) \
11060 -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
11061 - "movw %w1,2(%2)\n\t" \
11062 - "rorl $16,%1\n\t" \
11063 - "movb %b1,4(%2)\n\t" \
11064 - "movb %4,5(%2)\n\t" \
11065 - "movb $0,6(%2)\n\t" \
11066 - "movb %h1,7(%2)\n\t" \
11067 - "rorl $16,%1" \
11068 - : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
11069 +#ifndef CONFIG_XEN
11070 +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
11071 +{
11072 + __u32 *lp = (__u32 *)((char *)dt + entry*8);
11073 + *lp = entry_a;
11074 + *(lp+1) = entry_b;
11075 +}
11076
11077 -#ifndef CONFIG_X86_NO_TSS
11078 -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
11079 +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
11080 +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
11081 +#else
11082 +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
11083 +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
11084 +#endif
11085 +#ifndef CONFIG_X86_NO_IDT
11086 +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
11087 +
11088 +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
11089 {
11090 - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
11091 - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
11092 + __u32 a, b;
11093 + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
11094 + write_idt_entry(idt_table, gate, a, b);
11095 }
11096 +#endif
11097
11098 -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
11099 +#ifndef CONFIG_X86_NO_TSS
11100 +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
11101 +{
11102 + __u32 a, b;
11103 + pack_descriptor(&a, &b, (unsigned long)addr,
11104 + offsetof(struct tss_struct, __cacheline_filler) - 1,
11105 + DESCTYPE_TSS, 0);
11106 + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
11107 +}
11108 #endif
11109
11110 -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
11111 +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
11112 {
11113 - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
11114 + __u32 a, b;
11115 + pack_descriptor(&a, &b, (unsigned long)addr,
11116 + entries * sizeof(struct desc_struct) - 1,
11117 + DESCTYPE_LDT, 0);
11118 + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
11119 }
11120
11121 +#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
11122 +
11123 #define LDT_entry_a(info) \
11124 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
11125
11126 @@ -103,21 +161,6 @@ static inline void set_ldt_desc(unsigned
11127 (info)->seg_not_present == 1 && \
11128 (info)->useable == 0 )
11129
11130 -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
11131 -
11132 -#if TLS_SIZE != 24
11133 -# error update this code.
11134 -#endif
11135 -
11136 -static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
11137 -{
11138 -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
11139 - *(u64 *)&t->tls_array[i])) \
11140 - BUG();
11141 - C(0); C(1); C(2);
11142 -#undef C
11143 -}
11144 -
11145 static inline void clear_LDT(void)
11146 {
11147 int cpu = get_cpu();
11148 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h
11149 ===================================================================
11150 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-03 15:49:14.000000000 +0100
11151 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-12-01 11:29:05.000000000 +0100
11152 @@ -55,7 +55,7 @@ enum fixed_addresses {
11153 #ifdef CONFIG_X86_LOCAL_APIC
11154 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
11155 #endif
11156 -#ifdef CONFIG_X86_IO_APIC
11157 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
11158 FIX_IO_APIC_BASE_0,
11159 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
11160 #endif
11161 @@ -95,10 +95,9 @@ enum fixed_addresses {
11162 __end_of_fixed_addresses
11163 };
11164
11165 -extern void set_fixaddr_top(unsigned long top);
11166 -
11167 extern void __set_fixmap(enum fixed_addresses idx,
11168 maddr_t phys, pgprot_t flags);
11169 +extern void reserve_top_address(unsigned long reserve);
11170
11171 #define set_fixmap(idx, phys) \
11172 __set_fixmap(idx, phys, PAGE_KERNEL)
11173 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_32.h
11174 ===================================================================
11175 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypercall_32.h 2008-12-03 15:49:14.000000000 +0100
11176 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_32.h 2008-12-01 11:29:05.000000000 +0100
11177 @@ -128,6 +128,23 @@
11178 __res; \
11179 })
11180
11181 +#define _hypercall(type, op, a1, a2, a3, a4, a5) \
11182 +({ \
11183 + type __res; \
11184 + register typeof((a1)+0) __arg1 asm("ebx") = (a1); \
11185 + register typeof((a2)+0) __arg2 asm("ecx") = (a2); \
11186 + register typeof((a3)+0) __arg3 asm("edx") = (a3); \
11187 + register typeof((a4)+0) __arg4 asm("esi") = (a4); \
11188 + register typeof((a5)+0) __arg5 asm("edi") = (a5); \
11189 + asm volatile ( \
11190 + "call *%6" \
11191 + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
11192 + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
11193 + : "0" (hypercall_page + (op) * 32) \
11194 + : "memory" ); \
11195 + __res; \
11196 +})
11197 +
11198 static inline int __must_check
11199 HYPERVISOR_set_trap_table(
11200 const trap_info_t *table)
11201 @@ -140,6 +157,8 @@ HYPERVISOR_mmu_update(
11202 mmu_update_t *req, unsigned int count, unsigned int *success_count,
11203 domid_t domid)
11204 {
11205 + if (arch_use_lazy_mmu_mode())
11206 + return xen_multi_mmu_update(req, count, success_count, domid);
11207 return _hypercall4(int, mmu_update, req, count, success_count, domid);
11208 }
11209
11210 @@ -148,6 +167,8 @@ HYPERVISOR_mmuext_op(
11211 struct mmuext_op *op, unsigned int count, unsigned int *success_count,
11212 domid_t domid)
11213 {
11214 + if (arch_use_lazy_mmu_mode())
11215 + return xen_multi_mmuext_op(op, count, success_count, domid);
11216 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
11217 }
11218
11219 @@ -238,6 +259,8 @@ static inline int __must_check
11220 HYPERVISOR_memory_op(
11221 unsigned int cmd, void *arg)
11222 {
11223 + if (arch_use_lazy_mmu_mode())
11224 + xen_multicall_flush(false);
11225 return _hypercall2(int, memory_op, cmd, arg);
11226 }
11227
11228 @@ -253,6 +276,9 @@ HYPERVISOR_update_va_mapping(
11229 unsigned long va, pte_t new_val, unsigned long flags)
11230 {
11231 unsigned long pte_hi = 0;
11232 +
11233 + if (arch_use_lazy_mmu_mode())
11234 + return xen_multi_update_va_mapping(va, new_val, flags);
11235 #ifdef CONFIG_X86_PAE
11236 pte_hi = new_val.pte_high;
11237 #endif
11238 @@ -316,6 +342,8 @@ static inline int __must_check
11239 HYPERVISOR_grant_table_op(
11240 unsigned int cmd, void *uop, unsigned int count)
11241 {
11242 + if (arch_use_lazy_mmu_mode())
11243 + xen_multicall_flush(false);
11244 return _hypercall3(int, grant_table_op, cmd, uop, count);
11245 }
11246
11247 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_64.h
11248 ===================================================================
11249 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypercall_64.h 2008-12-03 15:49:14.000000000 +0100
11250 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypercall_64.h 2008-12-01 11:29:05.000000000 +0100
11251 @@ -134,6 +134,23 @@
11252 __res; \
11253 })
11254
11255 +#define _hypercall(type, op, a1, a2, a3, a4, a5) \
11256 +({ \
11257 + type __res; \
11258 + register typeof((a1)+0) __arg1 asm("rdi") = (a1); \
11259 + register typeof((a2)+0) __arg2 asm("rsi") = (a2); \
11260 + register typeof((a3)+0) __arg3 asm("rdx") = (a3); \
11261 + register typeof((a4)+0) __arg4 asm("r10") = (a4); \
11262 + register typeof((a5)+0) __arg5 asm("r8") = (a5); \
11263 + asm volatile ( \
11264 + "call *%6" \
11265 + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
11266 + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
11267 + : "0" (hypercall_page + (op) * 32) \
11268 + : "memory" ); \
11269 + __res; \
11270 +})
11271 +
11272 static inline int __must_check
11273 HYPERVISOR_set_trap_table(
11274 const trap_info_t *table)
11275 @@ -146,6 +163,8 @@ HYPERVISOR_mmu_update(
11276 mmu_update_t *req, unsigned int count, unsigned int *success_count,
11277 domid_t domid)
11278 {
11279 + if (arch_use_lazy_mmu_mode())
11280 + return xen_multi_mmu_update(req, count, success_count, domid);
11281 return _hypercall4(int, mmu_update, req, count, success_count, domid);
11282 }
11283
11284 @@ -154,6 +173,8 @@ HYPERVISOR_mmuext_op(
11285 struct mmuext_op *op, unsigned int count, unsigned int *success_count,
11286 domid_t domid)
11287 {
11288 + if (arch_use_lazy_mmu_mode())
11289 + return xen_multi_mmuext_op(op, count, success_count, domid);
11290 return _hypercall4(int, mmuext_op, op, count, success_count, domid);
11291 }
11292
11293 @@ -241,6 +262,8 @@ static inline int __must_check
11294 HYPERVISOR_memory_op(
11295 unsigned int cmd, void *arg)
11296 {
11297 + if (arch_use_lazy_mmu_mode())
11298 + xen_multicall_flush(false);
11299 return _hypercall2(int, memory_op, cmd, arg);
11300 }
11301
11302 @@ -255,6 +278,8 @@ static inline int __must_check
11303 HYPERVISOR_update_va_mapping(
11304 unsigned long va, pte_t new_val, unsigned long flags)
11305 {
11306 + if (arch_use_lazy_mmu_mode())
11307 + return xen_multi_update_va_mapping(va, new_val, flags);
11308 return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
11309 }
11310
11311 @@ -314,6 +339,8 @@ static inline int __must_check
11312 HYPERVISOR_grant_table_op(
11313 unsigned int cmd, void *uop, unsigned int count)
11314 {
11315 + if (arch_use_lazy_mmu_mode())
11316 + xen_multicall_flush(false);
11317 return _hypercall3(int, grant_table_op, cmd, uop, count);
11318 }
11319
11320 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h
11321 ===================================================================
11322 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-03 15:49:14.000000000 +0100
11323 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/hypervisor.h 2008-12-01 11:29:05.000000000 +0100
11324 @@ -43,6 +43,7 @@
11325 #include <xen/interface/physdev.h>
11326 #include <xen/interface/sched.h>
11327 #include <xen/interface/nmi.h>
11328 +#include <asm/percpu.h>
11329 #include <asm/ptrace.h>
11330 #include <asm/page.h>
11331 #if defined(__i386__)
11332 @@ -135,7 +136,46 @@ void scrub_pages(void *, unsigned int);
11333 #define scrub_pages(_p,_n) ((void)0)
11334 #endif
11335
11336 -#include <xen/hypercall.h>
11337 +#ifdef CONFIG_XEN
11338 +
11339 +DECLARE_PER_CPU(bool, xen_lazy_mmu);
11340 +
11341 +int xen_multicall_flush(bool);
11342 +
11343 +int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
11344 + unsigned long flags);
11345 +int __must_check xen_multi_mmu_update(mmu_update_t *, unsigned int count,
11346 + unsigned int *success_count, domid_t);
11347 +int __must_check xen_multi_mmuext_op(struct mmuext_op *, unsigned int count,
11348 + unsigned int *success_count, domid_t);
11349 +
11350 +#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
11351 +static inline void arch_enter_lazy_mmu_mode(void)
11352 +{
11353 + __get_cpu_var(xen_lazy_mmu) = true;
11354 +}
11355 +
11356 +static inline void arch_leave_lazy_mmu_mode(void)
11357 +{
11358 + __get_cpu_var(xen_lazy_mmu) = false;
11359 + xen_multicall_flush(false);
11360 +}
11361 +
11362 +#if defined(CONFIG_X86_32)
11363 +#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
11364 +#elif !defined(arch_use_lazy_mmu_mode)
11365 +#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
11366 +#endif
11367 +
11368 +#else /* CONFIG_XEN */
11369 +
11370 +static inline void xen_multicall_flush(bool ignore) {}
11371 +#define arch_use_lazy_mmu_mode() false
11372 +#define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
11373 +#define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
11374 +#define xen_multi_mmuext_op(...) ({ BUG(); -ENOSYS; })
11375 +
11376 +#endif /* CONFIG_XEN */
11377
11378 #if defined(CONFIG_X86_64)
11379 #define MULTI_UVMFLAGS_INDEX 2
11380 @@ -147,11 +187,15 @@ void scrub_pages(void *, unsigned int);
11381
11382 #ifdef CONFIG_XEN
11383 #define is_running_on_xen() 1
11384 +extern char hypercall_page[PAGE_SIZE];
11385 #else
11386 extern char *hypercall_stubs;
11387 +#define hypercall_page hypercall_stubs
11388 #define is_running_on_xen() (!!hypercall_stubs)
11389 #endif
11390
11391 +#include <xen/hypercall.h>
11392 +
11393 static inline int
11394 HYPERVISOR_yield(
11395 void)
11396 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h
11397 ===================================================================
11398 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_32.h 2008-12-03 15:49:14.000000000 +0100
11399 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_32.h 2008-12-01 11:29:05.000000000 +0100
11400 @@ -237,33 +237,6 @@ static inline void memcpy_toio(volatile
11401
11402 #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
11403
11404 -/**
11405 - * check_signature - find BIOS signatures
11406 - * @io_addr: mmio address to check
11407 - * @signature: signature block
11408 - * @length: length of signature
11409 - *
11410 - * Perform a signature comparison with the mmio address io_addr. This
11411 - * address should have been obtained by ioremap.
11412 - * Returns 1 on a match.
11413 - */
11414 -
11415 -static inline int check_signature(volatile void __iomem * io_addr,
11416 - const unsigned char *signature, int length)
11417 -{
11418 - int retval = 0;
11419 - do {
11420 - if (readb(io_addr) != *signature)
11421 - goto out;
11422 - io_addr++;
11423 - signature++;
11424 - length--;
11425 - } while (length);
11426 - retval = 1;
11427 -out:
11428 - return retval;
11429 -}
11430 -
11431 /*
11432 * Cache management
11433 *
11434 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h
11435 ===================================================================
11436 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-03 15:49:14.000000000 +0100
11437 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-12-01 11:29:05.000000000 +0100
11438 @@ -53,7 +53,6 @@ static inline int pte_exec_kernel(pte_t
11439 * not possible, use pte_get_and_clear to obtain the old pte
11440 * value and then use set_pte to update it. -ben
11441 */
11442 -#define __HAVE_ARCH_SET_PTE_ATOMIC
11443
11444 static inline void set_pte(pte_t *ptep, pte_t pte)
11445 {
11446 @@ -70,14 +69,6 @@ static inline void set_pte(pte_t *ptep,
11447 set_pte((ptep), (pteval)); \
11448 } while (0)
11449
11450 -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
11451 - if (((_mm) != current->mm && (_mm) != &init_mm) || \
11452 - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
11453 - set_pte((ptep), (pteval)); \
11454 - xen_invlpg((addr)); \
11455 - } \
11456 -} while (0)
11457 -
11458 #define set_pmd(pmdptr,pmdval) \
11459 xen_l2_entry_update((pmdptr), (pmdval))
11460 #define set_pud(pudptr,pudval) \
11461 @@ -94,7 +85,7 @@ static inline void pud_clear (pud_t * pu
11462 #define pud_page(pud) \
11463 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
11464
11465 -#define pud_page_kernel(pud) \
11466 +#define pud_page_vaddr(pud) \
11467 ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
11468
11469
11470 @@ -124,6 +115,7 @@ static inline void pte_clear(struct mm_s
11471
11472 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
11473
11474 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11475 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11476 {
11477 pte_t pte = *ptep;
11478 @@ -142,6 +134,7 @@ static inline pte_t ptep_get_and_clear(s
11479 return pte;
11480 }
11481
11482 +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11483 #define ptep_clear_flush(vma, addr, ptep) \
11484 ({ \
11485 pte_t *__ptep = (ptep); \
11486 @@ -159,6 +152,7 @@ static inline pte_t ptep_get_and_clear(s
11487 __res; \
11488 })
11489
11490 +#define __HAVE_ARCH_PTE_SAME
11491 static inline int pte_same(pte_t a, pte_t b)
11492 {
11493 return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
11494 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h
11495 ===================================================================
11496 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-03 15:49:14.000000000 +0100
11497 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-12-01 11:29:05.000000000 +0100
11498 @@ -260,31 +260,89 @@ static inline pte_t pte_mkhuge(pte_t pte
11499 # include <asm/pgtable-2level.h>
11500 #endif
11501
11502 -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
11503 +/*
11504 + * Rules for using pte_update - it must be called after any PTE update which
11505 + * has not been done using the set_pte / clear_pte interfaces. It is used by
11506 + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
11507 + * updates should either be sets, clears, or set_pte_atomic for P->P
11508 + * transitions, which means this hook should only be called for user PTEs.
11509 + * This hook implies a P->P protection or access change has taken place, which
11510 + * requires a subsequent TLB flush. The notification can optionally be delayed
11511 + * until the TLB flush event by using the pte_update_defer form of the
11512 + * interface, but care must be taken to assure that the flush happens while
11513 + * still holding the same page table lock so that the shadow and primary pages
11514 + * do not become out of sync on SMP.
11515 + */
11516 +#define pte_update(mm, addr, ptep) do { } while (0)
11517 +#define pte_update_defer(mm, addr, ptep) do { } while (0)
11518 +
11519 +
11520 +/*
11521 + * We only update the dirty/accessed state if we set
11522 + * the dirty bit by hand in the kernel, since the hardware
11523 + * will do the accessed bit for us, and we don't want to
11524 + * race with other CPU's that might be updating the dirty
11525 + * bit at the same time.
11526 + */
11527 +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
11528 +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
11529 +do { \
11530 + if (dirty) \
11531 + ptep_establish(vma, address, ptep, entry); \
11532 +} while (0)
11533 +
11534 +/*
11535 + * We don't actually have these, but we want to advertise them so that
11536 + * we can encompass the flush here.
11537 + */
11538 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11539 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11540 +
11541 +/*
11542 + * Rules for using ptep_establish: the pte MUST be a user pte, and
11543 + * must be a present->present transition.
11544 + */
11545 +#define __HAVE_ARCH_PTEP_ESTABLISH
11546 +#define ptep_establish(vma, address, ptep, pteval) \
11547 +do { \
11548 + if ( likely((vma)->vm_mm == current->mm) ) { \
11549 + BUG_ON(HYPERVISOR_update_va_mapping(address, \
11550 + pteval, \
11551 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
11552 + UVMF_INVLPG|UVMF_MULTI)); \
11553 + } else { \
11554 + xen_l1_entry_update(ptep, pteval); \
11555 + flush_tlb_page(vma, address); \
11556 + } \
11557 +} while (0)
11558 +
11559 +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
11560 +#define ptep_clear_flush_dirty(vma, address, ptep) \
11561 ({ \
11562 pte_t __pte = *(ptep); \
11563 - int __ret = pte_dirty(__pte); \
11564 - if (__ret) { \
11565 - __pte = pte_mkclean(__pte); \
11566 - if ((vma)->vm_mm != current->mm || \
11567 - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
11568 - (ptep)->pte_low = __pte.pte_low; \
11569 - } \
11570 - __ret; \
11571 + int __dirty = pte_dirty(__pte); \
11572 + __pte = pte_mkclean(__pte); \
11573 + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
11574 + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
11575 + else if (__dirty) \
11576 + (ptep)->pte_low = __pte.pte_low; \
11577 + __dirty; \
11578 })
11579
11580 -#define ptep_test_and_clear_young(vma, addr, ptep) \
11581 +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
11582 +#define ptep_clear_flush_young(vma, address, ptep) \
11583 ({ \
11584 pte_t __pte = *(ptep); \
11585 - int __ret = pte_young(__pte); \
11586 - if (__ret) \
11587 - __pte = pte_mkold(__pte); \
11588 - if ((vma)->vm_mm != current->mm || \
11589 - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
11590 - (ptep)->pte_low = __pte.pte_low; \
11591 - __ret; \
11592 + int __young = pte_young(__pte); \
11593 + __pte = pte_mkold(__pte); \
11594 + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
11595 + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
11596 + else if (__young) \
11597 + (ptep)->pte_low = __pte.pte_low; \
11598 + __young; \
11599 })
11600
11601 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11602 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
11603 ((full) ? ({ \
11604 pte_t __res = *(ptep); \
11605 @@ -296,6 +354,7 @@ static inline pte_t pte_mkhuge(pte_t pte
11606 }) : \
11607 ptep_get_and_clear(mm, addr, ptep))
11608
11609 +#define __HAVE_ARCH_PTEP_SET_WRPROTECT
11610 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
11611 {
11612 pte_t pte = *ptep;
11613 @@ -391,11 +450,11 @@ static inline pte_t pte_modify(pte_t pte
11614 #define pte_index(address) \
11615 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
11616 #define pte_offset_kernel(dir, address) \
11617 - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
11618 + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
11619
11620 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
11621
11622 -#define pmd_page_kernel(pmd) \
11623 +#define pmd_page_vaddr(pmd) \
11624 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
11625
11626 /*
11627 @@ -418,8 +477,6 @@ extern pte_t *lookup_address(unsigned lo
11628 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
11629 #endif
11630
11631 -extern void noexec_setup(const char *str);
11632 -
11633 #if defined(CONFIG_HIGHPTE)
11634 #define pte_offset_map(dir, address) \
11635 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
11636 @@ -437,37 +494,17 @@ extern void noexec_setup(const char *str
11637 #define pte_unmap_nested(pte) do { } while (0)
11638 #endif
11639
11640 -#define __HAVE_ARCH_PTEP_ESTABLISH
11641 -#define ptep_establish(vma, address, ptep, pteval) \
11642 - do { \
11643 - if ( likely((vma)->vm_mm == current->mm) ) { \
11644 - BUG_ON(HYPERVISOR_update_va_mapping(address, \
11645 - pteval, \
11646 - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
11647 - UVMF_INVLPG|UVMF_MULTI)); \
11648 - } else { \
11649 - xen_l1_entry_update(ptep, pteval); \
11650 - flush_tlb_page(vma, address); \
11651 - } \
11652 - } while (0)
11653 +/* Clear a kernel PTE and flush it from the TLB */
11654 +#define kpte_clear_flush(ptep, vaddr) do { \
11655 + if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
11656 + BUG(); \
11657 +} while (0)
11658
11659 /*
11660 * The i386 doesn't have any external MMU info: the kernel page
11661 * tables contain all the necessary information.
11662 - *
11663 - * Also, we only update the dirty/accessed state if we set
11664 - * the dirty bit by hand in the kernel, since the hardware
11665 - * will do the accessed bit for us, and we don't want to
11666 - * race with other CPU's that might be updating the dirty
11667 - * bit at the same time.
11668 */
11669 #define update_mmu_cache(vma,address,pte) do { } while (0)
11670 -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
11671 -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
11672 - do { \
11673 - if (dirty) \
11674 - ptep_establish(vma, address, ptep, entry); \
11675 - } while (0)
11676
11677 #include <xen/features.h>
11678 void make_lowmem_page_readonly(void *va, unsigned int feature);
11679 @@ -513,10 +550,11 @@ int touch_pte_range(struct mm_struct *mm
11680 unsigned long size);
11681
11682 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11683 - unsigned long addr, unsigned long end, pgprot_t newprot);
11684 + unsigned long addr, unsigned long end, pgprot_t newprot,
11685 + int dirty_accountable);
11686
11687 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11688 - xen_change_pte_range(mm, pmd, addr, end, newprot)
11689 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11690 + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11691
11692 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
11693 direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
11694 @@ -525,13 +563,6 @@ direct_remap_pfn_range(vma,from,pfn,size
11695 #define GET_IOSPACE(pfn) 0
11696 #define GET_PFN(pfn) (pfn)
11697
11698 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11699 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11700 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11701 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11702 -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11703 -#define __HAVE_ARCH_PTEP_SET_WRPROTECT
11704 -#define __HAVE_ARCH_PTE_SAME
11705 #include <asm-generic/pgtable.h>
11706
11707 #endif /* _I386_PGTABLE_H */
11708 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h
11709 ===================================================================
11710 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-03 15:49:14.000000000 +0100
11711 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_32.h 2008-12-01 11:29:05.000000000 +0100
11712 @@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui
11713 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
11714 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
11715
11716 +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
11717 + unsigned int *ecx, unsigned int *edx)
11718 +{
11719 + /* ecx is often an input as well as an output. */
11720 + __asm__(XEN_CPUID
11721 + : "=a" (*eax),
11722 + "=b" (*ebx),
11723 + "=c" (*ecx),
11724 + "=d" (*edx)
11725 + : "0" (*eax), "2" (*ecx));
11726 +}
11727 +
11728 /*
11729 * Generic CPUID function
11730 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
11731 @@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui
11732 */
11733 static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
11734 {
11735 - __asm__(XEN_CPUID
11736 - : "=a" (*eax),
11737 - "=b" (*ebx),
11738 - "=c" (*ecx),
11739 - "=d" (*edx)
11740 - : "0" (op), "c"(0));
11741 + *eax = op;
11742 + *ecx = 0;
11743 + __cpuid(eax, ebx, ecx, edx);
11744 }
11745
11746 /* Some CPUID calls want 'count' to be placed in ecx */
11747 static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
11748 - int *edx)
11749 + int *edx)
11750 {
11751 - __asm__(XEN_CPUID
11752 - : "=a" (*eax),
11753 - "=b" (*ebx),
11754 - "=c" (*ecx),
11755 - "=d" (*edx)
11756 - : "0" (op), "c" (count));
11757 + *eax = op;
11758 + *ecx = count;
11759 + __cpuid(eax, ebx, ecx, edx);
11760 }
11761
11762 /*
11763 @@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i
11764 */
11765 static inline unsigned int cpuid_eax(unsigned int op)
11766 {
11767 - unsigned int eax;
11768 + unsigned int eax, ebx, ecx, edx;
11769
11770 - __asm__(XEN_CPUID
11771 - : "=a" (eax)
11772 - : "0" (op)
11773 - : "bx", "cx", "dx");
11774 + cpuid(op, &eax, &ebx, &ecx, &edx);
11775 return eax;
11776 }
11777 static inline unsigned int cpuid_ebx(unsigned int op)
11778 {
11779 - unsigned int eax, ebx;
11780 + unsigned int eax, ebx, ecx, edx;
11781
11782 - __asm__(XEN_CPUID
11783 - : "=a" (eax), "=b" (ebx)
11784 - : "0" (op)
11785 - : "cx", "dx" );
11786 + cpuid(op, &eax, &ebx, &ecx, &edx);
11787 return ebx;
11788 }
11789 static inline unsigned int cpuid_ecx(unsigned int op)
11790 {
11791 - unsigned int eax, ecx;
11792 + unsigned int eax, ebx, ecx, edx;
11793
11794 - __asm__(XEN_CPUID
11795 - : "=a" (eax), "=c" (ecx)
11796 - : "0" (op)
11797 - : "bx", "dx" );
11798 + cpuid(op, &eax, &ebx, &ecx, &edx);
11799 return ecx;
11800 }
11801 static inline unsigned int cpuid_edx(unsigned int op)
11802 {
11803 - unsigned int eax, edx;
11804 + unsigned int eax, ebx, ecx, edx;
11805
11806 - __asm__(XEN_CPUID
11807 - : "=a" (eax), "=d" (edx)
11808 - : "0" (op)
11809 - : "bx", "cx");
11810 + cpuid(op, &eax, &ebx, &ecx, &edx);
11811 return edx;
11812 }
11813
11814 @@ -315,6 +309,8 @@ static inline void __mwait(unsigned long
11815 : :"a" (eax), "c" (ecx));
11816 }
11817
11818 +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11819 +
11820 /* from system description table in BIOS. Mostly for MCA use, but
11821 others may find it useful. */
11822 extern unsigned int machine_id;
11823 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/segment_32.h
11824 ===================================================================
11825 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-03 15:49:14.000000000 +0100
11826 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/segment_32.h 2008-12-01 11:29:05.000000000 +0100
11827 @@ -61,11 +61,9 @@
11828
11829 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
11830 #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
11831 -#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11832
11833 #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
11834 #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
11835 -#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11836
11837 #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
11838 #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
11839 @@ -85,6 +83,11 @@
11840
11841 #define GDT_SIZE (GDT_ENTRIES * 8)
11842
11843 +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
11844 +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
11845 +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
11846 +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
11847 +
11848 /* Simple and small GDT entries for booting only */
11849
11850 #define GDT_ENTRY_BOOT_CS 2
11851 @@ -114,4 +117,16 @@
11852 */
11853 #define IDT_ENTRIES 256
11854
11855 +/* Bottom two bits of selector give the ring privilege level */
11856 +#define SEGMENT_RPL_MASK 0x3
11857 +/* Bit 2 is table indicator (LDT/GDT) */
11858 +#define SEGMENT_TI_MASK 0x4
11859 +
11860 +/* User mode is privilege level 3 */
11861 +#define USER_RPL 0x3
11862 +/* LDT segment has TI set, GDT has it cleared */
11863 +#define SEGMENT_LDT 0x4
11864 +#define SEGMENT_GDT 0x0
11865 +
11866 +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
11867 #endif
11868 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h
11869 ===================================================================
11870 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-03 15:49:14.000000000 +0100
11871 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_32.h 2008-12-01 11:29:05.000000000 +0100
11872 @@ -79,25 +79,36 @@ static inline int hard_smp_processor_id(
11873 return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
11874 }
11875 #endif
11876 -
11877 -static __inline int logical_smp_processor_id(void)
11878 -{
11879 - /* we don't want to mark this access volatile - bad code generation */
11880 - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11881 -}
11882 -
11883 #endif
11884
11885 +#define safe_smp_processor_id() smp_processor_id()
11886 extern int __cpu_disable(void);
11887 extern void __cpu_die(unsigned int cpu);
11888 extern void prefill_possible_map(void);
11889 +extern unsigned int num_processors;
11890 +
11891 #endif /* !__ASSEMBLY__ */
11892
11893 #else /* CONFIG_SMP */
11894
11895 +#define safe_smp_processor_id() 0
11896 #define cpu_physical_id(cpu) boot_cpu_physical_apicid
11897
11898 #define NO_PROC_ID 0xFF /* No processor magic marker */
11899
11900 #endif
11901 +
11902 +#ifndef __ASSEMBLY__
11903 +
11904 +extern u8 apicid_2_node[];
11905 +
11906 +#ifdef CONFIG_X86_LOCAL_APIC
11907 +static __inline int logical_smp_processor_id(void)
11908 +{
11909 + /* we don't want to mark this access volatile - bad code generation */
11910 + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11911 +}
11912 +#endif
11913 +#endif
11914 +
11915 #endif
11916 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h
11917 ===================================================================
11918 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_32.h 2008-12-03 15:49:14.000000000 +0100
11919 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_32.h 2008-12-01 11:29:05.000000000 +0100
11920 @@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig
11921 #define cmpxchg(ptr,o,n)\
11922 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
11923 (unsigned long)(n),sizeof(*(ptr))))
11924 +#define sync_cmpxchg(ptr,o,n)\
11925 + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
11926 + (unsigned long)(n),sizeof(*(ptr))))
11927 #endif
11928
11929 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
11930 @@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo
11931 return old;
11932 }
11933
11934 +/*
11935 + * Always use locked operations when touching memory shared with a
11936 + * hypervisor, since the system may be SMP even if the guest kernel
11937 + * isn't.
11938 + */
11939 +static inline unsigned long __sync_cmpxchg(volatile void *ptr,
11940 + unsigned long old,
11941 + unsigned long new, int size)
11942 +{
11943 + unsigned long prev;
11944 + switch (size) {
11945 + case 1:
11946 + __asm__ __volatile__("lock; cmpxchgb %b1,%2"
11947 + : "=a"(prev)
11948 + : "q"(new), "m"(*__xg(ptr)), "0"(old)
11949 + : "memory");
11950 + return prev;
11951 + case 2:
11952 + __asm__ __volatile__("lock; cmpxchgw %w1,%2"
11953 + : "=a"(prev)
11954 + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11955 + : "memory");
11956 + return prev;
11957 + case 4:
11958 + __asm__ __volatile__("lock; cmpxchgl %1,%2"
11959 + : "=a"(prev)
11960 + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11961 + : "memory");
11962 + return prev;
11963 + }
11964 + return old;
11965 +}
11966 +
11967 #ifndef CONFIG_X86_CMPXCHG
11968 /*
11969 * Building a kernel capable running on 80386. It may be necessary to
11970 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_32.h
11971 ===================================================================
11972 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-03 15:49:14.000000000 +0100
11973 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-12-01 11:29:05.000000000 +0100
11974 @@ -8,8 +8,6 @@
11975 #define __flush_tlb_global() xen_tlb_flush()
11976 #define __flush_tlb_all() xen_tlb_flush()
11977
11978 -extern unsigned long pgkern_mask;
11979 -
11980 #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
11981
11982 #define __flush_tlb_single(addr) xen_invlpg(addr)
11983 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h
11984 ===================================================================
11985 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-03 15:49:14.000000000 +0100
11986 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-12-01 11:29:05.000000000 +0100
11987 @@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address;
11988 extern struct dma_mapping_ops* dma_ops;
11989 extern int iommu_merge;
11990
11991 -static inline int valid_dma_direction(int dma_direction)
11992 -{
11993 - return ((dma_direction == DMA_BIDIRECTIONAL) ||
11994 - (dma_direction == DMA_TO_DEVICE) ||
11995 - (dma_direction == DMA_FROM_DEVICE));
11996 -}
11997 -
11998 #if 0
11999 static inline int dma_mapping_error(dma_addr_t dma_addr)
12000 {
12001 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_64.h
12002 ===================================================================
12003 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-12-03 15:49:14.000000000 +0100
12004 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-12-01 11:29:05.000000000 +0100
12005 @@ -41,7 +41,7 @@ enum fixed_addresses {
12006 #ifdef CONFIG_X86_LOCAL_APIC
12007 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
12008 #endif
12009 -#ifdef CONFIG_X86_IO_APIC
12010 +#ifndef CONFIG_XEN
12011 FIX_IO_APIC_BASE_0,
12012 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
12013 #endif
12014 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/io_64.h
12015 ===================================================================
12016 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/io_64.h 2008-12-03 15:49:14.000000000 +0100
12017 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/io_64.h 2008-12-01 11:29:05.000000000 +0100
12018 @@ -273,33 +273,6 @@ void memset_io(volatile void __iomem *a,
12019
12020 #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
12021
12022 -/**
12023 - * check_signature - find BIOS signatures
12024 - * @io_addr: mmio address to check
12025 - * @signature: signature block
12026 - * @length: length of signature
12027 - *
12028 - * Perform a signature comparison with the mmio address io_addr. This
12029 - * address should have been obtained by ioremap.
12030 - * Returns 1 on a match.
12031 - */
12032 -
12033 -static inline int check_signature(void __iomem *io_addr,
12034 - const unsigned char *signature, int length)
12035 -{
12036 - int retval = 0;
12037 - do {
12038 - if (readb(io_addr) != *signature)
12039 - goto out;
12040 - io_addr++;
12041 - signature++;
12042 - length--;
12043 - } while (length);
12044 - retval = 1;
12045 -out:
12046 - return retval;
12047 -}
12048 -
12049 /* Nothing to do */
12050
12051 #define dma_cache_inv(_start,_size) do { } while (0)
12052 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h
12053 ===================================================================
12054 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-03 15:49:14.000000000 +0100
12055 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-12-01 11:29:05.000000000 +0100
12056 @@ -42,12 +42,9 @@ extern unsigned long __supported_pte_mas
12057
12058 #define swapper_pg_dir init_level4_pgt
12059
12060 -extern int nonx_setup(char *str);
12061 extern void paging_init(void);
12062 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
12063
12064 -extern unsigned long pgkern_mask;
12065 -
12066 /*
12067 * ZERO_PAGE is a global shared page that is always zero: used
12068 * for zero-mapped memory areas etc..
12069 @@ -117,9 +114,6 @@ static inline void pgd_clear (pgd_t * pg
12070 set_pgd(__user_pgd(pgd), __pgd(0));
12071 }
12072
12073 -#define pud_page(pud) \
12074 - ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
12075 -
12076 #define pte_same(a, b) ((a).pte == (b).pte)
12077
12078 #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
12079 @@ -331,7 +325,7 @@ static inline pte_t ptep_get_and_clear_f
12080 #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
12081 static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
12082 static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
12083 -static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
12084 +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
12085 static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
12086 static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
12087 static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
12088 @@ -344,29 +338,12 @@ static inline pte_t pte_mkclean(pte_t pt
12089 static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
12090 static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
12091 static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
12092 -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
12093 +static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
12094 static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
12095 static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
12096 static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
12097 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
12098 -
12099 -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
12100 -({ \
12101 - pte_t __pte = *(ptep); \
12102 - int __ret = pte_dirty(__pte); \
12103 - if (__ret) \
12104 - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
12105 - __ret; \
12106 -})
12107 -
12108 -#define ptep_test_and_clear_young(vma, addr, ptep) \
12109 -({ \
12110 - pte_t __pte = *(ptep); \
12111 - int __ret = pte_young(__pte); \
12112 - if (__ret) \
12113 - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
12114 - __ret; \
12115 -})
12116 +static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
12117
12118 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
12119 {
12120 @@ -393,7 +370,8 @@ static inline int pmd_large(pmd_t pte) {
12121 /*
12122 * Level 4 access.
12123 */
12124 -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
12125 +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
12126 +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
12127 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
12128 #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
12129 #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
12130 @@ -402,16 +380,18 @@ static inline int pmd_large(pmd_t pte) {
12131
12132 /* PUD - Level3 access */
12133 /* to find an entry in a page-table-directory. */
12134 +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
12135 +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
12136 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
12137 -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
12138 +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
12139 #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
12140
12141 /* PMD - Level 2 access */
12142 -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
12143 +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
12144 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
12145
12146 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
12147 -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
12148 +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
12149 pmd_index(address))
12150 #define pmd_none(x) (!__pmd_val(x))
12151 #if CONFIG_XEN_COMPAT <= 0x030002
12152 @@ -442,6 +422,7 @@ static inline pte_t mk_pte_phys(unsigned
12153 {
12154 unsigned long pteval;
12155 pteval = physpage | pgprot_val(pgprot);
12156 + pteval &= __supported_pte_mask;
12157 return __pte(pteval);
12158 }
12159
12160 @@ -463,7 +444,7 @@ static inline pte_t pte_modify(pte_t pte
12161
12162 #define pte_index(address) \
12163 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
12164 -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
12165 +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
12166 pte_index(address))
12167
12168 /* x86-64 always has all page tables mapped. */
12169 @@ -504,6 +485,40 @@ static inline pte_t pte_modify(pte_t pte
12170 ptep_establish(vma, address, ptep, entry); \
12171 } while (0)
12172
12173 +
12174 +/*
12175 + * i386 says: We don't actually have these, but we want to advertise
12176 + * them so that we can encompass the flush here.
12177 + */
12178 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
12179 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
12180 +
12181 +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
12182 +#define ptep_clear_flush_dirty(vma, address, ptep) \
12183 +({ \
12184 + pte_t __pte = *(ptep); \
12185 + int __dirty = pte_dirty(__pte); \
12186 + __pte = pte_mkclean(__pte); \
12187 + if ((vma)->vm_mm->context.pinned) \
12188 + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
12189 + else if (__dirty) \
12190 + set_pte(ptep, __pte); \
12191 + __dirty; \
12192 +})
12193 +
12194 +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
12195 +#define ptep_clear_flush_young(vma, address, ptep) \
12196 +({ \
12197 + pte_t __pte = *(ptep); \
12198 + int __young = pte_young(__pte); \
12199 + __pte = pte_mkold(__pte); \
12200 + if ((vma)->vm_mm->context.pinned) \
12201 + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
12202 + else if (__young) \
12203 + set_pte(ptep, __pte); \
12204 + __young; \
12205 +})
12206 +
12207 /* Encode and de-code a swap entry */
12208 #define __swp_type(x) (((x).val >> 1) & 0x3f)
12209 #define __swp_offset(x) ((x).val >> 8)
12210 @@ -545,10 +560,11 @@ int touch_pte_range(struct mm_struct *mm
12211 unsigned long size);
12212
12213 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
12214 - unsigned long addr, unsigned long end, pgprot_t newprot);
12215 + unsigned long addr, unsigned long end, pgprot_t newprot,
12216 + int dirty_accountable);
12217
12218 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
12219 - xen_change_pte_range(mm, pmd, addr, end, newprot)
12220 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
12221 + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
12222
12223 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
12224 direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
12225 @@ -570,8 +586,6 @@ int xen_change_pte_range(struct mm_struc
12226 #define kc_offset_to_vaddr(o) \
12227 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
12228
12229 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
12230 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
12231 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
12232 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
12233 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
12234 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h
12235 ===================================================================
12236 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-03 15:49:14.000000000 +0100
12237 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/processor_64.h 2008-12-01 11:29:05.000000000 +0100
12238 @@ -484,6 +484,8 @@ static inline void __mwait(unsigned long
12239 : :"a" (eax), "c" (ecx));
12240 }
12241
12242 +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
12243 +
12244 #define stack_current() \
12245 ({ \
12246 struct thread_info *ti; \
12247 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h
12248 ===================================================================
12249 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-03 15:49:14.000000000 +0100
12250 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/smp_64.h 2008-12-01 11:29:05.000000000 +0100
12251 @@ -4,15 +4,12 @@
12252 /*
12253 * We need the APIC definitions automatically as part of 'smp.h'
12254 */
12255 -#ifndef __ASSEMBLY__
12256 #include <linux/threads.h>
12257 #include <linux/cpumask.h>
12258 #include <linux/bitops.h>
12259 extern int disable_apic;
12260 -#endif
12261
12262 #ifdef CONFIG_X86_LOCAL_APIC
12263 -#ifndef __ASSEMBLY__
12264 #include <asm/fixmap.h>
12265 #include <asm/mpspec.h>
12266 #ifdef CONFIG_X86_IO_APIC
12267 @@ -21,10 +18,8 @@ extern int disable_apic;
12268 #include <asm/apic.h>
12269 #include <asm/thread_info.h>
12270 #endif
12271 -#endif
12272
12273 #ifdef CONFIG_SMP
12274 -#ifndef ASSEMBLY
12275
12276 #include <asm/pda.h>
12277
12278 @@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized;
12279
12280 extern void smp_alloc_memory(void);
12281 extern volatile unsigned long smp_invalidate_needed;
12282 -extern int pic_mode;
12283 extern void lock_ipi_call_lock(void);
12284 extern void unlock_ipi_call_lock(void);
12285 extern int smp_num_siblings;
12286 extern void smp_send_reschedule(int cpu);
12287 void smp_stop_cpu(void);
12288 -extern int smp_call_function_single(int cpuid, void (*func) (void *info),
12289 - void *info, int retry, int wait);
12290
12291 extern cpumask_t cpu_sibling_map[NR_CPUS];
12292 extern cpumask_t cpu_core_map[NR_CPUS];
12293 @@ -77,20 +69,16 @@ static inline int hard_smp_processor_id(
12294 }
12295 #endif
12296
12297 -extern int safe_smp_processor_id(void);
12298 extern int __cpu_disable(void);
12299 extern void __cpu_die(unsigned int cpu);
12300 extern void prefill_possible_map(void);
12301 extern unsigned num_processors;
12302 extern unsigned disabled_cpus;
12303
12304 -#endif /* !ASSEMBLY */
12305 -
12306 #define NO_PROC_ID 0xFF /* No processor magic marker */
12307
12308 #endif
12309
12310 -#ifndef ASSEMBLY
12311 /*
12312 * Some lowlevel functions might want to know about
12313 * the real APIC ID <-> CPU # mapping.
12314 @@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid(
12315 }
12316 #endif
12317
12318 -#endif /* !ASSEMBLY */
12319 -
12320 #ifndef CONFIG_SMP
12321 #define stack_smp_processor_id() 0
12322 -#define safe_smp_processor_id() 0
12323 #define cpu_logical_map(x) (x)
12324 #else
12325 #include <asm/thread_info.h>
12326 @@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid(
12327 })
12328 #endif
12329
12330 -#ifndef __ASSEMBLY__
12331 #ifdef CONFIG_X86_LOCAL_APIC
12332 static __inline int logical_smp_processor_id(void)
12333 {
12334 @@ -138,13 +122,18 @@ static __inline int logical_smp_processo
12335 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
12336 }
12337 #endif
12338 -#endif
12339
12340 #ifdef CONFIG_SMP
12341 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
12342 #else
12343 #define cpu_physical_id(cpu) boot_cpu_id
12344 -#endif
12345 -
12346 +static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
12347 + void *info, int retry, int wait)
12348 +{
12349 + /* Disable interrupts here? */
12350 + func(info);
12351 + return 0;
12352 +}
12353 +#endif /* !CONFIG_SMP */
12354 #endif
12355
12356 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/system_64.h
12357 ===================================================================
12358 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/system_64.h 2008-12-03 15:49:14.000000000 +0100
12359 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/system_64.h 2008-12-01 11:29:05.000000000 +0100
12360 @@ -24,6 +24,7 @@
12361 #define __EXTRA_CLOBBER \
12362 ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
12363
12364 +/* Save restore flags to clear handle leaking NT */
12365 #define switch_to(prev,next,last) \
12366 asm volatile(SAVE_CONTEXT \
12367 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
12368 Index: head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_64.h
12369 ===================================================================
12370 --- head-2008-12-01.orig/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-03 15:49:14.000000000 +0100
12371 +++ head-2008-12-01/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-12-01 11:29:05.000000000 +0100
12372 @@ -12,9 +12,6 @@
12373 */
12374 #define __flush_tlb_global() xen_tlb_flush()
12375
12376 -
12377 -extern unsigned long pgkern_mask;
12378 -
12379 #define __flush_tlb_all() __flush_tlb_global()
12380
12381 #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
12382 Index: head-2008-12-01/include/linux/skbuff.h
12383 ===================================================================
12384 --- head-2008-12-01.orig/include/linux/skbuff.h 2008-12-03 15:49:14.000000000 +0100
12385 +++ head-2008-12-01/include/linux/skbuff.h 2008-12-01 11:29:05.000000000 +0100
12386 @@ -1771,5 +1771,12 @@ static inline void skb_forward_csum(stru
12387 }
12388
12389 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
12390 +
12391 +#ifdef CONFIG_XEN
12392 +int skb_checksum_setup(struct sk_buff *skb);
12393 +#else
12394 +static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
12395 +#endif
12396 +
12397 #endif /* __KERNEL__ */
12398 #endif /* _LINUX_SKBUFF_H */
12399 Index: head-2008-12-01/include/xen/evtchn.h
12400 ===================================================================
12401 --- head-2008-12-01.orig/include/xen/evtchn.h 2008-12-03 15:49:14.000000000 +0100
12402 +++ head-2008-12-01/include/xen/evtchn.h 2008-12-01 11:29:05.000000000 +0100
12403 @@ -54,34 +54,34 @@
12404 */
12405 int bind_caller_port_to_irqhandler(
12406 unsigned int caller_port,
12407 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12408 + irq_handler_t handler,
12409 unsigned long irqflags,
12410 const char *devname,
12411 void *dev_id);
12412 int bind_listening_port_to_irqhandler(
12413 unsigned int remote_domain,
12414 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12415 + irq_handler_t handler,
12416 unsigned long irqflags,
12417 const char *devname,
12418 void *dev_id);
12419 int bind_interdomain_evtchn_to_irqhandler(
12420 unsigned int remote_domain,
12421 unsigned int remote_port,
12422 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12423 + irq_handler_t handler,
12424 unsigned long irqflags,
12425 const char *devname,
12426 void *dev_id);
12427 int bind_virq_to_irqhandler(
12428 unsigned int virq,
12429 unsigned int cpu,
12430 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12431 + irq_handler_t handler,
12432 unsigned long irqflags,
12433 const char *devname,
12434 void *dev_id);
12435 int bind_ipi_to_irqhandler(
12436 unsigned int ipi,
12437 unsigned int cpu,
12438 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
12439 + irq_handler_t handler,
12440 unsigned long irqflags,
12441 const char *devname,
12442 void *dev_id);
12443 Index: head-2008-12-01/include/xen/xencons.h
12444 ===================================================================
12445 --- head-2008-12-01.orig/include/xen/xencons.h 2008-12-03 15:49:14.000000000 +0100
12446 +++ head-2008-12-01/include/xen/xencons.h 2008-12-01 11:29:05.000000000 +0100
12447 @@ -8,7 +8,7 @@ void xencons_force_flush(void);
12448 void xencons_resume(void);
12449
12450 /* Interrupt work hooks. Receive data, or kick data out. */
12451 -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
12452 +void xencons_rx(char *buf, unsigned len);
12453 void xencons_tx(void);
12454
12455 int xencons_ring_init(void);
12456 Index: head-2008-12-01/mm/mprotect.c
12457 ===================================================================
12458 --- head-2008-12-01.orig/mm/mprotect.c 2008-12-03 15:49:14.000000000 +0100
12459 +++ head-2008-12-01/mm/mprotect.c 2008-12-01 11:29:05.000000000 +0100
12460 @@ -92,7 +92,7 @@ static inline void change_pmd_range(stru
12461 next = pmd_addr_end(addr, end);
12462 if (pmd_none_or_clear_bad(pmd))
12463 continue;
12464 - if (arch_change_pte_range(mm, pmd, addr, next, newprot))
12465 + if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
12466 continue;
12467 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
12468 } while (pmd++, addr = next, addr != end);
12469 Index: head-2008-12-01/net/core/dev.c
12470 ===================================================================
12471 --- head-2008-12-01.orig/net/core/dev.c 2008-12-03 15:49:14.000000000 +0100
12472 +++ head-2008-12-01/net/core/dev.c 2008-12-01 11:29:05.000000000 +0100
12473 @@ -1765,15 +1765,14 @@ inline int skb_checksum_setup(struct sk_
12474 }
12475 if ((skb->h.raw + skb->csum + 2) > skb->tail)
12476 goto out;
12477 - skb->ip_summed = CHECKSUM_HW;
12478 + skb->ip_summed = CHECKSUM_PARTIAL;
12479 skb->proto_csum_blank = 0;
12480 }
12481 return 0;
12482 out:
12483 return -EPROTO;
12484 }
12485 -#else
12486 -inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
12487 +EXPORT_SYMBOL(skb_checksum_setup);
12488 #endif
12489
12490 /**
12491 @@ -2324,7 +2323,7 @@ int netif_receive_skb(struct sk_buff *sk
12492 case CHECKSUM_UNNECESSARY:
12493 skb->proto_data_valid = 1;
12494 break;
12495 - case CHECKSUM_HW:
12496 + case CHECKSUM_PARTIAL:
12497 /* XXX Implement me. */
12498 default:
12499 skb->proto_data_valid = 0;
12500 @@ -4986,7 +4985,6 @@ EXPORT_SYMBOL(unregister_netdevice_notif
12501 EXPORT_SYMBOL(net_enable_timestamp);
12502 EXPORT_SYMBOL(net_disable_timestamp);
12503 EXPORT_SYMBOL(dev_get_flags);
12504 -EXPORT_SYMBOL(skb_checksum_setup);
12505
12506 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
12507 EXPORT_SYMBOL(br_handle_frame_hook);