]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/60030_xen3-patch-2.6.21.patch1
Imported xen patches.
[people/pmueller/ipfire-2.x.git] / src / patches / 60030_xen3-patch-2.6.21.patch1
1 From: www.kernel.org
2 Subject: Linux 2.6.21
3 Patch-mainline: 2.6.21
4
5 Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 ---
10 arch/x86/Kconfig | 4
11 arch/x86/ia32/ia32entry-xen.S | 5
12 arch/x86/kernel/Makefile | 4
13 arch/x86/kernel/acpi/sleep_64-xen.c | 6
14 arch/x86/kernel/apic_32-xen.c | 65 ----
15 arch/x86/kernel/cpu/common-xen.c | 14
16 arch/x86/kernel/e820_32-xen.c | 18 -
17 arch/x86/kernel/e820_64-xen.c | 40 ++
18 arch/x86/kernel/entry_32-xen.S | 80 +++--
19 arch/x86/kernel/entry_64-xen.S | 3
20 arch/x86/kernel/genapic_64-xen.c | 4
21 arch/x86/kernel/head64-xen.c | 8
22 arch/x86/kernel/head_32-xen.S | 9
23 arch/x86/kernel/io_apic_32-xen.c | 43 +-
24 arch/x86/kernel/io_apic_64-xen.c | 414 +++++++++++++-------------
25 arch/x86/kernel/irq_32-xen.c | 22 +
26 arch/x86/kernel/irq_64-xen.c | 13
27 arch/x86/kernel/microcode-xen.c | 2
28 arch/x86/kernel/mpparse_32-xen.c | 4
29 arch/x86/kernel/mpparse_64-xen.c | 6
30 arch/x86/kernel/pci-dma-xen.c | 2
31 arch/x86/kernel/pcspeaker.c | 5
32 arch/x86/kernel/process_32-xen.c | 42 +-
33 arch/x86/kernel/process_64-xen.c | 13
34 arch/x86/kernel/setup_32-xen.c | 46 --
35 arch/x86/kernel/setup_64-xen.c | 184 +----------
36 arch/x86/kernel/smp_32-xen.c | 5
37 arch/x86/kernel/time_32-xen.c | 279 +----------------
38 arch/x86/kernel/traps_32-xen.c | 27 +
39 arch/x86/kernel/vsyscall_64-xen.c | 127 ++++---
40 arch/x86/mm/fault_32-xen.c | 44 --
41 arch/x86/mm/fault_64-xen.c | 39 --
42 arch/x86/mm/highmem_32-xen.c | 9
43 arch/x86/mm/init_32-xen.c | 2
44 arch/x86/mm/init_64-xen.c | 24 +
45 arch/x86/mm/pageattr_64-xen.c | 6
46 arch/x86/mm/pgtable_32-xen.c | 28 +
47 drivers/acpi/processor_extcntl.c | 18 -
48 drivers/char/tpm/tpm_xen.c | 5
49 drivers/pci/msi-xen.c | 196 +++---------
50 drivers/xen/balloon/sysfs.c | 1
51 drivers/xen/blkback/xenbus.c | 4
52 drivers/xen/blkfront/blkfront.c | 1
53 drivers/xen/blktap/xenbus.c | 4
54 drivers/xen/core/evtchn.c | 4
55 drivers/xen/core/smpboot.c | 22 -
56 drivers/xen/fbfront/xenfb.c | 1
57 drivers/xen/fbfront/xenkbd.c | 1
58 drivers/xen/netback/xenbus.c | 4
59 drivers/xen/netfront/netfront.c | 49 +--
60 drivers/xen/pciback/xenbus.c | 1
61 drivers/xen/pcifront/xenbus.c | 1
62 drivers/xen/scsiback/xenbus.c | 1
63 drivers/xen/scsifront/xenbus.c | 1
64 drivers/xen/tpmback/common.h | 4
65 drivers/xen/tpmback/interface.c | 5
66 drivers/xen/tpmback/tpmback.c | 16 -
67 drivers/xen/tpmback/xenbus.c | 5
68 drivers/xen/xenbus/xenbus_probe.c | 17 -
69 drivers/xen/xenbus/xenbus_probe.h | 4
70 drivers/xen/xenbus/xenbus_probe_backend.c | 8
71 drivers/xen/xenoprof/xenoprofile.c | 2
72 include/asm-x86/i8253.h | 4
73 include/asm-x86/mach-xen/asm/desc_32.h | 2
74 include/asm-x86/mach-xen/asm/dma-mapping_64.h | 4
75 include/asm-x86/mach-xen/asm/hypervisor.h | 15
76 include/asm-x86/mach-xen/asm/io_32.h | 6
77 include/asm-x86/mach-xen/asm/io_64.h | 8
78 include/asm-x86/mach-xen/asm/mmu_context_32.h | 10
79 include/asm-x86/mach-xen/asm/pgalloc_32.h | 21 +
80 include/asm-x86/mach-xen/asm/pgtable_32.h | 25 +
81 include/asm-x86/mach-xen/asm/pgtable_64.h | 9
82 include/asm-x86/mach-xen/asm/processor_32.h | 6
83 include/asm-x86/mach-xen/asm/segment_32.h | 23 +
84 include/asm-x86/mach-xen/asm/smp_32.h | 5
85 include/asm-x86/mach-xen/asm/smp_64.h | 3
86 include/xen/xenbus.h | 24 +
87 lib/swiotlb-xen.c | 19 -
88 78 files changed, 946 insertions(+), 1259 deletions(-)
89
90 --- a/arch/x86/ia32/ia32entry-xen.S
91 +++ b/arch/x86/ia32/ia32entry-xen.S
92 @@ -465,7 +465,7 @@ ia32_sys_call_table:
93 .quad sys32_vm86_warning /* vm86old */
94 .quad compat_sys_wait4
95 .quad sys_swapoff /* 115 */
96 - .quad sys32_sysinfo
97 + .quad compat_sys_sysinfo
98 .quad sys32_ipc
99 .quad sys_fsync
100 .quad stub32_sigreturn
101 @@ -510,7 +510,7 @@ ia32_sys_call_table:
102 .quad sys_sched_yield
103 .quad sys_sched_get_priority_max
104 .quad sys_sched_get_priority_min /* 160 */
105 - .quad sys_sched_rr_get_interval
106 + .quad sys32_sched_rr_get_interval
107 .quad compat_sys_nanosleep
108 .quad sys_mremap
109 .quad sys_setresuid16
110 @@ -668,4 +668,5 @@ ia32_sys_call_table:
111 .quad compat_sys_vmsplice
112 .quad compat_sys_move_pages
113 .quad sys_getcpu
114 + .quad sys_epoll_pwait
115 ia32_syscall_end:
116 --- a/arch/x86/Kconfig
117 +++ b/arch/x86/Kconfig
118 @@ -50,13 +50,15 @@ config GENERIC_CMOS_UPDATE
119
120 config CLOCKSOURCE_WATCHDOG
121 def_bool y
122 + depends on !X86_XEN
123
124 config GENERIC_CLOCKEVENTS
125 def_bool y
126 + depends on !X86_XEN
127
128 config GENERIC_CLOCKEVENTS_BROADCAST
129 def_bool y
130 - depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
131 + depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN)
132
133 config LOCKDEP_SUPPORT
134 def_bool y
135 --- a/arch/x86/kernel/acpi/sleep_64-xen.c
136 +++ b/arch/x86/kernel/acpi/sleep_64-xen.c
137 @@ -59,7 +59,7 @@ unsigned long acpi_wakeup_address = 0;
138 unsigned long acpi_video_flags;
139 extern char wakeup_start, wakeup_end;
140
141 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
142 +extern unsigned long acpi_copy_wakeup_routine(unsigned long);
143
144 static pgd_t low_ptr;
145
146 @@ -67,8 +67,10 @@ static void init_low_mapping(void)
147 {
148 pgd_t *slot0 = pgd_offset(current->mm, 0UL);
149 low_ptr = *slot0;
150 + /* FIXME: We're playing with the current task's page tables here, which
151 + * is potentially dangerous on SMP systems.
152 + */
153 set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
154 - WARN_ON(num_online_cpus() != 1);
155 local_flush_tlb();
156 }
157 #endif
158 --- a/arch/x86/kernel/apic_32-xen.c
159 +++ b/arch/x86/kernel/apic_32-xen.c
160 @@ -25,6 +25,8 @@
161 #include <linux/kernel_stat.h>
162 #include <linux/sysdev.h>
163 #include <linux/cpu.h>
164 +#include <linux/clockchips.h>
165 +#include <linux/acpi_pmtmr.h>
166 #include <linux/module.h>
167
168 #include <asm/atomic.h>
169 @@ -56,83 +58,26 @@ static cpumask_t timer_bcast_ipi;
170 */
171
172 /*
173 - * Debug level
174 + * Debug level, exported for io_apic.c
175 */
176 int apic_verbosity;
177
178 #ifndef CONFIG_XEN
179 static int modern_apic(void)
180 {
181 - unsigned int lvr, version;
182 /* AMD systems use old APIC versions, so check the CPU */
183 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
184 - boot_cpu_data.x86 >= 0xf)
185 + boot_cpu_data.x86 >= 0xf)
186 return 1;
187 - lvr = apic_read(APIC_LVR);
188 - version = GET_APIC_VERSION(lvr);
189 - return version >= 0x14;
190 + return lapic_get_version() >= 0x14;
191 }
192 #endif /* !CONFIG_XEN */
193
194 -/*
195 - * 'what should we do if we get a hw irq event on an illegal vector'.
196 - * each architecture has to answer this themselves.
197 - */
198 -void ack_bad_irq(unsigned int irq)
199 -{
200 - printk("unexpected IRQ trap at vector %02x\n", irq);
201 - /*
202 - * Currently unexpected vectors happen only on SMP and APIC.
203 - * We _must_ ack these because every local APIC has only N
204 - * irq slots per priority level, and a 'hanging, unacked' IRQ
205 - * holds up an irq slot - in excessive cases (when multiple
206 - * unexpected vectors occur) that might lock up the APIC
207 - * completely.
208 - * But only ack when the APIC is enabled -AK
209 - */
210 - if (cpu_has_apic)
211 - ack_APIC_irq();
212 -}
213 -
214 int get_physical_broadcast(void)
215 {
216 return 0xff;
217 }
218
219 -#ifndef CONFIG_XEN
220 -#ifndef CONFIG_SMP
221 -static void up_apic_timer_interrupt_call(void)
222 -{
223 - int cpu = smp_processor_id();
224 -
225 - /*
226 - * the NMI deadlock-detector uses this.
227 - */
228 - per_cpu(irq_stat, cpu).apic_timer_irqs++;
229 -
230 - smp_local_timer_interrupt();
231 -}
232 -#endif
233 -
234 -void smp_send_timer_broadcast_ipi(void)
235 -{
236 - cpumask_t mask;
237 -
238 - cpus_and(mask, cpu_online_map, timer_bcast_ipi);
239 - if (!cpus_empty(mask)) {
240 -#ifdef CONFIG_SMP
241 - send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
242 -#else
243 - /*
244 - * We can directly call the apic timer interrupt handler
245 - * in UP case. Minus all irq related functions
246 - */
247 - up_apic_timer_interrupt_call();
248 -#endif
249 - }
250 -}
251 -#endif
252 -
253 int setup_profiling_timer(unsigned int multiplier)
254 {
255 return -EINVAL;
256 --- a/arch/x86/kernel/cpu/common-xen.c
257 +++ b/arch/x86/kernel/cpu/common-xen.c
258 @@ -610,7 +610,7 @@ void __init early_cpu_init(void)
259 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
260 {
261 memset(regs, 0, sizeof(struct pt_regs));
262 - regs->xgs = __KERNEL_PDA;
263 + regs->xfs = __KERNEL_PDA;
264 return regs;
265 }
266
267 @@ -667,12 +667,12 @@ struct i386_pda boot_pda = {
268 .pcurrent = &init_task,
269 };
270
271 -static inline void set_kernel_gs(void)
272 +static inline void set_kernel_fs(void)
273 {
274 - /* Set %gs for this CPU's PDA. Memory clobber is to create a
275 + /* Set %fs for this CPU's PDA. Memory clobber is to create a
276 barrier with respect to any PDA operations, so the compiler
277 doesn't move any before here. */
278 - asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
279 + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
280 }
281
282 /* Initialize the CPU's GDT and PDA. The boot CPU does this for
283 @@ -730,7 +730,7 @@ void __cpuinit cpu_set_gdt(int cpu)
284 }
285 BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
286
287 - set_kernel_gs();
288 + set_kernel_fs();
289 }
290
291 /* Common CPU init for both boot and secondary CPUs */
292 @@ -775,8 +775,8 @@ static void __cpuinit _cpu_init(int cpu,
293 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
294 #endif
295
296 - /* Clear %fs. */
297 - asm volatile ("mov %0, %%fs" : : "r" (0));
298 + /* Clear %gs. */
299 + asm volatile ("mov %0, %%gs" : : "r" (0));
300
301 /* Clear all 6 debug registers: */
302 set_debugreg(0, 0);
303 --- a/arch/x86/kernel/e820_32-xen.c
304 +++ b/arch/x86/kernel/e820_32-xen.c
305 @@ -14,6 +14,7 @@
306 #include <asm/pgtable.h>
307 #include <asm/page.h>
308 #include <asm/e820.h>
309 +#include <asm/setup.h>
310 #include <xen/interface/memory.h>
311
312 #ifdef CONFIG_EFI
313 @@ -157,21 +158,22 @@ static struct resource standard_io_resou
314 .flags = IORESOURCE_BUSY | IORESOURCE_IO
315 } };
316
317 -static int romsignature(const unsigned char *x)
318 +#define ROMSIGNATURE 0xaa55
319 +
320 +static int __init romsignature(const unsigned char *rom)
321 {
322 unsigned short sig;
323 - int ret = 0;
324 - if (probe_kernel_address((const unsigned short *)x, sig) == 0)
325 - ret = (sig == 0xaa55);
326 - return ret;
327 +
328 + return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
329 + sig == ROMSIGNATURE;
330 }
331
332 static int __init romchecksum(unsigned char *rom, unsigned long length)
333 {
334 - unsigned char *p, sum = 0;
335 + unsigned char sum;
336
337 - for (p = rom; p < rom + length; p++)
338 - sum += *p;
339 + for (sum = 0; length; length--)
340 + sum += *rom++;
341 return sum == 0;
342 }
343
344 --- a/arch/x86/kernel/e820_64-xen.c
345 +++ b/arch/x86/kernel/e820_64-xen.c
346 @@ -88,6 +88,13 @@ static inline int bad_addr(unsigned long
347 return 1;
348 }
349
350 +#ifdef CONFIG_NUMA
351 + /* NUMA memory to node map */
352 + if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
353 + *addrp = nodemap_addr + nodemap_size;
354 + return 1;
355 + }
356 +#endif
357 /* XXX ramdisk image here? */
358 #else
359 if (last < (table_end<<PAGE_SHIFT)) {
360 @@ -213,6 +220,37 @@ unsigned long __init e820_end_of_ram(voi
361 }
362
363 /*
364 + * Find the hole size in the range.
365 + */
366 +unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
367 +{
368 + unsigned long ram = 0;
369 + int i;
370 +
371 + for (i = 0; i < e820.nr_map; i++) {
372 + struct e820entry *ei = &e820.map[i];
373 + unsigned long last, addr;
374 +
375 + if (ei->type != E820_RAM ||
376 + ei->addr+ei->size <= start ||
377 + ei->addr >= end)
378 + continue;
379 +
380 + addr = round_up(ei->addr, PAGE_SIZE);
381 + if (addr < start)
382 + addr = start;
383 +
384 + last = round_down(ei->addr + ei->size, PAGE_SIZE);
385 + if (last >= end)
386 + last = end;
387 +
388 + if (last > addr)
389 + ram += last - addr;
390 + }
391 + return ((end - start) - ram);
392 +}
393 +
394 +/*
395 * Mark e820 reserved areas as busy for the resource manager.
396 */
397 void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
398 @@ -738,7 +776,7 @@ static int __init parse_memmap_opt(char
399 }
400 early_param("memmap", parse_memmap_opt);
401
402 -void finish_e820_parsing(void)
403 +void __init finish_e820_parsing(void)
404 {
405 if (userdef) {
406 printk(KERN_INFO "user-defined physical RAM map:\n");
407 --- a/arch/x86/kernel/entry_32-xen.S
408 +++ b/arch/x86/kernel/entry_32-xen.S
409 @@ -30,7 +30,7 @@
410 * 18(%esp) - %eax
411 * 1C(%esp) - %ds
412 * 20(%esp) - %es
413 - * 24(%esp) - %gs
414 + * 24(%esp) - %fs
415 * 28(%esp) - orig_eax
416 * 2C(%esp) - %eip
417 * 30(%esp) - %cs
418 @@ -102,9 +102,9 @@ NMI_MASK = 0x80000000
419
420 #define SAVE_ALL \
421 cld; \
422 - pushl %gs; \
423 + pushl %fs; \
424 CFI_ADJUST_CFA_OFFSET 4;\
425 - /*CFI_REL_OFFSET gs, 0;*/\
426 + /*CFI_REL_OFFSET fs, 0;*/\
427 pushl %es; \
428 CFI_ADJUST_CFA_OFFSET 4;\
429 /*CFI_REL_OFFSET es, 0;*/\
430 @@ -136,7 +136,7 @@ NMI_MASK = 0x80000000
431 movl %edx, %ds; \
432 movl %edx, %es; \
433 movl $(__KERNEL_PDA), %edx; \
434 - movl %edx, %gs
435 + movl %edx, %fs
436
437 #define RESTORE_INT_REGS \
438 popl %ebx; \
439 @@ -169,9 +169,9 @@ NMI_MASK = 0x80000000
440 2: popl %es; \
441 CFI_ADJUST_CFA_OFFSET -4;\
442 /*CFI_RESTORE es;*/\
443 -3: popl %gs; \
444 +3: popl %fs; \
445 CFI_ADJUST_CFA_OFFSET -4;\
446 - /*CFI_RESTORE gs;*/\
447 + /*CFI_RESTORE fs;*/\
448 .pushsection .fixup,"ax"; \
449 4: movl $0,(%esp); \
450 jmp 1b; \
451 @@ -230,6 +230,7 @@ ENTRY(ret_from_fork)
452 CFI_ADJUST_CFA_OFFSET -4
453 jmp syscall_exit
454 CFI_ENDPROC
455 +END(ret_from_fork)
456
457 /*
458 * Return to user mode is not as complex as all this looks,
459 @@ -261,6 +262,7 @@ ENTRY(resume_userspace)
460 # int/exception return?
461 jne work_pending
462 jmp restore_all
463 +END(ret_from_exception)
464
465 #ifdef CONFIG_PREEMPT
466 ENTRY(resume_kernel)
467 @@ -275,6 +277,7 @@ need_resched:
468 jz restore_all
469 call preempt_schedule_irq
470 jmp need_resched
471 +END(resume_kernel)
472 #endif
473 CFI_ENDPROC
474
475 @@ -352,16 +355,17 @@ sysenter_past_esp:
476 movl PT_OLDESP(%esp), %ecx
477 xorl %ebp,%ebp
478 TRACE_IRQS_ON
479 -1: mov PT_GS(%esp), %gs
480 +1: mov PT_FS(%esp), %fs
481 ENABLE_INTERRUPTS_SYSEXIT
482 CFI_ENDPROC
483 .pushsection .fixup,"ax"
484 -2: movl $0,PT_GS(%esp)
485 +2: movl $0,PT_FS(%esp)
486 jmp 1b
487 .section __ex_table,"a"
488 .align 4
489 .long 1b,2b
490 .popsection
491 +ENDPROC(sysenter_entry)
492
493 # pv sysenter call handler stub
494 ENTRY(sysenter_entry_pv)
495 @@ -533,6 +537,7 @@ hypervisor_iret:
496 jmp hypercall_page + (__HYPERVISOR_iret * 32)
497 #endif
498 CFI_ENDPROC
499 +ENDPROC(system_call)
500
501 # perform work that needs to be done immediately before resumption
502 ALIGN
503 @@ -578,6 +583,7 @@ work_notifysig_v86:
504 xorl %edx, %edx
505 call do_notify_resume
506 jmp resume_userspace_sig
507 +END(work_pending)
508
509 # perform syscall exit tracing
510 ALIGN
511 @@ -593,6 +599,7 @@ syscall_trace_entry:
512 cmpl $(nr_syscalls), %eax
513 jnae syscall_call
514 jmp syscall_exit
515 +END(syscall_trace_entry)
516
517 # perform syscall exit tracing
518 ALIGN
519 @@ -606,6 +613,7 @@ syscall_exit_work:
520 movl $1, %edx
521 call do_syscall_trace
522 jmp resume_userspace
523 +END(syscall_exit_work)
524 CFI_ENDPROC
525
526 RING0_INT_FRAME # can't unwind into user space anyway
527 @@ -616,16 +624,18 @@ syscall_fault:
528 GET_THREAD_INFO(%ebp)
529 movl $-EFAULT,PT_EAX(%esp)
530 jmp resume_userspace
531 +END(syscall_fault)
532
533 syscall_badsys:
534 movl $-ENOSYS,PT_EAX(%esp)
535 jmp resume_userspace
536 +END(syscall_badsys)
537 CFI_ENDPROC
538
539 #ifndef CONFIG_XEN
540 #define FIXUP_ESPFIX_STACK \
541 /* since we are on a wrong stack, we cant make it a C code :( */ \
542 - movl %gs:PDA_cpu, %ebx; \
543 + movl %fs:PDA_cpu, %ebx; \
544 PER_CPU(cpu_gdt_descr, %ebx); \
545 movl GDS_address(%ebx), %ebx; \
546 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
547 @@ -656,9 +666,9 @@ syscall_badsys:
548 ENTRY(interrupt)
549 .text
550
551 -vector=0
552 ENTRY(irq_entries_start)
553 RING0_INT_FRAME
554 +vector=0
555 .rept NR_IRQS
556 ALIGN
557 .if vector
558 @@ -667,11 +677,16 @@ ENTRY(irq_entries_start)
559 1: pushl $~(vector)
560 CFI_ADJUST_CFA_OFFSET 4
561 jmp common_interrupt
562 -.data
563 + .previous
564 .long 1b
565 -.text
566 + .text
567 vector=vector+1
568 .endr
569 +END(irq_entries_start)
570 +
571 +.previous
572 +END(interrupt)
573 +.previous
574
575 /*
576 * the CPU automatically disables interrupts when executing an IRQ vector,
577 @@ -684,6 +699,7 @@ common_interrupt:
578 movl %esp,%eax
579 call do_IRQ
580 jmp ret_from_intr
581 +ENDPROC(common_interrupt)
582 CFI_ENDPROC
583
584 #define BUILD_INTERRUPT(name, nr) \
585 @@ -696,10 +712,16 @@ ENTRY(name) \
586 movl %esp,%eax; \
587 call smp_/**/name; \
588 jmp ret_from_intr; \
589 - CFI_ENDPROC
590 + CFI_ENDPROC; \
591 +ENDPROC(name)
592
593 /* The include is where all of the SMP etc. interrupts come from */
594 #include "entry_arch.h"
595 +
596 +/* This alternate entry is needed because we hijack the apic LVTT */
597 +#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
598 +BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
599 +#endif
600 #else
601 #define UNWIND_ESPFIX_STACK
602 #endif
603 @@ -710,7 +732,7 @@ KPROBE_ENTRY(page_fault)
604 CFI_ADJUST_CFA_OFFSET 4
605 ALIGN
606 error_code:
607 - /* the function address is in %gs's slot on the stack */
608 + /* the function address is in %fs's slot on the stack */
609 pushl %es
610 CFI_ADJUST_CFA_OFFSET 4
611 /*CFI_REL_OFFSET es, 0*/
612 @@ -739,20 +761,20 @@ error_code:
613 CFI_ADJUST_CFA_OFFSET 4
614 CFI_REL_OFFSET ebx, 0
615 cld
616 - pushl %gs
617 + pushl %fs
618 CFI_ADJUST_CFA_OFFSET 4
619 - /*CFI_REL_OFFSET gs, 0*/
620 + /*CFI_REL_OFFSET fs, 0*/
621 movl $(__KERNEL_PDA), %ecx
622 - movl %ecx, %gs
623 + movl %ecx, %fs
624 UNWIND_ESPFIX_STACK
625 popl %ecx
626 CFI_ADJUST_CFA_OFFSET -4
627 /*CFI_REGISTER es, ecx*/
628 - movl PT_GS(%esp), %edi # get the function address
629 + movl PT_FS(%esp), %edi # get the function address
630 movl PT_ORIG_EAX(%esp), %edx # get the error code
631 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
632 - mov %ecx, PT_GS(%esp)
633 - /*CFI_REL_OFFSET gs, ES*/
634 + mov %ecx, PT_FS(%esp)
635 + /*CFI_REL_OFFSET fs, ES*/
636 movl $(__USER_DS), %ecx
637 movl %ecx, %ds
638 movl %ecx, %es
639 @@ -839,7 +861,7 @@ critical_fixup_table:
640 .byte 0x18 # pop %eax
641 .byte 0x1c # pop %ds
642 .byte 0x20 # pop %es
643 - .byte 0x24,0x24 # pop %gs
644 + .byte 0x24,0x24 # pop %fs
645 .byte 0x28,0x28,0x28 # add $4,%esp
646 .byte 0x2c # iret
647 .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
648 @@ -905,6 +927,7 @@ ENTRY(coprocessor_error)
649 CFI_ADJUST_CFA_OFFSET 4
650 jmp error_code
651 CFI_ENDPROC
652 +END(coprocessor_error)
653
654 ENTRY(simd_coprocessor_error)
655 RING0_INT_FRAME
656 @@ -914,6 +937,7 @@ ENTRY(simd_coprocessor_error)
657 CFI_ADJUST_CFA_OFFSET 4
658 jmp error_code
659 CFI_ENDPROC
660 +END(simd_coprocessor_error)
661
662 ENTRY(device_not_available)
663 RING0_INT_FRAME
664 @@ -936,6 +960,7 @@ device_available_emulate:
665 call math_state_restore
666 jmp ret_from_exception
667 CFI_ENDPROC
668 +END(device_not_available)
669
670 #ifndef CONFIG_XEN
671 /*
672 @@ -1097,10 +1122,12 @@ ENTRY(native_iret)
673 .align 4
674 .long 1b,iret_exc
675 .previous
676 +END(native_iret)
677
678 ENTRY(native_irq_enable_sysexit)
679 sti
680 sysexit
681 +END(native_irq_enable_sysexit)
682 #endif
683
684 KPROBE_ENTRY(int3)
685 @@ -1123,6 +1150,7 @@ ENTRY(overflow)
686 CFI_ADJUST_CFA_OFFSET 4
687 jmp error_code
688 CFI_ENDPROC
689 +END(overflow)
690
691 ENTRY(bounds)
692 RING0_INT_FRAME
693 @@ -1132,6 +1160,7 @@ ENTRY(bounds)
694 CFI_ADJUST_CFA_OFFSET 4
695 jmp error_code
696 CFI_ENDPROC
697 +END(bounds)
698
699 ENTRY(invalid_op)
700 RING0_INT_FRAME
701 @@ -1141,6 +1170,7 @@ ENTRY(invalid_op)
702 CFI_ADJUST_CFA_OFFSET 4
703 jmp error_code
704 CFI_ENDPROC
705 +END(invalid_op)
706
707 ENTRY(coprocessor_segment_overrun)
708 RING0_INT_FRAME
709 @@ -1150,6 +1180,7 @@ ENTRY(coprocessor_segment_overrun)
710 CFI_ADJUST_CFA_OFFSET 4
711 jmp error_code
712 CFI_ENDPROC
713 +END(coprocessor_segment_overrun)
714
715 ENTRY(invalid_TSS)
716 RING0_EC_FRAME
717 @@ -1157,6 +1188,7 @@ ENTRY(invalid_TSS)
718 CFI_ADJUST_CFA_OFFSET 4
719 jmp error_code
720 CFI_ENDPROC
721 +END(invalid_TSS)
722
723 ENTRY(segment_not_present)
724 RING0_EC_FRAME
725 @@ -1164,6 +1196,7 @@ ENTRY(segment_not_present)
726 CFI_ADJUST_CFA_OFFSET 4
727 jmp error_code
728 CFI_ENDPROC
729 +END(segment_not_present)
730
731 ENTRY(stack_segment)
732 RING0_EC_FRAME
733 @@ -1171,6 +1204,7 @@ ENTRY(stack_segment)
734 CFI_ADJUST_CFA_OFFSET 4
735 jmp error_code
736 CFI_ENDPROC
737 +END(stack_segment)
738
739 KPROBE_ENTRY(general_protection)
740 RING0_EC_FRAME
741 @@ -1186,6 +1220,7 @@ ENTRY(alignment_check)
742 CFI_ADJUST_CFA_OFFSET 4
743 jmp error_code
744 CFI_ENDPROC
745 +END(alignment_check)
746
747 ENTRY(divide_error)
748 RING0_INT_FRAME
749 @@ -1195,6 +1230,7 @@ ENTRY(divide_error)
750 CFI_ADJUST_CFA_OFFSET 4
751 jmp error_code
752 CFI_ENDPROC
753 +END(divide_error)
754
755 #ifdef CONFIG_X86_MCE
756 ENTRY(machine_check)
757 @@ -1205,6 +1241,7 @@ ENTRY(machine_check)
758 CFI_ADJUST_CFA_OFFSET 4
759 jmp error_code
760 CFI_ENDPROC
761 +END(machine_check)
762 #endif
763
764 #ifndef CONFIG_XEN
765 @@ -1224,6 +1261,7 @@ ENTRY(fixup_4gb_segment)
766 CFI_ADJUST_CFA_OFFSET 4
767 jmp error_code
768 CFI_ENDPROC
769 +END(spurious_interrupt_bug)
770
771 ENTRY(kernel_thread_helper)
772 pushl $0 # fake return address for unwinder
773 --- a/arch/x86/kernel/entry_64-xen.S
774 +++ b/arch/x86/kernel/entry_64-xen.S
775 @@ -629,6 +629,9 @@ END(invalidate_interrupt\num)
776 ENTRY(call_function_interrupt)
777 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
778 END(call_function_interrupt)
779 +ENTRY(irq_move_cleanup_interrupt)
780 + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
781 +END(irq_move_cleanup_interrupt)
782 #endif
783
784 ENTRY(apic_timer_interrupt)
785 --- a/arch/x86/kernel/genapic_64-xen.c
786 +++ b/arch/x86/kernel/genapic_64-xen.c
787 @@ -65,8 +65,8 @@ void __init clustered_apic_check(void)
788 * Some x86_64 machines use physical APIC mode regardless of how many
789 * procs/clusters are present (x86_64 ES7000 is an example).
790 */
791 - if (acpi_fadt.revision > FADT2_REVISION_ID)
792 - if (acpi_fadt.force_apic_physical_destination_mode) {
793 + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
794 + if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
795 genapic = &apic_cluster;
796 goto print;
797 }
798 --- a/arch/x86/kernel/head_32-xen.S
799 +++ b/arch/x86/kernel/head_32-xen.S
800 @@ -27,6 +27,7 @@
801 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
802 #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
803
804 +.section .text.head,"ax",@progbits
805 #define VIRT_ENTRY_OFFSET 0x0
806 .org VIRT_ENTRY_OFFSET
807 ENTRY(startup_32)
808 @@ -60,11 +61,11 @@ ENTRY(startup_32)
809
810 movb $1,X86_HARD_MATH
811
812 - xorl %eax,%eax # Clear FS
813 - movl %eax,%fs
814 + xorl %eax,%eax # Clear GS
815 + movl %eax,%gs
816
817 movl $(__KERNEL_PDA),%eax
818 - mov %eax,%gs
819 + mov %eax,%fs
820
821 cld # gcc2 wants the direction flag cleared at all times
822
823 @@ -75,7 +76,7 @@ ENTRY(startup_32)
824 * Point the GDT at this CPU's PDA. This will be
825 * cpu_gdt_table and boot_pda.
826 */
827 -setup_pda:
828 +ENTRY(setup_pda)
829 /* get the PDA pointer */
830 movl $boot_pda, %eax
831
832 --- a/arch/x86/kernel/head64-xen.c
833 +++ b/arch/x86/kernel/head64-xen.c
834 @@ -45,8 +45,6 @@ static void __init clear_bss(void)
835 #define OLD_CL_BASE_ADDR 0x90000
836 #define OLD_CL_OFFSET 0x90022
837
838 -extern char saved_command_line[];
839 -
840 static void __init copy_bootdata(char *real_mode_data)
841 {
842 #ifndef CONFIG_XEN
843 @@ -62,14 +60,14 @@ static void __init copy_bootdata(char *r
844 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
845 }
846 command_line = (char *) ((u64)(new_data));
847 - memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
848 + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
849 #else
850 int max_cmdline;
851
852 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
853 max_cmdline = COMMAND_LINE_SIZE;
854 - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
855 - saved_command_line[max_cmdline-1] = '\0';
856 + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
857 + boot_command_line[max_cmdline-1] = '\0';
858 #endif
859 }
860
861 --- a/arch/x86/kernel/io_apic_32-xen.c
862 +++ b/arch/x86/kernel/io_apic_32-xen.c
863 @@ -167,7 +167,7 @@ static inline void io_apic_write(unsigne
864 */
865 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
866 {
867 - volatile struct io_apic *io_apic = io_apic_base(apic);
868 + volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
869 if (sis_apic_bug)
870 writel(reg, &io_apic->index);
871 writel(value, &io_apic->data);
872 @@ -392,7 +392,7 @@ static void set_ioapic_affinity_irq(unsi
873 break;
874 entry = irq_2_pin + entry->next;
875 }
876 - set_native_irq_info(irq, cpumask);
877 + irq_desc[irq].affinity = cpumask;
878 spin_unlock_irqrestore(&ioapic_lock, flags);
879 }
880
881 @@ -531,8 +531,8 @@ static void do_irq_balance(void)
882 package_index = CPU_TO_PACKAGEINDEX(i);
883 for (j = 0; j < NR_IRQS; j++) {
884 unsigned long value_now, delta;
885 - /* Is this an active IRQ? */
886 - if (!irq_desc[j].action)
887 + /* Is this an active IRQ or balancing disabled ? */
888 + if (!irq_desc[j].action || irq_balancing_disabled(j))
889 continue;
890 if ( package_index == i )
891 IRQ_DELTA(package_index,j) = 0;
892 @@ -785,7 +785,7 @@ failed:
893 return 0;
894 }
895
896 -int __init irqbalance_disable(char *str)
897 +int __devinit irqbalance_disable(char *str)
898 {
899 irqbalance_disabled = 1;
900 return 1;
901 @@ -1329,11 +1329,9 @@ static void ioapic_register_intr(int irq
902 trigger == IOAPIC_LEVEL)
903 set_irq_chip_and_handler_name(irq, &ioapic_chip,
904 handle_fasteoi_irq, "fasteoi");
905 - else {
906 - irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
907 + else
908 set_irq_chip_and_handler_name(irq, &ioapic_chip,
909 handle_edge_irq, "edge");
910 - }
911 set_intr_gate(vector, interrupt[irq]);
912 }
913 #else
914 @@ -1407,7 +1405,6 @@ static void __init setup_IO_APIC_irqs(vo
915 }
916 spin_lock_irqsave(&ioapic_lock, flags);
917 __ioapic_write_entry(apic, pin, entry);
918 - set_native_irq_info(irq, TARGET_CPUS);
919 spin_unlock_irqrestore(&ioapic_lock, flags);
920 }
921 }
922 @@ -1638,7 +1635,7 @@ void /*__init*/ print_local_APIC(void *
923 v = apic_read(APIC_LVR);
924 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
925 ver = GET_APIC_VERSION(v);
926 - maxlvt = get_maxlvt();
927 + maxlvt = lapic_get_maxlvt();
928
929 v = apic_read(APIC_TASKPRI);
930 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
931 @@ -1976,7 +1973,7 @@ static void __init setup_ioapic_ids_from
932 #endif
933
934 #ifndef CONFIG_XEN
935 -static int no_timer_check __initdata;
936 +int no_timer_check __initdata;
937
938 static int __init notimercheck(char *s)
939 {
940 @@ -2369,7 +2366,7 @@ static inline void __init check_timer(vo
941
942 disable_8259A_irq(0);
943 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
944 - "fasteio");
945 + "fasteoi");
946 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
947 enable_8259A_irq(0);
948
949 @@ -2662,7 +2659,7 @@ static void set_msi_irq_affinity(unsigne
950 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
951
952 write_msi_msg(irq, &msg);
953 - set_native_irq_info(irq, mask);
954 + irq_desc[irq].affinity = mask;
955 }
956 #endif /* CONFIG_SMP */
957
958 @@ -2681,25 +2678,32 @@ static struct irq_chip msi_chip = {
959 .retrigger = ioapic_retrigger_irq,
960 };
961
962 -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
963 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
964 {
965 struct msi_msg msg;
966 - int ret;
967 + int irq, ret;
968 + irq = create_irq();
969 + if (irq < 0)
970 + return irq;
971 +
972 + set_irq_msi(irq, desc);
973 ret = msi_compose_msg(dev, irq, &msg);
974 - if (ret < 0)
975 + if (ret < 0) {
976 + destroy_irq(irq);
977 return ret;
978 + }
979
980 write_msi_msg(irq, &msg);
981
982 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
983 "edge");
984
985 - return 0;
986 + return irq;
987 }
988
989 void arch_teardown_msi_irq(unsigned int irq)
990 {
991 - return;
992 + destroy_irq(irq);
993 }
994
995 #endif /* CONFIG_PCI_MSI */
996 @@ -2739,7 +2743,7 @@ static void set_ht_irq_affinity(unsigned
997 dest = cpu_mask_to_apicid(mask);
998
999 target_ht_irq(irq, dest);
1000 - set_native_irq_info(irq, mask);
1001 + irq_desc[irq].affinity = mask;
1002 }
1003 #endif
1004
1005 @@ -2947,7 +2951,6 @@ int io_apic_set_pci_routing (int ioapic,
1006
1007 spin_lock_irqsave(&ioapic_lock, flags);
1008 __ioapic_write_entry(ioapic, pin, entry);
1009 - set_native_irq_info(irq, TARGET_CPUS);
1010 spin_unlock_irqrestore(&ioapic_lock, flags);
1011
1012 return 0;
1013 --- a/arch/x86/kernel/io_apic_64-xen.c
1014 +++ b/arch/x86/kernel/io_apic_64-xen.c
1015 @@ -36,6 +36,7 @@
1016 #include <acpi/acpi_bus.h>
1017 #endif
1018
1019 +#include <asm/idle.h>
1020 #include <asm/io.h>
1021 #include <asm/smp.h>
1022 #include <asm/desc.h>
1023 @@ -47,7 +48,20 @@
1024 #include <asm/msidef.h>
1025 #include <asm/hypertransport.h>
1026
1027 -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
1028 +struct irq_cfg {
1029 +#ifndef CONFIG_XEN
1030 + cpumask_t domain;
1031 + cpumask_t old_domain;
1032 +#endif
1033 + unsigned move_cleanup_count;
1034 + u8 vector;
1035 + u8 move_in_progress : 1;
1036 +};
1037 +
1038 +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
1039 +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
1040 +
1041 +static int assign_irq_vector(int irq, cpumask_t mask);
1042
1043 #define __apicdebuginit __init
1044
1045 @@ -89,7 +103,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
1046 * Rough estimation of how many shared IRQs there are, can
1047 * be changed anytime.
1048 */
1049 -#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
1050 +#define MAX_PLUS_SHARED_IRQS NR_IRQS
1051 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
1052
1053 /*
1054 @@ -262,21 +276,19 @@ static void __target_IO_APIC_irq(unsigne
1055
1056 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1057 {
1058 + struct irq_cfg *cfg = irq_cfg + irq;
1059 unsigned long flags;
1060 unsigned int dest;
1061 cpumask_t tmp;
1062 - int vector;
1063
1064 cpus_and(tmp, mask, cpu_online_map);
1065 if (cpus_empty(tmp))
1066 - tmp = TARGET_CPUS;
1067 -
1068 - cpus_and(mask, tmp, CPU_MASK_ALL);
1069 + return;
1070
1071 - vector = assign_irq_vector(irq, mask, &tmp);
1072 - if (vector < 0)
1073 + if (assign_irq_vector(irq, mask))
1074 return;
1075
1076 + cpus_and(tmp, cfg->domain, mask);
1077 dest = cpu_mask_to_apicid(tmp);
1078
1079 /*
1080 @@ -285,8 +297,8 @@ static void set_ioapic_affinity_irq(unsi
1081 dest = SET_APIC_LOGICAL_ID(dest);
1082
1083 spin_lock_irqsave(&ioapic_lock, flags);
1084 - __target_IO_APIC_irq(irq, dest, vector);
1085 - set_native_irq_info(irq, mask);
1086 + __target_IO_APIC_irq(irq, dest, cfg->vector);
1087 + irq_desc[irq].affinity = mask;
1088 spin_unlock_irqrestore(&ioapic_lock, flags);
1089 }
1090 #endif
1091 @@ -332,11 +344,11 @@ static void add_pin_to_irq(unsigned int
1092 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
1093 reg ACTION; \
1094 io_apic_modify(entry->apic, reg); \
1095 + FINAL; \
1096 if (!entry->next) \
1097 break; \
1098 entry = irq_2_pin + entry->next; \
1099 } \
1100 - FINAL; \
1101 }
1102
1103 #define DO_ACTION(name,R,ACTION, FINAL) \
1104 @@ -669,77 +681,62 @@ static int pin_2_irq(int idx, int apic,
1105 return irq;
1106 }
1107
1108 -static inline int IO_APIC_irq_trigger(int irq)
1109 -{
1110 - int apic, idx, pin;
1111 -
1112 - for (apic = 0; apic < nr_ioapics; apic++) {
1113 - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1114 - idx = find_irq_entry(apic,pin,mp_INT);
1115 - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
1116 - return irq_trigger(idx);
1117 - }
1118 - }
1119 - /*
1120 - * nonexistent IRQs are edge default
1121 - */
1122 - return 0;
1123 -}
1124 -
1125 -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1126 -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
1127 -
1128 -static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
1129 +static int __assign_irq_vector(int irq, cpumask_t mask)
1130 {
1131 - int vector;
1132 struct physdev_irq irq_op;
1133 + struct irq_cfg *cfg;
1134
1135 - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1136 + BUG_ON((unsigned)irq >= NR_IRQS);
1137
1138 if (irq < PIRQ_BASE || irq - PIRQ_BASE > NR_PIRQS)
1139 return -EINVAL;
1140
1141 - cpus_and(*result, mask, cpu_online_map);
1142 + cfg = &irq_cfg[irq];
1143 +
1144 + if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1145 + return -EBUSY;
1146
1147 - if (irq_vector[irq] > 0)
1148 - return irq_vector[irq];
1149 + if (cfg->vector)
1150 + return 0;
1151
1152 irq_op.irq = irq;
1153 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
1154 return -ENOSPC;
1155
1156 - vector = irq_op.vector;
1157 - irq_vector[irq] = vector;
1158 + cfg->vector = irq_op.vector;
1159
1160 - return vector;
1161 + return 0;
1162 }
1163
1164 -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
1165 +static int assign_irq_vector(int irq, cpumask_t mask)
1166 {
1167 - int vector;
1168 + int err;
1169 unsigned long flags;
1170
1171 spin_lock_irqsave(&vector_lock, flags);
1172 - vector = __assign_irq_vector(irq, mask, result);
1173 + err = __assign_irq_vector(irq, mask);
1174 spin_unlock_irqrestore(&vector_lock, flags);
1175 - return vector;
1176 + return err;
1177 }
1178
1179 #ifndef CONFIG_XEN
1180 static void __clear_irq_vector(int irq)
1181 {
1182 + struct irq_cfg *cfg;
1183 cpumask_t mask;
1184 int cpu, vector;
1185
1186 - BUG_ON(!irq_vector[irq]);
1187 + BUG_ON((unsigned)irq >= NR_IRQS);
1188 + cfg = &irq_cfg[irq];
1189 + BUG_ON(!cfg->vector);
1190
1191 - vector = irq_vector[irq];
1192 - cpus_and(mask, irq_domain[irq], cpu_online_map);
1193 + vector = cfg->vector;
1194 + cpus_and(mask, cfg->domain, cpu_online_map);
1195 for_each_cpu_mask(cpu, mask)
1196 per_cpu(vector_irq, cpu)[vector] = -1;
1197
1198 - irq_vector[irq] = 0;
1199 - irq_domain[irq] = CPU_MASK_NONE;
1200 + cfg->vector = 0;
1201 + cfg->domain = CPU_MASK_NONE;
1202 }
1203
1204 void __setup_vector_irq(int cpu)
1205 @@ -749,10 +746,10 @@ void __setup_vector_irq(int cpu)
1206 int irq, vector;
1207
1208 /* Mark the inuse vectors */
1209 - for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
1210 - if (!cpu_isset(cpu, irq_domain[irq]))
1211 + for (irq = 0; irq < NR_IRQS; ++irq) {
1212 + if (!cpu_isset(cpu, irq_cfg[irq].domain))
1213 continue;
1214 - vector = irq_vector[irq];
1215 + vector = irq_cfg[irq].vector;
1216 per_cpu(vector_irq, cpu)[vector] = irq;
1217 }
1218 /* Mark the free vectors */
1219 @@ -760,41 +757,49 @@ void __setup_vector_irq(int cpu)
1220 irq = per_cpu(vector_irq, cpu)[vector];
1221 if (irq < 0)
1222 continue;
1223 - if (!cpu_isset(cpu, irq_domain[irq]))
1224 + if (!cpu_isset(cpu, irq_cfg[irq].domain))
1225 per_cpu(vector_irq, cpu)[vector] = -1;
1226 }
1227 }
1228
1229 -extern void (*interrupt[NR_IRQS])(void);
1230 -
1231 static struct irq_chip ioapic_chip;
1232
1233 -#define IOAPIC_AUTO -1
1234 -#define IOAPIC_EDGE 0
1235 -#define IOAPIC_LEVEL 1
1236 -
1237 -static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1238 +static void ioapic_register_intr(int irq, unsigned long trigger)
1239 {
1240 - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1241 - trigger == IOAPIC_LEVEL)
1242 + if (trigger)
1243 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1244 handle_fasteoi_irq, "fasteoi");
1245 - else {
1246 - irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1247 + else
1248 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1249 handle_edge_irq, "edge");
1250 - }
1251 }
1252 #else
1253 -#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
1254 +#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
1255 #endif /* !CONFIG_XEN */
1256
1257 -static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
1258 +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1259 + int trigger, int polarity)
1260 {
1261 + struct irq_cfg *cfg = irq_cfg + irq;
1262 struct IO_APIC_route_entry entry;
1263 - int vector;
1264 - unsigned long flags;
1265 + cpumask_t mask;
1266 +
1267 + if (!IO_APIC_IRQ(irq))
1268 + return;
1269
1270 + mask = TARGET_CPUS;
1271 + if (assign_irq_vector(irq, mask))
1272 + return;
1273 +
1274 +#ifndef CONFIG_XEN
1275 + cpus_and(mask, cfg->domain, mask);
1276 +#endif
1277 +
1278 + apic_printk(APIC_VERBOSE,KERN_DEBUG
1279 + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1280 + "IRQ %d Mode:%i Active:%i)\n",
1281 + apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
1282 + irq, trigger, polarity);
1283
1284 /*
1285 * add it to the IO-APIC irq-routing table:
1286 @@ -803,41 +808,23 @@ static void __init setup_IO_APIC_irq(int
1287
1288 entry.delivery_mode = INT_DELIVERY_MODE;
1289 entry.dest_mode = INT_DEST_MODE;
1290 + entry.dest = cpu_mask_to_apicid(mask);
1291 entry.mask = 0; /* enable IRQ */
1292 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1293 -
1294 - entry.trigger = irq_trigger(idx);
1295 - entry.polarity = irq_polarity(idx);
1296 + entry.trigger = trigger;
1297 + entry.polarity = polarity;
1298 + entry.vector = cfg->vector;
1299
1300 - if (irq_trigger(idx)) {
1301 - entry.trigger = 1;
1302 + /* Mask level triggered irqs.
1303 + * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1304 + */
1305 + if (trigger)
1306 entry.mask = 1;
1307 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1308 - }
1309 -
1310 - if (/* !apic && */ !IO_APIC_IRQ(irq))
1311 - return;
1312
1313 - if (IO_APIC_IRQ(irq)) {
1314 - cpumask_t mask;
1315 - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1316 - if (vector < 0)
1317 - return;
1318 -
1319 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1320 - entry.vector = vector;
1321 -
1322 - ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1323 - if (!apic && (irq < 16))
1324 - disable_8259A_irq(irq);
1325 - }
1326 + ioapic_register_intr(irq, trigger);
1327 + if (irq < 16)
1328 + disable_8259A_irq(irq);
1329
1330 ioapic_write_entry(apic, pin, entry);
1331 -
1332 - spin_lock_irqsave(&ioapic_lock, flags);
1333 - set_native_irq_info(irq, TARGET_CPUS);
1334 - spin_unlock_irqrestore(&ioapic_lock, flags);
1335 -
1336 }
1337
1338 static void __init setup_IO_APIC_irqs(void)
1339 @@ -862,8 +849,8 @@ static void __init setup_IO_APIC_irqs(vo
1340 irq = pin_2_irq(idx, apic, pin);
1341 add_pin_to_irq(irq, apic, pin);
1342
1343 - setup_IO_APIC_irq(apic, pin, idx, irq);
1344 -
1345 + setup_IO_APIC_irq(apic, pin, irq,
1346 + irq_trigger(idx), irq_polarity(idx));
1347 }
1348 }
1349
1350 @@ -894,7 +881,7 @@ static void __init setup_ExtINT_IRQ0_pin
1351 */
1352 entry.dest_mode = INT_DEST_MODE;
1353 entry.mask = 0; /* unmask IRQ now */
1354 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1355 + entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
1356 entry.delivery_mode = INT_DELIVERY_MODE;
1357 entry.polarity = 0;
1358 entry.trigger = 0;
1359 @@ -994,18 +981,17 @@ void __apicdebuginit print_IO_APIC(void)
1360
1361 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1362
1363 - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1364 - " Stat Dest Deli Vect: \n");
1365 + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1366 + " Stat Dmod Deli Vect: \n");
1367
1368 for (i = 0; i <= reg_01.bits.entries; i++) {
1369 struct IO_APIC_route_entry entry;
1370
1371 entry = ioapic_read_entry(apic, i);
1372
1373 - printk(KERN_DEBUG " %02x %03X %02X ",
1374 + printk(KERN_DEBUG " %02x %03X ",
1375 i,
1376 - entry.dest.logical.logical_dest,
1377 - entry.dest.physical.physical_dest
1378 + entry.dest
1379 );
1380
1381 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1382 @@ -1269,8 +1255,7 @@ void disable_IO_APIC(void)
1383 entry.dest_mode = 0; /* Physical */
1384 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1385 entry.vector = 0;
1386 - entry.dest.physical.physical_dest =
1387 - GET_APIC_ID(apic_read(APIC_ID));
1388 + entry.dest = GET_APIC_ID(apic_read(APIC_ID));
1389
1390 /*
1391 * Add it to the IO-APIC irq-routing table:
1392 @@ -1355,16 +1340,15 @@ static unsigned int startup_ioapic_irq(u
1393
1394 static int ioapic_retrigger_irq(unsigned int irq)
1395 {
1396 + struct irq_cfg *cfg = &irq_cfg[irq];
1397 cpumask_t mask;
1398 - unsigned vector;
1399 unsigned long flags;
1400
1401 spin_lock_irqsave(&vector_lock, flags);
1402 - vector = irq_vector[irq];
1403 cpus_clear(mask);
1404 - cpu_set(first_cpu(irq_domain[irq]), mask);
1405 + cpu_set(first_cpu(cfg->domain), mask);
1406
1407 - send_IPI_mask(mask, vector);
1408 + send_IPI_mask(mask, cfg->vector);
1409 spin_unlock_irqrestore(&vector_lock, flags);
1410
1411 return 1;
1412 @@ -1379,8 +1363,68 @@ static int ioapic_retrigger_irq(unsigned
1413 * races.
1414 */
1415
1416 +#ifdef CONFIG_SMP
1417 +asmlinkage void smp_irq_move_cleanup_interrupt(void)
1418 +{
1419 + unsigned vector, me;
1420 + ack_APIC_irq();
1421 + exit_idle();
1422 + irq_enter();
1423 +
1424 + me = smp_processor_id();
1425 + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
1426 + unsigned int irq;
1427 + struct irq_desc *desc;
1428 + struct irq_cfg *cfg;
1429 + irq = __get_cpu_var(vector_irq)[vector];
1430 + if (irq >= NR_IRQS)
1431 + continue;
1432 +
1433 + desc = irq_desc + irq;
1434 + cfg = irq_cfg + irq;
1435 + spin_lock(&desc->lock);
1436 + if (!cfg->move_cleanup_count)
1437 + goto unlock;
1438 +
1439 + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
1440 + goto unlock;
1441 +
1442 + __get_cpu_var(vector_irq)[vector] = -1;
1443 + cfg->move_cleanup_count--;
1444 +unlock:
1445 + spin_unlock(&desc->lock);
1446 + }
1447 +
1448 + irq_exit();
1449 +}
1450 +
1451 +static void irq_complete_move(unsigned int irq)
1452 +{
1453 + struct irq_cfg *cfg = irq_cfg + irq;
1454 + unsigned vector, me;
1455 +
1456 + if (likely(!cfg->move_in_progress))
1457 + return;
1458 +
1459 + vector = ~get_irq_regs()->orig_rax;
1460 + me = smp_processor_id();
1461 + if ((vector == cfg->vector) &&
1462 + cpu_isset(smp_processor_id(), cfg->domain)) {
1463 + cpumask_t cleanup_mask;
1464 +
1465 + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1466 + cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1467 + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1468 + cfg->move_in_progress = 0;
1469 + }
1470 +}
1471 +#else
1472 +static inline void irq_complete_move(unsigned int irq) {}
1473 +#endif
1474 +
1475 static void ack_apic_edge(unsigned int irq)
1476 {
1477 + irq_complete_move(irq);
1478 move_native_irq(irq);
1479 ack_APIC_irq();
1480 }
1481 @@ -1389,6 +1433,7 @@ static void ack_apic_level(unsigned int
1482 {
1483 int do_unmask_irq = 0;
1484
1485 + irq_complete_move(irq);
1486 #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
1487 /* If we are moving the irq we need to mask it */
1488 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
1489 @@ -1440,7 +1485,7 @@ static inline void init_IO_APIC_traps(vo
1490 */
1491 for (irq = 0; irq < NR_IRQS ; irq++) {
1492 int tmp = irq;
1493 - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1494 + if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
1495 /*
1496 * Hmm.. We don't have an entry for this,
1497 * so default to an old-fashioned 8259
1498 @@ -1538,7 +1583,7 @@ static inline void unlock_ExtINT_logic(v
1499
1500 entry1.dest_mode = 0; /* physical delivery */
1501 entry1.mask = 0; /* unmask IRQ now */
1502 - entry1.dest.physical.physical_dest = hard_smp_processor_id();
1503 + entry1.dest = hard_smp_processor_id();
1504 entry1.delivery_mode = dest_ExtINT;
1505 entry1.polarity = entry0.polarity;
1506 entry1.trigger = 0;
1507 @@ -1582,15 +1627,14 @@ static inline void unlock_ExtINT_logic(v
1508 */
1509 static inline void check_timer(void)
1510 {
1511 + struct irq_cfg *cfg = irq_cfg + 0;
1512 int apic1, pin1, apic2, pin2;
1513 - int vector;
1514 - cpumask_t mask;
1515
1516 /*
1517 * get/set the timer IRQ vector:
1518 */
1519 disable_8259A_irq(0);
1520 - vector = assign_irq_vector(0, TARGET_CPUS, &mask);
1521 + assign_irq_vector(0, TARGET_CPUS);
1522
1523 /*
1524 * Subtle, code in do_timer_interrupt() expects an AEOI
1525 @@ -1610,7 +1654,7 @@ static inline void check_timer(void)
1526 apic2 = ioapic_i8259.apic;
1527
1528 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1529 - vector, apic1, pin1, apic2, pin2);
1530 + cfg->vector, apic1, pin1, apic2, pin2);
1531
1532 if (pin1 != -1) {
1533 /*
1534 @@ -1641,7 +1685,7 @@ static inline void check_timer(void)
1535 /*
1536 * legacy devices should be connected to IO APIC #0
1537 */
1538 - setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1539 + setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
1540 if (timer_irq_works()) {
1541 apic_printk(APIC_VERBOSE," works.\n");
1542 nmi_watchdog_default();
1543 @@ -1666,14 +1710,14 @@ static inline void check_timer(void)
1544
1545 disable_8259A_irq(0);
1546 irq_desc[0].chip = &lapic_irq_type;
1547 - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1548 + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1549 enable_8259A_irq(0);
1550
1551 if (timer_irq_works()) {
1552 apic_printk(APIC_VERBOSE," works.\n");
1553 return;
1554 }
1555 - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
1556 + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1557 apic_printk(APIC_VERBOSE," failed.\n");
1558
1559 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
1560 @@ -1828,19 +1872,16 @@ int create_irq(void)
1561 /* Allocate an unused irq */
1562 int irq;
1563 int new;
1564 - int vector = 0;
1565 unsigned long flags;
1566 - cpumask_t mask;
1567
1568 irq = -ENOSPC;
1569 spin_lock_irqsave(&vector_lock, flags);
1570 for (new = (NR_IRQS - 1); new >= 0; new--) {
1571 if (platform_legacy_irq(new))
1572 continue;
1573 - if (irq_vector[new] != 0)
1574 + if (irq_cfg[new].vector != 0)
1575 continue;
1576 - vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
1577 - if (likely(vector > 0))
1578 + if (__assign_irq_vector(new, TARGET_CPUS) == 0)
1579 irq = new;
1580 break;
1581 }
1582 @@ -1871,12 +1912,15 @@ void destroy_irq(unsigned int irq)
1583 #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
1584 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1585 {
1586 - int vector;
1587 + struct irq_cfg *cfg = irq_cfg + irq;
1588 + int err;
1589 unsigned dest;
1590 cpumask_t tmp;
1591
1592 - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1593 - if (vector >= 0) {
1594 + tmp = TARGET_CPUS;
1595 + err = assign_irq_vector(irq, tmp);
1596 + if (!err) {
1597 + cpus_and(tmp, cfg->domain, tmp);
1598 dest = cpu_mask_to_apicid(tmp);
1599
1600 msg->address_hi = MSI_ADDR_BASE_HI;
1601 @@ -1896,40 +1940,38 @@ static int msi_compose_msg(struct pci_de
1602 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1603 MSI_DATA_DELIVERY_FIXED:
1604 MSI_DATA_DELIVERY_LOWPRI) |
1605 - MSI_DATA_VECTOR(vector);
1606 + MSI_DATA_VECTOR(cfg->vector);
1607 }
1608 - return vector;
1609 + return err;
1610 }
1611
1612 #ifdef CONFIG_SMP
1613 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1614 {
1615 + struct irq_cfg *cfg = irq_cfg + irq;
1616 struct msi_msg msg;
1617 unsigned int dest;
1618 cpumask_t tmp;
1619 - int vector;
1620
1621 cpus_and(tmp, mask, cpu_online_map);
1622 if (cpus_empty(tmp))
1623 - tmp = TARGET_CPUS;
1624 -
1625 - cpus_and(mask, tmp, CPU_MASK_ALL);
1626 + return;
1627
1628 - vector = assign_irq_vector(irq, mask, &tmp);
1629 - if (vector < 0)
1630 + if (assign_irq_vector(irq, mask))
1631 return;
1632
1633 + cpus_and(tmp, cfg->domain, mask);
1634 dest = cpu_mask_to_apicid(tmp);
1635
1636 read_msi_msg(irq, &msg);
1637
1638 msg.data &= ~MSI_DATA_VECTOR_MASK;
1639 - msg.data |= MSI_DATA_VECTOR(vector);
1640 + msg.data |= MSI_DATA_VECTOR(cfg->vector);
1641 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1642 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1643
1644 write_msi_msg(irq, &msg);
1645 - set_native_irq_info(irq, mask);
1646 + irq_desc[irq].affinity = mask;
1647 }
1648 #endif /* CONFIG_SMP */
1649
1650 @@ -1948,24 +1990,31 @@ static struct irq_chip msi_chip = {
1651 .retrigger = ioapic_retrigger_irq,
1652 };
1653
1654 -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1655 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
1656 {
1657 struct msi_msg msg;
1658 - int ret;
1659 + int irq, ret;
1660 + irq = create_irq();
1661 + if (irq < 0)
1662 + return irq;
1663 +
1664 + set_irq_msi(irq, desc);
1665 ret = msi_compose_msg(dev, irq, &msg);
1666 - if (ret < 0)
1667 + if (ret < 0) {
1668 + destroy_irq(irq);
1669 return ret;
1670 + }
1671
1672 write_msi_msg(irq, &msg);
1673
1674 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
1675
1676 - return 0;
1677 + return irq;
1678 }
1679
1680 void arch_teardown_msi_irq(unsigned int irq)
1681 {
1682 - return;
1683 + destroy_irq(irq);
1684 }
1685
1686 #endif /* CONFIG_PCI_MSI */
1687 @@ -1993,24 +2042,22 @@ static void target_ht_irq(unsigned int i
1688
1689 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1690 {
1691 + struct irq_cfg *cfg = irq_cfg + irq;
1692 unsigned int dest;
1693 cpumask_t tmp;
1694 - int vector;
1695
1696 cpus_and(tmp, mask, cpu_online_map);
1697 if (cpus_empty(tmp))
1698 - tmp = TARGET_CPUS;
1699 -
1700 - cpus_and(mask, tmp, CPU_MASK_ALL);
1701 + return;
1702
1703 - vector = assign_irq_vector(irq, mask, &tmp);
1704 - if (vector < 0)
1705 + if (assign_irq_vector(irq, mask))
1706 return;
1707
1708 + cpus_and(tmp, cfg->domain, mask);
1709 dest = cpu_mask_to_apicid(tmp);
1710
1711 - target_ht_irq(irq, dest, vector);
1712 - set_native_irq_info(irq, mask);
1713 + target_ht_irq(irq, dest, cfg->vector);
1714 + irq_desc[irq].affinity = mask;
1715 }
1716 #endif
1717
1718 @@ -2027,14 +2074,17 @@ static struct irq_chip ht_irq_chip = {
1719
1720 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1721 {
1722 - int vector;
1723 + struct irq_cfg *cfg = irq_cfg + irq;
1724 + int err;
1725 cpumask_t tmp;
1726
1727 - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1728 - if (vector >= 0) {
1729 + tmp = TARGET_CPUS;
1730 + err = assign_irq_vector(irq, tmp);
1731 + if (!err) {
1732 struct ht_irq_msg msg;
1733 unsigned dest;
1734
1735 + cpus_and(tmp, cfg->domain, tmp);
1736 dest = cpu_mask_to_apicid(tmp);
1737
1738 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1739 @@ -2042,7 +2092,7 @@ int arch_setup_ht_irq(unsigned int irq,
1740 msg.address_lo =
1741 HT_IRQ_LOW_BASE |
1742 HT_IRQ_LOW_DEST_ID(dest) |
1743 - HT_IRQ_LOW_VECTOR(vector) |
1744 + HT_IRQ_LOW_VECTOR(cfg->vector) |
1745 ((INT_DEST_MODE == 0) ?
1746 HT_IRQ_LOW_DM_PHYSICAL :
1747 HT_IRQ_LOW_DM_LOGICAL) |
1748 @@ -2057,7 +2107,7 @@ int arch_setup_ht_irq(unsigned int irq,
1749 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1750 handle_edge_irq, "edge");
1751 }
1752 - return vector;
1753 + return err;
1754 }
1755 #endif /* CONFIG_HT_IRQ */
1756
1757 @@ -2082,13 +2132,8 @@ int __init io_apic_get_redir_entries (in
1758 }
1759
1760
1761 -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
1762 +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
1763 {
1764 - struct IO_APIC_route_entry entry;
1765 - unsigned long flags;
1766 - int vector;
1767 - cpumask_t mask;
1768 -
1769 if (!IO_APIC_IRQ(irq)) {
1770 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
1771 ioapic);
1772 @@ -2101,42 +2146,7 @@ int io_apic_set_pci_routing (int ioapic,
1773 if (irq >= 16)
1774 add_pin_to_irq(irq, ioapic, pin);
1775
1776 -
1777 - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1778 - if (vector < 0)
1779 - return vector;
1780 -
1781 - /*
1782 - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
1783 - * Note that we mask (disable) IRQs now -- these get enabled when the
1784 - * corresponding device driver registers for this IRQ.
1785 - */
1786 -
1787 - memset(&entry,0,sizeof(entry));
1788 -
1789 - entry.delivery_mode = INT_DELIVERY_MODE;
1790 - entry.dest_mode = INT_DEST_MODE;
1791 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1792 - entry.trigger = edge_level;
1793 - entry.polarity = active_high_low;
1794 - entry.mask = 1; /* Disabled (masked) */
1795 - entry.vector = vector & 0xff;
1796 -
1797 - apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
1798 - "IRQ %d Mode:%i Active:%i)\n", ioapic,
1799 - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
1800 - edge_level, active_high_low);
1801 -
1802 - ioapic_register_intr(irq, entry.vector, edge_level);
1803 -
1804 - if (!ioapic && (irq < 16))
1805 - disable_8259A_irq(irq);
1806 -
1807 - ioapic_write_entry(ioapic, pin, entry);
1808 -
1809 - spin_lock_irqsave(&ioapic_lock, flags);
1810 - set_native_irq_info(irq, TARGET_CPUS);
1811 - spin_unlock_irqrestore(&ioapic_lock, flags);
1812 + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
1813
1814 return 0;
1815 }
1816 @@ -2169,8 +2179,10 @@ void __init setup_ioapic_dest(void)
1817 * when you have too many devices, because at that time only boot
1818 * cpu is online.
1819 */
1820 - if(!irq_vector[irq])
1821 - setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
1822 + if (!irq_cfg[irq].vector)
1823 + setup_IO_APIC_irq(ioapic, pin, irq,
1824 + irq_trigger(irq_entry),
1825 + irq_polarity(irq_entry));
1826 else
1827 set_ioapic_affinity_irq(irq, TARGET_CPUS);
1828 }
1829 --- a/arch/x86/kernel/irq_32-xen.c
1830 +++ b/arch/x86/kernel/irq_32-xen.c
1831 @@ -10,7 +10,6 @@
1832 * io_apic.c.)
1833 */
1834
1835 -#include <asm/uaccess.h>
1836 #include <linux/module.h>
1837 #include <linux/seq_file.h>
1838 #include <linux/interrupt.h>
1839 @@ -19,19 +18,34 @@
1840 #include <linux/cpu.h>
1841 #include <linux/delay.h>
1842
1843 +#include <asm/apic.h>
1844 +#include <asm/uaccess.h>
1845 +
1846 DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
1847 EXPORT_PER_CPU_SYMBOL(irq_stat);
1848
1849 -#ifndef CONFIG_X86_LOCAL_APIC
1850 /*
1851 * 'what should we do if we get a hw irq event on an illegal vector'.
1852 * each architecture has to answer this themselves.
1853 */
1854 void ack_bad_irq(unsigned int irq)
1855 {
1856 - printk("unexpected IRQ trap at vector %02x\n", irq);
1857 -}
1858 + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
1859 +
1860 +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
1861 + /*
1862 + * Currently unexpected vectors happen only on SMP and APIC.
1863 + * We _must_ ack these because every local APIC has only N
1864 + * irq slots per priority level, and a 'hanging, unacked' IRQ
1865 + * holds up an irq slot - in excessive cases (when multiple
1866 + * unexpected vectors occur) that might lock up the APIC
1867 + * completely.
1868 + * But only ack when the APIC is enabled -AK
1869 + */
1870 + if (cpu_has_apic)
1871 + ack_APIC_irq();
1872 #endif
1873 +}
1874
1875 #ifdef CONFIG_4KSTACKS
1876 /*
1877 --- a/arch/x86/kernel/irq_64-xen.c
1878 +++ b/arch/x86/kernel/irq_64-xen.c
1879 @@ -18,6 +18,7 @@
1880 #include <asm/uaccess.h>
1881 #include <asm/io_apic.h>
1882 #include <asm/idle.h>
1883 +#include <asm/smp.h>
1884
1885 atomic_t irq_err_count;
1886
1887 @@ -120,9 +121,15 @@ asmlinkage unsigned int do_IRQ(struct pt
1888
1889 if (likely(irq < NR_IRQS))
1890 generic_handle_irq(irq);
1891 - else if (printk_ratelimit())
1892 - printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
1893 - __func__, smp_processor_id(), irq);
1894 + else {
1895 +#ifndef CONFIG_XEN
1896 + if (!disable_apic)
1897 + ack_APIC_irq();
1898 +#endif
1899 + if (printk_ratelimit())
1900 + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
1901 + __func__, smp_processor_id(), irq);
1902 + }
1903
1904 /*irq_exit();*/
1905
1906 --- a/arch/x86/kernel/Makefile
1907 +++ b/arch/x86/kernel/Makefile
1908 @@ -124,7 +124,7 @@ ifeq ($(CONFIG_X86_64),y)
1909 pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
1910 endif
1911
1912 -disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
1913 - smpboot_$(BITS).o tsc_$(BITS).o
1914 +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
1915 + smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
1916 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
1917 %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
1918 --- a/arch/x86/kernel/microcode-xen.c
1919 +++ b/arch/x86/kernel/microcode-xen.c
1920 @@ -108,7 +108,7 @@ static ssize_t microcode_write (struct f
1921 return ret;
1922 }
1923
1924 -static struct file_operations microcode_fops = {
1925 +static const struct file_operations microcode_fops = {
1926 .owner = THIS_MODULE,
1927 .write = microcode_write,
1928 .open = microcode_open,
1929 --- a/arch/x86/kernel/mpparse_32-xen.c
1930 +++ b/arch/x86/kernel/mpparse_32-xen.c
1931 @@ -1079,7 +1079,7 @@ int mp_register_gsi(u32 gsi, int trigger
1932 static int gsi_to_irq[MAX_GSI_NUM];
1933
1934 /* Don't set up the ACPI SCI because it's already set up */
1935 - if (acpi_fadt.sci_int == gsi)
1936 + if (acpi_gbl_FADT.sci_interrupt == gsi)
1937 return gsi;
1938
1939 ioapic = mp_find_ioapic(gsi);
1940 @@ -1136,7 +1136,7 @@ int mp_register_gsi(u32 gsi, int trigger
1941 /*
1942 * Don't assign IRQ used by ACPI SCI
1943 */
1944 - if (gsi == acpi_fadt.sci_int)
1945 + if (gsi == acpi_gbl_FADT.sci_interrupt)
1946 gsi = pci_irq++;
1947 gsi_to_irq[irq] = gsi;
1948 } else {
1949 --- a/arch/x86/kernel/mpparse_64-xen.c
1950 +++ b/arch/x86/kernel/mpparse_64-xen.c
1951 @@ -60,9 +60,9 @@ unsigned long mp_lapic_addr = 0;
1952 /* Processor that is doing the boot up */
1953 unsigned int boot_cpu_id = -1U;
1954 /* Internal processor count */
1955 -unsigned int num_processors __initdata = 0;
1956 +unsigned int num_processors __cpuinitdata = 0;
1957
1958 -unsigned disabled_cpus __initdata;
1959 +unsigned disabled_cpus __cpuinitdata;
1960
1961 /* Bitmask of physically existing CPUs */
1962 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
1963 @@ -808,7 +808,7 @@ int mp_register_gsi(u32 gsi, int trigger
1964 return gsi;
1965
1966 /* Don't set up the ACPI SCI because it's already set up */
1967 - if (acpi_fadt.sci_int == gsi)
1968 + if (acpi_gbl_FADT.sci_interrupt == gsi)
1969 return gsi;
1970
1971 ioapic = mp_find_ioapic(gsi);
1972 --- a/arch/x86/kernel/pci-dma-xen.c
1973 +++ b/arch/x86/kernel/pci-dma-xen.c
1974 @@ -311,7 +311,7 @@ int dma_declare_coherent_memory(struct d
1975 return DMA_MEMORY_IO;
1976
1977 free1_out:
1978 - kfree(dev->dma_mem->bitmap);
1979 + kfree(dev->dma_mem);
1980 out:
1981 if (mem_base)
1982 iounmap(mem_base);
1983 --- a/arch/x86/kernel/pcspeaker.c
1984 +++ b/arch/x86/kernel/pcspeaker.c
1985 @@ -7,6 +7,11 @@ static __init int add_pcspkr(void)
1986 struct platform_device *pd;
1987 int ret;
1988
1989 +#ifdef CONFIG_XEN
1990 + if (!is_initial_xendomain())
1991 + return 0;
1992 +#endif
1993 +
1994 pd = platform_device_alloc("pcspkr", -1);
1995 if (!pd)
1996 return -ENOMEM;
1997 --- a/arch/x86/kernel/process_32-xen.c
1998 +++ b/arch/x86/kernel/process_32-xen.c
1999 @@ -38,6 +38,7 @@
2000 #include <linux/ptrace.h>
2001 #include <linux/random.h>
2002 #include <linux/personality.h>
2003 +#include <linux/tick.h>
2004
2005 #include <asm/uaccess.h>
2006 #include <asm/pgtable.h>
2007 @@ -160,6 +161,7 @@ void cpu_idle(void)
2008
2009 /* endless idle loop with no priority at all */
2010 while (1) {
2011 + tick_nohz_stop_sched_tick();
2012 while (!need_resched()) {
2013 void (*idle)(void);
2014
2015 @@ -175,6 +177,7 @@ void cpu_idle(void)
2016 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
2017 idle();
2018 }
2019 + tick_nohz_restart_sched_tick();
2020 preempt_enable_no_resched();
2021 schedule();
2022 preempt_disable();
2023 @@ -247,8 +250,8 @@ void show_regs(struct pt_regs * regs)
2024 regs->eax,regs->ebx,regs->ecx,regs->edx);
2025 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2026 regs->esi, regs->edi, regs->ebp);
2027 - printk(" DS: %04x ES: %04x GS: %04x\n",
2028 - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
2029 + printk(" DS: %04x ES: %04x FS: %04x\n",
2030 + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
2031
2032 cr0 = read_cr0();
2033 cr2 = read_cr2();
2034 @@ -279,7 +282,7 @@ int kernel_thread(int (*fn)(void *), voi
2035
2036 regs.xds = __USER_DS;
2037 regs.xes = __USER_DS;
2038 - regs.xgs = __KERNEL_PDA;
2039 + regs.xfs = __KERNEL_PDA;
2040 regs.orig_eax = -1;
2041 regs.eip = (unsigned long) kernel_thread_helper;
2042 regs.xcs = __KERNEL_CS | get_kernel_rpl();
2043 @@ -356,7 +359,7 @@ int copy_thread(int nr, unsigned long cl
2044
2045 p->thread.eip = (unsigned long) ret_from_fork;
2046
2047 - savesegment(fs,p->thread.fs);
2048 + savesegment(gs,p->thread.gs);
2049
2050 tsk = current;
2051 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2052 @@ -434,8 +437,8 @@ void dump_thread(struct pt_regs * regs,
2053 dump->regs.eax = regs->eax;
2054 dump->regs.ds = regs->xds;
2055 dump->regs.es = regs->xes;
2056 - savesegment(fs,dump->regs.fs);
2057 - dump->regs.gs = regs->xgs;
2058 + dump->regs.fs = regs->xfs;
2059 + savesegment(gs,dump->regs.gs);
2060 dump->regs.orig_eax = regs->orig_eax;
2061 dump->regs.eip = regs->eip;
2062 dump->regs.cs = regs->xcs;
2063 @@ -637,16 +640,6 @@ struct task_struct fastcall * __switch_t
2064 prefetch(&next->i387.fxsave);
2065
2066 /*
2067 - * Restore %fs if needed.
2068 - *
2069 - * Glibc normally makes %fs be zero.
2070 - */
2071 - if (unlikely(next->fs))
2072 - loadsegment(fs, next->fs);
2073 -
2074 - write_pda(pcurrent, next_p);
2075 -
2076 - /*
2077 * Now maybe handle debug registers
2078 */
2079 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
2080 @@ -654,6 +647,15 @@ struct task_struct fastcall * __switch_t
2081
2082 disable_tsc(prev_p, next_p);
2083
2084 + /*
2085 + * Leave lazy mode, flushing any hypercalls made here.
2086 + * This must be done before restoring TLS segments so
2087 + * the GDT and LDT are properly updated, and must be
2088 + * done before math_state_restore, so the TS bit is up
2089 + * to date.
2090 + */
2091 + arch_leave_lazy_cpu_mode();
2092 +
2093 /* If the task has used fpu the last 5 timeslices, just do a full
2094 * restore of the math state immediately to avoid the trap; the
2095 * chances of needing FPU soon are obviously high now
2096 @@ -661,6 +663,14 @@ struct task_struct fastcall * __switch_t
2097 if (next_p->fpu_counter > 5)
2098 math_state_restore();
2099
2100 + /*
2101 + * Restore %gs if needed (which is common)
2102 + */
2103 + if (prev->gs | next->gs)
2104 + loadsegment(gs, next->gs);
2105 +
2106 + write_pda(pcurrent, next_p);
2107 +
2108 return prev_p;
2109 }
2110
2111 --- a/arch/x86/kernel/process_64-xen.c
2112 +++ b/arch/x86/kernel/process_64-xen.c
2113 @@ -338,14 +338,17 @@ void load_gs_index(unsigned gs)
2114 void flush_thread(void)
2115 {
2116 struct task_struct *tsk = current;
2117 - struct thread_info *t = current_thread_info();
2118
2119 - if (t->flags & _TIF_ABI_PENDING) {
2120 - t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
2121 - if (t->flags & _TIF_IA32)
2122 + if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
2123 + clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
2124 + if (test_tsk_thread_flag(tsk, TIF_IA32)) {
2125 + clear_tsk_thread_flag(tsk, TIF_IA32);
2126 + } else {
2127 + set_tsk_thread_flag(tsk, TIF_IA32);
2128 current_thread_info()->status |= TS_COMPAT;
2129 + }
2130 }
2131 - t->flags &= ~_TIF_DEBUG;
2132 + clear_tsk_thread_flag(tsk, TIF_DEBUG);
2133
2134 tsk->thread.debugreg0 = 0;
2135 tsk->thread.debugreg1 = 0;
2136 --- a/arch/x86/kernel/setup_32-xen.c
2137 +++ b/arch/x86/kernel/setup_32-xen.c
2138 @@ -33,7 +33,6 @@
2139 #include <linux/initrd.h>
2140 #include <linux/bootmem.h>
2141 #include <linux/seq_file.h>
2142 -#include <linux/platform_device.h>
2143 #include <linux/console.h>
2144 #include <linux/mca.h>
2145 #include <linux/root_dev.h>
2146 @@ -148,7 +147,7 @@ unsigned long saved_videomode;
2147 #define RAMDISK_PROMPT_FLAG 0x8000
2148 #define RAMDISK_LOAD_FLAG 0x4000
2149
2150 -static char command_line[COMMAND_LINE_SIZE];
2151 +static char __initdata command_line[COMMAND_LINE_SIZE];
2152
2153 unsigned char __initdata boot_params[PARAM_SIZE];
2154
2155 @@ -647,8 +646,8 @@ void __init setup_arch(char **cmdline_p)
2156
2157 if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2158 i = COMMAND_LINE_SIZE;
2159 - memcpy(saved_command_line, xen_start_info->cmd_line, i);
2160 - saved_command_line[i - 1] = '\0';
2161 + memcpy(boot_command_line, xen_start_info->cmd_line, i);
2162 + boot_command_line[i - 1] = '\0';
2163 parse_early_param();
2164
2165 if (user_defined_memmap) {
2166 @@ -656,11 +655,19 @@ void __init setup_arch(char **cmdline_p)
2167 print_memory_map("user");
2168 }
2169
2170 - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2171 + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
2172 *cmdline_p = command_line;
2173
2174 max_low_pfn = setup_memory();
2175
2176 +#ifdef CONFIG_VMI
2177 + /*
2178 + * Must be after max_low_pfn is determined, and before kernel
2179 + * pagetables are setup.
2180 + */
2181 + vmi_init();
2182 +#endif
2183 +
2184 /*
2185 * NOTE: before this point _nobody_ is allowed to allocate
2186 * any memory using the bootmem allocator. Although the
2187 @@ -823,7 +830,6 @@ void __init setup_arch(char **cmdline_p)
2188 conswitchp = &dummy_con;
2189 #endif
2190 }
2191 - tsc_init();
2192 }
2193
2194 static int
2195 @@ -833,31 +839,3 @@ xen_panic_event(struct notifier_block *t
2196 /* we're never actually going to get here... */
2197 return NOTIFY_DONE;
2198 }
2199 -
2200 -static __init int add_pcspkr(void)
2201 -{
2202 - struct platform_device *pd;
2203 - int ret;
2204 -
2205 - if (!is_initial_xendomain())
2206 - return 0;
2207 -
2208 - pd = platform_device_alloc("pcspkr", -1);
2209 - if (!pd)
2210 - return -ENOMEM;
2211 -
2212 - ret = platform_device_add(pd);
2213 - if (ret)
2214 - platform_device_put(pd);
2215 -
2216 - return ret;
2217 -}
2218 -device_initcall(add_pcspkr);
2219 -
2220 -/*
2221 - * Local Variables:
2222 - * mode:c
2223 - * c-file-style:"k&r"
2224 - * c-basic-offset:8
2225 - * End:
2226 - */
2227 --- a/arch/x86/kernel/setup_64-xen.c
2228 +++ b/arch/x86/kernel/setup_64-xen.c
2229 @@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(edid_info);
2230
2231 extern int root_mountflags;
2232
2233 -char command_line[COMMAND_LINE_SIZE];
2234 +char __initdata command_line[COMMAND_LINE_SIZE];
2235
2236 struct resource standard_io_resources[] = {
2237 { .name = "dma1", .start = 0x00, .end = 0x1f,
2238 @@ -179,134 +179,6 @@ struct resource code_resource = {
2239 .flags = IORESOURCE_RAM,
2240 };
2241
2242 -#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
2243 -
2244 -static struct resource system_rom_resource = {
2245 - .name = "System ROM",
2246 - .start = 0xf0000,
2247 - .end = 0xfffff,
2248 - .flags = IORESOURCE_ROM,
2249 -};
2250 -
2251 -static struct resource extension_rom_resource = {
2252 - .name = "Extension ROM",
2253 - .start = 0xe0000,
2254 - .end = 0xeffff,
2255 - .flags = IORESOURCE_ROM,
2256 -};
2257 -
2258 -static struct resource adapter_rom_resources[] = {
2259 - { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
2260 - .flags = IORESOURCE_ROM },
2261 - { .name = "Adapter ROM", .start = 0, .end = 0,
2262 - .flags = IORESOURCE_ROM },
2263 - { .name = "Adapter ROM", .start = 0, .end = 0,
2264 - .flags = IORESOURCE_ROM },
2265 - { .name = "Adapter ROM", .start = 0, .end = 0,
2266 - .flags = IORESOURCE_ROM },
2267 - { .name = "Adapter ROM", .start = 0, .end = 0,
2268 - .flags = IORESOURCE_ROM },
2269 - { .name = "Adapter ROM", .start = 0, .end = 0,
2270 - .flags = IORESOURCE_ROM }
2271 -};
2272 -
2273 -static struct resource video_rom_resource = {
2274 - .name = "Video ROM",
2275 - .start = 0xc0000,
2276 - .end = 0xc7fff,
2277 - .flags = IORESOURCE_ROM,
2278 -};
2279 -
2280 -static struct resource video_ram_resource = {
2281 - .name = "Video RAM area",
2282 - .start = 0xa0000,
2283 - .end = 0xbffff,
2284 - .flags = IORESOURCE_RAM,
2285 -};
2286 -
2287 -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2288 -
2289 -static int __init romchecksum(unsigned char *rom, unsigned long length)
2290 -{
2291 - unsigned char *p, sum = 0;
2292 -
2293 - for (p = rom; p < rom + length; p++)
2294 - sum += *p;
2295 - return sum == 0;
2296 -}
2297 -
2298 -static void __init probe_roms(void)
2299 -{
2300 - unsigned long start, length, upper;
2301 - unsigned char *rom;
2302 - int i;
2303 -
2304 -#ifdef CONFIG_XEN
2305 - /* Nothing to do if not running in dom0. */
2306 - if (!is_initial_xendomain())
2307 - return;
2308 -#endif
2309 -
2310 - /* video rom */
2311 - upper = adapter_rom_resources[0].start;
2312 - for (start = video_rom_resource.start; start < upper; start += 2048) {
2313 - rom = isa_bus_to_virt(start);
2314 - if (!romsignature(rom))
2315 - continue;
2316 -
2317 - video_rom_resource.start = start;
2318 -
2319 - /* 0 < length <= 0x7f * 512, historically */
2320 - length = rom[2] * 512;
2321 -
2322 - /* if checksum okay, trust length byte */
2323 - if (length && romchecksum(rom, length))
2324 - video_rom_resource.end = start + length - 1;
2325 -
2326 - request_resource(&iomem_resource, &video_rom_resource);
2327 - break;
2328 - }
2329 -
2330 - start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
2331 - if (start < upper)
2332 - start = upper;
2333 -
2334 - /* system rom */
2335 - request_resource(&iomem_resource, &system_rom_resource);
2336 - upper = system_rom_resource.start;
2337 -
2338 - /* check for extension rom (ignore length byte!) */
2339 - rom = isa_bus_to_virt(extension_rom_resource.start);
2340 - if (romsignature(rom)) {
2341 - length = extension_rom_resource.end - extension_rom_resource.start + 1;
2342 - if (romchecksum(rom, length)) {
2343 - request_resource(&iomem_resource, &extension_rom_resource);
2344 - upper = extension_rom_resource.start;
2345 - }
2346 - }
2347 -
2348 - /* check for adapter roms on 2k boundaries */
2349 - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
2350 - start += 2048) {
2351 - rom = isa_bus_to_virt(start);
2352 - if (!romsignature(rom))
2353 - continue;
2354 -
2355 - /* 0 < length <= 0x7f * 512, historically */
2356 - length = rom[2] * 512;
2357 -
2358 - /* but accept any length that fits if checksum okay */
2359 - if (!length || start + length > upper || !romchecksum(rom, length))
2360 - continue;
2361 -
2362 - adapter_rom_resources[i].start = start;
2363 - adapter_rom_resources[i].end = start + length - 1;
2364 - request_resource(&iomem_resource, &adapter_rom_resources[i]);
2365 -
2366 - start = adapter_rom_resources[i++].end & ~2047UL;
2367 - }
2368 -}
2369 -
2370 #ifdef CONFIG_PROC_VMCORE
2371 /* elfcorehdr= specifies the location of elf core header
2372 * stored by the crashed kernel. This option will be passed
2373 @@ -403,7 +275,7 @@ void __init setup_arch(char **cmdline_p)
2374 #ifdef CONFIG_XEN
2375 extern struct e820map machine_e820;
2376
2377 - printk(KERN_INFO "Command line: %s\n", saved_command_line);
2378 + printk(KERN_INFO "Command line: %s\n", boot_command_line);
2379
2380 /* Register a call for panic conditions. */
2381 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
2382 @@ -430,7 +302,7 @@ void __init setup_arch(char **cmdline_p)
2383
2384 ARCH_SETUP
2385 #else
2386 - printk(KERN_INFO "Command line: %s\n", saved_command_line);
2387 + printk(KERN_INFO "Command line: %s\n", boot_command_line);
2388
2389 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
2390 screen_info = SCREEN_INFO;
2391 @@ -461,7 +333,7 @@ void __init setup_arch(char **cmdline_p)
2392
2393 early_identify_cpu(&boot_cpu_data);
2394
2395 - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2396 + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
2397 *cmdline_p = command_line;
2398
2399 parse_early_param();
2400 @@ -531,6 +403,11 @@ void __init setup_arch(char **cmdline_p)
2401 /* reserve ebda region */
2402 if (ebda_addr)
2403 reserve_bootmem_generic(ebda_addr, ebda_size);
2404 +#ifdef CONFIG_NUMA
2405 + /* reserve nodemap region */
2406 + if (nodemap_addr)
2407 + reserve_bootmem_generic(nodemap_addr, nodemap_size);
2408 +#endif
2409
2410 #ifdef CONFIG_SMP
2411 /*
2412 @@ -731,10 +608,8 @@ void __init setup_arch(char **cmdline_p)
2413 #endif
2414
2415 /*
2416 - * Request address space for all standard RAM and ROM resources
2417 - * and also for regions reported as reserved by the e820.
2418 + * We trust e820 completely. No explicit ROM probing in memory.
2419 */
2420 - probe_roms();
2421 #ifdef CONFIG_XEN
2422 if (is_initial_xendomain())
2423 e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
2424 @@ -743,8 +618,6 @@ void __init setup_arch(char **cmdline_p)
2425 e820_mark_nosave_regions();
2426 #endif
2427
2428 - request_resource(&iomem_resource, &video_ram_resource);
2429 -
2430 {
2431 unsigned i;
2432 /* request I/O space for devices used on all i[345]86 PCs */
2433 @@ -1321,7 +1194,8 @@ static int show_cpuinfo(struct seq_file
2434 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2435 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
2436 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
2437 - NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
2438 + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
2439 + "3dnowext", "3dnow",
2440
2441 /* Transmeta-defined */
2442 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
2443 @@ -1339,7 +1213,7 @@ static int show_cpuinfo(struct seq_file
2444 /* Intel-defined (#2) */
2445 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
2446 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
2447 - NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
2448 + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
2449 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2450
2451 /* VIA/Cyrix/Centaur-defined */
2452 @@ -1349,8 +1223,10 @@ static int show_cpuinfo(struct seq_file
2453 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2454
2455 /* AMD-defined (#2) */
2456 - "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
2457 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2458 + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
2459 + "altmovcr8", "abm", "sse4a",
2460 + "misalignsse", "3dnowprefetch",
2461 + "osvw", "ibs", NULL, NULL, NULL, NULL,
2462 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2463 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2464 };
2465 @@ -1361,6 +1237,9 @@ static int show_cpuinfo(struct seq_file
2466 "ttp", /* thermal trip */
2467 "tm",
2468 "stc",
2469 + "100mhzsteps",
2470 + "hwpstate",
2471 + NULL, /* tsc invariant mapped to constant_tsc */
2472 NULL,
2473 /* nothing */ /* constant_tsc - moved to flags */
2474 };
2475 @@ -1477,26 +1356,3 @@ struct seq_operations cpuinfo_op = {
2476 .stop = c_stop,
2477 .show = show_cpuinfo,
2478 };
2479 -
2480 -#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
2481 -#include <linux/platform_device.h>
2482 -static __init int add_pcspkr(void)
2483 -{
2484 - struct platform_device *pd;
2485 - int ret;
2486 -
2487 - if (!is_initial_xendomain())
2488 - return 0;
2489 -
2490 - pd = platform_device_alloc("pcspkr", -1);
2491 - if (!pd)
2492 - return -ENOMEM;
2493 -
2494 - ret = platform_device_add(pd);
2495 - if (ret)
2496 - platform_device_put(pd);
2497 -
2498 - return ret;
2499 -}
2500 -device_initcall(add_pcspkr);
2501 -#endif
2502 --- a/arch/x86/kernel/smp_32-xen.c
2503 +++ b/arch/x86/kernel/smp_32-xen.c
2504 @@ -335,8 +335,7 @@ static void flush_tlb_others(cpumask_t c
2505 /*
2506 * i'm not happy about this global shared spinlock in the
2507 * MM hot path, but we'll see how contended it is.
2508 - * Temporarily this turns IRQs off, so that lockups are
2509 - * detected by the NMI watchdog.
2510 + * AK: x86-64 has a faster method that could be ported.
2511 */
2512 spin_lock(&tlbstate_lock);
2513
2514 @@ -361,7 +360,7 @@ static void flush_tlb_others(cpumask_t c
2515
2516 while (!cpus_empty(flush_cpumask))
2517 /* nothing. lockup detection does not belong here */
2518 - mb();
2519 + cpu_relax();
2520
2521 flush_mm = NULL;
2522 flush_va = 0;
2523 --- a/arch/x86/kernel/time_32-xen.c
2524 +++ b/arch/x86/kernel/time_32-xen.c
2525 @@ -51,6 +51,7 @@
2526 #include <linux/kernel_stat.h>
2527 #include <linux/posix-timers.h>
2528 #include <linux/cpufreq.h>
2529 +#include <linux/clocksource.h>
2530
2531 #include <asm/io.h>
2532 #include <asm/smp.h>
2533 @@ -75,25 +76,17 @@
2534 #include <xen/evtchn.h>
2535 #include <xen/interface/vcpu.h>
2536
2537 -#if defined (__i386__)
2538 -#include <asm/i8259.h>
2539 +#ifdef CONFIG_X86_32
2540 #include <asm/i8253.h>
2541 DEFINE_SPINLOCK(i8253_lock);
2542 EXPORT_SYMBOL(i8253_lock);
2543 -#endif
2544 -
2545 -#define XEN_SHIFT 22
2546 -
2547 int pit_latch_buggy; /* extern */
2548 -
2549 -#if defined(__x86_64__)
2550 -unsigned long vxtime_hz = PIT_TICK_RATE;
2551 -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
2552 +#else
2553 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
2554 -struct timespec __xtime __section_xtime;
2555 -struct timezone __sys_tz __section_sys_tz;
2556 #endif
2557
2558 +#define XEN_SHIFT 22
2559 +
2560 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
2561 EXPORT_SYMBOL(cpu_khz);
2562
2563 @@ -113,9 +106,6 @@ static DEFINE_PER_CPU(struct shadow_time
2564 static struct timespec shadow_tv;
2565 static u32 shadow_tv_version;
2566
2567 -static struct timeval monotonic_tv;
2568 -static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED;
2569 -
2570 /* Keep track of last time we did processing/updating of jiffies and xtime. */
2571 static u64 processed_system_time; /* System time (ns) at last processing. */
2572 static DEFINE_PER_CPU(u64, processed_system_time);
2573 @@ -228,7 +218,7 @@ static inline u64 scale_delta(u64 delta,
2574 return product;
2575 }
2576
2577 -void init_cpu_khz(void)
2578 +static void init_cpu_khz(void)
2579 {
2580 u64 __cpu_khz = 1000000ULL << 32;
2581 struct vcpu_time_info *info = &vcpu_info(0)->time;
2582 @@ -247,16 +237,6 @@ static u64 get_nsec_offset(struct shadow
2583 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
2584 }
2585
2586 -#ifdef CONFIG_X86_64
2587 -static unsigned long get_usec_offset(struct shadow_time_info *shadow)
2588 -{
2589 - u64 now, delta;
2590 - rdtscll(now);
2591 - delta = now - shadow->tsc_timestamp;
2592 - return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
2593 -}
2594 -#endif
2595 -
2596 static void __update_wallclock(time_t sec, long nsec)
2597 {
2598 long wtm_nsec, xtime_nsec;
2599 @@ -371,138 +351,6 @@ void rtc_cmos_write(unsigned char val, u
2600 }
2601 EXPORT_SYMBOL(rtc_cmos_write);
2602
2603 -#ifdef CONFIG_X86_64
2604 -
2605 -/*
2606 - * This version of gettimeofday has microsecond resolution
2607 - * and better than microsecond precision on fast x86 machines with TSC.
2608 - */
2609 -void do_gettimeofday(struct timeval *tv)
2610 -{
2611 - unsigned long seq;
2612 - unsigned long usec, sec;
2613 - unsigned long flags;
2614 - s64 nsec;
2615 - unsigned int cpu;
2616 - struct shadow_time_info *shadow;
2617 - u32 local_time_version;
2618 -
2619 - cpu = get_cpu();
2620 - shadow = &per_cpu(shadow_time, cpu);
2621 -
2622 - do {
2623 - local_time_version = shadow->version;
2624 - seq = read_seqbegin(&xtime_lock);
2625 -
2626 - usec = get_usec_offset(shadow);
2627 -
2628 - sec = xtime.tv_sec;
2629 - usec += (xtime.tv_nsec / NSEC_PER_USEC);
2630 -
2631 - nsec = shadow->system_timestamp - processed_system_time;
2632 - __normalize_time(&sec, &nsec);
2633 - usec += (long)nsec / NSEC_PER_USEC;
2634 -
2635 - if (unlikely(!time_values_up_to_date(cpu))) {
2636 - /*
2637 - * We may have blocked for a long time,
2638 - * rendering our calculations invalid
2639 - * (e.g. the time delta may have
2640 - * overflowed). Detect that and recalculate
2641 - * with fresh values.
2642 - */
2643 - get_time_values_from_xen(cpu);
2644 - continue;
2645 - }
2646 - } while (read_seqretry(&xtime_lock, seq) ||
2647 - (local_time_version != shadow->version));
2648 -
2649 - put_cpu();
2650 -
2651 - while (usec >= USEC_PER_SEC) {
2652 - usec -= USEC_PER_SEC;
2653 - sec++;
2654 - }
2655 -
2656 - spin_lock_irqsave(&monotonic_lock, flags);
2657 - if ((sec > monotonic_tv.tv_sec) ||
2658 - ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec)))
2659 - {
2660 - monotonic_tv.tv_sec = sec;
2661 - monotonic_tv.tv_usec = usec;
2662 - } else {
2663 - sec = monotonic_tv.tv_sec;
2664 - usec = monotonic_tv.tv_usec;
2665 - }
2666 - spin_unlock_irqrestore(&monotonic_lock, flags);
2667 -
2668 - tv->tv_sec = sec;
2669 - tv->tv_usec = usec;
2670 -}
2671 -
2672 -EXPORT_SYMBOL(do_gettimeofday);
2673 -
2674 -int do_settimeofday(struct timespec *tv)
2675 -{
2676 - time_t sec;
2677 - s64 nsec;
2678 - unsigned int cpu;
2679 - struct shadow_time_info *shadow;
2680 - struct xen_platform_op op;
2681 -
2682 - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
2683 - return -EINVAL;
2684 -
2685 - cpu = get_cpu();
2686 - shadow = &per_cpu(shadow_time, cpu);
2687 -
2688 - write_seqlock_irq(&xtime_lock);
2689 -
2690 - /*
2691 - * Ensure we don't get blocked for a long time so that our time delta
2692 - * overflows. If that were to happen then our shadow time values would
2693 - * be stale, so we can retry with fresh ones.
2694 - */
2695 - for (;;) {
2696 - nsec = tv->tv_nsec - get_nsec_offset(shadow);
2697 - if (time_values_up_to_date(cpu))
2698 - break;
2699 - get_time_values_from_xen(cpu);
2700 - }
2701 - sec = tv->tv_sec;
2702 - __normalize_time(&sec, &nsec);
2703 -
2704 - if (is_initial_xendomain() && !independent_wallclock) {
2705 - op.cmd = XENPF_settime;
2706 - op.u.settime.secs = sec;
2707 - op.u.settime.nsecs = nsec;
2708 - op.u.settime.system_time = shadow->system_timestamp;
2709 - WARN_ON(HYPERVISOR_platform_op(&op));
2710 - update_wallclock();
2711 - } else if (independent_wallclock) {
2712 - nsec -= shadow->system_timestamp;
2713 - __normalize_time(&sec, &nsec);
2714 - __update_wallclock(sec, nsec);
2715 - }
2716 -
2717 - /* Reset monotonic gettimeofday() timeval. */
2718 - spin_lock(&monotonic_lock);
2719 - monotonic_tv.tv_sec = 0;
2720 - monotonic_tv.tv_usec = 0;
2721 - spin_unlock(&monotonic_lock);
2722 -
2723 - write_sequnlock_irq(&xtime_lock);
2724 -
2725 - put_cpu();
2726 -
2727 - clock_was_set();
2728 - return 0;
2729 -}
2730 -
2731 -EXPORT_SYMBOL(do_settimeofday);
2732 -
2733 -#endif
2734 -
2735 static void sync_xen_wallclock(unsigned long dummy);
2736 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
2737 static void sync_xen_wallclock(unsigned long dummy)
2738 @@ -551,15 +399,7 @@ static int set_rtc_mmss(unsigned long no
2739 return retval;
2740 }
2741
2742 -#ifdef CONFIG_X86_64
2743 -/* monotonic_clock(): returns # of nanoseconds passed since time_init()
2744 - * Note: This function is required to return accurate
2745 - * time even in the absence of multiple timer ticks.
2746 - */
2747 -unsigned long long monotonic_clock(void)
2748 -#else
2749 unsigned long long sched_clock(void)
2750 -#endif
2751 {
2752 unsigned int cpu = get_cpu();
2753 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
2754 @@ -579,21 +419,18 @@ unsigned long long sched_clock(void)
2755
2756 return time;
2757 }
2758 -#ifdef CONFIG_X86_64
2759 -EXPORT_SYMBOL(monotonic_clock);
2760 -
2761 -unsigned long long sched_clock(void)
2762 -{
2763 - return monotonic_clock();
2764 -}
2765 -#endif
2766
2767 unsigned long profile_pc(struct pt_regs *regs)
2768 {
2769 unsigned long pc = instruction_pointer(regs);
2770
2771 #if defined(CONFIG_SMP) || defined(__x86_64__)
2772 - if (!user_mode_vm(regs) && in_lock_functions(pc)) {
2773 +# ifdef __i386__
2774 + if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs)
2775 +# else
2776 + if (!user_mode(regs)
2777 +# endif
2778 + && in_lock_functions(pc)) {
2779 # ifdef CONFIG_FRAME_POINTER
2780 # ifdef __i386__
2781 return ((unsigned long *)regs->ebp)[1];
2782 @@ -602,14 +439,11 @@ unsigned long profile_pc(struct pt_regs
2783 # endif
2784 # else
2785 # ifdef __i386__
2786 - unsigned long *sp;
2787 - if ((regs->xcs & 2) == 0)
2788 - sp = (unsigned long *)&regs->esp;
2789 - else
2790 - sp = (unsigned long *)regs->esp;
2791 + unsigned long *sp = (unsigned long *)&regs->esp;
2792 # else
2793 unsigned long *sp = (unsigned long *)regs->rsp;
2794 # endif
2795 +
2796 /* Return address is either directly at stack pointer
2797 or above a saved eflags. Eflags has bits 22-31 zero,
2798 kernel addresses don't. */
2799 @@ -762,19 +596,6 @@ irqreturn_t timer_interrupt(int irq, voi
2800 return IRQ_HANDLED;
2801 }
2802
2803 -#ifndef CONFIG_X86_64
2804 -
2805 -void tsc_init(void)
2806 -{
2807 - init_cpu_khz();
2808 - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2809 - cpu_khz / 1000, cpu_khz % 1000);
2810 -
2811 - use_tsc_delay();
2812 -}
2813 -
2814 -#include <linux/clocksource.h>
2815 -
2816 void mark_tsc_unstable(void)
2817 {
2818 #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
2819 @@ -830,21 +651,9 @@ static struct clocksource clocksource_xe
2820 .mask = CLOCKSOURCE_MASK(64),
2821 .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */
2822 .shift = XEN_SHIFT,
2823 - .is_continuous = 1,
2824 + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
2825 };
2826
2827 -static int __init init_xen_clocksource(void)
2828 -{
2829 - clocksource_xen.mult = clocksource_khz2mult(cpu_khz,
2830 - clocksource_xen.shift);
2831 -
2832 - return clocksource_register(&clocksource_xen);
2833 -}
2834 -
2835 -module_init(init_xen_clocksource);
2836 -
2837 -#endif
2838 -
2839 static void init_missing_ticks_accounting(unsigned int cpu)
2840 {
2841 struct vcpu_register_runstate_memory_area area;
2842 @@ -865,7 +674,7 @@ static void init_missing_ticks_accountin
2843 }
2844
2845 /* not static: needed by APM */
2846 -unsigned long get_cmos_time(void)
2847 +unsigned long read_persistent_clock(void)
2848 {
2849 unsigned long retval;
2850 unsigned long flags;
2851 @@ -878,11 +687,11 @@ unsigned long get_cmos_time(void)
2852
2853 return retval;
2854 }
2855 -EXPORT_SYMBOL(get_cmos_time);
2856
2857 static void sync_cmos_clock(unsigned long dummy);
2858
2859 static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
2860 +int no_sync_cmos_clock;
2861
2862 static void sync_cmos_clock(unsigned long dummy)
2863 {
2864 @@ -926,7 +735,8 @@ static void sync_cmos_clock(unsigned lon
2865
2866 void notify_arch_cmos_timer(void)
2867 {
2868 - mod_timer(&sync_cmos_timer, jiffies + 1);
2869 + if (!no_sync_cmos_clock)
2870 + mod_timer(&sync_cmos_timer, jiffies + 1);
2871 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
2872 }
2873
2874 @@ -959,29 +769,11 @@ static int time_init_device(void)
2875
2876 device_initcall(time_init_device);
2877
2878 -#ifdef CONFIG_HPET_TIMER
2879 extern void (*late_time_init)(void);
2880 -/* Duplicate of time_init() below, with hpet_enable part added */
2881 -static void __init hpet_time_init(void)
2882 -{
2883 - struct timespec ts;
2884 - ts.tv_sec = get_cmos_time();
2885 - ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2886 -
2887 - do_settimeofday(&ts);
2888 -
2889 - if ((hpet_enable() >= 0) && hpet_use_timer) {
2890 - printk("Using HPET for base-timer\n");
2891 - }
2892 -
2893 - do_time_init();
2894 -}
2895 -#endif
2896
2897 /* Dynamically-mapped IRQ. */
2898 DEFINE_PER_CPU(int, timer_irq);
2899
2900 -extern void (*late_time_init)(void);
2901 static void setup_cpu0_timer_irq(void)
2902 {
2903 per_cpu(timer_irq, 0) =
2904 @@ -989,7 +781,7 @@ static void setup_cpu0_timer_irq(void)
2905 VIRQ_TIMER,
2906 0,
2907 timer_interrupt,
2908 - SA_INTERRUPT,
2909 + IRQF_DISABLED|IRQF_NOBALANCING,
2910 "timer0",
2911 NULL);
2912 BUG_ON(per_cpu(timer_irq, 0) < 0);
2913 @@ -1001,16 +793,9 @@ static struct vcpu_set_periodic_timer xe
2914
2915 void __init time_init(void)
2916 {
2917 -#ifdef CONFIG_HPET_TIMER
2918 - if (is_hpet_capable()) {
2919 - /*
2920 - * HPET initialization needs to do memory-mapped io. So, let
2921 - * us do a late initialization after mem_init().
2922 - */
2923 - late_time_init = hpet_time_init;
2924 - return;
2925 - }
2926 -#endif
2927 + init_cpu_khz();
2928 + printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2929 + cpu_khz / 1000, cpu_khz % 1000);
2930
2931 switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
2932 &xen_set_periodic_tick)) {
2933 @@ -1029,18 +814,12 @@ void __init time_init(void)
2934 per_cpu(processed_system_time, 0) = processed_system_time;
2935 init_missing_ticks_accounting(0);
2936
2937 - update_wallclock();
2938 + clocksource_register(&clocksource_xen);
2939
2940 -#ifdef CONFIG_X86_64
2941 - init_cpu_khz();
2942 - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2943 - cpu_khz / 1000, cpu_khz % 1000);
2944 + update_wallclock();
2945
2946 - vxtime.mode = VXTIME_TSC;
2947 - vxtime.quot = (1000000L << 32) / vxtime_hz;
2948 - vxtime.tsc_quot = (1000L << 32) / cpu_khz;
2949 - sync_core();
2950 - rdtscll(vxtime.last_tsc);
2951 +#ifndef CONFIG_X86_64
2952 + use_tsc_delay();
2953 #endif
2954
2955 /* Cannot request_irq() until kmem is initialised. */
2956 @@ -1197,7 +976,7 @@ int __cpuinit local_setup_timer(unsigned
2957 irq = bind_virq_to_irqhandler(VIRQ_TIMER,
2958 cpu,
2959 timer_interrupt,
2960 - SA_INTERRUPT,
2961 + IRQF_DISABLED|IRQF_NOBALANCING,
2962 timer_name[cpu],
2963 NULL);
2964 if (irq < 0)
2965 @@ -1286,7 +1065,7 @@ static ctl_table xen_table[] = {
2966 };
2967 static int __init xen_sysctl_init(void)
2968 {
2969 - (void)register_sysctl_table(xen_table, 0);
2970 + (void)register_sysctl_table(xen_table);
2971 return 0;
2972 }
2973 __initcall(xen_sysctl_init);
2974 --- a/arch/x86/kernel/traps_32-xen.c
2975 +++ b/arch/x86/kernel/traps_32-xen.c
2976 @@ -100,6 +100,7 @@ asmlinkage void fixup_4gb_segment(void);
2977 asmlinkage void machine_check(void);
2978
2979 int kstack_depth_to_print = 24;
2980 +static unsigned int code_bytes = 64;
2981 ATOMIC_NOTIFIER_HEAD(i386die_chain);
2982
2983 int register_die_notifier(struct notifier_block *nb)
2984 @@ -297,10 +298,11 @@ void show_registers(struct pt_regs *regs
2985 int i;
2986 int in_kernel = 1;
2987 unsigned long esp;
2988 - unsigned short ss;
2989 + unsigned short ss, gs;
2990
2991 esp = (unsigned long) (&regs->esp);
2992 savesegment(ss, ss);
2993 + savesegment(gs, gs);
2994 if (user_mode_vm(regs)) {
2995 in_kernel = 0;
2996 esp = regs->esp;
2997 @@ -319,8 +321,8 @@ void show_registers(struct pt_regs *regs
2998 regs->eax, regs->ebx, regs->ecx, regs->edx);
2999 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
3000 regs->esi, regs->edi, regs->ebp, esp);
3001 - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n",
3002 - regs->xds & 0xffff, regs->xes & 0xffff, ss);
3003 + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
3004 + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
3005 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
3006 TASK_COMM_LEN, current->comm, current->pid,
3007 current_thread_info(), current, current->thread_info);
3008 @@ -330,7 +332,8 @@ void show_registers(struct pt_regs *regs
3009 */
3010 if (in_kernel) {
3011 u8 *eip;
3012 - int code_bytes = 64;
3013 + unsigned int code_prologue = code_bytes * 43 / 64;
3014 + unsigned int code_len = code_bytes;
3015 unsigned char c;
3016
3017 printk("\n" KERN_EMERG "Stack: ");
3018 @@ -338,14 +341,14 @@ void show_registers(struct pt_regs *regs
3019
3020 printk(KERN_EMERG "Code: ");
3021
3022 - eip = (u8 *)regs->eip - 43;
3023 + eip = (u8 *)regs->eip - code_prologue;
3024 if (eip < (u8 *)PAGE_OFFSET ||
3025 probe_kernel_address(eip, c)) {
3026 /* try starting at EIP */
3027 eip = (u8 *)regs->eip;
3028 - code_bytes = 32;
3029 + code_len = code_len - code_prologue + 1;
3030 }
3031 - for (i = 0; i < code_bytes; i++, eip++) {
3032 + for (i = 0; i < code_len; i++, eip++) {
3033 if (eip < (u8 *)PAGE_OFFSET ||
3034 probe_kernel_address(eip, c)) {
3035 printk(" Bad EIP value.");
3036 @@ -1134,3 +1137,13 @@ static int __init kstack_setup(char *s)
3037 return 1;
3038 }
3039 __setup("kstack=", kstack_setup);
3040 +
3041 +static int __init code_bytes_setup(char *s)
3042 +{
3043 + code_bytes = simple_strtoul(s, NULL, 0);
3044 + if (code_bytes > 8192)
3045 + code_bytes = 8192;
3046 +
3047 + return 1;
3048 +}
3049 +__setup("code_bytes=", code_bytes_setup);
3050 --- a/arch/x86/kernel/vsyscall_64-xen.c
3051 +++ b/arch/x86/kernel/vsyscall_64-xen.c
3052 @@ -26,6 +26,7 @@
3053 #include <linux/seqlock.h>
3054 #include <linux/jiffies.h>
3055 #include <linux/sysctl.h>
3056 +#include <linux/clocksource.h>
3057 #include <linux/getcpu.h>
3058 #include <linux/cpu.h>
3059 #include <linux/smp.h>
3060 @@ -34,6 +35,7 @@
3061 #include <asm/vsyscall.h>
3062 #include <asm/pgtable.h>
3063 #include <asm/page.h>
3064 +#include <asm/unistd.h>
3065 #include <asm/fixmap.h>
3066 #include <asm/errno.h>
3067 #include <asm/io.h>
3068 @@ -44,56 +46,41 @@
3069 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
3070 #define __syscall_clobber "r11","rcx","memory"
3071
3072 -int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
3073 -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
3074 +struct vsyscall_gtod_data_t {
3075 + seqlock_t lock;
3076 + int sysctl_enabled;
3077 + struct timeval wall_time_tv;
3078 + struct timezone sys_tz;
3079 + cycle_t offset_base;
3080 + struct clocksource clock;
3081 +};
3082 int __vgetcpu_mode __section_vgetcpu_mode;
3083
3084 -#include <asm/unistd.h>
3085 -
3086 -static __always_inline void timeval_normalize(struct timeval * tv)
3087 +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data =
3088 {
3089 - time_t __sec;
3090 -
3091 - __sec = tv->tv_usec / 1000000;
3092 - if (__sec) {
3093 - tv->tv_usec %= 1000000;
3094 - tv->tv_sec += __sec;
3095 - }
3096 -}
3097 + .lock = SEQLOCK_UNLOCKED,
3098 + .sysctl_enabled = 1,
3099 +};
3100
3101 -static __always_inline void do_vgettimeofday(struct timeval * tv)
3102 +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
3103 {
3104 - long sequence, t;
3105 - unsigned long sec, usec;
3106 + unsigned long flags;
3107
3108 - do {
3109 - sequence = read_seqbegin(&__xtime_lock);
3110 -
3111 - sec = __xtime.tv_sec;
3112 - usec = __xtime.tv_nsec / 1000;
3113 -
3114 - if (__vxtime.mode != VXTIME_HPET) {
3115 - t = get_cycles_sync();
3116 - if (t < __vxtime.last_tsc)
3117 - t = __vxtime.last_tsc;
3118 - usec += ((t - __vxtime.last_tsc) *
3119 - __vxtime.tsc_quot) >> 32;
3120 - /* See comment in x86_64 do_gettimeofday. */
3121 - } else {
3122 - usec += ((readl((void __iomem *)
3123 - fix_to_virt(VSYSCALL_HPET) + 0xf0) -
3124 - __vxtime.last) * __vxtime.quot) >> 32;
3125 - }
3126 - } while (read_seqretry(&__xtime_lock, sequence));
3127 -
3128 - tv->tv_sec = sec + usec / 1000000;
3129 - tv->tv_usec = usec % 1000000;
3130 + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
3131 + /* copy vsyscall data */
3132 + vsyscall_gtod_data.clock = *clock;
3133 + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
3134 + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
3135 + vsyscall_gtod_data.sys_tz = sys_tz;
3136 + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
3137 }
3138
3139 -/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
3140 +/* RED-PEN may want to readd seq locking, but then the variable should be
3141 + * write-once.
3142 + */
3143 static __always_inline void do_get_tz(struct timezone * tz)
3144 {
3145 - *tz = __sys_tz;
3146 + *tz = __vsyscall_gtod_data.sys_tz;
3147 }
3148
3149 static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
3150 @@ -101,7 +88,8 @@ static __always_inline int gettimeofday(
3151 int ret;
3152 asm volatile("vsysc2: syscall"
3153 : "=a" (ret)
3154 - : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
3155 + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
3156 + : __syscall_clobber );
3157 return ret;
3158 }
3159
3160 @@ -114,10 +102,44 @@ static __always_inline long time_syscall
3161 return secs;
3162 }
3163
3164 +static __always_inline void do_vgettimeofday(struct timeval * tv)
3165 +{
3166 + cycle_t now, base, mask, cycle_delta;
3167 + unsigned long seq, mult, shift, nsec_delta;
3168 + cycle_t (*vread)(void);
3169 + do {
3170 + seq = read_seqbegin(&__vsyscall_gtod_data.lock);
3171 +
3172 + vread = __vsyscall_gtod_data.clock.vread;
3173 + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
3174 + gettimeofday(tv,NULL);
3175 + return;
3176 + }
3177 + now = vread();
3178 + base = __vsyscall_gtod_data.clock.cycle_last;
3179 + mask = __vsyscall_gtod_data.clock.mask;
3180 + mult = __vsyscall_gtod_data.clock.mult;
3181 + shift = __vsyscall_gtod_data.clock.shift;
3182 +
3183 + *tv = __vsyscall_gtod_data.wall_time_tv;
3184 +
3185 + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
3186 +
3187 + /* calculate interval: */
3188 + cycle_delta = (now - base) & mask;
3189 + /* convert to nsecs: */
3190 + nsec_delta = (cycle_delta * mult) >> shift;
3191 +
3192 + /* convert to usecs and add to timespec: */
3193 + tv->tv_usec += nsec_delta / NSEC_PER_USEC;
3194 + while (tv->tv_usec > USEC_PER_SEC) {
3195 + tv->tv_sec += 1;
3196 + tv->tv_usec -= USEC_PER_SEC;
3197 + }
3198 +}
3199 +
3200 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
3201 {
3202 - if (!__sysctl_vsyscall)
3203 - return gettimeofday(tv,tz);
3204 if (tv)
3205 do_vgettimeofday(tv);
3206 if (tz)
3207 @@ -129,11 +151,11 @@ int __vsyscall(0) vgettimeofday(struct t
3208 * unlikely */
3209 time_t __vsyscall(1) vtime(time_t *t)
3210 {
3211 - if (!__sysctl_vsyscall)
3212 + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
3213 return time_syscall(t);
3214 else if (t)
3215 - *t = __xtime.tv_sec;
3216 - return __xtime.tv_sec;
3217 + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
3218 + return __vsyscall_gtod_data.wall_time_tv.tv_sec;
3219 }
3220
3221 /* Fast way to get current CPU and node.
3222 @@ -210,7 +232,7 @@ static int vsyscall_sysctl_change(ctl_ta
3223 ret = -ENOMEM;
3224 goto out;
3225 }
3226 - if (!sysctl_vsyscall) {
3227 + if (!vsyscall_gtod_data.sysctl_enabled) {
3228 writew(SYSCALL, map1);
3229 writew(SYSCALL, map2);
3230 } else {
3231 @@ -232,16 +254,17 @@ static int vsyscall_sysctl_nostrat(ctl_t
3232
3233 static ctl_table kernel_table2[] = {
3234 { .ctl_name = 99, .procname = "vsyscall64",
3235 - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
3236 + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
3237 + .mode = 0644,
3238 .strategy = vsyscall_sysctl_nostrat,
3239 .proc_handler = vsyscall_sysctl_change },
3240 - { 0, }
3241 + {}
3242 };
3243
3244 static ctl_table kernel_root_table2[] = {
3245 { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
3246 .child = kernel_table2 },
3247 - { 0 },
3248 + {}
3249 };
3250
3251 #endif
3252 @@ -304,14 +327,14 @@ static int __init vsyscall_init(void)
3253 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
3254 map_vsyscall();
3255 #ifdef CONFIG_XEN
3256 - sysctl_vsyscall = 0; /* disable vgettimeofay() */
3257 + vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */
3258 if (boot_cpu_has(X86_FEATURE_RDTSCP))
3259 vgetcpu_mode = VGETCPU_RDTSCP;
3260 else
3261 vgetcpu_mode = VGETCPU_LSL;
3262 #endif
3263 #ifdef CONFIG_SYSCTL
3264 - register_sysctl_table(kernel_root_table2, 0);
3265 + register_sysctl_table(kernel_root_table2);
3266 #endif
3267 on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
3268 hotcpu_notifier(cpu_vsyscall_notifier, 0);
3269 --- a/arch/x86/mm/fault_32-xen.c
3270 +++ b/arch/x86/mm/fault_32-xen.c
3271 @@ -46,43 +46,17 @@ int unregister_page_fault_notifier(struc
3272 }
3273 EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3274
3275 -static inline int notify_page_fault(enum die_val val, const char *str,
3276 - struct pt_regs *regs, long err, int trap, int sig)
3277 +static inline int notify_page_fault(struct pt_regs *regs, long err)
3278 {
3279 struct die_args args = {
3280 .regs = regs,
3281 - .str = str,
3282 + .str = "page fault",
3283 .err = err,
3284 - .trapnr = trap,
3285 - .signr = sig
3286 + .trapnr = 14,
3287 + .signr = SIGSEGV
3288 };
3289 - return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3290 -}
3291 -
3292 -/*
3293 - * Unlock any spinlocks which will prevent us from getting the
3294 - * message out
3295 - */
3296 -void bust_spinlocks(int yes)
3297 -{
3298 - int loglevel_save = console_loglevel;
3299 -
3300 - if (yes) {
3301 - oops_in_progress = 1;
3302 - return;
3303 - }
3304 -#ifdef CONFIG_VT
3305 - unblank_screen();
3306 -#endif
3307 - oops_in_progress = 0;
3308 - /*
3309 - * OK, the message is on the console. Now we call printk()
3310 - * without oops_in_progress set so that printk will give klogd
3311 - * a poke. Hold onto your hats...
3312 - */
3313 - console_loglevel = 15; /* NMI oopser may have shut the console up */
3314 - printk(" ");
3315 - console_loglevel = loglevel_save;
3316 + return atomic_notifier_call_chain(&notify_page_fault_chain,
3317 + DIE_PAGE_FAULT, &args);
3318 }
3319
3320 /*
3321 @@ -476,8 +450,7 @@ fastcall void __kprobes do_page_fault(st
3322 /* Can take a spurious fault if mapping changes R/O -> R/W. */
3323 if (spurious_fault(regs, address, error_code))
3324 return;
3325 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3326 - SIGSEGV) == NOTIFY_STOP)
3327 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3328 return;
3329 /*
3330 * Don't take the mm semaphore here. If we fixup a prefetch
3331 @@ -486,8 +459,7 @@ fastcall void __kprobes do_page_fault(st
3332 goto bad_area_nosemaphore;
3333 }
3334
3335 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3336 - SIGSEGV) == NOTIFY_STOP)
3337 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3338 return;
3339
3340 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
3341 --- a/arch/x86/mm/fault_64-xen.c
3342 +++ b/arch/x86/mm/fault_64-xen.c
3343 @@ -56,38 +56,17 @@ int unregister_page_fault_notifier(struc
3344 }
3345 EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3346
3347 -static inline int notify_page_fault(enum die_val val, const char *str,
3348 - struct pt_regs *regs, long err, int trap, int sig)
3349 +static inline int notify_page_fault(struct pt_regs *regs, long err)
3350 {
3351 struct die_args args = {
3352 .regs = regs,
3353 - .str = str,
3354 + .str = "page fault",
3355 .err = err,
3356 - .trapnr = trap,
3357 - .signr = sig
3358 + .trapnr = 14,
3359 + .signr = SIGSEGV
3360 };
3361 - return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3362 -}
3363 -
3364 -void bust_spinlocks(int yes)
3365 -{
3366 - int loglevel_save = console_loglevel;
3367 - if (yes) {
3368 - oops_in_progress = 1;
3369 - } else {
3370 -#ifdef CONFIG_VT
3371 - unblank_screen();
3372 -#endif
3373 - oops_in_progress = 0;
3374 - /*
3375 - * OK, the message is on the console. Now we call printk()
3376 - * without oops_in_progress set so that printk will give klogd
3377 - * a poke. Hold onto your hats...
3378 - */
3379 - console_loglevel = 15; /* NMI oopser may have shut the console up */
3380 - printk(" ");
3381 - console_loglevel = loglevel_save;
3382 - }
3383 + return atomic_notifier_call_chain(&notify_page_fault_chain,
3384 + DIE_PAGE_FAULT, &args);
3385 }
3386
3387 /* Sometimes the CPU reports invalid exceptions on prefetch.
3388 @@ -437,8 +416,7 @@ asmlinkage void __kprobes do_page_fault(
3389 /* Can take a spurious fault if mapping changes R/O -> R/W. */
3390 if (spurious_fault(regs, address, error_code))
3391 return;
3392 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3393 - SIGSEGV) == NOTIFY_STOP)
3394 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3395 return;
3396 /*
3397 * Don't take the mm semaphore here. If we fixup a prefetch
3398 @@ -447,8 +425,7 @@ asmlinkage void __kprobes do_page_fault(
3399 goto bad_area_nosemaphore;
3400 }
3401
3402 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3403 - SIGSEGV) == NOTIFY_STOP)
3404 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3405 return;
3406
3407 if (likely(regs->eflags & X86_EFLAGS_IF))
3408 --- a/arch/x86/mm/highmem_32-xen.c
3409 +++ b/arch/x86/mm/highmem_32-xen.c
3410 @@ -33,14 +33,16 @@ static void *__kmap_atomic(struct page *
3411
3412 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
3413 pagefault_disable();
3414 +
3415 + idx = type + KM_TYPE_NR*smp_processor_id();
3416 + BUG_ON(!pte_none(*(kmap_pte-idx)));
3417 +
3418 if (!PageHighMem(page))
3419 return page_address(page);
3420
3421 - idx = type + KM_TYPE_NR*smp_processor_id();
3422 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3423 - if (!pte_none(*(kmap_pte-idx)))
3424 - BUG();
3425 set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3426 + /*arch_flush_lazy_mmu_mode();*/
3427
3428 return (void*) vaddr;
3429 }
3430 @@ -94,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
3431 idx = type + KM_TYPE_NR*smp_processor_id();
3432 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3433 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
3434 + /*arch_flush_lazy_mmu_mode();*/
3435
3436 return (void*) vaddr;
3437 }
3438 --- a/arch/x86/mm/init_32-xen.c
3439 +++ b/arch/x86/mm/init_32-xen.c
3440 @@ -66,6 +66,7 @@ static pmd_t * __init one_md_table_init(
3441
3442 #ifdef CONFIG_X86_PAE
3443 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
3444 + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
3445 make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
3446 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
3447 pud = pud_offset(pgd, 0);
3448 @@ -87,6 +88,7 @@ static pte_t * __init one_page_table_ini
3449 {
3450 if (pmd_none(*pmd)) {
3451 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
3452 + paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
3453 make_lowmem_page_readonly(page_table,
3454 XENFEAT_writable_page_tables);
3455 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
3456 --- a/arch/x86/mm/init_64-xen.c
3457 +++ b/arch/x86/mm/init_64-xen.c
3458 @@ -1110,20 +1110,30 @@ int kern_addr_valid(unsigned long addr)
3459 extern int exception_trace, page_fault_trace;
3460
3461 static ctl_table debug_table2[] = {
3462 - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
3463 - proc_dointvec },
3464 - { 0, }
3465 + {
3466 + .ctl_name = 99,
3467 + .procname = "exception-trace",
3468 + .data = &exception_trace,
3469 + .maxlen = sizeof(int),
3470 + .mode = 0644,
3471 + .proc_handler = proc_dointvec
3472 + },
3473 + {}
3474 };
3475
3476 static ctl_table debug_root_table2[] = {
3477 - { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
3478 - .child = debug_table2 },
3479 - { 0 },
3480 + {
3481 + .ctl_name = CTL_DEBUG,
3482 + .procname = "debug",
3483 + .mode = 0555,
3484 + .child = debug_table2
3485 + },
3486 + {}
3487 };
3488
3489 static __init int x8664_sysctl_init(void)
3490 {
3491 - register_sysctl_table(debug_root_table2, 1);
3492 + register_sysctl_table(debug_root_table2);
3493 return 0;
3494 }
3495 __initcall(x8664_sysctl_init);
3496 --- a/arch/x86/mm/pageattr_64-xen.c
3497 +++ b/arch/x86/mm/pageattr_64-xen.c
3498 @@ -350,8 +350,8 @@ static void flush_kernel_map(void *arg)
3499 void *adr = page_address(pg);
3500 if (cpu_has_clflush)
3501 cache_flush_page(adr);
3502 - __flush_tlb_one(adr);
3503 }
3504 + __flush_tlb_all();
3505 }
3506
3507 static inline void flush_map(struct list_head *l)
3508 @@ -376,6 +376,7 @@ static void revert_page(unsigned long ad
3509 pud_t *pud;
3510 pmd_t *pmd;
3511 pte_t large_pte;
3512 + unsigned long pfn;
3513
3514 pgd = pgd_offset_k(address);
3515 BUG_ON(pgd_none(*pgd));
3516 @@ -383,7 +384,8 @@ static void revert_page(unsigned long ad
3517 BUG_ON(pud_none(*pud));
3518 pmd = pmd_offset(pud, address);
3519 BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
3520 - large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
3521 + pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
3522 + large_pte = pfn_pte(pfn, ref_prot);
3523 large_pte = pte_mkhuge(large_pte);
3524 set_pte((pte_t *)pmd, large_pte);
3525 }
3526 --- a/arch/x86/mm/pgtable_32-xen.c
3527 +++ b/arch/x86/mm/pgtable_32-xen.c
3528 @@ -149,6 +149,8 @@ void __set_fixmap (enum fixed_addresses
3529 void __init reserve_top_address(unsigned long reserve)
3530 {
3531 BUG_ON(fixmaps > 0);
3532 + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
3533 + (int)-reserve);
3534 __FIXADDR_TOP = -reserve - PAGE_SIZE;
3535 __VMALLOC_RESERVE += reserve;
3536 }
3537 @@ -258,6 +260,12 @@ void pgd_ctor(void *pgd, struct kmem_cac
3538 swapper_pg_dir + USER_PTRS_PER_PGD,
3539 KERNEL_PGD_PTRS);
3540 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
3541 +
3542 + /* must happen under lock */
3543 + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
3544 + __pa(swapper_pg_dir) >> PAGE_SHIFT,
3545 + USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
3546 +
3547 pgd_list_add(pgd);
3548 spin_unlock_irqrestore(&pgd_lock, flags);
3549 }
3550 @@ -268,6 +276,7 @@ void pgd_dtor(void *pgd, struct kmem_cac
3551 {
3552 unsigned long flags; /* can be called from interrupt context */
3553
3554 + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
3555 spin_lock_irqsave(&pgd_lock, flags);
3556 pgd_list_del(pgd);
3557 spin_unlock_irqrestore(&pgd_lock, flags);
3558 @@ -292,6 +301,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
3559 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
3560 if (!pmd)
3561 goto out_oom;
3562 + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
3563 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
3564 }
3565 return pgd;
3566 @@ -314,6 +324,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
3567 pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
3568 if (!pmd[i])
3569 goto out_oom;
3570 + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
3571 }
3572
3573 spin_lock_irqsave(&pgd_lock, flags);
3574 @@ -354,12 +365,17 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
3575
3576 out_oom:
3577 if (HAVE_SHARED_KERNEL_PMD) {
3578 - for (i--; i >= 0; i--)
3579 - kmem_cache_free(pmd_cache,
3580 - (void *)__va(pgd_val(pgd[i])-1));
3581 + for (i--; i >= 0; i--) {
3582 + pgd_t pgdent = pgd[i];
3583 + void* pmd = (void *)__va(pgd_val(pgdent)-1);
3584 + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
3585 + kmem_cache_free(pmd_cache, pmd);
3586 + }
3587 } else {
3588 - for (i--; i >= 0; i--)
3589 + for (i--; i >= 0; i--) {
3590 + paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
3591 kmem_cache_free(pmd_cache, pmd[i]);
3592 + }
3593 kfree(pmd);
3594 }
3595 kmem_cache_free(pgd_cache, pgd);
3596 @@ -383,7 +399,9 @@ void pgd_free(pgd_t *pgd)
3597 /* in the PAE case user pgd entries are overwritten before usage */
3598 if (PTRS_PER_PMD > 1) {
3599 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
3600 - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
3601 + pgd_t pgdent = pgd[i];
3602 + void* pmd = (void *)__va(pgd_val(pgdent)-1);
3603 + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
3604 kmem_cache_free(pmd_cache, pmd);
3605 }
3606
3607 --- a/drivers/acpi/processor_extcntl.c
3608 +++ b/drivers/acpi/processor_extcntl.c
3609 @@ -32,9 +32,8 @@
3610
3611 #define ACPI_PROCESSOR_COMPONENT 0x01000000
3612 #define ACPI_PROCESSOR_CLASS "processor"
3613 -#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
3614 #define _COMPONENT ACPI_PROCESSOR_COMPONENT
3615 -ACPI_MODULE_NAME("acpi_processor")
3616 +ACPI_MODULE_NAME("processor_extcntl")
3617
3618 static int processor_extcntl_parse_csd(struct acpi_processor *pr);
3619 static int processor_extcntl_get_performance(struct acpi_processor *pr);
3620 @@ -56,24 +55,17 @@ static int processor_notify_smm(void)
3621 return 0;
3622
3623 /* Can't write pstate_cnt to smi_cmd if either value is zero */
3624 - if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
3625 + if (!acpi_gbl_FADT.smi_command || !acpi_gbl_FADT.pstate_control) {
3626 ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
3627 return 0;
3628 }
3629
3630 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
3631 "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
3632 - acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
3633 + acpi_gbl_FADT.pstate_control, acpi_gbl_FADT.smi_command));
3634
3635 - /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
3636 - * it anyway, so we need to support it... */
3637 - if (acpi_fadt_is_v1) {
3638 - ACPI_DEBUG_PRINT((ACPI_DB_INFO,
3639 - "Using v1.0 FADT reserved value for pstate_cnt\n"));
3640 - }
3641 -
3642 - status = acpi_os_write_port(acpi_fadt.smi_cmd,
3643 - (u32) acpi_fadt.pstate_cnt, 8);
3644 + status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
3645 + acpi_gbl_FADT.pstate_control, 8);
3646 if (ACPI_FAILURE(status))
3647 return status;
3648
3649 --- a/drivers/char/tpm/tpm_xen.c
3650 +++ b/drivers/char/tpm/tpm_xen.c
3651 @@ -481,7 +481,6 @@ static struct xenbus_device_id tpmfront_
3652
3653 static struct xenbus_driver tpmfront = {
3654 .name = "vtpm",
3655 - .owner = THIS_MODULE,
3656 .ids = tpmfront_ids,
3657 .probe = tpmfront_probe,
3658 .remove = tpmfront_remove,
3659 @@ -491,9 +490,9 @@ static struct xenbus_driver tpmfront = {
3660 .suspend_cancel = tpmfront_suspend_cancel,
3661 };
3662
3663 -static void __init init_tpm_xenbus(void)
3664 +static int __init init_tpm_xenbus(void)
3665 {
3666 - xenbus_register_frontend(&tpmfront);
3667 + return xenbus_register_frontend(&tpmfront);
3668 }
3669
3670 static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
3671 --- a/drivers/pci/msi-xen.c
3672 +++ b/drivers/pci/msi-xen.c
3673 @@ -44,6 +44,36 @@ struct msi_pirq_entry {
3674 int entry_nr;
3675 };
3676
3677 +static void msi_set_enable(struct pci_dev *dev, int enable)
3678 +{
3679 + int pos;
3680 + u16 control;
3681 +
3682 + pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
3683 + if (pos) {
3684 + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
3685 + control &= ~PCI_MSI_FLAGS_ENABLE;
3686 + if (enable)
3687 + control |= PCI_MSI_FLAGS_ENABLE;
3688 + pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
3689 + }
3690 +}
3691 +
3692 +static void msix_set_enable(struct pci_dev *dev, int enable)
3693 +{
3694 + int pos;
3695 + u16 control;
3696 +
3697 + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
3698 + if (pos) {
3699 + pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
3700 + control &= ~PCI_MSIX_FLAGS_ENABLE;
3701 + if (enable)
3702 + control |= PCI_MSIX_FLAGS_ENABLE;
3703 + pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
3704 + }
3705 +}
3706 +
3707 static struct msi_dev_list *get_msi_dev_pirq_list(struct pci_dev *dev)
3708 {
3709 struct msi_dev_list *msi_dev_list, *ret = NULL;
3710 @@ -235,85 +265,13 @@ static int msi_map_vector(struct pci_dev
3711
3712 static int msi_init(void)
3713 {
3714 - static int status = 0;
3715 -
3716 - if (pci_msi_quirk) {
3717 - pci_msi_enable = 0;
3718 - printk(KERN_WARNING "PCI: MSI quirk detected. MSI disabled.\n");
3719 - status = -EINVAL;
3720 - }
3721 -
3722 - return status;
3723 -}
3724 -
3725 -void pci_scan_msi_device(struct pci_dev *dev) { }
3726 -
3727 -void disable_msi_mode(struct pci_dev *dev, int pos, int type)
3728 -{
3729 - u16 control;
3730 -
3731 - pci_read_config_word(dev, msi_control_reg(pos), &control);
3732 - if (type == PCI_CAP_ID_MSI) {
3733 - /* Set enabled bits to single MSI & enable MSI_enable bit */
3734 - msi_disable(control);
3735 - pci_write_config_word(dev, msi_control_reg(pos), control);
3736 - dev->msi_enabled = 0;
3737 - } else {
3738 - msix_disable(control);
3739 - pci_write_config_word(dev, msi_control_reg(pos), control);
3740 - dev->msix_enabled = 0;
3741 - }
3742 -
3743 - pci_intx(dev, 1); /* enable intx */
3744 -}
3745 -
3746 -static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
3747 -{
3748 - u16 control;
3749 -
3750 - pci_read_config_word(dev, msi_control_reg(pos), &control);
3751 - if (type == PCI_CAP_ID_MSI) {
3752 - /* Set enabled bits to single MSI & enable MSI_enable bit */
3753 - msi_enable(control, 1);
3754 - pci_write_config_word(dev, msi_control_reg(pos), control);
3755 - dev->msi_enabled = 1;
3756 - } else {
3757 - msix_enable(control);
3758 - pci_write_config_word(dev, msi_control_reg(pos), control);
3759 - dev->msix_enabled = 1;
3760 - }
3761 -
3762 - pci_intx(dev, 0); /* disable intx */
3763 -}
3764 -
3765 -#ifdef CONFIG_PM
3766 -int pci_save_msi_state(struct pci_dev *dev)
3767 -{
3768 - int pos;
3769 -
3770 - pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
3771 - if (pos <= 0 || dev->no_msi)
3772 - return 0;
3773 -
3774 - if (!dev->msi_enabled)
3775 - return 0;
3776 -
3777 - /* Restore dev->irq to its default pin-assertion vector */
3778 - msi_unmap_pirq(dev, dev->irq);
3779 - /* Disable MSI mode */
3780 - disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
3781 - /* Set the flags for use of restore */
3782 - dev->msi_enabled = 1;
3783 return 0;
3784 }
3785
3786 -void pci_restore_msi_state(struct pci_dev *dev)
3787 +#ifdef CONFIG_PM
3788 +static void __pci_restore_msi_state(struct pci_dev *dev)
3789 {
3790 - int pos, pirq;
3791 -
3792 - pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
3793 - if (pos <= 0)
3794 - return;
3795 + int pirq;
3796
3797 if (!dev->msi_enabled)
3798 return;
3799 @@ -321,40 +279,12 @@ void pci_restore_msi_state(struct pci_de
3800 pirq = msi_map_pirq_to_vector(dev, dev->irq, 0, 0);
3801 if (pirq < 0)
3802 return;
3803 - enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
3804 -}
3805 -
3806 -int pci_save_msix_state(struct pci_dev *dev)
3807 -{
3808 - int pos;
3809 - unsigned long flags;
3810 - struct msi_dev_list *msi_dev_entry;
3811 - struct msi_pirq_entry *pirq_entry, *tmp;
3812
3813 - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
3814 - if (pos <= 0 || dev->no_msi)
3815 - return 0;
3816 -
3817 - /* save the capability */
3818 - if (!dev->msix_enabled)
3819 - return 0;
3820 -
3821 - msi_dev_entry = get_msi_dev_pirq_list(dev);
3822 -
3823 - spin_lock_irqsave(&msi_dev_entry->pirq_list_lock, flags);
3824 - list_for_each_entry_safe(pirq_entry, tmp,
3825 - &msi_dev_entry->pirq_list_head, list)
3826 - msi_unmap_pirq(dev, pirq_entry->pirq);
3827 - spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
3828 -
3829 - disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
3830 - /* Set the flags for use of restore */
3831 - dev->msix_enabled = 1;
3832 -
3833 - return 0;
3834 + pci_intx(dev, 0); /* disable intx */
3835 + msi_set_enable(dev, 0);
3836 }
3837
3838 -void pci_restore_msix_state(struct pci_dev *dev)
3839 +static void __pci_restore_msix_state(struct pci_dev *dev)
3840 {
3841 int pos;
3842 unsigned long flags;
3843 @@ -387,9 +317,16 @@ void pci_restore_msix_state(struct pci_d
3844 }
3845 spin_unlock_irqrestore(&msi_dev_entry->pirq_list_lock, flags);
3846
3847 - enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
3848 + pci_intx(dev, 0); /* disable intx */
3849 + msix_set_enable(dev, 0);
3850 }
3851 -#endif
3852 +
3853 +void pci_restore_msi_state(struct pci_dev *dev)
3854 +{
3855 + __pci_restore_msi_state(dev);
3856 + __pci_restore_msix_state(dev);
3857 +}
3858 +#endif /* CONFIG_PM */
3859
3860 /**
3861 * msi_capability_init - configure device's MSI capability structure
3862 @@ -405,6 +342,8 @@ static int msi_capability_init(struct pc
3863 int pos, pirq;
3864 u16 control;
3865
3866 + msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
3867 +
3868 pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
3869 pci_read_config_word(dev, msi_control_reg(pos), &control);
3870
3871 @@ -413,7 +352,8 @@ static int msi_capability_init(struct pc
3872 return -EBUSY;
3873
3874 /* Set MSI enabled bits */
3875 - enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
3876 + pci_intx(dev, 0); /* disable intx */
3877 + msi_set_enable(dev, 1);
3878 dev->msi_enabled = 1;
3879
3880 dev->irq = pirq;
3881 @@ -441,6 +381,8 @@ static int msix_capability_init(struct p
3882 if (!msi_dev_entry)
3883 return -ENOMEM;
3884
3885 + msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
3886 +
3887 pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
3888 table_base = find_table_base(dev, pos);
3889 if (!table_base)
3890 @@ -484,7 +426,8 @@ static int msix_capability_init(struct p
3891 return avail;
3892 }
3893
3894 - enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
3895 + pci_intx(dev, 0); /* disable intx */
3896 + msix_set_enable(dev, 1);
3897 dev->msix_enabled = 1;
3898
3899 return 0;
3900 @@ -572,17 +515,14 @@ int pci_enable_msi(struct pci_dev* dev)
3901 /* Check whether driver already requested for MSI-X irqs */
3902 if (dev->msix_enabled) {
3903 printk(KERN_INFO "PCI: %s: Can't enable MSI. "
3904 - "Device already has MSI-X irq assigned\n",
3905 - pci_name(dev));
3906 - dev->irq = temp;
3907 + "Device already has MSI-X enabled\n",
3908 + pci_name(dev));
3909 return -EINVAL;
3910 }
3911
3912 status = msi_capability_init(dev);
3913 if ( !status )
3914 dev->irq_old = temp;
3915 - else
3916 - dev->irq = temp;
3917
3918 return status;
3919 }
3920 @@ -590,7 +530,6 @@ int pci_enable_msi(struct pci_dev* dev)
3921 extern void pci_frontend_disable_msi(struct pci_dev* dev);
3922 void pci_disable_msi(struct pci_dev* dev)
3923 {
3924 - int pos;
3925 int pirq;
3926
3927 if (!pci_msi_enable)
3928 @@ -607,8 +546,7 @@ void pci_disable_msi(struct pci_dev* dev
3929 }
3930 #endif
3931
3932 - pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
3933 - if (!pos)
3934 + if (!dev->msi_enabled)
3935 return;
3936
3937 pirq = dev->irq;
3938 @@ -617,7 +555,9 @@ void pci_disable_msi(struct pci_dev* dev
3939 msi_unmap_pirq(dev, pirq);
3940
3941 /* Disable MSI mode */
3942 - disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
3943 + msi_set_enable(dev, 0);
3944 + pci_intx(dev, 1); /* enable intx */
3945 + dev->msi_enabled = 0;
3946 }
3947
3948 /**
3949 @@ -710,7 +650,6 @@ int pci_enable_msix(struct pci_dev* dev,
3950 printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
3951 "Device already has an MSI irq assigned\n",
3952 pci_name(dev));
3953 - dev->irq = temp;
3954 return -EINVAL;
3955 }
3956
3957 @@ -718,8 +657,6 @@ int pci_enable_msix(struct pci_dev* dev,
3958
3959 if ( !status )
3960 dev->irq_old = temp;
3961 - else
3962 - dev->irq = temp;
3963
3964 return status;
3965 }
3966 @@ -727,10 +664,6 @@ int pci_enable_msix(struct pci_dev* dev,
3967 extern void pci_frontend_disable_msix(struct pci_dev* dev);
3968 void pci_disable_msix(struct pci_dev* dev)
3969 {
3970 - int pos;
3971 - u16 control;
3972 -
3973 -
3974 if (!pci_msi_enable)
3975 return;
3976 if (!dev)
3977 @@ -756,18 +689,15 @@ void pci_disable_msix(struct pci_dev* de
3978 }
3979 #endif
3980
3981 - pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
3982 - if (!pos)
3983 - return;
3984 -
3985 - pci_read_config_word(dev, msi_control_reg(pos), &control);
3986 - if (!(control & PCI_MSIX_FLAGS_ENABLE))
3987 + if (!dev->msix_enabled)
3988 return;
3989
3990 msi_remove_pci_irq_vectors(dev);
3991
3992 /* Disable MSI mode */
3993 - disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
3994 + msix_set_enable(dev, 0);
3995 + pci_intx(dev, 1); /* enable intx */
3996 + dev->msix_enabled = 0;
3997 }
3998
3999 /**
4000 --- a/drivers/xen/balloon/sysfs.c
4001 +++ b/drivers/xen/balloon/sysfs.c
4002 @@ -34,6 +34,7 @@
4003 #include <linux/stat.h>
4004 #include <linux/string.h>
4005 #include <linux/sysdev.h>
4006 +#include <linux/module.h>
4007 #include "common.h"
4008
4009 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
4010 --- a/drivers/xen/blkback/xenbus.c
4011 +++ b/drivers/xen/blkback/xenbus.c
4012 @@ -527,7 +527,6 @@ static const struct xenbus_device_id blk
4013
4014 static struct xenbus_driver blkback = {
4015 .name = "vbd",
4016 - .owner = THIS_MODULE,
4017 .ids = blkback_ids,
4018 .probe = blkback_probe,
4019 .remove = blkback_remove,
4020 @@ -537,5 +536,6 @@ static struct xenbus_driver blkback = {
4021
4022 void blkif_xenbus_init(void)
4023 {
4024 - xenbus_register_backend(&blkback);
4025 + if (xenbus_register_backend(&blkback))
4026 + BUG();
4027 }
4028 --- a/drivers/xen/blkfront/blkfront.c
4029 +++ b/drivers/xen/blkfront/blkfront.c
4030 @@ -907,7 +907,6 @@ MODULE_ALIAS("xen:vbd");
4031
4032 static struct xenbus_driver blkfront = {
4033 .name = "vbd",
4034 - .owner = THIS_MODULE,
4035 .ids = blkfront_ids,
4036 .probe = blkfront_probe,
4037 .remove = blkfront_remove,
4038 --- a/drivers/xen/blktap/xenbus.c
4039 +++ b/drivers/xen/blktap/xenbus.c
4040 @@ -465,7 +465,6 @@ static const struct xenbus_device_id blk
4041
4042 static struct xenbus_driver blktap = {
4043 .name = "tap",
4044 - .owner = THIS_MODULE,
4045 .ids = blktap_ids,
4046 .probe = blktap_probe,
4047 .remove = blktap_remove,
4048 @@ -475,5 +474,6 @@ static struct xenbus_driver blktap = {
4049
4050 void tap_blkif_xenbus_init(void)
4051 {
4052 - xenbus_register_backend(&blktap);
4053 + if (xenbus_register_backend(&blktap))
4054 + BUG();
4055 }
4056 --- a/drivers/xen/core/evtchn.c
4057 +++ b/drivers/xen/core/evtchn.c
4058 @@ -144,7 +144,7 @@ static void bind_evtchn_to_cpu(unsigned
4059 BUG_ON(!test_bit(chn, s->evtchn_mask));
4060
4061 if (irq != -1)
4062 - set_native_irq_info(irq, cpumask_of_cpu(cpu));
4063 + irq_desc[irq].affinity = cpumask_of_cpu(cpu);
4064
4065 clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
4066 set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
4067 @@ -157,7 +157,7 @@ static void init_evtchn_cpu_bindings(voi
4068
4069 /* By default all event channels notify CPU#0. */
4070 for (i = 0; i < NR_IRQS; i++)
4071 - set_native_irq_info(i, cpumask_of_cpu(0));
4072 + irq_desc[i].affinity = cpumask_of_cpu(0);
4073
4074 memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
4075 memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
4076 --- a/drivers/xen/core/smpboot.c
4077 +++ b/drivers/xen/core/smpboot.c
4078 @@ -121,7 +121,7 @@ static int __cpuinit xen_smp_intr_init(u
4079 rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
4080 cpu,
4081 smp_reschedule_interrupt,
4082 - SA_INTERRUPT,
4083 + IRQF_DISABLED|IRQF_NOBALANCING,
4084 resched_name[cpu],
4085 NULL);
4086 if (rc < 0)
4087 @@ -132,7 +132,7 @@ static int __cpuinit xen_smp_intr_init(u
4088 rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
4089 cpu,
4090 smp_call_function_interrupt,
4091 - SA_INTERRUPT,
4092 + IRQF_DISABLED|IRQF_NOBALANCING,
4093 callfunc_name[cpu],
4094 NULL);
4095 if (rc < 0)
4096 @@ -261,7 +261,7 @@ void __init smp_prepare_cpus(unsigned in
4097 {
4098 unsigned int cpu;
4099 struct task_struct *idle;
4100 - int apicid, acpiid;
4101 + int apicid;
4102 struct vcpu_get_physid cpu_id;
4103 #ifdef __x86_64__
4104 struct desc_ptr *gdt_descr;
4105 @@ -270,14 +270,8 @@ void __init smp_prepare_cpus(unsigned in
4106 #endif
4107
4108 apicid = 0;
4109 - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
4110 + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
4111 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
4112 - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
4113 -#ifdef CONFIG_ACPI
4114 - if (acpiid != 0xff)
4115 - x86_acpiid_to_apicid[acpiid] = apicid;
4116 -#endif
4117 - }
4118 boot_cpu_data.apicid = apicid;
4119 cpu_data[0] = boot_cpu_data;
4120
4121 @@ -333,14 +327,8 @@ void __init smp_prepare_cpus(unsigned in
4122 XENFEAT_writable_descriptor_tables);
4123
4124 apicid = cpu;
4125 - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
4126 + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
4127 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
4128 - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
4129 -#ifdef CONFIG_ACPI
4130 - if (acpiid != 0xff)
4131 - x86_acpiid_to_apicid[acpiid] = apicid;
4132 -#endif
4133 - }
4134 cpu_data[cpu] = boot_cpu_data;
4135 cpu_data[cpu].apicid = apicid;
4136
4137 --- a/drivers/xen/fbfront/xenfb.c
4138 +++ b/drivers/xen/fbfront/xenfb.c
4139 @@ -856,7 +856,6 @@ MODULE_ALIAS("xen:vfb");
4140
4141 static struct xenbus_driver xenfb_driver = {
4142 .name = "vfb",
4143 - .owner = THIS_MODULE,
4144 .ids = xenfb_ids,
4145 .probe = xenfb_probe,
4146 .remove = xenfb_remove,
4147 --- a/drivers/xen/fbfront/xenkbd.c
4148 +++ b/drivers/xen/fbfront/xenkbd.c
4149 @@ -323,7 +323,6 @@ MODULE_ALIAS("xen:vkbd");
4150
4151 static struct xenbus_driver xenkbd_driver = {
4152 .name = "vkbd",
4153 - .owner = THIS_MODULE,
4154 .ids = xenkbd_ids,
4155 .probe = xenkbd_probe,
4156 .remove = xenkbd_remove,
4157 --- a/drivers/xen/netback/xenbus.c
4158 +++ b/drivers/xen/netback/xenbus.c
4159 @@ -439,7 +439,6 @@ static const struct xenbus_device_id net
4160
4161 static struct xenbus_driver netback = {
4162 .name = "vif",
4163 - .owner = THIS_MODULE,
4164 .ids = netback_ids,
4165 .probe = netback_probe,
4166 .remove = netback_remove,
4167 @@ -450,5 +449,6 @@ static struct xenbus_driver netback = {
4168
4169 void netif_xenbus_init(void)
4170 {
4171 - xenbus_register_backend(&netback);
4172 + if (xenbus_register_backend(&netback))
4173 + BUG();
4174 }
4175 --- a/drivers/xen/netfront/netfront.c
4176 +++ b/drivers/xen/netfront/netfront.c
4177 @@ -1892,20 +1892,19 @@ static struct ethtool_ops network_ethtoo
4178 };
4179
4180 #ifdef CONFIG_SYSFS
4181 -static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
4182 +static ssize_t show_rxbuf_min(struct device *dev,
4183 + struct device_attribute *attr, char *buf)
4184 {
4185 - struct net_device *netdev = container_of(cd, struct net_device,
4186 - class_dev);
4187 - struct netfront_info *info = netdev_priv(netdev);
4188 + struct netfront_info *info = netdev_priv(to_net_dev(dev));
4189
4190 return sprintf(buf, "%u\n", info->rx_min_target);
4191 }
4192
4193 -static ssize_t store_rxbuf_min(struct class_device *cd,
4194 +static ssize_t store_rxbuf_min(struct device *dev,
4195 + struct device_attribute *attr,
4196 const char *buf, size_t len)
4197 {
4198 - struct net_device *netdev = container_of(cd, struct net_device,
4199 - class_dev);
4200 + struct net_device *netdev = to_net_dev(dev);
4201 struct netfront_info *np = netdev_priv(netdev);
4202 char *endp;
4203 unsigned long target;
4204 @@ -1935,20 +1934,19 @@ static ssize_t store_rxbuf_min(struct cl
4205 return len;
4206 }
4207
4208 -static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
4209 +static ssize_t show_rxbuf_max(struct device *dev,
4210 + struct device_attribute *attr, char *buf)
4211 {
4212 - struct net_device *netdev = container_of(cd, struct net_device,
4213 - class_dev);
4214 - struct netfront_info *info = netdev_priv(netdev);
4215 + struct netfront_info *info = netdev_priv(to_net_dev(dev));
4216
4217 return sprintf(buf, "%u\n", info->rx_max_target);
4218 }
4219
4220 -static ssize_t store_rxbuf_max(struct class_device *cd,
4221 +static ssize_t store_rxbuf_max(struct device *dev,
4222 + struct device_attribute *attr,
4223 const char *buf, size_t len)
4224 {
4225 - struct net_device *netdev = container_of(cd, struct net_device,
4226 - class_dev);
4227 + struct net_device *netdev = to_net_dev(dev);
4228 struct netfront_info *np = netdev_priv(netdev);
4229 char *endp;
4230 unsigned long target;
4231 @@ -1978,16 +1976,15 @@ static ssize_t store_rxbuf_max(struct cl
4232 return len;
4233 }
4234
4235 -static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
4236 +static ssize_t show_rxbuf_cur(struct device *dev,
4237 + struct device_attribute *attr, char *buf)
4238 {
4239 - struct net_device *netdev = container_of(cd, struct net_device,
4240 - class_dev);
4241 - struct netfront_info *info = netdev_priv(netdev);
4242 + struct netfront_info *info = netdev_priv(to_net_dev(dev));
4243
4244 return sprintf(buf, "%u\n", info->rx_target);
4245 }
4246
4247 -static const struct class_device_attribute xennet_attrs[] = {
4248 +static struct device_attribute xennet_attrs[] = {
4249 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
4250 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
4251 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
4252 @@ -1999,8 +1996,8 @@ static int xennet_sysfs_addif(struct net
4253 int error = 0;
4254
4255 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
4256 - error = class_device_create_file(&netdev->class_dev,
4257 - &xennet_attrs[i]);
4258 + error = device_create_file(&netdev->dev,
4259 + &xennet_attrs[i]);
4260 if (error)
4261 goto fail;
4262 }
4263 @@ -2008,8 +2005,7 @@ static int xennet_sysfs_addif(struct net
4264
4265 fail:
4266 while (--i >= 0)
4267 - class_device_remove_file(&netdev->class_dev,
4268 - &xennet_attrs[i]);
4269 + device_remove_file(&netdev->dev, &xennet_attrs[i]);
4270 return error;
4271 }
4272
4273 @@ -2017,10 +2013,8 @@ static void xennet_sysfs_delif(struct ne
4274 {
4275 int i;
4276
4277 - for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
4278 - class_device_remove_file(&netdev->class_dev,
4279 - &xennet_attrs[i]);
4280 - }
4281 + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
4282 + device_remove_file(&netdev->dev, &xennet_attrs[i]);
4283 }
4284
4285 #endif /* CONFIG_SYSFS */
4286 @@ -2186,7 +2180,6 @@ MODULE_ALIAS("xen:vif");
4287
4288 static struct xenbus_driver netfront_driver = {
4289 .name = "vif",
4290 - .owner = THIS_MODULE,
4291 .ids = netfront_ids,
4292 .probe = netfront_probe,
4293 .remove = __devexit_p(netfront_remove),
4294 --- a/drivers/xen/pciback/xenbus.c
4295 +++ b/drivers/xen/pciback/xenbus.c
4296 @@ -682,7 +682,6 @@ static const struct xenbus_device_id xen
4297
4298 static struct xenbus_driver xenbus_pciback_driver = {
4299 .name = "pciback",
4300 - .owner = THIS_MODULE,
4301 .ids = xenpci_ids,
4302 .probe = pciback_xenbus_probe,
4303 .remove = pciback_xenbus_remove,
4304 --- a/drivers/xen/pcifront/xenbus.c
4305 +++ b/drivers/xen/pcifront/xenbus.c
4306 @@ -436,7 +436,6 @@ MODULE_ALIAS("xen:pci");
4307
4308 static struct xenbus_driver xenbus_pcifront_driver = {
4309 .name = "pcifront",
4310 - .owner = THIS_MODULE,
4311 .ids = xenpci_ids,
4312 .probe = pcifront_xenbus_probe,
4313 .remove = pcifront_xenbus_remove,
4314 --- a/drivers/xen/scsiback/xenbus.c
4315 +++ b/drivers/xen/scsiback/xenbus.c
4316 @@ -350,7 +350,6 @@ static struct xenbus_device_id scsiback_
4317
4318 static struct xenbus_driver scsiback = {
4319 .name = "vscsi",
4320 - .owner = THIS_MODULE,
4321 .ids = scsiback_ids,
4322 .probe = scsiback_probe,
4323 .remove = scsiback_remove,
4324 --- a/drivers/xen/scsifront/xenbus.c
4325 +++ b/drivers/xen/scsifront/xenbus.c
4326 @@ -401,7 +401,6 @@ static struct xenbus_device_id scsifront
4327
4328 static struct xenbus_driver scsifront_driver = {
4329 .name = "vscsi",
4330 - .owner = THIS_MODULE,
4331 .ids = scsifront_ids,
4332 .probe = scsifront_probe,
4333 .remove = scsifront_remove,
4334 --- a/drivers/xen/tpmback/common.h
4335 +++ b/drivers/xen/tpmback/common.h
4336 @@ -54,11 +54,11 @@ typedef struct tpmif_st {
4337
4338 void tpmif_disconnect_complete(tpmif_t * tpmif);
4339 tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
4340 -void tpmif_interface_init(void);
4341 +int tpmif_interface_init(void);
4342 void tpmif_interface_exit(void);
4343 void tpmif_schedule_work(tpmif_t * tpmif);
4344 void tpmif_deschedule_work(tpmif_t * tpmif);
4345 -void tpmif_xenbus_init(void);
4346 +int tpmif_xenbus_init(void);
4347 void tpmif_xenbus_exit(void);
4348 int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
4349 irqreturn_t tpmif_be_int(int irq, void *dev_id);
4350 --- a/drivers/xen/tpmback/interface.c
4351 +++ b/drivers/xen/tpmback/interface.c
4352 @@ -156,13 +156,14 @@ void tpmif_disconnect_complete(tpmif_t *
4353 free_tpmif(tpmif);
4354 }
4355
4356 -void __init tpmif_interface_init(void)
4357 +int __init tpmif_interface_init(void)
4358 {
4359 tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
4360 0, 0, NULL, NULL);
4361 + return tpmif_cachep ? 0 : -ENOMEM;
4362 }
4363
4364 -void __exit tpmif_interface_exit(void)
4365 +void tpmif_interface_exit(void)
4366 {
4367 kmem_cache_destroy(tpmif_cachep);
4368 }
4369 --- a/drivers/xen/tpmback/tpmback.c
4370 +++ b/drivers/xen/tpmback/tpmback.c
4371 @@ -923,22 +923,30 @@ static int __init tpmback_init(void)
4372 spin_lock_init(&tpm_schedule_list_lock);
4373 INIT_LIST_HEAD(&tpm_schedule_list);
4374
4375 - tpmif_interface_init();
4376 - tpmif_xenbus_init();
4377 + rc = tpmif_interface_init();
4378 + if (!rc) {
4379 + rc = tpmif_xenbus_init();
4380 + if (rc)
4381 + tpmif_interface_exit();
4382 + }
4383 + if (rc) {
4384 + misc_deregister(&vtpms_miscdevice);
4385 + return rc;
4386 + }
4387
4388 printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
4389
4390 return 0;
4391 }
4392 -
4393 module_init(tpmback_init);
4394
4395 -void __exit tpmback_exit(void)
4396 +static void __exit tpmback_exit(void)
4397 {
4398 vtpm_release_packets(NULL, 0);
4399 tpmif_xenbus_exit();
4400 tpmif_interface_exit();
4401 misc_deregister(&vtpms_miscdevice);
4402 }
4403 +module_exit(tpmback_exit)
4404
4405 MODULE_LICENSE("Dual BSD/GPL");
4406 --- a/drivers/xen/tpmback/xenbus.c
4407 +++ b/drivers/xen/tpmback/xenbus.c
4408 @@ -270,7 +270,6 @@ static const struct xenbus_device_id tpm
4409
4410 static struct xenbus_driver tpmback = {
4411 .name = "vtpm",
4412 - .owner = THIS_MODULE,
4413 .ids = tpmback_ids,
4414 .probe = tpmback_probe,
4415 .remove = tpmback_remove,
4416 @@ -278,9 +277,9 @@ static struct xenbus_driver tpmback = {
4417 };
4418
4419
4420 -void tpmif_xenbus_init(void)
4421 +int tpmif_xenbus_init(void)
4422 {
4423 - xenbus_register_backend(&tpmback);
4424 + return xenbus_register_backend(&tpmback);
4425 }
4426
4427 void tpmif_xenbus_exit(void)
4428 --- a/drivers/xen/xenbus/xenbus_probe_backend.c
4429 +++ b/drivers/xen/xenbus/xenbus_probe_backend.c
4430 @@ -172,13 +172,15 @@ static int xenbus_uevent_backend(struct
4431 return 0;
4432 }
4433
4434 -int xenbus_register_backend(struct xenbus_driver *drv)
4435 +int __xenbus_register_backend(struct xenbus_driver *drv,
4436 + struct module *owner, const char *mod_name)
4437 {
4438 drv->read_otherend_details = read_frontend_details;
4439
4440 - return xenbus_register_driver_common(drv, &xenbus_backend);
4441 + return xenbus_register_driver_common(drv, &xenbus_backend,
4442 + owner, mod_name);
4443 }
4444 -EXPORT_SYMBOL_GPL(xenbus_register_backend);
4445 +EXPORT_SYMBOL_GPL(__xenbus_register_backend);
4446
4447 /* backend/<typename>/<frontend-uuid>/<name> */
4448 static int xenbus_probe_backend_unit(const char *dir,
4449 --- a/drivers/xen/xenbus/xenbus_probe.c
4450 +++ b/drivers/xen/xenbus/xenbus_probe.c
4451 @@ -365,7 +365,9 @@ static void xenbus_dev_shutdown(struct d
4452 }
4453
4454 int xenbus_register_driver_common(struct xenbus_driver *drv,
4455 - struct xen_bus_type *bus)
4456 + struct xen_bus_type *bus,
4457 + struct module *owner,
4458 + const char *mod_name)
4459 {
4460 int ret;
4461
4462 @@ -375,7 +377,10 @@ int xenbus_register_driver_common(struct
4463 drv->driver.name = drv->name;
4464 drv->driver.bus = &bus->bus;
4465 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
4466 - drv->driver.owner = drv->owner;
4467 + drv->driver.owner = owner;
4468 +#endif
4469 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
4470 + drv->driver.mod_name = mod_name;
4471 #endif
4472 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
4473 drv->driver.probe = xenbus_dev_probe;
4474 @@ -389,13 +394,15 @@ int xenbus_register_driver_common(struct
4475 return ret;
4476 }
4477
4478 -int xenbus_register_frontend(struct xenbus_driver *drv)
4479 +int __xenbus_register_frontend(struct xenbus_driver *drv,
4480 + struct module *owner, const char *mod_name)
4481 {
4482 int ret;
4483
4484 drv->read_otherend_details = read_backend_details;
4485
4486 - ret = xenbus_register_driver_common(drv, &xenbus_frontend);
4487 + ret = xenbus_register_driver_common(drv, &xenbus_frontend,
4488 + owner, mod_name);
4489 if (ret)
4490 return ret;
4491
4492 @@ -404,7 +411,7 @@ int xenbus_register_frontend(struct xenb
4493
4494 return 0;
4495 }
4496 -EXPORT_SYMBOL_GPL(xenbus_register_frontend);
4497 +EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
4498
4499 void xenbus_unregister_driver(struct xenbus_driver *drv)
4500 {
4501 --- a/drivers/xen/xenbus/xenbus_probe.h
4502 +++ b/drivers/xen/xenbus/xenbus_probe.h
4503 @@ -63,7 +63,9 @@ extern int xenbus_match(struct device *_
4504 extern int xenbus_dev_probe(struct device *_dev);
4505 extern int xenbus_dev_remove(struct device *_dev);
4506 extern int xenbus_register_driver_common(struct xenbus_driver *drv,
4507 - struct xen_bus_type *bus);
4508 + struct xen_bus_type *bus,
4509 + struct module *owner,
4510 + const char *mod_name);
4511 extern int xenbus_probe_node(struct xen_bus_type *bus,
4512 const char *type,
4513 const char *nodename);
4514 --- a/drivers/xen/xenoprof/xenoprofile.c
4515 +++ b/drivers/xen/xenoprof/xenoprofile.c
4516 @@ -235,7 +235,7 @@ static int bind_virq(void)
4517 result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
4518 i,
4519 xenoprof_ovf_interrupt,
4520 - SA_INTERRUPT,
4521 + IRQF_DISABLED|IRQF_NOBALANCING,
4522 "xenoprof",
4523 NULL);
4524
4525 --- a/include/asm-x86/i8253.h
4526 +++ b/include/asm-x86/i8253.h
4527 @@ -8,10 +8,14 @@
4528
4529 extern spinlock_t i8253_lock;
4530
4531 +#ifdef CONFIG_GENERIC_CLOCKEVENTS
4532 +
4533 extern struct clock_event_device *global_clock_event;
4534
4535 extern void setup_pit_timer(void);
4536
4537 +#endif
4538 +
4539 #define inb_pit inb_p
4540 #define outb_pit outb_p
4541
4542 --- a/include/asm-x86/mach-xen/asm/desc_32.h
4543 +++ b/include/asm-x86/mach-xen/asm/desc_32.h
4544 @@ -21,7 +21,7 @@ struct Xgt_desc_struct {
4545
4546 extern struct Xgt_desc_struct idt_descr;
4547 DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
4548 -
4549 +extern struct Xgt_desc_struct early_gdt_descr;
4550
4551 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
4552 {
4553 --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h
4554 +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h
4555 @@ -9,7 +9,6 @@
4556
4557 #include <asm/scatterlist.h>
4558 #include <asm/io.h>
4559 -#include <asm/swiotlb.h>
4560
4561 struct dma_mapping_ops {
4562 int (*mapping_error)(dma_addr_t dma_addr);
4563 @@ -67,6 +66,9 @@ static inline int dma_mapping_error(dma_
4564 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
4565 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
4566
4567 +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
4568 +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
4569 +
4570 extern void *dma_alloc_coherent(struct device *dev, size_t size,
4571 dma_addr_t *dma_handle, gfp_t gfp);
4572 extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr,
4573 --- a/include/asm-x86/mach-xen/asm/hypervisor.h
4574 +++ b/include/asm-x86/mach-xen/asm/hypervisor.h
4575 @@ -158,6 +158,19 @@ static inline void arch_leave_lazy_mmu_m
4576 #define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
4577 #endif
4578
4579 +#if 0 /* All uses are in places potentially called asynchronously, but
4580 + * asynchronous code should rather not make use of lazy mode at all.
4581 + * Therefore, all uses of this function get commented out, proper
4582 + * detection of asynchronous invocations is added whereever needed,
4583 + * and this function is disabled to catch any new (improper) uses.
4584 + */
4585 +static inline void arch_flush_lazy_mmu_mode(void)
4586 +{
4587 + if (arch_use_lazy_mmu_mode())
4588 + xen_multicall_flush(false);
4589 +}
4590 +#endif
4591 +
4592 #else /* CONFIG_XEN */
4593
4594 static inline void xen_multicall_flush(bool ignore) {}
4595 @@ -215,7 +228,7 @@ HYPERVISOR_block(
4596 return rc;
4597 }
4598
4599 -static inline void /*__noreturn*/
4600 +static inline void __noreturn
4601 HYPERVISOR_shutdown(
4602 unsigned int reason)
4603 {
4604 --- a/include/asm-x86/mach-xen/asm/io_32.h
4605 +++ b/include/asm-x86/mach-xen/asm/io_32.h
4606 @@ -232,12 +232,6 @@ static inline void memcpy_toio(volatile
4607 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
4608
4609 /*
4610 - * Again, i386 does not require mem IO specific function.
4611 - */
4612 -
4613 -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
4614 -
4615 -/*
4616 * Cache management
4617 *
4618 * This needed for two cases
4619 --- a/include/asm-x86/mach-xen/asm/io_64.h
4620 +++ b/include/asm-x86/mach-xen/asm/io_64.h
4621 @@ -101,7 +101,7 @@ __OUTS(l)
4622
4623 #define IO_SPACE_LIMIT 0xffff
4624
4625 -#if defined(__KERNEL__) && __x86_64__
4626 +#if defined(__KERNEL__) && defined(__x86_64__)
4627
4628 #include <linux/vmalloc.h>
4629
4630 @@ -267,12 +267,6 @@ void memset_io(volatile void __iomem *a,
4631 */
4632 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
4633
4634 -/*
4635 - * Again, x86-64 does not require mem IO specific function.
4636 - */
4637 -
4638 -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
4639 -
4640 /* Nothing to do */
4641
4642 #define dma_cache_inv(_start,_size) do { } while (0)
4643 --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h
4644 +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h
4645 @@ -27,13 +27,13 @@ static inline void enter_lazy_tlb(struct
4646 static inline void __prepare_arch_switch(void)
4647 {
4648 /*
4649 - * Save away %fs. No need to save %gs, as it was saved on the
4650 + * Save away %gs. No need to save %fs, as it was saved on the
4651 * stack on entry. No need to save %es and %ds, as those are
4652 * always kernel segments while inside the kernel.
4653 */
4654 - asm volatile ( "mov %%fs,%0"
4655 - : "=m" (current->thread.fs));
4656 - asm volatile ( "movl %0,%%fs"
4657 + asm volatile ( "mov %%gs,%0"
4658 + : "=m" (current->thread.gs));
4659 + asm volatile ( "movl %0,%%gs"
4660 : : "r" (0) );
4661 }
4662
4663 @@ -95,7 +95,7 @@ static inline void switch_mm(struct mm_s
4664 }
4665
4666 #define deactivate_mm(tsk, mm) \
4667 - asm("movl %0,%%fs": :"r" (0));
4668 + asm("movl %0,%%gs": :"r" (0));
4669
4670 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
4671 {
4672 --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h
4673 +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h
4674 @@ -6,12 +6,23 @@
4675 #include <linux/mm.h> /* for struct page */
4676 #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
4677
4678 -#define pmd_populate_kernel(mm, pmd, pte) \
4679 - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
4680 +#define paravirt_alloc_pt(pfn) do { } while (0)
4681 +#define paravirt_alloc_pd(pfn) do { } while (0)
4682 +#define paravirt_alloc_pd(pfn) do { } while (0)
4683 +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
4684 +#define paravirt_release_pt(pfn) do { } while (0)
4685 +#define paravirt_release_pd(pfn) do { } while (0)
4686 +
4687 +#define pmd_populate_kernel(mm, pmd, pte) \
4688 +do { \
4689 + paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \
4690 + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
4691 +} while (0)
4692
4693 #define pmd_populate(mm, pmd, pte) \
4694 do { \
4695 unsigned long pfn = page_to_pfn(pte); \
4696 + paravirt_alloc_pt(pfn); \
4697 if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \
4698 if (!PageHighMem(pte)) \
4699 BUG_ON(HYPERVISOR_update_va_mapping( \
4700 @@ -42,7 +53,11 @@ static inline void pte_free_kernel(pte_t
4701
4702 extern void pte_free(struct page *pte);
4703
4704 -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
4705 +#define __pte_free_tlb(tlb,pte) \
4706 +do { \
4707 + paravirt_release_pt(page_to_pfn(pte)); \
4708 + tlb_remove_page((tlb),(pte)); \
4709 +} while (0)
4710
4711 #ifdef CONFIG_X86_PAE
4712 /*
4713 --- a/include/asm-x86/mach-xen/asm/pgtable_32.h
4714 +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h
4715 @@ -275,6 +275,7 @@ static inline pte_t pte_mkhuge(pte_t pte
4716 */
4717 #define pte_update(mm, addr, ptep) do { } while (0)
4718 #define pte_update_defer(mm, addr, ptep) do { } while (0)
4719 +#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
4720
4721 /*
4722 * We only update the dirty/accessed state if we set
4723 @@ -490,12 +491,24 @@ extern pte_t *lookup_address(unsigned lo
4724 #endif
4725
4726 #if defined(CONFIG_HIGHPTE)
4727 -#define pte_offset_map(dir, address) \
4728 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
4729 - pte_index(address))
4730 -#define pte_offset_map_nested(dir, address) \
4731 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \
4732 - pte_index(address))
4733 +#define pte_offset_map(dir, address) \
4734 +({ \
4735 + pte_t *__ptep; \
4736 + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
4737 + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
4738 + paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
4739 + __ptep = __ptep + pte_index(address); \
4740 + __ptep; \
4741 +})
4742 +#define pte_offset_map_nested(dir, address) \
4743 +({ \
4744 + pte_t *__ptep; \
4745 + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
4746 + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
4747 + paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
4748 + __ptep = __ptep + pte_index(address); \
4749 + __ptep; \
4750 +})
4751 #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
4752 #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
4753 #else
4754 --- a/include/asm-x86/mach-xen/asm/pgtable_64.h
4755 +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h
4756 @@ -414,15 +414,6 @@ static inline int pmd_large(pmd_t pte) {
4757 #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
4758 #define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
4759
4760 -/* physical address -> PTE */
4761 -static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
4762 -{
4763 - unsigned long pteval;
4764 - pteval = physpage | pgprot_val(pgprot);
4765 - pteval &= __supported_pte_mask;
4766 - return __pte(pteval);
4767 -}
4768 -
4769 /* Change flags of a PTE */
4770 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
4771 {
4772 --- a/include/asm-x86/mach-xen/asm/processor_32.h
4773 +++ b/include/asm-x86/mach-xen/asm/processor_32.h
4774 @@ -431,7 +431,7 @@ struct thread_struct {
4775 .vm86_info = NULL, \
4776 .sysenter_cs = __KERNEL_CS, \
4777 .io_bitmap_ptr = NULL, \
4778 - .gs = __KERNEL_PDA, \
4779 + .fs = __KERNEL_PDA, \
4780 }
4781
4782 /*
4783 @@ -449,8 +449,8 @@ struct thread_struct {
4784 }
4785
4786 #define start_thread(regs, new_eip, new_esp) do { \
4787 - __asm__("movl %0,%%fs": :"r" (0)); \
4788 - regs->xgs = 0; \
4789 + __asm__("movl %0,%%gs": :"r" (0)); \
4790 + regs->xfs = 0; \
4791 set_fs(USER_DS); \
4792 regs->xds = __USER_DS; \
4793 regs->xes = __USER_DS; \
4794 --- a/include/asm-x86/mach-xen/asm/segment_32.h
4795 +++ b/include/asm-x86/mach-xen/asm/segment_32.h
4796 @@ -83,14 +83,8 @@
4797 * The GDT has 32 entries
4798 */
4799 #define GDT_ENTRIES 32
4800 -
4801 #define GDT_SIZE (GDT_ENTRIES * 8)
4802
4803 -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
4804 -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
4805 -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
4806 -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
4807 -
4808 /* Simple and small GDT entries for booting only */
4809
4810 #define GDT_ENTRY_BOOT_CS 2
4811 @@ -132,4 +126,21 @@
4812 #define SEGMENT_GDT 0x0
4813
4814 #define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
4815 +
4816 +/*
4817 + * Matching rules for certain types of segments.
4818 + */
4819 +
4820 +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
4821 +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \
4822 + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3))
4823 +
4824 +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
4825 +#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \
4826 + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \
4827 + || ((x) & ~3) == (FLAT_USER_CS & ~3))
4828 +
4829 +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
4830 +#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8)
4831 +
4832 #endif
4833 --- a/include/asm-x86/mach-xen/asm/smp_32.h
4834 +++ b/include/asm-x86/mach-xen/asm/smp_32.h
4835 @@ -52,6 +52,11 @@ extern void cpu_exit_clear(void);
4836 extern void cpu_uninit(void);
4837 #endif
4838
4839 +#ifndef CONFIG_PARAVIRT
4840 +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
4841 +do { } while (0)
4842 +#endif
4843 +
4844 /*
4845 * This function is needed by all SMP systems. It must _always_ be valid
4846 * from the initial startup. We map APIC_BASE very early in page_setup(),
4847 --- a/include/asm-x86/mach-xen/asm/smp_64.h
4848 +++ b/include/asm-x86/mach-xen/asm/smp_64.h
4849 @@ -7,6 +7,7 @@
4850 #include <linux/threads.h>
4851 #include <linux/cpumask.h>
4852 #include <linux/bitops.h>
4853 +#include <linux/init.h>
4854 extern int disable_apic;
4855
4856 #ifdef CONFIG_X86_LOCAL_APIC
4857 @@ -73,7 +74,7 @@ extern int __cpu_disable(void);
4858 extern void __cpu_die(unsigned int cpu);
4859 extern void prefill_possible_map(void);
4860 extern unsigned num_processors;
4861 -extern unsigned disabled_cpus;
4862 +extern unsigned __cpuinitdata disabled_cpus;
4863
4864 #define NO_PROC_ID 0xFF /* No processor magic marker */
4865
4866 --- a/include/xen/xenbus.h
4867 +++ b/include/xen/xenbus.h
4868 @@ -93,8 +93,7 @@ struct xenbus_device_id
4869
4870 /* A xenbus driver. */
4871 struct xenbus_driver {
4872 - char *name;
4873 - struct module *owner;
4874 + const char *name;
4875 const struct xenbus_device_id *ids;
4876 int (*probe)(struct xenbus_device *dev,
4877 const struct xenbus_device_id *id);
4878 @@ -115,8 +114,25 @@ static inline struct xenbus_driver *to_x
4879 return container_of(drv, struct xenbus_driver, driver);
4880 }
4881
4882 -int xenbus_register_frontend(struct xenbus_driver *drv);
4883 -int xenbus_register_backend(struct xenbus_driver *drv);
4884 +int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
4885 + struct module *owner,
4886 + const char *mod_name);
4887 +
4888 +static inline int __must_check
4889 +xenbus_register_frontend(struct xenbus_driver *drv)
4890 +{
4891 + return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
4892 +}
4893 +
4894 +int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
4895 + struct module *owner,
4896 + const char *mod_name);
4897 +static inline int __must_check
4898 +xenbus_register_backend(struct xenbus_driver *drv)
4899 +{
4900 + return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
4901 +}
4902 +
4903 void xenbus_unregister_driver(struct xenbus_driver *drv);
4904
4905 struct xenbus_transaction
4906 --- a/lib/swiotlb-xen.c
4907 +++ b/lib/swiotlb-xen.c
4908 @@ -135,8 +135,8 @@ __setup("swiotlb=", setup_io_tlb_npages)
4909 * Statically reserve bounce buffer space and initialize bounce buffer data
4910 * structures for the software IO TLB used to implement the PCI DMA API.
4911 */
4912 -void
4913 -swiotlb_init_with_default_size (size_t default_size)
4914 +void __init
4915 +swiotlb_init_with_default_size(size_t default_size)
4916 {
4917 unsigned long i, bytes;
4918 int rc;
4919 @@ -221,7 +221,7 @@ swiotlb_init_with_default_size (size_t d
4920 dma_bits);
4921 }
4922
4923 -void
4924 +void __init
4925 swiotlb_init(void)
4926 {
4927 long ram_end;
4928 @@ -457,7 +457,7 @@ swiotlb_full(struct device *dev, size_t
4929 * When the mapping is small enough return a static buffer to limit
4930 * the damage, or panic when the transfer is too big.
4931 */
4932 - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
4933 + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
4934 "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
4935
4936 if (size > io_tlb_overflow && do_panic) {
4937 @@ -602,7 +602,7 @@ swiotlb_map_sg(struct device *hwdev, str
4938 sg[0].dma_length = 0;
4939 return 0;
4940 }
4941 - sg->dma_address = (dma_addr_t)virt_to_bus(map);
4942 + sg->dma_address = virt_to_bus(map);
4943 } else
4944 sg->dma_address = dev_addr;
4945 sg->dma_length = sg->length;
4946 @@ -624,8 +624,7 @@ swiotlb_unmap_sg(struct device *hwdev, s
4947
4948 for (i = 0; i < nelems; i++, sg++)
4949 if (in_swiotlb_aperture(sg->dma_address))
4950 - unmap_single(hwdev,
4951 - (void *)bus_to_virt(sg->dma_address),
4952 + unmap_single(hwdev, bus_to_virt(sg->dma_address),
4953 sg->dma_length, dir);
4954 else
4955 gnttab_dma_unmap_page(sg->dma_address);
4956 @@ -648,8 +647,7 @@ swiotlb_sync_sg_for_cpu(struct device *h
4957
4958 for (i = 0; i < nelems; i++, sg++)
4959 if (in_swiotlb_aperture(sg->dma_address))
4960 - sync_single(hwdev,
4961 - (void *)bus_to_virt(sg->dma_address),
4962 + sync_single(hwdev, bus_to_virt(sg->dma_address),
4963 sg->dma_length, dir);
4964 }
4965
4966 @@ -663,8 +661,7 @@ swiotlb_sync_sg_for_device(struct device
4967
4968 for (i = 0; i < nelems; i++, sg++)
4969 if (in_swiotlb_aperture(sg->dma_address))
4970 - sync_single(hwdev,
4971 - (void *)bus_to_virt(sg->dma_address),
4972 + sync_single(hwdev, bus_to_virt(sg->dma_address),
4973 sg->dma_length, dir);
4974 }
4975