Imported xen patches.
[people/pmueller/ipfire-2.x.git] / src / patches / 60035_xen3-patch-2.6.26.patch1
1 From: kernel.org
2 Subject: 2.6.26
3 Patch-mainline: 2.6.26
4
5 Acked-by: Jeff Mahoney <jeffm@suse.com>
6 Automatically created from "patches.kernel.org/patch-2.6.26" by xen-port-patches.py
7
8 ---
9 arch/x86/Kconfig | 10
10 arch/x86/ia32/ia32entry-xen.S | 14
11 arch/x86/kernel/Makefile | 5
12 arch/x86/kernel/acpi/Makefile | 2
13 arch/x86/kernel/acpi/boot.c | 8
14 arch/x86/kernel/acpi/sleep-xen.c | 87 +
15 arch/x86/kernel/cpu/common-xen.c | 158 +--
16 arch/x86/kernel/cpu/mtrr/main-xen.c | 138 +++
17 arch/x86/kernel/e820_32-xen.c | 32
18 arch/x86/kernel/e820_64-xen.c | 197 +++-
19 arch/x86/kernel/early_printk-xen.c | 24
20 arch/x86/kernel/entry_32-xen.S | 44
21 arch/x86/kernel/entry_64-xen.S | 8
22 arch/x86/kernel/genapic_64-xen.c | 55 +
23 arch/x86/kernel/genapic_xen_64.c | 4
24 arch/x86/kernel/head64-xen.c | 101 +-
25 arch/x86/kernel/head_32-xen.S | 2
26 arch/x86/kernel/init_task-xen.c | 1
27 arch/x86/kernel/io_apic_32-xen.c | 155 +--
28 arch/x86/kernel/io_apic_64-xen.c | 67 -
29 arch/x86/kernel/ipi-xen.c | 232 +++++
30 arch/x86/kernel/irq_32-xen.c | 6
31 arch/x86/kernel/machine_kexec_64.c | 2
32 arch/x86/kernel/microcode-xen.c | 2
33 arch/x86/kernel/mmconf-fam10h_64.c | 10
34 arch/x86/kernel/mpparse-xen.c | 1104 ++++++++++++++++++++++++
35 arch/x86/kernel/mpparse_32-xen.c | 1161 --------------------------
36 arch/x86/kernel/mpparse_64-xen.c | 879 -------------------
37 arch/x86/kernel/pci-dma-xen.c | 735 +++++++++-------
38 arch/x86/kernel/pci-nommu-xen.c | 103 ++
39 arch/x86/kernel/process-xen.c | 188 ++++
40 arch/x86/kernel/process_32-xen.c | 146 +--
41 arch/x86/kernel/process_64-xen.c | 165 ++-
42 arch/x86/kernel/setup-xen.c | 141 +++
43 arch/x86/kernel/setup64-xen.c | 103 --
44 arch/x86/kernel/setup_32-xen.c | 127 ++
45 arch/x86/kernel/setup_64-xen.c | 303 +++---
46 arch/x86/kernel/smp-xen.c | 329 +++++++
47 arch/x86/kernel/smp_32-xen.c | 647 --------------
48 arch/x86/kernel/smp_64-xen.c | 554 ------------
49 arch/x86/kernel/time_32-xen.c | 2
50 arch/x86/kernel/traps_32-xen.c | 592 +++++++------
51 arch/x86/kernel/traps_64-xen.c | 46 -
52 arch/x86/kernel/vsyscall_64-xen.c | 2
53 arch/x86/mm/fault-xen.c | 11
54 arch/x86/mm/highmem_32-xen.c | 1
55 arch/x86/mm/init_32-xen.c | 122 +-
56 arch/x86/mm/init_64-xen.c | 292 +++++-
57 arch/x86/mm/ioremap-xen.c | 269 ++++--
58 arch/x86/mm/pageattr-xen.c | 481 ++--------
59 arch/x86/mm/pat-xen.c | 602 +++++++++++++
60 arch/x86/mm/pgtable-xen.c | 709 +++++++++++++++
61 arch/x86/mm/pgtable_32-xen.c | 242 -----
62 arch/x86/pci/i386.c | 4
63 arch/x86/pci/irq-xen.c | 23
64 arch/x86/vdso/vdso32-setup-xen.c | 15
65 drivers/acpi/processor_core.c | 2
66 drivers/input/xen-kbdfront.c | 1
67 drivers/oprofile/cpu_buffer.c | 2
68 drivers/pci/msi-xen.c | 12
69 drivers/video/Kconfig | 2
70 drivers/video/xen-fbfront.c | 1
71 drivers/xen/Kconfig | 2
72 drivers/xen/Makefile | 8
73 drivers/xen/blkfront/blkfront.c | 4
74 drivers/xen/blkfront/block.h | 1
75 drivers/xen/blkfront/vbd.c | 58 -
76 drivers/xen/blktap/blktap.c | 27
77 drivers/xen/char/mem.c | 53 +
78 drivers/xen/console/console.c | 13
79 drivers/xen/core/machine_kexec.c | 8
80 drivers/xen/core/machine_reboot.c | 8
81 drivers/xen/core/smpboot.c | 23
82 drivers/xen/core/xen_proc.c | 2
83 drivers/xen/fbfront/xenfb.c | 24
84 drivers/xen/gntdev/gntdev.c | 8
85 drivers/xen/netfront/netfront.c | 6
86 drivers/xen/privcmd/privcmd.c | 8
87 drivers/xen/xenbus/xenbus_client.c | 6
88 drivers/xen/xenbus/xenbus_probe.c | 25
89 fs/aio.c | 15
90 include/asm-x86/dma-mapping.h | 5
91 include/asm-x86/genapic_64.h | 5
92 include/asm-x86/mach-xen/asm/desc.h | 65 -
93 include/asm-x86/mach-xen/asm/dma-mapping.h | 22
94 include/asm-x86/mach-xen/asm/dma-mapping_32.h | 141 ---
95 include/asm-x86/mach-xen/asm/dma-mapping_64.h | 205 ----
96 include/asm-x86/mach-xen/asm/fixmap.h | 8
97 include/asm-x86/mach-xen/asm/fixmap_32.h | 22
98 include/asm-x86/mach-xen/asm/fixmap_64.h | 27
99 include/asm-x86/mach-xen/asm/highmem.h | 2
100 include/asm-x86/mach-xen/asm/io.h | 17
101 include/asm-x86/mach-xen/asm/io_32.h | 156 +--
102 include/asm-x86/mach-xen/asm/io_64.h | 124 +-
103 include/asm-x86/mach-xen/asm/irqflags.h | 8
104 include/asm-x86/mach-xen/asm/mmu_context_32.h | 12
105 include/asm-x86/mach-xen/asm/mmu_context_64.h | 15
106 include/asm-x86/mach-xen/asm/page.h | 20
107 include/asm-x86/mach-xen/asm/page_64.h | 10
108 include/asm-x86/mach-xen/asm/pci.h | 11
109 include/asm-x86/mach-xen/asm/pci_64.h | 16
110 include/asm-x86/mach-xen/asm/pgalloc.h | 152 +++
111 include/asm-x86/mach-xen/asm/pgalloc_32.h | 111 --
112 include/asm-x86/mach-xen/asm/pgalloc_64.h | 179 ----
113 include/asm-x86/mach-xen/asm/pgtable-3level.h | 43
114 include/asm-x86/mach-xen/asm/pgtable.h | 292 ++++--
115 include/asm-x86/mach-xen/asm/pgtable_32.h | 107 +-
116 include/asm-x86/mach-xen/asm/pgtable_64.h | 156 +--
117 include/asm-x86/mach-xen/asm/processor.h | 688 ++++++++-------
118 include/asm-x86/mach-xen/asm/segment.h | 3
119 include/asm-x86/mach-xen/asm/smp.h | 228 +++++
120 include/asm-x86/mach-xen/asm/smp_32.h | 178 ---
121 include/asm-x86/mach-xen/asm/smp_64.h | 103 --
122 include/asm-x86/mach-xen/asm/spinlock.h | 18
123 include/asm-x86/mach-xen/asm/swiotlb.h | 13
124 include/asm-x86/mach-xen/asm/swiotlb_32.h | 43
125 include/asm-x86/mach-xen/asm/system.h | 107 +-
126 include/asm-x86/mach-xen/asm/tlbflush.h | 3
127 include/asm-x86/mach-xen/asm/vga.h | 4
128 include/asm-x86/mach-xen/asm/xor_64.h | 294 +++---
129 include/asm-x86/scatterlist.h | 2
130 include/linux/page-flags.h | 31
131 include/xen/balloon.h | 10
132 include/xen/interface/grant_table.h | 7
133 include/xen/interface/io/fbif.h | 5
134 include/xen/interface/memory.h | 17
135 include/xen/interface/vcpu.h | 4
136 lib/swiotlb-xen.c | 236 ++---
137 128 files changed, 8046 insertions(+), 7660 deletions(-)
138
139 --- a/arch/x86/ia32/ia32entry-xen.S
140 +++ b/arch/x86/ia32/ia32entry-xen.S
141 @@ -129,12 +129,14 @@ sysenter_tracesys:
142 SAVE_REST
143 CLEAR_RREGS
144 movq %r9,R9(%rsp)
145 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
146 + movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
147 movq %rsp,%rdi /* &pt_regs -> arg1 */
148 call syscall_trace_enter
149 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
150 RESTORE_REST
151 xchgl %ebp,%r9d
152 + cmpl $(IA32_NR_syscalls-1),%eax
153 + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
154 jmp sysenter_do_call
155 CFI_ENDPROC
156 ENDPROC(ia32_sysenter_target)
157 @@ -200,13 +202,15 @@ cstar_tracesys:
158 SAVE_REST
159 CLEAR_RREGS
160 movq %r9,R9(%rsp)
161 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
162 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
163 movq %rsp,%rdi /* &pt_regs -> arg1 */
164 call syscall_trace_enter
165 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
166 RESTORE_REST
167 xchgl %ebp,%r9d
168 movl RSP-ARGOFFSET(%rsp), %r8d
169 + cmpl $(IA32_NR_syscalls-1),%eax
170 + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
171 jmp cstar_do_call
172 END(ia32_cstar_target)
173
174 @@ -264,7 +268,7 @@ ENTRY(ia32_syscall)
175 jnz ia32_tracesys
176 ia32_do_syscall:
177 cmpl $(IA32_NR_syscalls-1),%eax
178 - ja ia32_badsys
179 + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
180 IA32_ARG_FIXUP
181 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
182 ia32_sysret:
183 @@ -274,7 +278,7 @@ ia32_sysret:
184 ia32_tracesys:
185 SAVE_REST
186 CLEAR_RREGS
187 - movq $-ENOSYS,RAX(%rsp) /* really needed? */
188 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
189 movq %rsp,%rdi /* &pt_regs -> arg1 */
190 call syscall_trace_enter
191 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
192 @@ -365,7 +369,7 @@ ia32_sys_call_table:
193 .quad sys_setuid16
194 .quad sys_getuid16
195 .quad compat_sys_stime /* stime */ /* 25 */
196 - .quad sys32_ptrace /* ptrace */
197 + .quad compat_sys_ptrace /* ptrace */
198 .quad sys_alarm
199 .quad sys_fstat /* (old)fstat */
200 .quad sys_pause
201 --- a/arch/x86/Kconfig
202 +++ b/arch/x86/Kconfig
203 @@ -28,6 +28,6 @@ config X86
204 select HAVE_DYNAMIC_FTRACE
205 select HAVE_FTRACE
206 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) && !XEN
207 - select HAVE_ARCH_KGDB if !X86_VOYAGER
208 + select HAVE_ARCH_KGDB if !X86_VOYAGER && !XEN
209 select HAVE_GENERIC_DMA_COHERENT if X86_32
210 select HAVE_EFFICIENT_UNALIGNED_ACCESS
211 @@ -482,6 +482,7 @@ config PARAVIRT_DEBUG
212
213 config MEMTEST
214 bool "Memtest"
215 + depends on !XEN
216 help
217 This option adds a kernel parameter 'memtest', which allows memtest
218 to be set.
219 @@ -1345,8 +1346,7 @@ source kernel/Kconfig.hz
220
221 config KEXEC
222 bool "kexec system call"
223 - depends on X86_BIOS_REBOOT
224 - depends on !XEN_UNPRIVILEGED_GUEST
225 + depends on X86_BIOS_REBOOT || (XEN && !XEN_UNPRIVILEGED_GUEST)
226 help
227 kexec is a system call that implements the ability to shutdown your
228 current kernel, and to start another kernel. It is like a reboot
229 @@ -1944,6 +1944,4 @@ source "crypto/Kconfig"
230
231 source "arch/x86/kvm/Kconfig"
232
233 -source "drivers/xen/Kconfig"
234 -
235 source "lib/Kconfig"
236 --- a/arch/x86/kernel/acpi/boot.c
237 +++ b/arch/x86/kernel/acpi/boot.c
238 @@ -251,19 +251,23 @@ static int __init acpi_parse_madt(struct
239
240 static void __cpuinit acpi_register_lapic(int id, u8 enabled)
241 {
242 +#ifndef CONFIG_XEN
243 unsigned int ver = 0;
244 +#endif
245
246 if (!enabled) {
247 ++disabled_cpus;
248 return;
249 }
250
251 +#ifndef CONFIG_XEN
252 #ifdef CONFIG_X86_32
253 if (boot_cpu_physical_apicid != -1U)
254 ver = apic_version[boot_cpu_physical_apicid];
255 #endif
256
257 generic_processor_info(id, ver);
258 +#endif
259 }
260
261 static int __init
262 @@ -774,6 +778,7 @@ static int __init acpi_parse_fadt(struct
263 * returns 0 on success, < 0 on error
264 */
265
266 +#ifndef CONFIG_XEN
267 static void __init acpi_register_lapic_address(unsigned long address)
268 {
269 mp_lapic_addr = address;
270 @@ -787,6 +792,9 @@ static void __init acpi_register_lapic_a
271 #endif
272 }
273 }
274 +#else
275 +#define acpi_register_lapic_address(address)
276 +#endif
277
278 static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
279 {
280 --- a/arch/x86/kernel/acpi/Makefile
281 +++ b/arch/x86/kernel/acpi/Makefile
282 @@ -15,4 +15,4 @@ $(obj)/wakeup_rm.o: $(obj)/realmode/w
283 $(obj)/realmode/wakeup.bin: FORCE
284 $(Q)$(MAKE) $(build)=$(obj)/realmode
285
286 -disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_$(BITS).o
287 +disabled-obj-$(CONFIG_XEN) := cstate.o wakeup_%.o
288 --- a/arch/x86/kernel/acpi/sleep-xen.c
289 +++ b/arch/x86/kernel/acpi/sleep-xen.c
290 @@ -10,15 +10,19 @@
291 #include <linux/dmi.h>
292 #include <linux/cpumask.h>
293
294 -#include <asm/smp.h>
295 +#include "realmode/wakeup.h"
296 +#include "sleep.h"
297
298 #ifndef CONFIG_ACPI_PV_SLEEP
299 -/* address in low memory of the wakeup routine. */
300 -unsigned long acpi_wakeup_address = 0;
301 +unsigned long acpi_wakeup_address;
302 unsigned long acpi_realmode_flags;
303 -extern char wakeup_start, wakeup_end;
304
305 -extern unsigned long acpi_copy_wakeup_routine(unsigned long);
306 +/* address in low memory of the wakeup routine. */
307 +static unsigned long acpi_realmode;
308 +
309 +#ifdef CONFIG_64BIT
310 +static char temp_stack[10240];
311 +#endif
312 #endif
313
314 /**
315 @@ -26,17 +30,69 @@ extern unsigned long acpi_copy_wakeup_ro
316 *
317 * Create an identity mapped page table and copy the wakeup routine to
318 * low memory.
319 + *
320 + * Note that this is too late to change acpi_wakeup_address.
321 */
322 int acpi_save_state_mem(void)
323 {
324 #ifndef CONFIG_ACPI_PV_SLEEP
325 - if (!acpi_wakeup_address) {
326 - printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
327 + struct wakeup_header *header;
328 +
329 + if (!acpi_realmode) {
330 + printk(KERN_ERR "Could not allocate memory during boot, "
331 + "S3 disabled\n");
332 return -ENOMEM;
333 }
334 - memcpy((void *)acpi_wakeup_address, &wakeup_start,
335 - &wakeup_end - &wakeup_start);
336 - acpi_copy_wakeup_routine(acpi_wakeup_address);
337 + memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
338 +
339 + header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
340 + if (header->signature != 0x51ee1111) {
341 + printk(KERN_ERR "wakeup header does not match\n");
342 + return -EINVAL;
343 + }
344 +
345 + header->video_mode = saved_video_mode;
346 +
347 + header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
348 + /* GDT[0]: GDT self-pointer */
349 + header->wakeup_gdt[0] =
350 + (u64)(sizeof(header->wakeup_gdt) - 1) +
351 + ((u64)(acpi_wakeup_address +
352 + ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
353 + << 16);
354 + /* GDT[1]: real-mode-like code segment */
355 + header->wakeup_gdt[1] = (0x009bULL << 40) +
356 + ((u64)acpi_wakeup_address << 16) + 0xffff;
357 + /* GDT[2]: real-mode-like data segment */
358 + header->wakeup_gdt[2] = (0x0093ULL << 40) +
359 + ((u64)acpi_wakeup_address << 16) + 0xffff;
360 +
361 +#ifndef CONFIG_64BIT
362 + store_gdt((struct desc_ptr *)&header->pmode_gdt);
363 +
364 + header->pmode_efer_low = nx_enabled;
365 + if (header->pmode_efer_low & 1) {
366 + /* This is strange, why not save efer, always? */
367 + rdmsr(MSR_EFER, header->pmode_efer_low,
368 + header->pmode_efer_high);
369 + }
370 +#endif /* !CONFIG_64BIT */
371 +
372 + header->pmode_cr0 = read_cr0();
373 + header->pmode_cr4 = read_cr4();
374 + header->realmode_flags = acpi_realmode_flags;
375 + header->real_magic = 0x12345678;
376 +
377 +#ifndef CONFIG_64BIT
378 + header->pmode_entry = (u32)&wakeup_pmode_return;
379 + header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
380 + saved_magic = 0x12345678;
381 +#else /* CONFIG_64BIT */
382 + header->trampoline_segment = setup_trampoline() >> 4;
383 + init_rsp = (unsigned long)temp_stack + 4096;
384 + initial_code = (unsigned long)wakeup_long64;
385 + saved_magic = 0x123456789abcdef0;
386 +#endif /* CONFIG_64BIT */
387 #endif
388
389 return 0;
390 @@ -61,15 +117,20 @@ void acpi_restore_state_mem(void)
391 void __init acpi_reserve_bootmem(void)
392 {
393 #ifndef CONFIG_ACPI_PV_SLEEP
394 - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
395 + if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
396 printk(KERN_ERR
397 "ACPI: Wakeup code way too big, S3 disabled.\n");
398 return;
399 }
400
401 - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
402 - if (!acpi_wakeup_address)
403 + acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
404 +
405 + if (!acpi_realmode) {
406 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
407 + return;
408 + }
409 +
410 + acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
411 #endif
412 }
413
414 --- a/arch/x86/kernel/cpu/common-xen.c
415 +++ b/arch/x86/kernel/cpu/common-xen.c
416 @@ -5,7 +5,6 @@
417 #include <linux/module.h>
418 #include <linux/percpu.h>
419 #include <linux/bootmem.h>
420 -#include <asm/semaphore.h>
421 #include <asm/processor.h>
422 #include <asm/i387.h>
423 #include <asm/msr.h>
424 @@ -13,6 +12,7 @@
425 #include <asm/mmu_context.h>
426 #include <asm/mtrr.h>
427 #include <asm/mce.h>
428 +#include <asm/pat.h>
429 #ifdef CONFIG_X86_LOCAL_APIC
430 #include <asm/mpspec.h>
431 #include <asm/apic.h>
432 @@ -69,9 +69,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuin
433 static int cachesize_override __cpuinitdata = -1;
434 static int disable_x86_serial_nr __cpuinitdata = 1;
435
436 -struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
437 +struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
438
439 -static void __cpuinit default_init(struct cpuinfo_x86 * c)
440 +static void __cpuinit default_init(struct cpuinfo_x86 *c)
441 {
442 /* Not much we can do here... */
443 /* Check if at least it has cpuid */
444 @@ -88,11 +88,11 @@ static struct cpu_dev __cpuinitdata defa
445 .c_init = default_init,
446 .c_vendor = "Unknown",
447 };
448 -static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
449 +static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
450
451 static int __init cachesize_setup(char *str)
452 {
453 - get_option (&str, &cachesize_override);
454 + get_option(&str, &cachesize_override);
455 return 1;
456 }
457 __setup("cachesize=", cachesize_setup);
458 @@ -114,12 +114,12 @@ int __cpuinit get_model_name(struct cpui
459 /* Intel chips right-justify this string for some dumb reason;
460 undo that brain damage */
461 p = q = &c->x86_model_id[0];
462 - while ( *p == ' ' )
463 + while (*p == ' ')
464 p++;
465 - if ( p != q ) {
466 - while ( *p )
467 + if (p != q) {
468 + while (*p)
469 *q++ = *p++;
470 - while ( q <= &c->x86_model_id[48] )
471 + while (q <= &c->x86_model_id[48])
472 *q++ = '\0'; /* Zero-pad the rest */
473 }
474
475 @@ -137,7 +137,7 @@ void __cpuinit display_cacheinfo(struct
476 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
477 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
478 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
479 - c->x86_cache_size=(ecx>>24)+(edx>>24);
480 + c->x86_cache_size = (ecx>>24)+(edx>>24);
481 }
482
483 if (n < 0x80000006) /* Some chips just has a large L1. */
484 @@ -145,16 +145,16 @@ void __cpuinit display_cacheinfo(struct
485
486 ecx = cpuid_ecx(0x80000006);
487 l2size = ecx >> 16;
488 -
489 +
490 /* do processor-specific cache resizing */
491 if (this_cpu->c_size_cache)
492 - l2size = this_cpu->c_size_cache(c,l2size);
493 + l2size = this_cpu->c_size_cache(c, l2size);
494
495 /* Allow user to override all this if necessary. */
496 if (cachesize_override != -1)
497 l2size = cachesize_override;
498
499 - if ( l2size == 0 )
500 + if (l2size == 0)
501 return; /* Again, no L2 cache is possible */
502
503 c->x86_cache_size = l2size;
504 @@ -163,16 +163,19 @@ void __cpuinit display_cacheinfo(struct
505 l2size, ecx & 0xFF);
506 }
507
508 -/* Naming convention should be: <Name> [(<Codename>)] */
509 -/* This table only is used unless init_<vendor>() below doesn't set it; */
510 -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
511 +/*
512 + * Naming convention should be: <Name> [(<Codename>)]
513 + * This table only is used unless init_<vendor>() below doesn't set it;
514 + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
515 + *
516 + */
517
518 /* Look up CPU names by table lookup. */
519 static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
520 {
521 struct cpu_model_info *info;
522
523 - if ( c->x86_model >= 16 )
524 + if (c->x86_model >= 16)
525 return NULL; /* Range check */
526
527 if (!this_cpu)
528 @@ -197,9 +200,9 @@ static void __cpuinit get_cpu_vendor(str
529
530 for (i = 0; i < X86_VENDOR_NUM; i++) {
531 if (cpu_devs[i]) {
532 - if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
533 - (cpu_devs[i]->c_ident[1] &&
534 - !strcmp(v,cpu_devs[i]->c_ident[1]))) {
535 + if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
536 + (cpu_devs[i]->c_ident[1] &&
537 + !strcmp(v, cpu_devs[i]->c_ident[1]))) {
538 c->x86_vendor = i;
539 if (!early)
540 this_cpu = cpu_devs[i];
541 @@ -217,7 +220,7 @@ static void __cpuinit get_cpu_vendor(str
542 }
543
544
545 -static int __init x86_fxsr_setup(char * s)
546 +static int __init x86_fxsr_setup(char *s)
547 {
548 setup_clear_cpu_cap(X86_FEATURE_FXSR);
549 setup_clear_cpu_cap(X86_FEATURE_XMM);
550 @@ -226,7 +229,7 @@ static int __init x86_fxsr_setup(char *
551 __setup("nofxsr", x86_fxsr_setup);
552
553
554 -static int __init x86_sep_setup(char * s)
555 +static int __init x86_sep_setup(char *s)
556 {
557 setup_clear_cpu_cap(X86_FEATURE_SEP);
558 return 1;
559 @@ -315,12 +318,15 @@ static void __cpuinit early_get_cap(stru
560
561 }
562
563 -/* Do minimum CPU detection early.
564 - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
565 - The others are not touched to avoid unwanted side effects.
566 -
567 - WARNING: this function is only called on the BP. Don't add code here
568 - that is supposed to run on all CPUs. */
569 +/*
570 + * Do minimum CPU detection early.
571 + * Fields really needed: vendor, cpuid_level, family, model, mask,
572 + * cache alignment.
573 + * The others are not touched to avoid unwanted side effects.
574 + *
575 + * WARNING: this function is only called on the BP. Don't add code here
576 + * that is supposed to run on all CPUs.
577 + */
578 static void __init early_cpu_detect(void)
579 {
580 struct cpuinfo_x86 *c = &boot_cpu_data;
581 @@ -335,19 +341,14 @@ static void __init early_cpu_detect(void
582
583 get_cpu_vendor(c, 1);
584
585 - switch (c->x86_vendor) {
586 - case X86_VENDOR_AMD:
587 - early_init_amd(c);
588 - break;
589 - case X86_VENDOR_INTEL:
590 - early_init_intel(c);
591 - break;
592 - }
593 + if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
594 + cpu_devs[c->x86_vendor]->c_early_init)
595 + cpu_devs[c->x86_vendor]->c_early_init(c);
596
597 early_get_cap(c);
598 }
599
600 -static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
601 +static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
602 {
603 u32 tfms, xlvl;
604 unsigned int ebx;
605 @@ -358,13 +359,12 @@ static void __cpuinit generic_identify(s
606 (unsigned int *)&c->x86_vendor_id[0],
607 (unsigned int *)&c->x86_vendor_id[8],
608 (unsigned int *)&c->x86_vendor_id[4]);
609 -
610 +
611 get_cpu_vendor(c, 0);
612 /* Initialize the standard set of capabilities */
613 /* Note that the vendor-specific code below might override */
614 -
615 /* Intel-defined flags: level 0x00000001 */
616 - if ( c->cpuid_level >= 0x00000001 ) {
617 + if (c->cpuid_level >= 0x00000001) {
618 u32 capability, excap;
619 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
620 c->x86_capability[0] = capability;
621 @@ -376,12 +376,14 @@ static void __cpuinit generic_identify(s
622 if (c->x86 >= 0x6)
623 c->x86_model += ((tfms >> 16) & 0xF) << 4;
624 c->x86_mask = tfms & 15;
625 + c->initial_apicid = (ebx >> 24) & 0xFF;
626 #ifdef CONFIG_X86_HT
627 - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
628 + c->apicid = phys_pkg_id(c->initial_apicid, 0);
629 + c->phys_proc_id = c->initial_apicid;
630 #else
631 - c->apicid = (ebx >> 24) & 0xFF;
632 + c->apicid = c->initial_apicid;
633 #endif
634 - if (c->x86_capability[0] & (1<<19))
635 + if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
636 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
637 } else {
638 /* Have CPUID level 0 only - unheard of */
639 @@ -390,33 +392,30 @@ static void __cpuinit generic_identify(s
640
641 /* AMD-defined flags: level 0x80000001 */
642 xlvl = cpuid_eax(0x80000000);
643 - if ( (xlvl & 0xffff0000) == 0x80000000 ) {
644 - if ( xlvl >= 0x80000001 ) {
645 + if ((xlvl & 0xffff0000) == 0x80000000) {
646 + if (xlvl >= 0x80000001) {
647 c->x86_capability[1] = cpuid_edx(0x80000001);
648 c->x86_capability[6] = cpuid_ecx(0x80000001);
649 }
650 - if ( xlvl >= 0x80000004 )
651 + if (xlvl >= 0x80000004)
652 get_model_name(c); /* Default name */
653 }
654
655 init_scattered_cpuid_features(c);
656 }
657
658 -#ifdef CONFIG_X86_HT
659 - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
660 -#endif
661 }
662
663 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
664 {
665 - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
666 + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
667 /* Disable processor serial number */
668 - unsigned long lo,hi;
669 - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
670 + unsigned long lo, hi;
671 + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
672 lo |= 0x200000;
673 - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
674 + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
675 printk(KERN_NOTICE "CPU serial number disabled.\n");
676 - clear_bit(X86_FEATURE_PN, c->x86_capability);
677 + clear_cpu_cap(c, X86_FEATURE_PN);
678
679 /* Disabling the serial number may affect the cpuid level */
680 c->cpuid_level = cpuid_eax(0);
681 @@ -451,9 +450,11 @@ void __cpuinit identify_cpu(struct cpuin
682 memset(&c->x86_capability, 0, sizeof c->x86_capability);
683
684 if (!have_cpuid_p()) {
685 - /* First of all, decide if this is a 486 or higher */
686 - /* It's a 486 if we can modify the AC flag */
687 - if ( flag_is_changeable_p(X86_EFLAGS_AC) )
688 + /*
689 + * First of all, decide if this is a 486 or higher
690 + * It's a 486 if we can modify the AC flag
691 + */
692 + if (flag_is_changeable_p(X86_EFLAGS_AC))
693 c->x86 = 4;
694 else
695 c->x86 = 3;
696 @@ -486,10 +487,10 @@ void __cpuinit identify_cpu(struct cpuin
697 */
698
699 /* If the model name is still unset, do table lookup. */
700 - if ( !c->x86_model_id[0] ) {
701 + if (!c->x86_model_id[0]) {
702 char *p;
703 p = table_lookup_model(c);
704 - if ( p )
705 + if (p)
706 strcpy(c->x86_model_id, p);
707 else
708 /* Last resort... */
709 @@ -503,9 +504,9 @@ void __cpuinit identify_cpu(struct cpuin
710 * common between the CPUs. The first time this routine gets
711 * executed, c == &boot_cpu_data.
712 */
713 - if ( c != &boot_cpu_data ) {
714 + if (c != &boot_cpu_data) {
715 /* AND the already accumulated flags with these */
716 - for ( i = 0 ; i < NCAPINTS ; i++ )
717 + for (i = 0 ; i < NCAPINTS ; i++)
718 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
719 }
720
721 @@ -549,7 +550,7 @@ void __cpuinit detect_ht(struct cpuinfo_
722
723 if (smp_num_siblings == 1) {
724 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
725 - } else if (smp_num_siblings > 1 ) {
726 + } else if (smp_num_siblings > 1) {
727
728 if (smp_num_siblings > NR_CPUS) {
729 printk(KERN_WARNING "CPU: Unsupported number of the "
730 @@ -559,7 +560,7 @@ void __cpuinit detect_ht(struct cpuinfo_
731 }
732
733 index_msb = get_count_order(smp_num_siblings);
734 - c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
735 + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
736
737 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
738 c->phys_proc_id);
739 @@ -570,7 +571,7 @@ void __cpuinit detect_ht(struct cpuinfo_
740
741 core_bits = get_count_order(c->x86_max_cores);
742
743 - c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
744 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
745 ((1 << core_bits) - 1);
746
747 if (c->x86_max_cores > 1)
748 @@ -604,7 +605,7 @@ void __cpuinit print_cpu_info(struct cpu
749 else
750 printk("%s", c->x86_model_id);
751
752 - if (c->x86_mask || c->cpuid_level >= 0)
753 + if (c->x86_mask || c->cpuid_level >= 0)
754 printk(" stepping %02x\n", c->x86_mask);
755 else
756 printk("\n");
757 @@ -623,24 +624,17 @@ __setup("clearcpuid=", setup_disablecpui
758
759 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
760
761 -/* This is hacky. :)
762 - * We're emulating future behavior.
763 - * In the future, the cpu-specific init functions will be called implicitly
764 - * via the magic of initcalls.
765 - * They will insert themselves into the cpu_devs structure.
766 - * Then, when cpu_init() is called, we can just iterate over that array.
767 - */
768 void __init early_cpu_init(void)
769 {
770 - intel_cpu_init();
771 - cyrix_init_cpu();
772 - nsc_init_cpu();
773 - amd_init_cpu();
774 - centaur_init_cpu();
775 - transmeta_init_cpu();
776 - nexgen_init_cpu();
777 - umc_init_cpu();
778 + struct cpu_vendor_dev *cvdev;
779 +
780 + for (cvdev = __x86cpuvendor_start ;
781 + cvdev < __x86cpuvendor_end ;
782 + cvdev++)
783 + cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
784 +
785 early_cpu_detect();
786 + validate_pat_support(&boot_cpu_data);
787 }
788
789 /* Make sure %fs is initialized properly in idle threads */
790 @@ -685,7 +679,7 @@ void __cpuinit cpu_init(void)
791 int cpu = smp_processor_id();
792 struct task_struct *curr = current;
793 #ifndef CONFIG_X86_NO_TSS
794 - struct tss_struct * t = &per_cpu(init_tss, cpu);
795 + struct tss_struct *t = &per_cpu(init_tss, cpu);
796 #endif
797 struct thread_struct *thread = &curr->thread;
798
799 @@ -738,7 +732,7 @@ void __cpuinit cpu_init(void)
800 mxcsr_feature_mask_init();
801 }
802
803 -#ifdef CONFIG_HOTPLUG_CPU
804 +#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
805 void __cpuinit cpu_uninit(void)
806 {
807 int cpu = raw_smp_processor_id();
808 --- a/arch/x86/kernel/cpu/mtrr/main-xen.c
809 +++ b/arch/x86/kernel/cpu/mtrr/main-xen.c
810 @@ -35,6 +35,8 @@ struct mtrr_ops *mtrr_if = &generic_mtrr
811 unsigned int num_var_ranges;
812 unsigned int mtrr_usage_table[MAX_VAR_RANGES];
813
814 +static u64 tom2;
815 +
816 static void __init set_num_var_ranges(void)
817 {
818 struct xen_platform_op op;
819 @@ -162,8 +164,144 @@ mtrr_del(int reg, unsigned long base, un
820 EXPORT_SYMBOL(mtrr_add);
821 EXPORT_SYMBOL(mtrr_del);
822
823 +/*
824 + * Returns the effective MTRR type for the region
825 + * Error returns:
826 + * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
827 + * - 0xFF - when MTRR is not enabled
828 + */
829 +u8 mtrr_type_lookup(u64 start, u64 end)
830 +{
831 + int i, error;
832 + u64 start_mfn, end_mfn, base_mfn, top_mfn;
833 + u8 prev_match, curr_match;
834 + struct xen_platform_op op;
835 +
836 + if (!is_initial_xendomain())
837 + return MTRR_TYPE_WRBACK;
838 +
839 + if (!num_var_ranges)
840 + return 0xFF;
841 +
842 + start_mfn = start >> PAGE_SHIFT;
843 + /* Make end inclusive end, instead of exclusive */
844 + end_mfn = --end >> PAGE_SHIFT;
845 +
846 + /* Look in fixed ranges. Just return the type as per start */
847 + if (start_mfn < 0x100) {
848 +#if 0//todo
849 + op.cmd = XENPF_read_memtype;
850 + op.u.read_memtype.reg = ???;
851 + error = HYPERVISOR_platform_op(&op);
852 + if (!error)
853 + return op.u.read_memtype.type;
854 +#endif
855 + return MTRR_TYPE_UNCACHABLE;
856 + }
857 +
858 + /*
859 + * Look in variable ranges
860 + * Look of multiple ranges matching this address and pick type
861 + * as per MTRR precedence
862 + */
863 + prev_match = 0xFF;
864 + for (i = 0; i < num_var_ranges; ++i) {
865 + op.cmd = XENPF_read_memtype;
866 + op.u.read_memtype.reg = i;
867 + error = HYPERVISOR_platform_op(&op);
868 +
869 + if (error || !op.u.read_memtype.nr_mfns)
870 + continue;
871 +
872 + base_mfn = op.u.read_memtype.mfn;
873 + top_mfn = base_mfn + op.u.read_memtype.nr_mfns - 1;
874 +
875 + if (base_mfn > end_mfn || start_mfn > top_mfn) {
876 + continue;
877 + }
878 +
879 + if (base_mfn > start_mfn || end_mfn > top_mfn) {
880 + return 0xFE;
881 + }
882 +
883 + curr_match = op.u.read_memtype.type;
884 + if (prev_match == 0xFF) {
885 + prev_match = curr_match;
886 + continue;
887 + }
888 +
889 + if (prev_match == MTRR_TYPE_UNCACHABLE ||
890 + curr_match == MTRR_TYPE_UNCACHABLE) {
891 + return MTRR_TYPE_UNCACHABLE;
892 + }
893 +
894 + if ((prev_match == MTRR_TYPE_WRBACK &&
895 + curr_match == MTRR_TYPE_WRTHROUGH) ||
896 + (prev_match == MTRR_TYPE_WRTHROUGH &&
897 + curr_match == MTRR_TYPE_WRBACK)) {
898 + prev_match = MTRR_TYPE_WRTHROUGH;
899 + curr_match = MTRR_TYPE_WRTHROUGH;
900 + }
901 +
902 + if (prev_match != curr_match) {
903 + return MTRR_TYPE_UNCACHABLE;
904 + }
905 + }
906 +
907 + if (tom2) {
908 + if (start >= (1ULL<<32) && (end < tom2))
909 + return MTRR_TYPE_WRBACK;
910 + }
911 +
912 + if (prev_match != 0xFF)
913 + return prev_match;
914 +
915 +#if 0//todo
916 + op.cmd = XENPF_read_def_memtype;
917 + error = HYPERVISOR_platform_op(&op);
918 + if (!error)
919 + return op.u.read_def_memtype.type;
920 +#endif
921 + return MTRR_TYPE_UNCACHABLE;
922 +}
923 +
924 +/*
925 + * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
926 + * for memory >4GB. Check for that here.
927 + * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
928 + * apply to are wrong, but so far we don't know of any such case in the wild.
929 + */
930 +#define Tom2Enabled (1U << 21)
931 +#define Tom2ForceMemTypeWB (1U << 22)
932 +
933 +int __init amd_special_default_mtrr(void)
934 +{
935 + u32 l, h;
936 +
937 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
938 + return 0;
939 + if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
940 + return 0;
941 + /* In case some hypervisor doesn't pass SYSCFG through */
942 + if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
943 + return 0;
944 + /*
945 + * Memory between 4GB and top of mem is forced WB by this magic bit.
946 + * Reserved before K8RevF, but should be zero there.
947 + */
948 + if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
949 + (Tom2Enabled | Tom2ForceMemTypeWB))
950 + return 1;
951 + return 0;
952 +}
953 +
954 void __init mtrr_bp_init(void)
955 {
956 + if (amd_special_default_mtrr()) {
957 + /* TOP_MEM2 */
958 + rdmsrl(MSR_K8_TOP_MEM2, tom2);
959 + tom2 &= 0xffffff8000000ULL;
960 + }
961 }
962
963 void mtrr_ap_init(void)
964 --- a/arch/x86/kernel/e820_32-xen.c
965 +++ b/arch/x86/kernel/e820_32-xen.c
966 @@ -469,7 +469,7 @@ int __init sanitize_e820_map(struct e820
967 * thinkpad 560x, for example, does not cooperate with the memory
968 * detection code.)
969 */
970 -int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
971 +int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
972 {
973 #ifndef CONFIG_XEN
974 /* Only one memory region (or negative)? Ignore it */
975 @@ -480,33 +480,17 @@ int __init copy_e820_map(struct e820entr
976 #endif
977
978 do {
979 - unsigned long long start = biosmap->addr;
980 - unsigned long long size = biosmap->size;
981 - unsigned long long end = start + size;
982 - unsigned long type = biosmap->type;
983 + u64 start = biosmap->addr;
984 + u64 size = biosmap->size;
985 + u64 end = start + size;
986 + u32 type = biosmap->type;
987
988 /* Overflow in 64 bits? Ignore the memory map. */
989 if (start > end)
990 return -1;
991
992 -#ifndef CONFIG_XEN
993 - /*
994 - * Some BIOSes claim RAM in the 640k - 1M region.
995 - * Not right. Fix it up.
996 - */
997 - if (type == E820_RAM) {
998 - if (start < 0x100000ULL && end > 0xA0000ULL) {
999 - if (start < 0xA0000ULL)
1000 - add_memory_region(start, 0xA0000ULL-start, type);
1001 - if (end <= 0x100000ULL)
1002 - continue;
1003 - start = 0x100000ULL;
1004 - size = end - start;
1005 - }
1006 - }
1007 -#endif
1008 add_memory_region(start, size, type);
1009 - } while (biosmap++,--nr_map);
1010 + } while (biosmap++, --nr_map);
1011
1012 #ifdef CONFIG_XEN
1013 if (is_initial_xendomain()) {
1014 @@ -528,7 +512,7 @@ int __init copy_e820_map(struct e820entr
1015 /*
1016 * Find the highest page frame number we have available
1017 */
1018 -void __init find_max_pfn(void)
1019 +void __init propagate_e820_map(void)
1020 {
1021 int i;
1022
1023 @@ -814,7 +798,7 @@ static int __init parse_memmap(char *arg
1024 * size before original memory map is
1025 * reset.
1026 */
1027 - find_max_pfn();
1028 + propagate_e820_map();
1029 saved_max_pfn = max_pfn;
1030 #endif
1031 e820.nr_map = 0;
1032 --- a/arch/x86/kernel/e820_64-xen.c
1033 +++ b/arch/x86/kernel/e820_64-xen.c
1034 @@ -40,11 +40,11 @@ struct e820map machine_e820;
1035 unsigned long end_pfn;
1036
1037 /*
1038 - * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
1039 - * The direct mapping extends to end_pfn_map, so that we can directly access
1040 + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
1041 + * The direct mapping extends to max_pfn_mapped, so that we can directly access
1042 * apertures, ACPI and other tables without having to play with fixmaps.
1043 */
1044 -unsigned long end_pfn_map;
1045 +unsigned long max_pfn_mapped;
1046
1047 /*
1048 * Last pfn which the user wants to use.
1049 @@ -63,8 +63,8 @@ struct early_res {
1050 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
1051 #ifndef CONFIG_XEN
1052 { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
1053 -#ifdef CONFIG_SMP
1054 - { SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" },
1055 +#ifdef CONFIG_X86_TRAMPOLINE
1056 + { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
1057 #endif
1058 #endif
1059 {}
1060 @@ -89,19 +89,47 @@ void __init reserve_early(unsigned long
1061 strncpy(r->name, name, sizeof(r->name) - 1);
1062 }
1063
1064 -void __init early_res_to_bootmem(void)
1065 +void __init free_early(unsigned long start, unsigned long end)
1066 +{
1067 + struct early_res *r;
1068 + int i, j;
1069 +
1070 + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1071 + r = &early_res[i];
1072 + if (start == r->start && end == r->end)
1073 + break;
1074 + }
1075 + if (i >= MAX_EARLY_RES || !early_res[i].end)
1076 + panic("free_early on not reserved area: %lx-%lx!", start, end);
1077 +
1078 + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
1079 + ;
1080 +
1081 + memmove(&early_res[i], &early_res[i + 1],
1082 + (j - 1 - i) * sizeof(struct early_res));
1083 +
1084 + early_res[j - 1].end = 0;
1085 +}
1086 +
1087 +void __init early_res_to_bootmem(unsigned long start, unsigned long end)
1088 {
1089 int i;
1090 + unsigned long final_start, final_end;
1091 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1092 struct early_res *r = &early_res[i];
1093 - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
1094 - r->start, r->end - 1, r->name);
1095 - reserve_bootmem_generic(r->start, r->end - r->start);
1096 + final_start = max(start, r->start);
1097 + final_end = min(end, r->end);
1098 + if (final_start >= final_end)
1099 + continue;
1100 + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
1101 + final_start, final_end - 1, r->name);
1102 + reserve_bootmem_generic(final_start, final_end - final_start);
1103 }
1104 }
1105
1106 /* Check for already reserved areas */
1107 -static inline int bad_addr(unsigned long *addrp, unsigned long size)
1108 +static inline int __init
1109 +bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
1110 {
1111 int i;
1112 unsigned long addr = *addrp, last;
1113 @@ -111,7 +139,7 @@ again:
1114 for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1115 struct early_res *r = &early_res[i];
1116 if (last >= r->start && addr < r->end) {
1117 - *addrp = addr = r->end;
1118 + *addrp = addr = round_up(r->end, align);
1119 changed = 1;
1120 goto again;
1121 }
1122 @@ -119,6 +147,40 @@ again:
1123 return changed;
1124 }
1125
1126 +/* Check for already reserved areas */
1127 +static inline int __init
1128 +bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
1129 +{
1130 + int i;
1131 + unsigned long addr = *addrp, last;
1132 + unsigned long size = *sizep;
1133 + int changed = 0;
1134 +again:
1135 + last = addr + size;
1136 + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
1137 + struct early_res *r = &early_res[i];
1138 + if (last > r->start && addr < r->start) {
1139 + size = r->start - addr;
1140 + changed = 1;
1141 + goto again;
1142 + }
1143 + if (last > r->end && addr < r->end) {
1144 + addr = round_up(r->end, align);
1145 + size = last - addr;
1146 + changed = 1;
1147 + goto again;
1148 + }
1149 + if (last <= r->end && addr >= r->start) {
1150 + (*sizep)++;
1151 + return 0;
1152 + }
1153 + }
1154 + if (changed) {
1155 + *addrp = addr;
1156 + *sizep = size;
1157 + }
1158 + return changed;
1159 +}
1160 /*
1161 * This function checks if any part of the range <start,end> is mapped
1162 * with type.
1163 @@ -194,26 +256,27 @@ int __init e820_all_mapped(unsigned long
1164 * Find a free area with specified alignment in a specific range.
1165 */
1166 unsigned long __init find_e820_area(unsigned long start, unsigned long end,
1167 - unsigned size, unsigned long align)
1168 + unsigned long size, unsigned long align)
1169 {
1170 int i;
1171 - unsigned long mask = ~(align - 1);
1172
1173 for (i = 0; i < e820.nr_map; i++) {
1174 struct e820entry *ei = &e820.map[i];
1175 - unsigned long addr = ei->addr, last;
1176 + unsigned long addr, last;
1177 + unsigned long ei_last;
1178
1179 if (ei->type != E820_RAM)
1180 continue;
1181 + addr = round_up(ei->addr, align);
1182 + ei_last = ei->addr + ei->size;
1183 if (addr < start)
1184 - addr = start;
1185 - if (addr > ei->addr + ei->size)
1186 + addr = round_up(start, align);
1187 + if (addr >= ei_last)
1188 continue;
1189 - while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
1190 + while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1191 ;
1192 - addr = (addr + align - 1) & mask;
1193 last = addr + size;
1194 - if (last > ei->addr + ei->size)
1195 + if (last > ei_last)
1196 continue;
1197 if (last > end)
1198 continue;
1199 @@ -223,6 +286,40 @@ unsigned long __init find_e820_area(unsi
1200 }
1201
1202 /*
1203 + * Find next free range after *start
1204 + */
1205 +unsigned long __init find_e820_area_size(unsigned long start,
1206 + unsigned long *sizep,
1207 + unsigned long align)
1208 +{
1209 + int i;
1210 +
1211 + for (i = 0; i < e820.nr_map; i++) {
1212 + struct e820entry *ei = &e820.map[i];
1213 + unsigned long addr, last;
1214 + unsigned long ei_last;
1215 +
1216 + if (ei->type != E820_RAM)
1217 + continue;
1218 + addr = round_up(ei->addr, align);
1219 + ei_last = ei->addr + ei->size;
1220 + if (addr < start)
1221 + addr = round_up(start, align);
1222 + if (addr >= ei_last)
1223 + continue;
1224 + *sizep = ei_last - addr;
1225 + while (bad_addr_size(&addr, sizep, align) &&
1226 + addr + *sizep <= ei_last)
1227 + ;
1228 + last = addr + *sizep;
1229 + if (last > ei_last)
1230 + continue;
1231 + return addr;
1232 + }
1233 + return -1UL;
1234 +
1235 +}
1236 +/*
1237 * Find the highest page frame number we have available
1238 */
1239 unsigned long __init e820_end_of_ram(void)
1240 @@ -231,31 +328,29 @@ unsigned long __init e820_end_of_ram(voi
1241
1242 end_pfn = find_max_pfn_with_active_regions();
1243
1244 - if (end_pfn > end_pfn_map)
1245 - end_pfn_map = end_pfn;
1246 - if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
1247 - end_pfn_map = MAXMEM>>PAGE_SHIFT;
1248 + if (end_pfn > max_pfn_mapped)
1249 + max_pfn_mapped = end_pfn;
1250 + if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
1251 + max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
1252 if (end_pfn > end_user_pfn)
1253 end_pfn = end_user_pfn;
1254 - if (end_pfn > end_pfn_map)
1255 - end_pfn = end_pfn_map;
1256 + if (end_pfn > max_pfn_mapped)
1257 + end_pfn = max_pfn_mapped;
1258
1259 - printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
1260 + printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
1261 return end_pfn;
1262 }
1263
1264 /*
1265 * Mark e820 reserved areas as busy for the resource manager.
1266 */
1267 -void __init e820_reserve_resources(struct e820entry *e820, int nr_map,
1268 - struct resource *code_resource,
1269 - struct resource *data_resource,
1270 - struct resource *bss_resource)
1271 +void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
1272 {
1273 int i;
1274 + struct resource *res;
1275 +
1276 + res = alloc_bootmem_low(sizeof(struct resource) * nr_map);
1277 for (i = 0; i < nr_map; i++) {
1278 - struct resource *res;
1279 - res = alloc_bootmem_low(sizeof(struct resource));
1280 switch (e820[i].type) {
1281 case E820_RAM: res->name = "System RAM"; break;
1282 case E820_ACPI: res->name = "ACPI Tables"; break;
1283 @@ -265,26 +360,8 @@ void __init e820_reserve_resources(struc
1284 res->start = e820[i].addr;
1285 res->end = res->start + e820[i].size - 1;
1286 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1287 - request_resource(&iomem_resource, res);
1288 - if (e820[i].type == E820_RAM) {
1289 - /*
1290 - * We don't know which RAM region contains kernel data,
1291 - * so we try it repeatedly and let the resource manager
1292 - * test it.
1293 - */
1294 -#ifndef CONFIG_XEN
1295 - request_resource(res, code_resource);
1296 - request_resource(res, data_resource);
1297 - request_resource(res, bss_resource);
1298 -#endif
1299 -#ifdef CONFIG_KEXEC
1300 - if (crashk_res.start != crashk_res.end)
1301 - request_resource(res, &crashk_res);
1302 -#ifdef CONFIG_XEN
1303 - xen_machine_kexec_register_resources(res);
1304 -#endif
1305 -#endif
1306 - }
1307 + insert_resource(&iomem_resource, res);
1308 + res++;
1309 }
1310 }
1311
1312 @@ -338,9 +415,9 @@ static int __init e820_find_active_regio
1313 if (*ei_startpfn >= *ei_endpfn)
1314 return 0;
1315
1316 - /* Check if end_pfn_map should be updated */
1317 - if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
1318 - end_pfn_map = *ei_endpfn;
1319 + /* Check if max_pfn_mapped should be updated */
1320 + if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
1321 + max_pfn_mapped = *ei_endpfn;
1322
1323 /* Skip if map is outside the node */
1324 if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
1325 @@ -667,10 +744,10 @@ static int __init copy_e820_map(struct e
1326 #endif
1327
1328 do {
1329 - unsigned long start = biosmap->addr;
1330 - unsigned long size = biosmap->size;
1331 - unsigned long end = start + size;
1332 - unsigned long type = biosmap->type;
1333 + u64 start = biosmap->addr;
1334 + u64 size = biosmap->size;
1335 + u64 end = start + size;
1336 + u32 type = biosmap->type;
1337
1338 /* Overflow in 64 bits? Ignore the memory map. */
1339 if (start > end)
1340 @@ -801,7 +878,7 @@ static int __init parse_memmap_opt(char
1341 saved_max_pfn = e820_end_of_ram();
1342 remove_all_active_ranges();
1343 #endif
1344 - end_pfn_map = 0;
1345 + max_pfn_mapped = 0;
1346 e820.nr_map = 0;
1347 userdef = 1;
1348 return 0;
1349 --- a/arch/x86/kernel/early_printk-xen.c
1350 +++ b/arch/x86/kernel/early_printk-xen.c
1351 @@ -13,7 +13,7 @@
1352
1353 #ifndef CONFIG_XEN
1354 static int max_ypos = 25, max_xpos = 80;
1355 -static int current_ypos = 25, current_xpos = 0;
1356 +static int current_ypos = 25, current_xpos;
1357
1358 static void early_vga_write(struct console *con, const char *str, unsigned n)
1359 {
1360 @@ -108,12 +108,12 @@ static __init void early_serial_init(cha
1361
1362 if (*s) {
1363 unsigned port;
1364 - if (!strncmp(s,"0x",2)) {
1365 + if (!strncmp(s, "0x", 2)) {
1366 early_serial_base = simple_strtoul(s, &e, 16);
1367 } else {
1368 static int bases[] = { 0x3f8, 0x2f8 };
1369
1370 - if (!strncmp(s,"ttyS",4))
1371 + if (!strncmp(s, "ttyS", 4))
1372 s += 4;
1373 port = simple_strtoul(s, &e, 10);
1374 if (port > 1 || s == e)
1375 @@ -223,7 +223,7 @@ static struct console simnow_console = {
1376
1377 /* Direct interface for emergencies */
1378 static struct console *early_console = &early_vga_console;
1379 -static int early_console_initialized = 0;
1380 +static int early_console_initialized;
1381
1382 void early_printk(const char *fmt, ...)
1383 {
1384 @@ -231,9 +231,9 @@ void early_printk(const char *fmt, ...)
1385 int n;
1386 va_list ap;
1387
1388 - va_start(ap,fmt);
1389 - n = vscnprintf(buf,512,fmt,ap);
1390 - early_console->write(early_console,buf,n);
1391 + va_start(ap, fmt);
1392 + n = vscnprintf(buf, 512, fmt, ap);
1393 + early_console->write(early_console, buf, n);
1394 va_end(ap);
1395 }
1396
1397 @@ -259,16 +259,16 @@ static int __init setup_early_printk(cha
1398 early_console = &early_serial_console;
1399 } else if (!strncmp(buf, "vga", 3)) {
1400 #ifndef CONFIG_XEN
1401 - && boot_params.screen_info.orig_video_isVGA == 1) {
1402 + && boot_params.screen_info.orig_video_isVGA == 1) {
1403 max_xpos = boot_params.screen_info.orig_video_cols;
1404 max_ypos = boot_params.screen_info.orig_video_lines;
1405 current_ypos = boot_params.screen_info.orig_y;
1406 #endif
1407 early_console = &early_vga_console;
1408 - } else if (!strncmp(buf, "simnow", 6)) {
1409 - simnow_init(buf + 6);
1410 - early_console = &simnow_console;
1411 - keep_early = 1;
1412 + } else if (!strncmp(buf, "simnow", 6)) {
1413 + simnow_init(buf + 6);
1414 + early_console = &simnow_console;
1415 + keep_early = 1;
1416 #ifdef CONFIG_XEN
1417 } else if (!strncmp(buf, "xen", 3)) {
1418 early_console = &xenboot_console;
1419 --- a/arch/x86/kernel/entry_32-xen.S
1420 +++ b/arch/x86/kernel/entry_32-xen.S
1421 @@ -1,5 +1,4 @@
1422 /*
1423 - * linux/arch/i386/entry.S
1424 *
1425 * Copyright (C) 1991, 1992 Linus Torvalds
1426 */
1427 @@ -51,6 +50,7 @@
1428 #include <asm/desc.h>
1429 #include <asm/percpu.h>
1430 #include <asm/dwarf2.h>
1431 +#include <asm/processor-flags.h>
1432 #include "irq_vectors.h"
1433 #include <xen/interface/xen.h>
1434
1435 @@ -69,12 +69,6 @@
1436
1437 #define nr_syscalls ((syscall_table_size)/4)
1438
1439 -CF_MASK = 0x00000001
1440 -TF_MASK = 0x00000100
1441 -IF_MASK = 0x00000200
1442 -DF_MASK = 0x00000400
1443 -NT_MASK = 0x00004000
1444 -VM_MASK = 0x00020000
1445 /* Pseudo-eflags. */
1446 NMI_MASK = 0x80000000
1447
1448 @@ -87,7 +81,7 @@ NMI_MASK = 0x80000000
1449
1450 .macro TRACE_IRQS_IRET
1451 #ifdef CONFIG_TRACE_IRQFLAGS
1452 - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
1453 + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off?
1454 jz 1f
1455 TRACE_IRQS_ON
1456 1:
1457 @@ -249,7 +243,7 @@ ret_from_intr:
1458 check_userspace:
1459 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1460 movb PT_CS(%esp), %al
1461 - andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
1462 + andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
1463 cmpl $USER_RPL, %eax
1464 jb resume_kernel # not returning to v8086 or userspace
1465
1466 @@ -258,6 +252,7 @@ ENTRY(resume_userspace)
1467 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1468 # setting need_resched or sigpending
1469 # between sampling and the iret
1470 + TRACE_IRQS_OFF
1471 movl TI_flags(%ebp), %ecx
1472 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
1473 # int/exception return?
1474 @@ -274,7 +269,7 @@ need_resched:
1475 movl TI_flags(%ebp), %ecx # need_resched set ?
1476 testb $_TIF_NEED_RESCHED, %cl
1477 jz restore_all
1478 - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1479 + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1480 jz restore_all
1481 call preempt_schedule_irq
1482 jmp need_resched
1483 @@ -299,10 +294,10 @@ ENTRY(ia32_sysenter_target)
1484 movl SYSENTER_stack_sp0(%esp),%esp
1485 sysenter_past_esp:
1486 /*
1487 - * No need to follow this irqs on/off section: the syscall
1488 - * disabled irqs and here we enable it straight after entry:
1489 + * Interrupts are disabled here, but we can't trace it until
1490 + * enough kernel state to call TRACE_IRQS_OFF can be called - but
1491 + * we immediately enable interrupts at that point anyway.
1492 */
1493 - ENABLE_INTERRUPTS(CLBR_NONE)
1494 pushl $(__USER_DS)
1495 CFI_ADJUST_CFA_OFFSET 4
1496 /*CFI_REL_OFFSET ss, 0*/
1497 @@ -310,6 +305,7 @@ sysenter_past_esp:
1498 CFI_ADJUST_CFA_OFFSET 4
1499 CFI_REL_OFFSET esp, 0
1500 pushfl
1501 + orl $X86_EFLAGS_IF, (%esp)
1502 CFI_ADJUST_CFA_OFFSET 4
1503 pushl $(__USER_CS)
1504 CFI_ADJUST_CFA_OFFSET 4
1505 @@ -323,6 +319,11 @@ sysenter_past_esp:
1506 CFI_ADJUST_CFA_OFFSET 4
1507 CFI_REL_OFFSET eip, 0
1508
1509 + pushl %eax
1510 + CFI_ADJUST_CFA_OFFSET 4
1511 + SAVE_ALL
1512 + ENABLE_INTERRUPTS(CLBR_NONE)
1513 +
1514 /*
1515 * Load the potential sixth argument from user stack.
1516 * Careful about security.
1517 @@ -330,14 +331,12 @@ sysenter_past_esp:
1518 cmpl $__PAGE_OFFSET-3,%ebp
1519 jae syscall_fault
1520 1: movl (%ebp),%ebp
1521 + movl %ebp,PT_EBP(%esp)
1522 .section __ex_table,"a"
1523 .align 4
1524 .long 1b,syscall_fault
1525 .previous
1526
1527 - pushl %eax
1528 - CFI_ADJUST_CFA_OFFSET 4
1529 - SAVE_ALL
1530 GET_THREAD_INFO(%ebp)
1531 test_tif %ebp
1532 jnz syscall_trace_entry
1533 @@ -414,7 +413,7 @@ syscall_exit:
1534 # setting need_resched or sigpending
1535 # between sampling and the iret
1536 TRACE_IRQS_OFF
1537 - testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
1538 + testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
1539 jz no_singlestep
1540 orl $_TIF_SINGLESTEP,TI_flags(%ebp)
1541 no_singlestep:
1542 @@ -430,7 +429,7 @@ restore_all:
1543 # See comments in process.c:copy_thread() for details.
1544 movb PT_OLDSS(%esp), %ah
1545 movb PT_CS(%esp), %al
1546 - andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1547 + andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1548 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1549 CFI_REMEMBER_STATE
1550 je ldt_ss # returning to user-space with LDT SS
1551 @@ -438,7 +437,7 @@ restore_nocheck:
1552 #else
1553 restore_nocheck:
1554 movl PT_EFLAGS(%esp), %eax
1555 - testl $(VM_MASK|NMI_MASK), %eax
1556 + testl $(X86_EFLAGS_VM|NMI_MASK), %eax
1557 CFI_REMEMBER_STATE
1558 jnz hypervisor_iret
1559 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
1560 @@ -456,7 +455,7 @@ restore_nocheck_notrace:
1561 irq_return:
1562 INTERRUPT_RETURN
1563 .section .fixup,"ax"
1564 -iret_exc:
1565 +ENTRY(iret_exc)
1566 pushl $0 # no error code
1567 pushl $do_iret_error
1568 jmp error_code
1569 @@ -560,7 +559,7 @@ work_resched:
1570 work_notifysig: # deal with pending signals and
1571 # notify-resume requests
1572 #ifdef CONFIG_VM86
1573 - testl $VM_MASK, PT_EFLAGS(%esp)
1574 + testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
1575 movl %esp, %eax
1576 jne work_notifysig_v86 # returning to kernel-space or
1577 # vm86-space
1578 @@ -617,9 +616,6 @@ END(syscall_exit_work)
1579
1580 RING0_INT_FRAME # can't unwind into user space anyway
1581 syscall_fault:
1582 - pushl %eax # save orig_eax
1583 - CFI_ADJUST_CFA_OFFSET 4
1584 - SAVE_ALL
1585 GET_THREAD_INFO(%ebp)
1586 movl $-EFAULT,PT_EAX(%esp)
1587 jmp resume_userspace
1588 --- a/arch/x86/kernel/entry_64-xen.S
1589 +++ b/arch/x86/kernel/entry_64-xen.S
1590 @@ -338,19 +338,17 @@ badsys:
1591 /* Do syscall tracing */
1592 tracesys:
1593 SAVE_REST
1594 - movq $-ENOSYS,RAX(%rsp)
1595 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
1596 FIXUP_TOP_OF_STACK %rdi
1597 movq %rsp,%rdi
1598 call syscall_trace_enter
1599 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
1600 RESTORE_REST
1601 cmpq $__NR_syscall_max,%rax
1602 - movq $-ENOSYS,%rcx
1603 - cmova %rcx,%rax
1604 - ja 1f
1605 + ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
1606 movq %r10,%rcx /* fixup for C */
1607 call *sys_call_table(,%rax,8)
1608 -1: movq %rax,RAX-ARGOFFSET(%rsp)
1609 + movq %rax,RAX-ARGOFFSET(%rsp)
1610 /* Use IRET because user could have changed frame */
1611
1612 /*
1613 --- a/arch/x86/kernel/genapic_64-xen.c
1614 +++ b/arch/x86/kernel/genapic_64-xen.c
1615 @@ -15,6 +15,7 @@
1616 #include <linux/kernel.h>
1617 #include <linux/ctype.h>
1618 #include <linux/init.h>
1619 +#include <linux/hardirq.h>
1620
1621 #include <asm/smp.h>
1622 #include <asm/ipi.h>
1623 @@ -24,17 +25,12 @@
1624 #include <acpi/acpi_bus.h>
1625 #endif
1626
1627 -/* which logical CPU number maps to which CPU (physical APIC ID) */
1628 #ifndef CONFIG_XEN
1629 -u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
1630 - = { [0 ... NR_CPUS-1] = BAD_APICID };
1631 -void *x86_cpu_to_apicid_early_ptr;
1632 -#endif
1633 -DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
1634 -EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
1635 +DEFINE_PER_CPU(int, x2apic_extra_bits);
1636
1637 -#ifndef CONFIG_XEN
1638 struct genapic __read_mostly *genapic = &apic_flat;
1639 +
1640 +static enum uv_system_type uv_system_type;
1641 #else
1642 extern struct genapic apic_xen;
1643 struct genapic __read_mostly *genapic = &apic_xen;
1644 @@ -47,6 +43,9 @@ struct genapic __read_mostly *genapic =
1645 void __init setup_apic_routing(void)
1646 {
1647 #ifndef CONFIG_XEN
1648 + if (uv_system_type == UV_NON_UNIQUE_APIC)
1649 + genapic = &apic_x2apic_uv_x;
1650 + else
1651 #ifdef CONFIG_ACPI
1652 /*
1653 * Quirk: some x86_64 machines can only use physical APIC mode
1654 @@ -59,7 +58,7 @@ void __init setup_apic_routing(void)
1655 else
1656 #endif
1657
1658 - if (cpus_weight(cpu_possible_map) <= 8)
1659 + if (num_possible_cpus() <= 8)
1660 genapic = &apic_flat;
1661 else
1662 genapic = &apic_physflat;
1663 @@ -85,3 +84,41 @@ void send_IPI_self(int vector)
1664 xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
1665 #endif
1666 }
1667 +
1668 +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
1669 +{
1670 +#ifndef CONFIG_XEN
1671 + if (!strcmp(oem_id, "SGI")) {
1672 + if (!strcmp(oem_table_id, "UVL"))
1673 + uv_system_type = UV_LEGACY_APIC;
1674 + else if (!strcmp(oem_table_id, "UVX"))
1675 + uv_system_type = UV_X2APIC;
1676 + else if (!strcmp(oem_table_id, "UVH"))
1677 + uv_system_type = UV_NON_UNIQUE_APIC;
1678 + }
1679 +#endif
1680 + return 0;
1681 +}
1682 +
1683 +#ifndef CONFIG_XEN
1684 +unsigned int read_apic_id(void)
1685 +{
1686 + unsigned int id;
1687 +
1688 + WARN_ON(preemptible() && num_online_cpus() > 1);
1689 + id = apic_read(APIC_ID);
1690 + if (uv_system_type >= UV_X2APIC)
1691 + id |= __get_cpu_var(x2apic_extra_bits);
1692 + return id;
1693 +}
1694 +
1695 +enum uv_system_type get_uv_system_type(void)
1696 +{
1697 + return uv_system_type;
1698 +}
1699 +
1700 +int is_uv_system(void)
1701 +{
1702 + return uv_system_type != UV_NONE;
1703 +}
1704 +#endif
1705 --- a/arch/x86/kernel/genapic_xen_64.c
1706 +++ b/arch/x86/kernel/genapic_xen_64.c
1707 @@ -72,9 +72,7 @@ static cpumask_t xen_target_cpus(void)
1708
1709 static cpumask_t xen_vector_allocation_domain(int cpu)
1710 {
1711 - cpumask_t domain = CPU_MASK_NONE;
1712 - cpu_set(cpu, domain);
1713 - return domain;
1714 + return cpumask_of_cpu(cpu);
1715 }
1716
1717 /*
1718 --- a/arch/x86/kernel/head_32-xen.S
1719 +++ b/arch/x86/kernel/head_32-xen.S
1720 @@ -69,7 +69,7 @@ ENTRY(startup_32)
1721 cld # gcc2 wants the direction flag cleared at all times
1722
1723 pushl $0 # fake return address for unwinder
1724 - jmp start_kernel
1725 + jmp i386_start_kernel
1726
1727 #define HYPERCALL_PAGE_OFFSET 0x1000
1728 .org HYPERCALL_PAGE_OFFSET
1729 --- a/arch/x86/kernel/head64-xen.c
1730 +++ b/arch/x86/kernel/head64-xen.c
1731 @@ -17,6 +17,7 @@
1732 #include <linux/string.h>
1733 #include <linux/percpu.h>
1734 #include <linux/start_kernel.h>
1735 +#include <linux/io.h>
1736 #include <linux/module.h>
1737
1738 #include <asm/processor.h>
1739 @@ -29,6 +30,7 @@
1740 #include <asm/sections.h>
1741 #include <asm/kdebug.h>
1742 #include <asm/e820.h>
1743 +#include <asm/bios_ebda.h>
1744
1745 unsigned long start_pfn;
1746
1747 @@ -75,34 +77,75 @@ EXPORT_SYMBOL(machine_to_phys_mapping);
1748 unsigned int machine_to_phys_order;
1749 EXPORT_SYMBOL(machine_to_phys_order);
1750
1751 -#define EBDA_ADDR_POINTER 0x40E
1752 +#define BIOS_LOWMEM_KILOBYTES 0x413
1753
1754 -static __init void reserve_ebda(void)
1755 +/*
1756 + * The BIOS places the EBDA/XBDA at the top of conventional
1757 + * memory, and usually decreases the reported amount of
1758 + * conventional memory (int 0x12) too. This also contains a
1759 + * workaround for Dell systems that neglect to reserve EBDA.
1760 + * The same workaround also avoids a problem with the AMD768MPX
1761 + * chipset: reserve a page before VGA to prevent PCI prefetch
1762 + * into it (errata #56). Usually the page is reserved anyways,
1763 + * unless you have no PS/2 mouse plugged in.
1764 + */
1765 +static void __init reserve_ebda_region(void)
1766 {
1767 #ifndef CONFIG_XEN
1768 - unsigned ebda_addr, ebda_size;
1769 + unsigned int lowmem, ebda_addr;
1770
1771 - /*
1772 - * there is a real-mode segmented pointer pointing to the
1773 - * 4K EBDA area at 0x40E
1774 - */
1775 - ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
1776 - ebda_addr <<= 4;
1777 -
1778 - if (!ebda_addr)
1779 + /* To determine the position of the EBDA and the */
1780 + /* end of conventional memory, we need to look at */
1781 + /* the BIOS data area. In a paravirtual environment */
1782 + /* that area is absent. We'll just have to assume */
1783 + /* that the paravirt case can handle memory setup */
1784 + /* correctly, without our help. */
1785 + if (paravirt_enabled())
1786 return;
1787
1788 - ebda_size = *(unsigned short *)__va(ebda_addr);
1789 + /* end of low (conventional) memory */
1790 + lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
1791 + lowmem <<= 10;
1792 +
1793 + /* start of EBDA area */
1794 + ebda_addr = get_bios_ebda();
1795 +
1796 + /* Fixup: bios puts an EBDA in the top 64K segment */
1797 + /* of conventional memory, but does not adjust lowmem. */
1798 + if ((lowmem - ebda_addr) <= 0x10000)
1799 + lowmem = ebda_addr;
1800 +
1801 + /* Fixup: bios does not report an EBDA at all. */
1802 + /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
1803 + if ((ebda_addr == 0) && (lowmem >= 0x9f000))
1804 + lowmem = 0x9f000;
1805 +
1806 + /* Paranoia: should never happen, but... */
1807 + if ((lowmem == 0) || (lowmem >= 0x100000))
1808 + lowmem = 0x9f000;
1809
1810 - /* Round EBDA up to pages */
1811 - if (ebda_size == 0)
1812 - ebda_size = 1;
1813 - ebda_size <<= 10;
1814 - ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
1815 - if (ebda_size > 64*1024)
1816 - ebda_size = 64*1024;
1817 + /* reserve all memory between lowmem and the 1MB mark */
1818 + reserve_early(lowmem, 0x100000, "BIOS reserved");
1819 +#endif
1820 +}
1821
1822 - reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA");
1823 +static void __init reserve_setup_data(void)
1824 +{
1825 +#ifndef CONFIG_XEN
1826 + struct setup_data *data;
1827 + unsigned long pa_data;
1828 + char buf[32];
1829 +
1830 + if (boot_params.hdr.version < 0x0209)
1831 + return;
1832 + pa_data = boot_params.hdr.setup_data;
1833 + while (pa_data) {
1834 + data = early_ioremap(pa_data, sizeof(*data));
1835 + sprintf(buf, "setup data %x", data->type);
1836 + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
1837 + pa_data = data->next;
1838 + early_iounmap(data, sizeof(*data));
1839 + }
1840 #endif
1841 }
1842
1843 @@ -112,6 +155,19 @@ void __init x86_64_start_kernel(char * r
1844 unsigned long machine_to_phys_nr_ents;
1845 int i;
1846
1847 + /*
1848 + * Build-time sanity checks on the kernel image and module
1849 + * area mappings. (these are purely build-time and produce no code)
1850 + */
1851 + BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
1852 + BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
1853 + BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
1854 + BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
1855 + BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
1856 + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
1857 + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
1858 + (__START_KERNEL & PGDIR_MASK)));
1859 +
1860 xen_setup_features();
1861
1862 xen_start_info = (struct start_info *)real_mode_data;
1863 @@ -140,7 +196,7 @@ void __init x86_64_start_kernel(char * r
1864 /* Cleanup the over mapped high alias */
1865 cleanup_highmap();
1866
1867 - for (i = 0; i < IDT_ENTRIES; i++) {
1868 + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
1869 #ifdef CONFIG_EARLY_PRINTK
1870 set_intr_gate(i, &early_idt_handlers[i]);
1871 #else
1872 @@ -163,7 +219,8 @@ void __init x86_64_start_kernel(char * r
1873 reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
1874 start_pfn << PAGE_SHIFT, "Xen provided");
1875
1876 - reserve_ebda();
1877 + reserve_ebda_region();
1878 + reserve_setup_data();
1879
1880 /*
1881 * At this point everything still needed from the boot loader
1882 --- a/arch/x86/kernel/init_task-xen.c
1883 +++ b/arch/x86/kernel/init_task-xen.c
1884 @@ -11,7 +11,6 @@
1885 #include <asm/desc.h>
1886
1887 static struct fs_struct init_fs = INIT_FS;
1888 -static struct files_struct init_files = INIT_FILES;
1889 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
1890 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
1891 #ifdef CONFIG_X86_XEN
1892 --- a/arch/x86/kernel/io_apic_32-xen.c
1893 +++ b/arch/x86/kernel/io_apic_32-xen.c
1894 @@ -88,6 +88,16 @@ int sis_apic_bug = -1;
1895 */
1896 int nr_ioapic_registers[MAX_IO_APICS];
1897
1898 +/* I/O APIC entries */
1899 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
1900 +int nr_ioapics;
1901 +
1902 +/* MP IRQ source entries */
1903 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
1904 +
1905 +/* # of MP IRQ source entries */
1906 +int mp_irq_entries;
1907 +
1908 static int disable_timer_pin_1 __initdata;
1909
1910 /*
1911 @@ -863,10 +873,7 @@ static int __init find_isa_irq_pin(int i
1912 for (i = 0; i < mp_irq_entries; i++) {
1913 int lbus = mp_irqs[i].mpc_srcbus;
1914
1915 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
1916 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
1917 - mp_bus_id_to_type[lbus] == MP_BUS_MCA
1918 - ) &&
1919 + if (test_bit(lbus, mp_bus_not_pci) &&
1920 (mp_irqs[i].mpc_irqtype == type) &&
1921 (mp_irqs[i].mpc_srcbusirq == irq))
1922
1923 @@ -882,10 +889,7 @@ static int __init find_isa_irq_apic(int
1924 for (i = 0; i < mp_irq_entries; i++) {
1925 int lbus = mp_irqs[i].mpc_srcbus;
1926
1927 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
1928 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
1929 - mp_bus_id_to_type[lbus] == MP_BUS_MCA
1930 - ) &&
1931 + if (test_bit(lbus, mp_bus_not_pci) &&
1932 (mp_irqs[i].mpc_irqtype == type) &&
1933 (mp_irqs[i].mpc_srcbusirq == irq))
1934 break;
1935 @@ -926,7 +930,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
1936 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
1937 break;
1938
1939 - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
1940 + if (!test_bit(lbus, mp_bus_not_pci) &&
1941 !mp_irqs[i].mpc_irqtype &&
1942 (bus == lbus) &&
1943 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
1944 @@ -977,6 +981,7 @@ void __init setup_ioapic_dest(void)
1945 #endif /* !CONFIG_XEN */
1946 #endif
1947
1948 +#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1949 /*
1950 * EISA Edge/Level control register, ELCR
1951 */
1952 @@ -990,6 +995,13 @@ static int EISA_ELCR(unsigned int irq)
1953 "Broken MPtable reports ISA irq %d\n", irq);
1954 return 0;
1955 }
1956 +#endif
1957 +
1958 +/* ISA interrupts are always polarity zero edge triggered,
1959 + * when listed as conforming in the MP table. */
1960 +
1961 +#define default_ISA_trigger(idx) (0)
1962 +#define default_ISA_polarity(idx) (0)
1963
1964 /* EISA interrupts are always polarity zero and can be edge or level
1965 * trigger depending on the ELCR value. If an interrupt is listed as
1966 @@ -997,13 +1009,7 @@ static int EISA_ELCR(unsigned int irq)
1967 * be read in from the ELCR */
1968
1969 #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
1970 -#define default_EISA_polarity(idx) (0)
1971 -
1972 -/* ISA interrupts are always polarity zero edge triggered,
1973 - * when listed as conforming in the MP table. */
1974 -
1975 -#define default_ISA_trigger(idx) (0)
1976 -#define default_ISA_polarity(idx) (0)
1977 +#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1978
1979 /* PCI interrupts are always polarity one level triggered,
1980 * when listed as conforming in the MP table. */
1981 @@ -1015,7 +1021,7 @@ static int EISA_ELCR(unsigned int irq)
1982 * when listed as conforming in the MP table. */
1983
1984 #define default_MCA_trigger(idx) (1)
1985 -#define default_MCA_polarity(idx) (0)
1986 +#define default_MCA_polarity(idx) default_ISA_polarity(idx)
1987
1988 static int MPBIOS_polarity(int idx)
1989 {
1990 @@ -1029,35 +1035,9 @@ static int MPBIOS_polarity(int idx)
1991 {
1992 case 0: /* conforms, ie. bus-type dependent polarity */
1993 {
1994 - switch (mp_bus_id_to_type[bus])
1995 - {
1996 - case MP_BUS_ISA: /* ISA pin */
1997 - {
1998 - polarity = default_ISA_polarity(idx);
1999 - break;
2000 - }
2001 - case MP_BUS_EISA: /* EISA pin */
2002 - {
2003 - polarity = default_EISA_polarity(idx);
2004 - break;
2005 - }
2006 - case MP_BUS_PCI: /* PCI pin */
2007 - {
2008 - polarity = default_PCI_polarity(idx);
2009 - break;
2010 - }
2011 - case MP_BUS_MCA: /* MCA pin */
2012 - {
2013 - polarity = default_MCA_polarity(idx);
2014 - break;
2015 - }
2016 - default:
2017 - {
2018 - printk(KERN_WARNING "broken BIOS!!\n");
2019 - polarity = 1;
2020 - break;
2021 - }
2022 - }
2023 + polarity = test_bit(bus, mp_bus_not_pci)?
2024 + default_ISA_polarity(idx):
2025 + default_PCI_polarity(idx);
2026 break;
2027 }
2028 case 1: /* high active */
2029 @@ -1098,11 +1078,15 @@ static int MPBIOS_trigger(int idx)
2030 {
2031 case 0: /* conforms, ie. bus-type dependent */
2032 {
2033 + trigger = test_bit(bus, mp_bus_not_pci)?
2034 + default_ISA_trigger(idx):
2035 + default_PCI_trigger(idx);
2036 +#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
2037 switch (mp_bus_id_to_type[bus])
2038 {
2039 case MP_BUS_ISA: /* ISA pin */
2040 {
2041 - trigger = default_ISA_trigger(idx);
2042 + /* set before the switch */
2043 break;
2044 }
2045 case MP_BUS_EISA: /* EISA pin */
2046 @@ -1112,7 +1096,7 @@ static int MPBIOS_trigger(int idx)
2047 }
2048 case MP_BUS_PCI: /* PCI pin */
2049 {
2050 - trigger = default_PCI_trigger(idx);
2051 + /* set before the switch */
2052 break;
2053 }
2054 case MP_BUS_MCA: /* MCA pin */
2055 @@ -1127,6 +1111,7 @@ static int MPBIOS_trigger(int idx)
2056 break;
2057 }
2058 }
2059 +#endif
2060 break;
2061 }
2062 case 1: /* edge */
2063 @@ -1176,39 +1161,22 @@ static int pin_2_irq(int idx, int apic,
2064 if (mp_irqs[idx].mpc_dstirq != pin)
2065 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
2066
2067 - switch (mp_bus_id_to_type[bus])
2068 - {
2069 - case MP_BUS_ISA: /* ISA pin */
2070 - case MP_BUS_EISA:
2071 - case MP_BUS_MCA:
2072 - {
2073 - irq = mp_irqs[idx].mpc_srcbusirq;
2074 - break;
2075 - }
2076 - case MP_BUS_PCI: /* PCI pin */
2077 - {
2078 - /*
2079 - * PCI IRQs are mapped in order
2080 - */
2081 - i = irq = 0;
2082 - while (i < apic)
2083 - irq += nr_ioapic_registers[i++];
2084 - irq += pin;
2085 -
2086 - /*
2087 - * For MPS mode, so far only needed by ES7000 platform
2088 - */
2089 - if (ioapic_renumber_irq)
2090 - irq = ioapic_renumber_irq(apic, irq);
2091 + if (test_bit(bus, mp_bus_not_pci))
2092 + irq = mp_irqs[idx].mpc_srcbusirq;
2093 + else {
2094 + /*
2095 + * PCI IRQs are mapped in order
2096 + */
2097 + i = irq = 0;
2098 + while (i < apic)
2099 + irq += nr_ioapic_registers[i++];
2100 + irq += pin;
2101
2102 - break;
2103 - }
2104 - default:
2105 - {
2106 - printk(KERN_ERR "unknown bus type %d.\n",bus);
2107 - irq = 0;
2108 - break;
2109 - }
2110 + /*
2111 + * For MPS mode, so far only needed by ES7000 platform
2112 + */
2113 + if (ioapic_renumber_irq)
2114 + irq = ioapic_renumber_irq(apic, irq);
2115 }
2116
2117 /*
2118 @@ -1314,7 +1282,6 @@ static void __init setup_IO_APIC_irqs(vo
2119 {
2120 struct IO_APIC_route_entry entry;
2121 int apic, pin, idx, irq, first_notcon = 1, vector;
2122 - unsigned long flags;
2123
2124 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
2125
2126 @@ -1380,9 +1347,7 @@ static void __init setup_IO_APIC_irqs(vo
2127 if (!apic && (irq < 16))
2128 disable_8259A_irq(irq);
2129 }
2130 - spin_lock_irqsave(&ioapic_lock, flags);
2131 - __ioapic_write_entry(apic, pin, entry);
2132 - spin_unlock_irqrestore(&ioapic_lock, flags);
2133 + ioapic_write_entry(apic, pin, entry);
2134 }
2135 }
2136
2137 @@ -1577,8 +1542,8 @@ void /*__init*/ print_local_APIC(void *
2138
2139 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
2140 smp_processor_id(), hard_smp_processor_id());
2141 - v = apic_read(APIC_ID);
2142 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
2143 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
2144 + GET_APIC_ID(read_apic_id()));
2145 v = apic_read(APIC_LVR);
2146 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
2147 ver = GET_APIC_VERSION(v);
2148 @@ -1791,7 +1756,7 @@ void disable_IO_APIC(void)
2149 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2150 entry.vector = 0;
2151 entry.dest.physical.physical_dest =
2152 - GET_APIC_ID(apic_read(APIC_ID));
2153 + GET_APIC_ID(read_apic_id());
2154
2155 /*
2156 * Add it to the IO-APIC irq-routing table:
2157 @@ -2090,8 +2055,7 @@ static inline void init_IO_APIC_traps(vo
2158 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2159 */
2160 for (irq = 0; irq < NR_IRQS ; irq++) {
2161 - int tmp = irq;
2162 - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2163 + if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
2164 /*
2165 * Hmm.. We don't have an entry for this,
2166 * so default to an old-fashioned 8259
2167 @@ -2166,7 +2130,7 @@ static void __init setup_nmi(void)
2168 * cycles as some i82489DX-based boards have glue logic that keeps the
2169 * 8259A interrupt line asserted until INTA. --macro
2170 */
2171 -static inline void unlock_ExtINT_logic(void)
2172 +static inline void __init unlock_ExtINT_logic(void)
2173 {
2174 int apic, pin, i;
2175 struct IO_APIC_route_entry entry0, entry1;
2176 @@ -2218,8 +2182,6 @@ static inline void unlock_ExtINT_logic(v
2177 ioapic_write_entry(apic, pin, entry0);
2178 }
2179
2180 -int timer_uses_ioapic_pin_0;
2181 -
2182 /*
2183 * This code may look a bit paranoid, but it's supposed to cooperate with
2184 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2185 @@ -2259,9 +2221,6 @@ static inline void __init check_timer(vo
2186 pin2 = ioapic_i8259.pin;
2187 apic2 = ioapic_i8259.apic;
2188
2189 - if (pin1 == 0)
2190 - timer_uses_ioapic_pin_0 = 1;
2191 -
2192 printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
2193 vector, apic1, pin1, apic2, pin2);
2194
2195 @@ -2555,6 +2514,7 @@ void destroy_irq(unsigned int irq)
2196 dynamic_irq_cleanup(irq);
2197
2198 spin_lock_irqsave(&vector_lock, flags);
2199 + clear_bit(irq_vector[irq], used_vectors);
2200 irq_vector[irq] = 0;
2201 spin_unlock_irqrestore(&vector_lock, flags);
2202 }
2203 @@ -2871,7 +2831,6 @@ int __init io_apic_get_redir_entries (in
2204 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
2205 {
2206 struct IO_APIC_route_entry entry;
2207 - unsigned long flags;
2208
2209 if (!IO_APIC_IRQ(irq)) {
2210 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2211 @@ -2912,9 +2871,7 @@ int io_apic_set_pci_routing (int ioapic,
2212 if (!ioapic && (irq < 16))
2213 disable_8259A_irq(irq);
2214
2215 - spin_lock_irqsave(&ioapic_lock, flags);
2216 - __ioapic_write_entry(ioapic, pin, entry);
2217 - spin_unlock_irqrestore(&ioapic_lock, flags);
2218 + ioapic_write_entry(ioapic, pin, entry);
2219
2220 return 0;
2221 }
2222 --- a/arch/x86/kernel/io_apic_64-xen.c
2223 +++ b/arch/x86/kernel/io_apic_64-xen.c
2224 @@ -43,13 +43,15 @@
2225 #include <asm/smp.h>
2226 #include <asm/desc.h>
2227 #include <asm/proto.h>
2228 -#include <asm/mach_apic.h>
2229 #include <asm/acpi.h>
2230 #include <asm/dma.h>
2231 #include <asm/nmi.h>
2232 #include <asm/msidef.h>
2233 #include <asm/hypertransport.h>
2234
2235 +#include <mach_ipi.h>
2236 +#include <mach_apic.h>
2237 +
2238 struct irq_cfg {
2239 #ifndef CONFIG_XEN
2240 cpumask_t domain;
2241 @@ -101,6 +103,16 @@ DEFINE_SPINLOCK(vector_lock);
2242 */
2243 int nr_ioapic_registers[MAX_IO_APICS];
2244
2245 +/* I/O APIC entries */
2246 +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
2247 +int nr_ioapics;
2248 +
2249 +/* MP IRQ source entries */
2250 +struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
2251 +
2252 +/* # of MP IRQ source entries */
2253 +int mp_irq_entries;
2254 +
2255 /*
2256 * Rough estimation of how many shared IRQs there are, can
2257 * be changed anytime.
2258 @@ -181,11 +193,10 @@ static inline void io_apic_modify(unsign
2259 writel(value, &io_apic->data);
2260 }
2261
2262 -static int io_apic_level_ack_pending(unsigned int irq)
2263 +static bool io_apic_level_ack_pending(unsigned int irq)
2264 {
2265 struct irq_pin_list *entry;
2266 unsigned long flags;
2267 - int pending = 0;
2268
2269 spin_lock_irqsave(&ioapic_lock, flags);
2270 entry = irq_2_pin + irq;
2271 @@ -198,13 +209,17 @@ static int io_apic_level_ack_pending(uns
2272 break;
2273 reg = io_apic_read(entry->apic, 0x10 + pin*2);
2274 /* Is the remote IRR bit set? */
2275 - pending |= (reg >> 14) & 1;
2276 + if ((reg >> 14) & 1) {
2277 + spin_unlock_irqrestore(&ioapic_lock, flags);
2278 + return true;
2279 + }
2280 if (!entry->next)
2281 break;
2282 entry = irq_2_pin + entry->next;
2283 }
2284 spin_unlock_irqrestore(&ioapic_lock, flags);
2285 - return pending;
2286 +
2287 + return false;
2288 }
2289 #endif
2290
2291 @@ -762,7 +777,7 @@ static void __clear_irq_vector(int irq)
2292 per_cpu(vector_irq, cpu)[vector] = -1;
2293
2294 cfg->vector = 0;
2295 - cfg->domain = CPU_MASK_NONE;
2296 + cpus_clear(cfg->domain);
2297 }
2298
2299 void __setup_vector_irq(int cpu)
2300 @@ -899,9 +914,8 @@ static void __init setup_IO_APIC_irqs(vo
2301 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
2302 {
2303 struct IO_APIC_route_entry entry;
2304 - unsigned long flags;
2305
2306 - memset(&entry,0,sizeof(entry));
2307 + memset(&entry, 0, sizeof(entry));
2308
2309 disable_8259A_irq(0);
2310
2311 @@ -929,10 +943,7 @@ static void __init setup_ExtINT_IRQ0_pin
2312 /*
2313 * Add it to the IO-APIC irq-routing table:
2314 */
2315 - spin_lock_irqsave(&ioapic_lock, flags);
2316 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
2317 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
2318 - spin_unlock_irqrestore(&ioapic_lock, flags);
2319 + ioapic_write_entry(apic, pin, entry);
2320
2321 enable_8259A_irq(0);
2322 }
2323 @@ -1061,8 +1072,7 @@ void __apicdebuginit print_local_APIC(vo
2324
2325 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
2326 smp_processor_id(), hard_smp_processor_id());
2327 - v = apic_read(APIC_ID);
2328 - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v));
2329 + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
2330 v = apic_read(APIC_LVR);
2331 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
2332 ver = GET_APIC_VERSION(v);
2333 @@ -1260,7 +1270,7 @@ void disable_IO_APIC(void)
2334 entry.dest_mode = 0; /* Physical */
2335 entry.delivery_mode = dest_ExtINT; /* ExtInt */
2336 entry.vector = 0;
2337 - entry.dest = GET_APIC_ID(apic_read(APIC_ID));
2338 + entry.dest = GET_APIC_ID(read_apic_id());
2339
2340 /*
2341 * Add it to the IO-APIC irq-routing table:
2342 @@ -1353,9 +1363,7 @@ static int ioapic_retrigger_irq(unsigned
2343 unsigned long flags;
2344
2345 spin_lock_irqsave(&vector_lock, flags);
2346 - cpus_clear(mask);
2347 - cpu_set(first_cpu(cfg->domain), mask);
2348 -
2349 + mask = cpumask_of_cpu(first_cpu(cfg->domain));
2350 send_IPI_mask(mask, cfg->vector);
2351 spin_unlock_irqrestore(&vector_lock, flags);
2352
2353 @@ -1519,8 +1527,7 @@ static inline void init_IO_APIC_traps(vo
2354 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2355 */
2356 for (irq = 0; irq < NR_IRQS ; irq++) {
2357 - int tmp = irq;
2358 - if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
2359 + if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
2360 /*
2361 * Hmm.. We don't have an entry for this,
2362 * so default to an old-fashioned 8259
2363 @@ -1597,22 +1604,19 @@ static void __init setup_nmi(void)
2364 * cycles as some i82489DX-based boards have glue logic that keeps the
2365 * 8259A interrupt line asserted until INTA. --macro
2366 */
2367 -static inline void unlock_ExtINT_logic(void)
2368 +static inline void __init unlock_ExtINT_logic(void)
2369 {
2370 int apic, pin, i;
2371 struct IO_APIC_route_entry entry0, entry1;
2372 unsigned char save_control, save_freq_select;
2373 - unsigned long flags;
2374
2375 pin = find_isa_irq_pin(8, mp_INT);
2376 apic = find_isa_irq_apic(8, mp_INT);
2377 if (pin == -1)
2378 return;
2379
2380 - spin_lock_irqsave(&ioapic_lock, flags);
2381 - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2382 - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2383 - spin_unlock_irqrestore(&ioapic_lock, flags);
2384 + entry0 = ioapic_read_entry(apic, pin);
2385 +
2386 clear_IO_APIC_pin(apic, pin);
2387
2388 memset(&entry1, 0, sizeof(entry1));
2389 @@ -1625,10 +1629,7 @@ static inline void unlock_ExtINT_logic(v
2390 entry1.trigger = 0;
2391 entry1.vector = 0;
2392
2393 - spin_lock_irqsave(&ioapic_lock, flags);
2394 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2395 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2396 - spin_unlock_irqrestore(&ioapic_lock, flags);
2397 + ioapic_write_entry(apic, pin, entry1);
2398
2399 save_control = CMOS_READ(RTC_CONTROL);
2400 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2401 @@ -1647,10 +1648,7 @@ static inline void unlock_ExtINT_logic(v
2402 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2403 clear_IO_APIC_pin(apic, pin);
2404
2405 - spin_lock_irqsave(&ioapic_lock, flags);
2406 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2407 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2408 - spin_unlock_irqrestore(&ioapic_lock, flags);
2409 + ioapic_write_entry(apic, pin, entry0);
2410 }
2411
2412 /*
2413 @@ -2327,7 +2325,6 @@ static struct resource * __init ioapic_s
2414 res = (void *)mem;
2415
2416 if (mem != NULL) {
2417 - memset(mem, 0, n);
2418 mem += sizeof(struct resource) * nr_ioapics;
2419
2420 for (i = 0; i < nr_ioapics; i++) {
2421 --- /dev/null
2422 +++ b/arch/x86/kernel/ipi-xen.c
2423 @@ -0,0 +1,232 @@
2424 +#include <linux/cpumask.h>
2425 +#include <linux/interrupt.h>
2426 +#include <linux/init.h>
2427 +
2428 +#include <linux/mm.h>
2429 +#include <linux/delay.h>
2430 +#include <linux/spinlock.h>
2431 +#include <linux/kernel_stat.h>
2432 +#include <linux/mc146818rtc.h>
2433 +#include <linux/cache.h>
2434 +#include <linux/interrupt.h>
2435 +#include <linux/cpu.h>
2436 +#include <linux/module.h>
2437 +
2438 +#include <asm/smp.h>
2439 +#include <asm/mtrr.h>
2440 +#include <asm/tlbflush.h>
2441 +#include <asm/mmu_context.h>
2442 +#include <asm/apic.h>
2443 +#include <asm/proto.h>
2444 +
2445 +#ifdef CONFIG_X86_32
2446 +#ifndef CONFIG_XEN
2447 +#include <mach_apic.h>
2448 +/*
2449 + * the following functions deal with sending IPIs between CPUs.
2450 + *
2451 + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
2452 + */
2453 +
2454 +static inline int __prepare_ICR(unsigned int shortcut, int vector)
2455 +{
2456 + unsigned int icr = shortcut | APIC_DEST_LOGICAL;
2457 +
2458 + switch (vector) {
2459 + default:
2460 + icr |= APIC_DM_FIXED | vector;
2461 + break;
2462 + case NMI_VECTOR:
2463 + icr |= APIC_DM_NMI;
2464 + break;
2465 + }
2466 + return icr;
2467 +}
2468 +
2469 +static inline int __prepare_ICR2(unsigned int mask)
2470 +{
2471 + return SET_APIC_DEST_FIELD(mask);
2472 +}
2473 +#else
2474 +#include <xen/evtchn.h>
2475 +
2476 +DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
2477 +
2478 +static inline void __send_IPI_one(unsigned int cpu, int vector)
2479 +{
2480 + int irq = per_cpu(ipi_to_irq, cpu)[vector];
2481 + BUG_ON(irq < 0);
2482 + notify_remote_via_irq(irq);
2483 +}
2484 +#endif
2485 +
2486 +void __send_IPI_shortcut(unsigned int shortcut, int vector)
2487 +{
2488 +#ifndef CONFIG_XEN
2489 + /*
2490 + * Subtle. In the case of the 'never do double writes' workaround
2491 + * we have to lock out interrupts to be safe. As we don't care
2492 + * of the value read we use an atomic rmw access to avoid costly
2493 + * cli/sti. Otherwise we use an even cheaper single atomic write
2494 + * to the APIC.
2495 + */
2496 + unsigned int cfg;
2497 +
2498 + /*
2499 + * Wait for idle.
2500 + */
2501 + apic_wait_icr_idle();
2502 +
2503 + /*
2504 + * No need to touch the target chip field
2505 + */
2506 + cfg = __prepare_ICR(shortcut, vector);
2507 +
2508 + /*
2509 + * Send the IPI. The write to APIC_ICR fires this off.
2510 + */
2511 + apic_write_around(APIC_ICR, cfg);
2512 +#else
2513 + int cpu;
2514 +
2515 + switch (shortcut) {
2516 + case APIC_DEST_SELF:
2517 + __send_IPI_one(smp_processor_id(), vector);
2518 + break;
2519 + case APIC_DEST_ALLBUT:
2520 + for_each_online_cpu(cpu)
2521 + if (cpu != smp_processor_id())
2522 + __send_IPI_one(cpu, vector);
2523 + break;
2524 + default:
2525 + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
2526 + vector);
2527 + break;
2528 + }
2529 +#endif
2530 +}
2531 +
2532 +void send_IPI_self(int vector)
2533 +{
2534 + __send_IPI_shortcut(APIC_DEST_SELF, vector);
2535 +}
2536 +
2537 +#ifndef CONFIG_XEN
2538 +/*
2539 + * This is used to send an IPI with no shorthand notation (the destination is
2540 + * specified in bits 56 to 63 of the ICR).
2541 + */
2542 +static inline void __send_IPI_dest_field(unsigned long mask, int vector)
2543 +{
2544 + unsigned long cfg;
2545 +
2546 + /*
2547 + * Wait for idle.
2548 + */
2549 + if (unlikely(vector == NMI_VECTOR))
2550 + safe_apic_wait_icr_idle();
2551 + else
2552 + apic_wait_icr_idle();
2553 +
2554 + /*
2555 + * prepare target chip field
2556 + */
2557 + cfg = __prepare_ICR2(mask);
2558 + apic_write_around(APIC_ICR2, cfg);
2559 +
2560 + /*
2561 + * program the ICR
2562 + */
2563 + cfg = __prepare_ICR(0, vector);
2564 +
2565 + /*
2566 + * Send the IPI. The write to APIC_ICR fires this off.
2567 + */
2568 + apic_write_around(APIC_ICR, cfg);
2569 +}
2570 +#endif
2571 +
2572 +/*
2573 + * This is only used on smaller machines.
2574 + */
2575 +void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
2576 +{
2577 +#ifndef CONFIG_XEN
2578 + unsigned long mask = cpus_addr(cpumask)[0];
2579 +#else
2580 + cpumask_t mask;
2581 + unsigned int cpu;
2582 +#endif
2583 + unsigned long flags;
2584 +
2585 + local_irq_save(flags);
2586 +#ifndef CONFIG_XEN
2587 + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
2588 + __send_IPI_dest_field(mask, vector);
2589 +#else
2590 + cpus_andnot(mask, cpumask, cpu_online_map);
2591 + WARN_ON(!cpus_empty(mask));
2592 + for_each_online_cpu(cpu)
2593 + if (cpu_isset(cpu, cpumask))
2594 + __send_IPI_one(cpu, vector);
2595 +#endif
2596 + local_irq_restore(flags);
2597 +}
2598 +
2599 +void send_IPI_mask_sequence(cpumask_t mask, int vector)
2600 +{
2601 +#ifndef CONFIG_XEN
2602 + unsigned long flags;
2603 + unsigned int query_cpu;
2604 +
2605 + /*
2606 + * Hack. The clustered APIC addressing mode doesn't allow us to send
2607 + * to an arbitrary mask, so I do a unicasts to each CPU instead. This
2608 + * should be modified to do 1 message per cluster ID - mbligh
2609 + */
2610 +
2611 + local_irq_save(flags);
2612 + for_each_possible_cpu(query_cpu) {
2613 + if (cpu_isset(query_cpu, mask)) {
2614 + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
2615 + vector);
2616 + }
2617 + }
2618 + local_irq_restore(flags);
2619 +#else
2620 + send_IPI_mask_bitmask(mask, vector);
2621 +#endif
2622 +}
2623 +
2624 +/* must come after the send_IPI functions above for inlining */
2625 +#include <mach_ipi.h>
2626 +
2627 +#ifndef CONFIG_XEN
2628 +static int convert_apicid_to_cpu(int apic_id)
2629 +{
2630 + int i;
2631 +
2632 + for_each_possible_cpu(i) {
2633 + if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
2634 + return i;
2635 + }
2636 + return -1;
2637 +}
2638 +
2639 +int safe_smp_processor_id(void)
2640 +{
2641 + int apicid, cpuid;
2642 +
2643 + if (!boot_cpu_has(X86_FEATURE_APIC))
2644 + return 0;
2645 +
2646 + apicid = hard_smp_processor_id();
2647 + if (apicid == BAD_APICID)
2648 + return 0;
2649 +
2650 + cpuid = convert_apicid_to_cpu(apicid);
2651 +
2652 + return cpuid >= 0 ? cpuid : 0;
2653 +}
2654 +#endif
2655 +#endif
2656 --- a/arch/x86/kernel/irq_32-xen.c
2657 +++ b/arch/x86/kernel/irq_32-xen.c
2658 @@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs
2659
2660 if (unlikely((unsigned)irq >= NR_IRQS)) {
2661 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
2662 - __FUNCTION__, irq);
2663 + __func__, irq);
2664 BUG();
2665 }
2666
2667 @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs
2668 : "=a" (arg1), "=d" (arg2), "=b" (bx)
2669 : "0" (irq), "1" (desc), "2" (isp),
2670 "D" (desc->handle_irq)
2671 - : "memory", "cc"
2672 + : "memory", "cc", "ecx"
2673 );
2674 } else
2675 #endif
2676 @@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu)
2677 hardirq_ctx[cpu] = NULL;
2678 }
2679
2680 -extern asmlinkage void __do_softirq(void);
2681 -
2682 asmlinkage void do_softirq(void)
2683 {
2684 unsigned long flags;
2685 --- a/arch/x86/kernel/machine_kexec_64.c
2686 +++ b/arch/x86/kernel/machine_kexec_64.c
2687 @@ -120,8 +120,6 @@ int __init machine_kexec_setup_resources
2688 return 0;
2689 }
2690
2691 -void machine_kexec_register_resources(struct resource *res) { ; }
2692 -
2693 #else /* CONFIG_XEN */
2694
2695 #define x__pmd(x) __pmd(x)
2696 --- a/arch/x86/kernel/Makefile
2697 +++ b/arch/x86/kernel/Makefile
2698 @@ -122,8 +122,7 @@ ifeq ($(CONFIG_X86_64),y)
2699
2700 obj-$(CONFIG_XEN) += nmi_64.o
2701 time_64-$(CONFIG_XEN) += time_32.o
2702 - pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
2703 endif
2704
2705 -disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
2706 - smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
2707 +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
2708 + pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
2709 --- a/arch/x86/kernel/microcode-xen.c
2710 +++ b/arch/x86/kernel/microcode-xen.c
2711 @@ -162,7 +162,7 @@ static int request_microcode(void)
2712 c->x86, c->x86_model, c->x86_mask);
2713 error = request_firmware(&firmware, name, &microcode_pdev->dev);
2714 if (error) {
2715 - pr_debug("ucode data file %s load failed\n", name);
2716 + pr_debug("microcode: ucode data file %s load failed\n", name);
2717 return error;
2718 }
2719
2720 --- a/arch/x86/kernel/mmconf-fam10h_64.c
2721 +++ b/arch/x86/kernel/mmconf-fam10h_64.c
2722 @@ -219,6 +219,16 @@ void __cpuinit fam10h_check_enable_mmcfg
2723 val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
2724 FAM10H_MMIO_CONF_ENABLE;
2725 wrmsrl(address, val);
2726 +
2727 +#ifdef CONFIG_XEN
2728 + {
2729 + u64 val2;
2730 +
2731 + rdmsrl(address, val2);
2732 + if (val2 != val)
2733 + pci_probe &= ~PCI_CHECK_ENABLE_AMD_MMCONF;
2734 + }
2735 +#endif
2736 }
2737
2738 static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
2739 --- a/arch/x86/kernel/mpparse_32-xen.c
2740 +++ /dev/null
2741 @@ -1,1161 +0,0 @@
2742 -/*
2743 - * Intel Multiprocessor Specification 1.1 and 1.4
2744 - * compliant MP-table parsing routines.
2745 - *
2746 - * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
2747 - * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
2748 - *
2749 - * Fixes
2750 - * Erich Boleyn : MP v1.4 and additional changes.
2751 - * Alan Cox : Added EBDA scanning
2752 - * Ingo Molnar : various cleanups and rewrites
2753 - * Maciej W. Rozycki: Bits for default MP configurations
2754 - * Paul Diefenbaugh: Added full ACPI support
2755 - */
2756 -
2757 -#include <linux/mm.h>
2758 -#include <linux/init.h>
2759 -#include <linux/acpi.h>
2760 -#include <linux/delay.h>
2761 -#include <linux/bootmem.h>
2762 -#include <linux/kernel_stat.h>
2763 -#include <linux/mc146818rtc.h>
2764 -#include <linux/bitops.h>
2765 -
2766 -#include <asm/smp.h>
2767 -#include <asm/acpi.h>
2768 -#include <asm/mtrr.h>
2769 -#include <asm/mpspec.h>
2770 -#include <asm/io_apic.h>
2771 -
2772 -#include <mach_apic.h>
2773 -#include <mach_apicdef.h>
2774 -#include <mach_mpparse.h>
2775 -#include <bios_ebda.h>
2776 -
2777 -/* Have we found an MP table */
2778 -int smp_found_config;
2779 -unsigned int __cpuinitdata maxcpus = NR_CPUS;
2780 -
2781 -/*
2782 - * Various Linux-internal data structures created from the
2783 - * MP-table.
2784 - */
2785 -int apic_version [MAX_APICS];
2786 -int mp_bus_id_to_type [MAX_MP_BUSSES];
2787 -int mp_bus_id_to_node [MAX_MP_BUSSES];
2788 -int mp_bus_id_to_local [MAX_MP_BUSSES];
2789 -int quad_local_to_mp_bus_id [NR_CPUS/4][4];
2790 -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
2791 -static int mp_current_pci_id;
2792 -
2793 -/* I/O APIC entries */
2794 -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
2795 -
2796 -/* # of MP IRQ source entries */
2797 -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
2798 -
2799 -/* MP IRQ source entries */
2800 -int mp_irq_entries;
2801 -
2802 -int nr_ioapics;
2803 -
2804 -int pic_mode;
2805 -unsigned long mp_lapic_addr;
2806 -
2807 -unsigned int def_to_bigsmp = 0;
2808 -
2809 -/* Processor that is doing the boot up */
2810 -unsigned int boot_cpu_physical_apicid = -1U;
2811 -/* Internal processor count */
2812 -unsigned int num_processors;
2813 -
2814 -/* Bitmask of physically existing CPUs */
2815 -physid_mask_t phys_cpu_present_map;
2816 -
2817 -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
2818 -
2819 -/*
2820 - * Intel MP BIOS table parsing routines:
2821 - */
2822 -
2823 -
2824 -/*
2825 - * Checksum an MP configuration block.
2826 - */
2827 -
2828 -static int __init mpf_checksum(unsigned char *mp, int len)
2829 -{
2830 - int sum = 0;
2831 -
2832 - while (len--)
2833 - sum += *mp++;
2834 -
2835 - return sum & 0xFF;
2836 -}
2837 -
2838 -/*
2839 - * Have to match translation table entries to main table entries by counter
2840 - * hence the mpc_record variable .... can't see a less disgusting way of
2841 - * doing this ....
2842 - */
2843 -
2844 -static int mpc_record;
2845 -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
2846 -
2847 -#ifndef CONFIG_XEN
2848 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2849 -{
2850 - int ver, apicid;
2851 - physid_mask_t phys_cpu;
2852 -
2853 - if (!(m->mpc_cpuflag & CPU_ENABLED))
2854 - return;
2855 -
2856 - apicid = mpc_apic_id(m, translation_table[mpc_record]);
2857 -
2858 - if (m->mpc_featureflag&(1<<0))
2859 - Dprintk(" Floating point unit present.\n");
2860 - if (m->mpc_featureflag&(1<<7))
2861 - Dprintk(" Machine Exception supported.\n");
2862 - if (m->mpc_featureflag&(1<<8))
2863 - Dprintk(" 64 bit compare & exchange supported.\n");
2864 - if (m->mpc_featureflag&(1<<9))
2865 - Dprintk(" Internal APIC present.\n");
2866 - if (m->mpc_featureflag&(1<<11))
2867 - Dprintk(" SEP present.\n");
2868 - if (m->mpc_featureflag&(1<<12))
2869 - Dprintk(" MTRR present.\n");
2870 - if (m->mpc_featureflag&(1<<13))
2871 - Dprintk(" PGE present.\n");
2872 - if (m->mpc_featureflag&(1<<14))
2873 - Dprintk(" MCA present.\n");
2874 - if (m->mpc_featureflag&(1<<15))
2875 - Dprintk(" CMOV present.\n");
2876 - if (m->mpc_featureflag&(1<<16))
2877 - Dprintk(" PAT present.\n");
2878 - if (m->mpc_featureflag&(1<<17))
2879 - Dprintk(" PSE present.\n");
2880 - if (m->mpc_featureflag&(1<<18))
2881 - Dprintk(" PSN present.\n");
2882 - if (m->mpc_featureflag&(1<<19))
2883 - Dprintk(" Cache Line Flush Instruction present.\n");
2884 - /* 20 Reserved */
2885 - if (m->mpc_featureflag&(1<<21))
2886 - Dprintk(" Debug Trace and EMON Store present.\n");
2887 - if (m->mpc_featureflag&(1<<22))
2888 - Dprintk(" ACPI Thermal Throttle Registers present.\n");
2889 - if (m->mpc_featureflag&(1<<23))
2890 - Dprintk(" MMX present.\n");
2891 - if (m->mpc_featureflag&(1<<24))
2892 - Dprintk(" FXSR present.\n");
2893 - if (m->mpc_featureflag&(1<<25))
2894 - Dprintk(" XMM present.\n");
2895 - if (m->mpc_featureflag&(1<<26))
2896 - Dprintk(" Willamette New Instructions present.\n");
2897 - if (m->mpc_featureflag&(1<<27))
2898 - Dprintk(" Self Snoop present.\n");
2899 - if (m->mpc_featureflag&(1<<28))
2900 - Dprintk(" HT present.\n");
2901 - if (m->mpc_featureflag&(1<<29))
2902 - Dprintk(" Thermal Monitor present.\n");
2903 - /* 30, 31 Reserved */
2904 -
2905 -
2906 - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
2907 - Dprintk(" Bootup CPU\n");
2908 - boot_cpu_physical_apicid = m->mpc_apicid;
2909 - }
2910 -
2911 - ver = m->mpc_apicver;
2912 -
2913 - /*
2914 - * Validate version
2915 - */
2916 - if (ver == 0x0) {
2917 - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
2918 - "fixing up to 0x10. (tell your hw vendor)\n",
2919 - m->mpc_apicid);
2920 - ver = 0x10;
2921 - }
2922 - apic_version[m->mpc_apicid] = ver;
2923 -
2924 - phys_cpu = apicid_to_cpu_present(apicid);
2925 - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
2926 -
2927 - if (num_processors >= NR_CPUS) {
2928 - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
2929 - " Processor ignored.\n", NR_CPUS);
2930 - return;
2931 - }
2932 -
2933 - if (num_processors >= maxcpus) {
2934 - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
2935 - " Processor ignored.\n", maxcpus);
2936 - return;
2937 - }
2938 -
2939 - cpu_set(num_processors, cpu_possible_map);
2940 - num_processors++;
2941 -
2942 - /*
2943 - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
2944 - * but we need to work other dependencies like SMP_SUSPEND etc
2945 - * before this can be done without some confusion.
2946 - * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
2947 - * - Ashok Raj <ashok.raj@intel.com>
2948 - */
2949 - if (num_processors > 8) {
2950 - switch (boot_cpu_data.x86_vendor) {
2951 - case X86_VENDOR_INTEL:
2952 - if (!APIC_XAPIC(ver)) {
2953 - def_to_bigsmp = 0;
2954 - break;
2955 - }
2956 - /* If P4 and above fall through */
2957 - case X86_VENDOR_AMD:
2958 - def_to_bigsmp = 1;
2959 - }
2960 - }
2961 - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
2962 -}
2963 -#else
2964 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
2965 -{
2966 - num_processors++;
2967 -}
2968 -#endif /* CONFIG_XEN */
2969 -
2970 -static void __init MP_bus_info (struct mpc_config_bus *m)
2971 -{
2972 - char str[7];
2973 -
2974 - memcpy(str, m->mpc_bustype, 6);
2975 - str[6] = 0;
2976 -
2977 - mpc_oem_bus_info(m, str, translation_table[mpc_record]);
2978 -
2979 -#if MAX_MP_BUSSES < 256
2980 - if (m->mpc_busid >= MAX_MP_BUSSES) {
2981 - printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
2982 - " is too large, max. supported is %d\n",
2983 - m->mpc_busid, str, MAX_MP_BUSSES - 1);
2984 - return;
2985 - }
2986 -#endif
2987 -
2988 - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
2989 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
2990 - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
2991 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
2992 - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
2993 - mpc_oem_pci_bus(m, translation_table[mpc_record]);
2994 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
2995 - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
2996 - mp_current_pci_id++;
2997 - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
2998 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
2999 - } else {
3000 - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
3001 - }
3002 -}
3003 -
3004 -static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
3005 -{
3006 - if (!(m->mpc_flags & MPC_APIC_USABLE))
3007 - return;
3008 -
3009 - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
3010 - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
3011 - if (nr_ioapics >= MAX_IO_APICS) {
3012 - printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
3013 - MAX_IO_APICS, nr_ioapics);
3014 - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
3015 - }
3016 - if (!m->mpc_apicaddr) {
3017 - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
3018 - " found in MP table, skipping!\n");
3019 - return;
3020 - }
3021 - mp_ioapics[nr_ioapics] = *m;
3022 - nr_ioapics++;
3023 -}
3024 -
3025 -static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
3026 -{
3027 - mp_irqs [mp_irq_entries] = *m;
3028 - Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
3029 - " IRQ %02x, APIC ID %x, APIC INT %02x\n",
3030 - m->mpc_irqtype, m->mpc_irqflag & 3,
3031 - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
3032 - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
3033 - if (++mp_irq_entries == MAX_IRQ_SOURCES)
3034 - panic("Max # of irq sources exceeded!!\n");
3035 -}
3036 -
3037 -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
3038 -{
3039 - Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
3040 - " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
3041 - m->mpc_irqtype, m->mpc_irqflag & 3,
3042 - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
3043 - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
3044 -}
3045 -
3046 -#ifdef CONFIG_X86_NUMAQ
3047 -static void __init MP_translation_info (struct mpc_config_translation *m)
3048 -{
3049 - printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
3050 -
3051 - if (mpc_record >= MAX_MPC_ENTRY)
3052 - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
3053 - else
3054 - translation_table[mpc_record] = m; /* stash this for later */
3055 - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
3056 - node_set_online(m->trans_quad);
3057 -}
3058 -
3059 -/*
3060 - * Read/parse the MPC oem tables
3061 - */
3062 -
3063 -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
3064 - unsigned short oemsize)
3065 -{
3066 - int count = sizeof (*oemtable); /* the header size */
3067 - unsigned char *oemptr = ((unsigned char *)oemtable)+count;
3068 -
3069 - mpc_record = 0;
3070 - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
3071 - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
3072 - {
3073 - printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
3074 - oemtable->oem_signature[0],
3075 - oemtable->oem_signature[1],
3076 - oemtable->oem_signature[2],
3077 - oemtable->oem_signature[3]);
3078 - return;
3079 - }
3080 - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
3081 - {
3082 - printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
3083 - return;
3084 - }
3085 - while (count < oemtable->oem_length) {
3086 - switch (*oemptr) {
3087 - case MP_TRANSLATION:
3088 - {
3089 - struct mpc_config_translation *m=
3090 - (struct mpc_config_translation *)oemptr;
3091 - MP_translation_info(m);
3092 - oemptr += sizeof(*m);
3093 - count += sizeof(*m);
3094 - ++mpc_record;
3095 - break;
3096 - }
3097 - default:
3098 - {
3099 - printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
3100 - return;
3101 - }
3102 - }
3103 - }
3104 -}
3105 -
3106 -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
3107 - char *productid)
3108 -{
3109 - if (strncmp(oem, "IBM NUMA", 8))
3110 - printk("Warning! May not be a NUMA-Q system!\n");
3111 - if (mpc->mpc_oemptr)
3112 - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
3113 - mpc->mpc_oemsize);
3114 -}
3115 -#endif /* CONFIG_X86_NUMAQ */
3116 -
3117 -/*
3118 - * Read/parse the MPC
3119 - */
3120 -
3121 -static int __init smp_read_mpc(struct mp_config_table *mpc)
3122 -{
3123 - char str[16];
3124 - char oem[10];
3125 - int count=sizeof(*mpc);
3126 - unsigned char *mpt=((unsigned char *)mpc)+count;
3127 -
3128 - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
3129 - printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
3130 - *(u32 *)mpc->mpc_signature);
3131 - return 0;
3132 - }
3133 - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
3134 - printk(KERN_ERR "SMP mptable: checksum error!\n");
3135 - return 0;
3136 - }
3137 - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
3138 - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
3139 - mpc->mpc_spec);
3140 - return 0;
3141 - }
3142 - if (!mpc->mpc_lapic) {
3143 - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
3144 - return 0;
3145 - }
3146 - memcpy(oem,mpc->mpc_oem,8);
3147 - oem[8]=0;
3148 - printk(KERN_INFO "OEM ID: %s ",oem);
3149 -
3150 - memcpy(str,mpc->mpc_productid,12);
3151 - str[12]=0;
3152 - printk("Product ID: %s ",str);
3153 -
3154 - mps_oem_check(mpc, oem, str);
3155 -
3156 - printk("APIC at: 0x%X\n", mpc->mpc_lapic);
3157 -
3158 - /*
3159 - * Save the local APIC address (it might be non-default) -- but only
3160 - * if we're not using ACPI.
3161 - */
3162 - if (!acpi_lapic)
3163 - mp_lapic_addr = mpc->mpc_lapic;
3164 -
3165 - /*
3166 - * Now process the configuration blocks.
3167 - */
3168 - mpc_record = 0;
3169 - while (count < mpc->mpc_length) {
3170 - switch(*mpt) {
3171 - case MP_PROCESSOR:
3172 - {
3173 - struct mpc_config_processor *m=
3174 - (struct mpc_config_processor *)mpt;
3175 - /* ACPI may have already provided this data */
3176 - if (!acpi_lapic)
3177 - MP_processor_info(m);
3178 - mpt += sizeof(*m);
3179 - count += sizeof(*m);
3180 - break;
3181 - }
3182 - case MP_BUS:
3183 - {
3184 - struct mpc_config_bus *m=
3185 - (struct mpc_config_bus *)mpt;
3186 - MP_bus_info(m);
3187 - mpt += sizeof(*m);
3188 - count += sizeof(*m);
3189 - break;
3190 - }
3191 - case MP_IOAPIC:
3192 - {
3193 - struct mpc_config_ioapic *m=
3194 - (struct mpc_config_ioapic *)mpt;
3195 - MP_ioapic_info(m);
3196 - mpt+=sizeof(*m);
3197 - count+=sizeof(*m);
3198 - break;
3199 - }
3200 - case MP_INTSRC:
3201 - {
3202 - struct mpc_config_intsrc *m=
3203 - (struct mpc_config_intsrc *)mpt;
3204 -
3205 - MP_intsrc_info(m);
3206 - mpt+=sizeof(*m);
3207 - count+=sizeof(*m);
3208 - break;
3209 - }
3210 - case MP_LINTSRC:
3211 - {
3212 - struct mpc_config_lintsrc *m=
3213 - (struct mpc_config_lintsrc *)mpt;
3214 - MP_lintsrc_info(m);
3215 - mpt+=sizeof(*m);
3216 - count+=sizeof(*m);
3217 - break;
3218 - }
3219 - default:
3220 - {
3221 - count = mpc->mpc_length;
3222 - break;
3223 - }
3224 - }
3225 - ++mpc_record;
3226 - }
3227 - setup_apic_routing();
3228 - if (!num_processors)
3229 - printk(KERN_ERR "SMP mptable: no processors registered!\n");
3230 - return num_processors;
3231 -}
3232 -
3233 -static int __init ELCR_trigger(unsigned int irq)
3234 -{
3235 - unsigned int port;
3236 -
3237 - port = 0x4d0 + (irq >> 3);
3238 - return (inb(port) >> (irq & 7)) & 1;
3239 -}
3240 -
3241 -static void __init construct_default_ioirq_mptable(int mpc_default_type)
3242 -{
3243 - struct mpc_config_intsrc intsrc;
3244 - int i;
3245 - int ELCR_fallback = 0;
3246 -
3247 - intsrc.mpc_type = MP_INTSRC;
3248 - intsrc.mpc_irqflag = 0; /* conforming */
3249 - intsrc.mpc_srcbus = 0;
3250 - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
3251 -
3252 - intsrc.mpc_irqtype = mp_INT;
3253 -
3254 - /*
3255 - * If true, we have an ISA/PCI system with no IRQ entries
3256 - * in the MP table. To prevent the PCI interrupts from being set up
3257 - * incorrectly, we try to use the ELCR. The sanity check to see if
3258 - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
3259 - * never be level sensitive, so we simply see if the ELCR agrees.
3260 - * If it does, we assume it's valid.
3261 - */
3262 - if (mpc_default_type == 5) {
3263 - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
3264 -
3265 - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
3266 - printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
3267 - else {
3268 - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
3269 - ELCR_fallback = 1;
3270 - }
3271 - }
3272 -
3273 - for (i = 0; i < 16; i++) {
3274 - switch (mpc_default_type) {
3275 - case 2:
3276 - if (i == 0 || i == 13)
3277 - continue; /* IRQ0 & IRQ13 not connected */
3278 - /* fall through */
3279 - default:
3280 - if (i == 2)
3281 - continue; /* IRQ2 is never connected */
3282 - }
3283 -
3284 - if (ELCR_fallback) {
3285 - /*
3286 - * If the ELCR indicates a level-sensitive interrupt, we
3287 - * copy that information over to the MP table in the
3288 - * irqflag field (level sensitive, active high polarity).
3289 - */
3290 - if (ELCR_trigger(i))
3291 - intsrc.mpc_irqflag = 13;
3292 - else
3293 - intsrc.mpc_irqflag = 0;
3294 - }
3295 -
3296 - intsrc.mpc_srcbusirq = i;
3297 - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
3298 - MP_intsrc_info(&intsrc);
3299 - }
3300 -
3301 - intsrc.mpc_irqtype = mp_ExtINT;
3302 - intsrc.mpc_srcbusirq = 0;
3303 - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
3304 - MP_intsrc_info(&intsrc);
3305 -}
3306 -
3307 -static inline void __init construct_default_ISA_mptable(int mpc_default_type)
3308 -{
3309 - struct mpc_config_processor processor;
3310 - struct mpc_config_bus bus;
3311 - struct mpc_config_ioapic ioapic;
3312 - struct mpc_config_lintsrc lintsrc;
3313 - int linttypes[2] = { mp_ExtINT, mp_NMI };
3314 - int i;
3315 -
3316 - /*
3317 - * local APIC has default address
3318 - */
3319 - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
3320 -
3321 - /*
3322 - * 2 CPUs, numbered 0 & 1.
3323 - */
3324 - processor.mpc_type = MP_PROCESSOR;
3325 - /* Either an integrated APIC or a discrete 82489DX. */
3326 - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
3327 - processor.mpc_cpuflag = CPU_ENABLED;
3328 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
3329 - (boot_cpu_data.x86_model << 4) |
3330 - boot_cpu_data.x86_mask;
3331 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
3332 - processor.mpc_reserved[0] = 0;
3333 - processor.mpc_reserved[1] = 0;
3334 - for (i = 0; i < 2; i++) {
3335 - processor.mpc_apicid = i;
3336 - MP_processor_info(&processor);
3337 - }
3338 -
3339 - bus.mpc_type = MP_BUS;
3340 - bus.mpc_busid = 0;
3341 - switch (mpc_default_type) {
3342 - default:
3343 - printk("???\n");
3344 - printk(KERN_ERR "Unknown standard configuration %d\n",
3345 - mpc_default_type);
3346 - /* fall through */
3347 - case 1:
3348 - case 5:
3349 - memcpy(bus.mpc_bustype, "ISA ", 6);
3350 - break;
3351 - case 2:
3352 - case 6:
3353 - case 3:
3354 - memcpy(bus.mpc_bustype, "EISA ", 6);
3355 - break;
3356 - case 4:
3357 - case 7:
3358 - memcpy(bus.mpc_bustype, "MCA ", 6);
3359 - }
3360 - MP_bus_info(&bus);
3361 - if (mpc_default_type > 4) {
3362 - bus.mpc_busid = 1;
3363 - memcpy(bus.mpc_bustype, "PCI ", 6);
3364 - MP_bus_info(&bus);
3365 - }
3366 -
3367 - ioapic.mpc_type = MP_IOAPIC;
3368 - ioapic.mpc_apicid = 2;
3369 - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
3370 - ioapic.mpc_flags = MPC_APIC_USABLE;
3371 - ioapic.mpc_apicaddr = 0xFEC00000;
3372 - MP_ioapic_info(&ioapic);
3373 -
3374 - /*
3375 - * We set up most of the low 16 IO-APIC pins according to MPS rules.
3376 - */
3377 - construct_default_ioirq_mptable(mpc_default_type);
3378 -
3379 - lintsrc.mpc_type = MP_LINTSRC;
3380 - lintsrc.mpc_irqflag = 0; /* conforming */
3381 - lintsrc.mpc_srcbusid = 0;
3382 - lintsrc.mpc_srcbusirq = 0;
3383 - lintsrc.mpc_destapic = MP_APIC_ALL;
3384 - for (i = 0; i < 2; i++) {
3385 - lintsrc.mpc_irqtype = linttypes[i];
3386 - lintsrc.mpc_destapiclint = i;
3387 - MP_lintsrc_info(&lintsrc);
3388 - }
3389 -}
3390 -
3391 -static struct intel_mp_floating *mpf_found;
3392 -
3393 -/*
3394 - * Scan the memory blocks for an SMP configuration block.
3395 - */
3396 -void __init get_smp_config (void)
3397 -{
3398 - struct intel_mp_floating *mpf = mpf_found;
3399 -
3400 - /*
3401 - * ACPI supports both logical (e.g. Hyper-Threading) and physical
3402 - * processors, where MPS only supports physical.
3403 - */
3404 - if (acpi_lapic && acpi_ioapic) {
3405 - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
3406 - return;
3407 - }
3408 - else if (acpi_lapic)
3409 - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
3410 -
3411 - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
3412 - if (mpf->mpf_feature2 & (1<<7)) {
3413 - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
3414 - pic_mode = 1;
3415 - } else {
3416 - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
3417 - pic_mode = 0;
3418 - }
3419 -
3420 - /*
3421 - * Now see if we need to read further.
3422 - */
3423 - if (mpf->mpf_feature1 != 0) {
3424 -
3425 - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
3426 - construct_default_ISA_mptable(mpf->mpf_feature1);
3427 -
3428 - } else if (mpf->mpf_physptr) {
3429 -
3430 - /*
3431 - * Read the physical hardware table. Anything here will
3432 - * override the defaults.
3433 - */
3434 - if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr))) {
3435 - smp_found_config = 0;
3436 - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
3437 - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
3438 - return;
3439 - }
3440 - /*
3441 - * If there are no explicit MP IRQ entries, then we are
3442 - * broken. We set up most of the low 16 IO-APIC pins to
3443 - * ISA defaults and hope it will work.
3444 - */
3445 - if (!mp_irq_entries) {
3446 - struct mpc_config_bus bus;
3447 -
3448 - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
3449 -
3450 - bus.mpc_type = MP_BUS;
3451 - bus.mpc_busid = 0;
3452 - memcpy(bus.mpc_bustype, "ISA ", 6);
3453 - MP_bus_info(&bus);
3454 -
3455 - construct_default_ioirq_mptable(0);
3456 - }
3457 -
3458 - } else
3459 - BUG();
3460 -
3461 - printk(KERN_INFO "Processors: %d\n", num_processors);
3462 - /*
3463 - * Only use the first configuration found.
3464 - */
3465 -}
3466 -
3467 -static int __init smp_scan_config (unsigned long base, unsigned long length)
3468 -{
3469 - unsigned long *bp = isa_bus_to_virt(base);
3470 - struct intel_mp_floating *mpf;
3471 -
3472 - printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
3473 - if (sizeof(*mpf) != 16)
3474 - printk("Error: MPF size\n");
3475 -
3476 - while (length > 0) {
3477 - mpf = (struct intel_mp_floating *)bp;
3478 - if ((*bp == SMP_MAGIC_IDENT) &&
3479 - (mpf->mpf_length == 1) &&
3480 - !mpf_checksum((unsigned char *)bp, 16) &&
3481 - ((mpf->mpf_specification == 1)
3482 - || (mpf->mpf_specification == 4)) ) {
3483 -
3484 - smp_found_config = 1;
3485 -#ifndef CONFIG_XEN
3486 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
3487 - mpf, virt_to_phys(mpf));
3488 - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
3489 - BOOTMEM_DEFAULT);
3490 - if (mpf->mpf_physptr) {
3491 - /*
3492 - * We cannot access to MPC table to compute
3493 - * table size yet, as only few megabytes from
3494 - * the bottom is mapped now.
3495 - * PC-9800's MPC table places on the very last
3496 - * of physical memory; so that simply reserving
3497 - * PAGE_SIZE from mpg->mpf_physptr yields BUG()
3498 - * in reserve_bootmem.
3499 - */
3500 - unsigned long size = PAGE_SIZE;
3501 - unsigned long end = max_low_pfn * PAGE_SIZE;
3502 - if (mpf->mpf_physptr + size > end)
3503 - size = end - mpf->mpf_physptr;
3504 - reserve_bootmem(mpf->mpf_physptr, size,
3505 - BOOTMEM_DEFAULT);
3506 - }
3507 -#else
3508 - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
3509 - mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
3510 -#endif
3511 -
3512 - mpf_found = mpf;
3513 - return 1;
3514 - }
3515 - bp += 4;
3516 - length -= 16;
3517 - }
3518 - return 0;
3519 -}
3520 -
3521 -void __init find_smp_config (void)
3522 -{
3523 -#ifndef CONFIG_XEN
3524 - unsigned int address;
3525 -#endif
3526 -
3527 - /*
3528 - * FIXME: Linux assumes you have 640K of base ram..
3529 - * this continues the error...
3530 - *
3531 - * 1) Scan the bottom 1K for a signature
3532 - * 2) Scan the top 1K of base RAM
3533 - * 3) Scan the 64K of bios
3534 - */
3535 - if (smp_scan_config(0x0,0x400) ||
3536 - smp_scan_config(639*0x400,0x400) ||
3537 - smp_scan_config(0xF0000,0x10000))
3538 - return;
3539 - /*
3540 - * If it is an SMP machine we should know now, unless the
3541 - * configuration is in an EISA/MCA bus machine with an
3542 - * extended bios data area.
3543 - *
3544 - * there is a real-mode segmented pointer pointing to the
3545 - * 4K EBDA area at 0x40E, calculate and scan it here.
3546 - *
3547 - * NOTE! There are Linux loaders that will corrupt the EBDA
3548 - * area, and as such this kind of SMP config may be less
3549 - * trustworthy, simply because the SMP table may have been
3550 - * stomped on during early boot. These loaders are buggy and
3551 - * should be fixed.
3552 - *
3553 - * MP1.4 SPEC states to only scan first 1K of 4K EBDA.
3554 - */
3555 -
3556 -#ifndef CONFIG_XEN
3557 - address = get_bios_ebda();
3558 - if (address)
3559 - smp_scan_config(address, 0x400);
3560 -#endif
3561 -}
3562 -
3563 -int es7000_plat;
3564 -
3565 -/* --------------------------------------------------------------------------
3566 - ACPI-based MP Configuration
3567 - -------------------------------------------------------------------------- */
3568 -
3569 -#ifdef CONFIG_ACPI
3570 -
3571 -void __init mp_register_lapic_address(u64 address)
3572 -{
3573 -#ifndef CONFIG_XEN
3574 - mp_lapic_addr = (unsigned long) address;
3575 -
3576 - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
3577 -
3578 - if (boot_cpu_physical_apicid == -1U)
3579 - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
3580 -
3581 - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
3582 -#endif
3583 -}
3584 -
3585 -void __cpuinit mp_register_lapic (u8 id, u8 enabled)
3586 -{
3587 - struct mpc_config_processor processor;
3588 - int boot_cpu = 0;
3589 -
3590 - if (MAX_APICS - id <= 0) {
3591 - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
3592 - id, MAX_APICS);
3593 - return;
3594 - }
3595 -
3596 - if (id == boot_cpu_physical_apicid)
3597 - boot_cpu = 1;
3598 -
3599 -#ifndef CONFIG_XEN
3600 - processor.mpc_type = MP_PROCESSOR;
3601 - processor.mpc_apicid = id;
3602 - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
3603 - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
3604 - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
3605 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
3606 - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
3607 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
3608 - processor.mpc_reserved[0] = 0;
3609 - processor.mpc_reserved[1] = 0;
3610 -#endif
3611 -
3612 - MP_processor_info(&processor);
3613 -}
3614 -
3615 -#ifdef CONFIG_X86_IO_APIC
3616 -
3617 -#define MP_ISA_BUS 0
3618 -#define MP_MAX_IOAPIC_PIN 127
3619 -
3620 -static struct mp_ioapic_routing {
3621 - int apic_id;
3622 - int gsi_base;
3623 - int gsi_end;
3624 - u32 pin_programmed[4];
3625 -} mp_ioapic_routing[MAX_IO_APICS];
3626 -
3627 -static int mp_find_ioapic (int gsi)
3628 -{
3629 - int i = 0;
3630 -
3631 - /* Find the IOAPIC that manages this GSI. */
3632 - for (i = 0; i < nr_ioapics; i++) {
3633 - if ((gsi >= mp_ioapic_routing[i].gsi_base)
3634 - && (gsi <= mp_ioapic_routing[i].gsi_end))
3635 - return i;
3636 - }
3637 -
3638 - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
3639 -
3640 - return -1;
3641 -}
3642 -
3643 -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
3644 -{
3645 - int idx = 0;
3646 - int tmpid;
3647 -
3648 - if (nr_ioapics >= MAX_IO_APICS) {
3649 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
3650 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
3651 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
3652 - }