]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.xen/xen3-patch-2.6.27
Imported linux-2.6.27.39 suse/xen patches.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.xen / xen3-patch-2.6.27
CommitLineData
2cb7cef9
BS
1From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
2Subject: [PATCH] Linux: Update to 2.6.27
3Patch-mainline: 2.6.27
4
5 This patch contains the differences between Linux 2.6.26 and 2.6.27.
6
7Acked-by: Jeff Mahoney <jeffm@suse.com>
8Automatically created from "patches.kernel.org/patch-2.6.27" by xen-port-patches.py
9
82094b55
AF
10--- sle11-2009-10-16.orig/arch/x86/Kconfig 2009-03-16 16:38:05.000000000 +0100
11+++ sle11-2009-10-16/arch/x86/Kconfig 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
12@@ -594,7 +594,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
13 config AMD_IOMMU
14 bool "AMD IOMMU support"
15 select SWIOTLB
16- depends on X86_64 && PCI && ACPI
17+ depends on X86_64 && PCI && ACPI && !X86_64_XEN
18 help
19 With this option you can enable support for AMD IOMMU hardware in
20 your system. An IOMMU is a hardware component which provides
21@@ -629,8 +629,10 @@ config MAXSMP
22
23 config NR_CPUS
24 int "Maximum number of CPUs (2-4096)"
25+ range 2 32 if XEN
26 range 2 4096
27 depends on SMP
28+ default "32" if MAXSMP && XEN
29 default "4096" if MAXSMP
30 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
31 default "16" if X86_64_XEN
32@@ -1227,7 +1229,7 @@ config MTRR
33 config MTRR_SANITIZER
34 bool
35 prompt "MTRR cleanup support"
36- depends on MTRR
37+ depends on MTRR && !XEN
38 help
39 Convert MTRR layout from continuous to discrete, so X drivers can
40 add writeback entries.
82094b55
AF
41--- sle11-2009-10-16.orig/arch/x86/Kconfig.debug 2009-03-16 16:33:40.000000000 +0100
42+++ sle11-2009-10-16/arch/x86/Kconfig.debug 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
43@@ -25,6 +25,7 @@ config STRICT_DEVMEM
44 config X86_VERBOSE_BOOTUP
45 bool "Enable verbose x86 bootup info messages"
46 default y
47+ depends on !XEN
48 help
49 Enables the informational output from the decompression stage
50 (e.g. bzImage) of the boot. If you disable this you will still
51@@ -179,7 +180,7 @@ config MMIOTRACE_HOOKS
52
53 config MMIOTRACE
54 bool "Memory mapped IO tracing"
55- depends on DEBUG_KERNEL && PCI
56+ depends on DEBUG_KERNEL && PCI && !XEN
57 select TRACING
58 select MMIOTRACE_HOOKS
59 help
82094b55
AF
60--- sle11-2009-10-16.orig/arch/x86/Makefile 2009-02-16 16:18:36.000000000 +0100
61+++ sle11-2009-10-16/arch/x86/Makefile 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
62@@ -116,8 +116,8 @@ mflags-$(CONFIG_X86_VOYAGER) := -Iinclud
63 mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/
64
65 # Xen subarch support
66-mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen
67-mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/
68+mflags-$(CONFIG_XEN) := -Iinclude/asm-x86/mach-xen
69+mcore-$(CONFIG_XEN) := arch/x86/mach-xen/
70
71 # generic subarchitecture
72 mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
73@@ -128,8 +128,10 @@ mcore-$(CONFIG_X86_GENERICARCH) := arch/
74 mflags-y += -Iinclude/asm-x86/mach-default
75
76 # 64 bit does not support subarch support - clear sub arch variables
77+ifneq ($(CONFIG_XEN),y)
78 fcore-$(CONFIG_X86_64) :=
79 mcore-$(CONFIG_X86_64) :=
80+endif
81
82 KBUILD_CFLAGS += $(mflags-y)
83 KBUILD_AFLAGS += $(mflags-y)
82094b55
AF
84--- sle11-2009-10-16.orig/arch/x86/ia32/ia32entry-xen.S 2009-03-16 16:38:05.000000000 +0100
85+++ sle11-2009-10-16/arch/x86/ia32/ia32entry-xen.S 2009-10-16 14:51:56.000000000 +0200
2cb7cef9
BS
86@@ -15,6 +15,16 @@
87 #include <asm/irqflags.h>
88 #include <linux/linkage.h>
89
90+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
91+#include <linux/elf-em.h>
92+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
93+#define __AUDIT_ARCH_LE 0x40000000
94+
95+#ifndef CONFIG_AUDITSYSCALL
96+#define sysexit_audit int_ret_from_sys_call
97+#define sysretl_audit int_ret_from_sys_call
98+#endif
99+
100 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
101
102 .macro IA32_ARG_FIXUP noebp=0
103@@ -37,6 +47,11 @@
104 movq %rax,R8(%rsp)
105 .endm
106
107+ /*
108+ * Reload arg registers from stack in case ptrace changed them.
109+ * We don't reload %eax because syscall_trace_enter() returned
110+ * the value it wants us to use in the table lookup.
111+ */
112 .macro LOAD_ARGS32 offset
113 movl \offset(%rsp),%r11d
114 movl \offset+8(%rsp),%r10d
115@@ -46,7 +61,6 @@
116 movl \offset+48(%rsp),%edx
117 movl \offset+56(%rsp),%esi
118 movl \offset+64(%rsp),%edi
119- movl \offset+72(%rsp),%eax
120 .endm
121
122 .macro CFI_STARTPROC32 simple
123@@ -61,6 +75,19 @@
124 CFI_UNDEFINED r15
125 .endm
126
127+#ifdef CONFIG_PARAVIRT
128+ENTRY(native_usergs_sysret32)
129+ swapgs
130+ sysretl
131+ENDPROC(native_usergs_sysret32)
132+
133+ENTRY(native_irq_enable_sysexit)
134+ swapgs
135+ sti
136+ sysexit
137+ENDPROC(native_irq_enable_sysexit)
138+#endif
139+
140 /*
141 * 32bit SYSENTER instruction entry.
142 *
143@@ -98,7 +125,7 @@ ENTRY(ia32_sysenter_target)
144 CFI_RESTORE rcx
145 movl %ebp,%ebp /* zero extension */
146 movl %eax,%eax
147- movl 48-THREAD_SIZE+threadinfo_sysenter_return(%rsp),%r10d
148+ movl 48-THREAD_SIZE+TI_sysenter_return(%rsp),%r10d
149 movl $__USER32_DS,40(%rsp)
150 movq %rbp,32(%rsp)
151 movl $__USER32_CS,16(%rsp)
82094b55 152@@ -113,19 +140,75 @@ ENTRY(ia32_sysenter_target)
2cb7cef9
BS
153 .quad 1b,ia32_badarg
154 .previous
155 GET_THREAD_INFO(%r10)
156- orl $TS_COMPAT,threadinfo_status(%r10)
157- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
158+ orl $TS_COMPAT,TI_status(%r10)
159+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
160 jnz sysenter_tracesys
161-sysenter_do_call:
162 cmpl $(IA32_NR_syscalls-1),%eax
163 ja ia32_badsys
164+sysenter_do_call:
165 IA32_ARG_FIXUP 1
166+sysenter_dispatch:
167 call *ia32_sys_call_table(,%rax,8)
168 movq %rax,RAX-ARGOFFSET(%rsp)
169+ GET_THREAD_INFO(%r10)
170+ DISABLE_INTERRUPTS(CLBR_NONE)
171+ TRACE_IRQS_OFF
172+ testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
173+ jnz sysexit_audit
82094b55
AF
174 jmp int_ret_from_sys_call
175
2cb7cef9
BS
176+#ifdef CONFIG_AUDITSYSCALL
177+ .macro auditsys_entry_common
178+ movl %esi,%r9d /* 6th arg: 4th syscall arg */
179+ movl %edx,%r8d /* 5th arg: 3rd syscall arg */
180+ /* (already in %ecx) 4th arg: 2nd syscall arg */
181+ movl %ebx,%edx /* 3rd arg: 1st syscall arg */
182+ movl %eax,%esi /* 2nd arg: syscall number */
183+ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
184+ call audit_syscall_entry
185+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
186+ cmpl $(IA32_NR_syscalls-1),%eax
187+ ja ia32_badsys
188+ movl %ebx,%edi /* reload 1st syscall arg */
189+ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
190+ movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
191+ movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
192+ movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
193+ .endm
194+
195+ .macro auditsys_exit exit,ebpsave=RBP
196+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
197+ jnz int_ret_from_sys_call
198+ TRACE_IRQS_ON
199+ ENABLE_INTERRUPTS(CLBR_NONE)
200+ movl %eax,%esi /* second arg, syscall return value */
201+ cmpl $0,%eax /* is it < 0? */
202+ setl %al /* 1 if so, 0 if not */
203+ movzbl %al,%edi /* zero-extend that into %edi */
204+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
205+ call audit_syscall_exit
2cb7cef9
BS
206+ movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
207+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
208+ DISABLE_INTERRUPTS(CLBR_NONE)
209+ TRACE_IRQS_OFF
82094b55 210+ jmp int_with_check
2cb7cef9
BS
211+ .endm
212+
213+sysenter_auditsys:
214+ auditsys_entry_common
215+ movl %ebp,%r9d /* reload 6th syscall arg */
216+ jmp sysenter_dispatch
217+
218+sysexit_audit:
219+ auditsys_exit sysexit_from_sys_call
220+#endif
82094b55 221+
2cb7cef9
BS
222 sysenter_tracesys:
223 xchgl %r9d,%ebp
224+#ifdef CONFIG_AUDITSYSCALL
225+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
226+ jz sysenter_auditsys
227+#endif
228 SAVE_REST
229 CLEAR_RREGS
230 movq %r9,R9(%rsp)
82094b55 231@@ -186,18 +269,38 @@ ENTRY(ia32_cstar_target)
2cb7cef9
BS
232 .quad 1b,ia32_badarg
233 .previous
234 GET_THREAD_INFO(%r10)
235- orl $TS_COMPAT,threadinfo_status(%r10)
236- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
237+ orl $TS_COMPAT,TI_status(%r10)
238+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
239 jnz cstar_tracesys
240 cstar_do_call:
241 cmpl $IA32_NR_syscalls-1,%eax
242 ja ia32_badsys
243 IA32_ARG_FIXUP 1
244+cstar_dispatch:
245 call *ia32_sys_call_table(,%rax,8)
246 movq %rax,RAX-ARGOFFSET(%rsp)
247+ GET_THREAD_INFO(%r10)
248+ DISABLE_INTERRUPTS(CLBR_NONE)
249+ testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
250+ jnz sysretl_audit
251 jmp int_ret_from_sys_call
252
253-cstar_tracesys:
254+#ifdef CONFIG_AUDITSYSCALL
255+cstar_auditsys:
256+ movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
257+ auditsys_entry_common
258+ movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
259+ jmp cstar_dispatch
260+
261+sysretl_audit:
262+ auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
263+#endif
264+
265+cstar_tracesys:
266+#ifdef CONFIG_AUDITSYSCALL
267+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
268+ jz cstar_auditsys
269+#endif
270 xchgl %r9d,%ebp
271 SAVE_REST
272 CLEAR_RREGS
82094b55 273@@ -263,8 +366,8 @@ ENTRY(ia32_syscall)
2cb7cef9
BS
274 this could be a problem. */
275 SAVE_ARGS 0,0,1
276 GET_THREAD_INFO(%r10)
277- orl $TS_COMPAT,threadinfo_status(%r10)
278- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
279+ orl $TS_COMPAT,TI_status(%r10)
280+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
281 jnz ia32_tracesys
282 ia32_do_syscall:
283 cmpl $(IA32_NR_syscalls-1),%eax
82094b55 284@@ -309,13 +412,11 @@ quiet_ni_syscall:
2cb7cef9
BS
285 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
286 PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
287 PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
288- PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
289 PTREGSCALL stub32_execve, sys32_execve, %rcx
290 PTREGSCALL stub32_fork, sys_fork, %rdi
291 PTREGSCALL stub32_clone, sys32_clone, %rdx
292 PTREGSCALL stub32_vfork, sys_vfork, %rdi
293 PTREGSCALL stub32_iopl, sys_iopl, %rsi
294- PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
295
296 ENTRY(ia32_ptregs_common)
297 popq %r11
82094b55 298@@ -415,7 +516,7 @@ ia32_sys_call_table:
2cb7cef9
BS
299 .quad sys_ssetmask
300 .quad sys_setreuid16 /* 70 */
301 .quad sys_setregid16
302- .quad stub32_sigsuspend
303+ .quad sys32_sigsuspend
304 .quad compat_sys_sigpending
305 .quad sys_sethostname
306 .quad compat_sys_setrlimit /* 75 */
82094b55 307@@ -522,7 +623,7 @@ ia32_sys_call_table:
2cb7cef9
BS
308 .quad sys32_rt_sigpending
309 .quad compat_sys_rt_sigtimedwait
310 .quad sys32_rt_sigqueueinfo
311- .quad stub32_rt_sigsuspend
312+ .quad sys_rt_sigsuspend
313 .quad sys32_pread /* 180 */
314 .quad sys32_pwrite
315 .quad sys_chown16
82094b55 316@@ -670,4 +771,10 @@ ia32_sys_call_table:
2cb7cef9
BS
317 .quad sys32_fallocate
318 .quad compat_sys_timerfd_settime /* 325 */
319 .quad compat_sys_timerfd_gettime
320+ .quad compat_sys_signalfd4
321+ .quad sys_eventfd2
322+ .quad sys_epoll_create1
323+ .quad sys_dup3 /* 330 */
324+ .quad sys_pipe2
325+ .quad sys_inotify_init1
326 ia32_syscall_end:
82094b55
AF
327--- sle11-2009-10-16.orig/arch/x86/kernel/Makefile 2009-03-16 16:38:05.000000000 +0100
328+++ sle11-2009-10-16/arch/x86/kernel/Makefile 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
329@@ -120,9 +120,10 @@ ifeq ($(CONFIG_X86_64),y)
330
331 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
332
333- obj-$(CONFIG_XEN) += nmi_64.o
334+ obj-$(CONFIG_XEN) += nmi.o
335 time_64-$(CONFIG_XEN) += time_32.o
336 endif
337
338-disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
339- pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
340+disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
341+ i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o smpboot.o \
342+ tlb_$(BITS).o tsc.o tsc_sync.o vsmp_64.o
82094b55
AF
343--- sle11-2009-10-16.orig/arch/x86/kernel/acpi/boot.c 2009-08-26 11:55:26.000000000 +0200
344+++ sle11-2009-10-16/arch/x86/kernel/acpi/boot.c 2009-08-26 12:03:49.000000000 +0200
345@@ -949,7 +949,9 @@ void __init mp_register_ioapic(int id, u
2cb7cef9
BS
346 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
347 mp_ioapics[idx].mp_apicaddr = address;
348
349+#ifndef CONFIG_XEN
350 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
351+#endif
352 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
353 #ifdef CONFIG_X86_32
354 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
82094b55 355@@ -1106,7 +1108,7 @@ int mp_register_gsi(u32 gsi, int trigger
2cb7cef9
BS
356 {
357 int ioapic;
358 int ioapic_pin;
359-#ifdef CONFIG_X86_32
360+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
361 #define MAX_GSI_NUM 4096
362 #define IRQ_COMPRESSION_START 64
363
82094b55 364@@ -1154,7 +1156,7 @@ int mp_register_gsi(u32 gsi, int trigger
2cb7cef9
BS
365 if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
366 pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n",
367 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
368-#ifdef CONFIG_X86_32
369+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
370 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
371 #else
372 return gsi;
82094b55 373@@ -1162,7 +1164,7 @@ int mp_register_gsi(u32 gsi, int trigger
2cb7cef9
BS
374 }
375
376 set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
377-#ifdef CONFIG_X86_32
378+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
379 /*
380 * For GSI >= 64, use IRQ compression
381 */
82094b55
AF
382--- sle11-2009-10-16.orig/arch/x86/kernel/acpi/sleep-xen.c 2009-03-16 16:38:05.000000000 +0100
383+++ sle11-2009-10-16/arch/x86/kernel/acpi/sleep-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
384@@ -9,6 +9,7 @@
385 #include <linux/bootmem.h>
386 #include <linux/dmi.h>
387 #include <linux/cpumask.h>
388+#include <asm/segment.h>
389
390 #include "realmode/wakeup.h"
391 #include "sleep.h"
392@@ -20,7 +21,7 @@ unsigned long acpi_realmode_flags;
393 /* address in low memory of the wakeup routine. */
394 static unsigned long acpi_realmode;
395
396-#ifdef CONFIG_64BIT
397+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
398 static char temp_stack[10240];
399 #endif
400 #endif
401@@ -54,18 +55,27 @@ int acpi_save_state_mem(void)
402 header->video_mode = saved_video_mode;
403
404 header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
405+
406+ /*
407+ * Set up the wakeup GDT. We set these up as Big Real Mode,
408+ * that is, with limits set to 4 GB. At least the Lenovo
409+ * Thinkpad X61 is known to need this for the video BIOS
410+ * initialization quirk to work; this is likely to also
411+ * be the case for other laptops or integrated video devices.
412+ */
413+
414 /* GDT[0]: GDT self-pointer */
415 header->wakeup_gdt[0] =
416 (u64)(sizeof(header->wakeup_gdt) - 1) +
417 ((u64)(acpi_wakeup_address +
418 ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
419 << 16);
420- /* GDT[1]: real-mode-like code segment */
421- header->wakeup_gdt[1] = (0x009bULL << 40) +
422- ((u64)acpi_wakeup_address << 16) + 0xffff;
423- /* GDT[2]: real-mode-like data segment */
424- header->wakeup_gdt[2] = (0x0093ULL << 40) +
425- ((u64)acpi_wakeup_address << 16) + 0xffff;
426+ /* GDT[1]: big real mode-like code segment */
427+ header->wakeup_gdt[1] =
428+ GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
429+ /* GDT[2]: big real mode-like data segment */
430+ header->wakeup_gdt[2] =
431+ GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
432
433 #ifndef CONFIG_64BIT
434 store_gdt((struct desc_ptr *)&header->pmode_gdt);
435@@ -79,7 +89,7 @@ int acpi_save_state_mem(void)
436 #endif /* !CONFIG_64BIT */
437
438 header->pmode_cr0 = read_cr0();
439- header->pmode_cr4 = read_cr4();
440+ header->pmode_cr4 = read_cr4_safe();
441 header->realmode_flags = acpi_realmode_flags;
442 header->real_magic = 0x12345678;
443
444@@ -89,7 +99,9 @@ int acpi_save_state_mem(void)
445 saved_magic = 0x12345678;
446 #else /* CONFIG_64BIT */
447 header->trampoline_segment = setup_trampoline() >> 4;
448- init_rsp = (unsigned long)temp_stack + 4096;
449+#ifdef CONFIG_SMP
450+ stack_start.sp = temp_stack + 4096;
451+#endif
452 initial_code = (unsigned long)wakeup_long64;
453 saved_magic = 0x123456789abcdef0;
454 #endif /* CONFIG_64BIT */
455@@ -145,6 +157,12 @@ static int __init acpi_sleep_setup(char
456 acpi_realmode_flags |= 2;
457 if (strncmp(str, "s3_beep", 7) == 0)
458 acpi_realmode_flags |= 4;
459+#ifdef CONFIG_HIBERNATION
460+ if (strncmp(str, "s4_nohwsig", 10) == 0)
461+ acpi_no_s4_hw_signature();
462+#endif
463+ if (strncmp(str, "old_ordering", 12) == 0)
464+ acpi_old_suspend_ordering();
465 str = strchr(str, ',');
466 if (str != NULL)
467 str += strspn(str, ", \t");
82094b55
AF
468--- sle11-2009-10-16.orig/arch/x86/kernel/apic_32-xen.c 2009-03-16 16:33:40.000000000 +0100
469+++ sle11-2009-10-16/arch/x86/kernel/apic_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
470@@ -59,7 +59,10 @@ static cpumask_t timer_bcast_ipi;
471 /*
472 * Debug level, exported for io_apic.c
473 */
474-int apic_verbosity;
475+unsigned int apic_verbosity;
476+
477+/* Have we found an MP table */
478+int smp_found_config;
479
480 #ifndef CONFIG_XEN
481 static int modern_apic(void)
82094b55
AF
482--- sle11-2009-10-16.orig/arch/x86/kernel/apic_64-xen.c 2009-03-16 16:33:40.000000000 +0100
483+++ sle11-2009-10-16/arch/x86/kernel/apic_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
484@@ -39,7 +39,10 @@ int disable_apic;
485 /*
486 * Debug level, exported for io_apic.c
487 */
488-int apic_verbosity;
489+unsigned int apic_verbosity;
490+
491+/* Have we found an MP table */
492+int smp_found_config;
493
494 /*
495 * The guts of the apic timer interrupt
82094b55
AF
496--- sle11-2009-10-16.orig/arch/x86/kernel/asm-offsets_64.c 2008-11-25 12:35:54.000000000 +0100
497+++ sle11-2009-10-16/arch/x86/kernel/asm-offsets_64.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
498@@ -138,7 +138,7 @@ int main(void)
499
500 BLANK();
501 DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
502-#ifdef CONFIG_XEN
503+#ifdef CONFIG_PARAVIRT_XEN
504 BLANK();
505 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
506 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
82094b55
AF
507--- sle11-2009-10-16.orig/arch/x86/kernel/cpu/amd_64.c 2009-10-28 14:55:02.000000000 +0100
508+++ sle11-2009-10-16/arch/x86/kernel/cpu/amd_64.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
509@@ -193,6 +193,7 @@ static void __cpuinit init_amd(struct cp
510 fam10h_check_enable_mmcfg();
511 }
512
513+#ifndef CONFIG_XEN
514 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
515 unsigned long long tseg;
516
517@@ -211,6 +212,7 @@ static void __cpuinit init_amd(struct cp
518 set_memory_4k((unsigned long)__va(tseg), 1);
519 }
520 }
521+#endif
522 }
523
524 static struct cpu_dev amd_cpu_dev __cpuinitdata = {
82094b55
AF
525--- sle11-2009-10-16.orig/arch/x86/kernel/cpu/bugs_64.c 2009-10-28 14:55:02.000000000 +0100
526+++ sle11-2009-10-16/arch/x86/kernel/cpu/bugs_64.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
527@@ -20,6 +20,7 @@ void __init check_bugs(void)
528 #endif
529 alternative_instructions();
530
531+#ifndef CONFIG_XEN
532 /*
533 * Make sure the first 2MB area is not mapped by huge pages
534 * There are typically fixed size MTRRs in there and overlapping
535@@ -30,4 +31,5 @@ void __init check_bugs(void)
536 */
537 if (!direct_gbpages)
538 set_memory_4k((unsigned long)__va(0), 1);
539+#endif
540 }
82094b55
AF
541--- sle11-2009-10-16.orig/arch/x86/kernel/cpu/common-xen.c 2009-03-16 16:38:05.000000000 +0100
542+++ sle11-2009-10-16/arch/x86/kernel/cpu/common-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
543@@ -13,6 +13,7 @@
544 #include <asm/mtrr.h>
545 #include <asm/mce.h>
546 #include <asm/pat.h>
547+#include <asm/asm.h>
548 #ifdef CONFIG_X86_LOCAL_APIC
549 #include <asm/mpspec.h>
550 #include <asm/apic.h>
551@@ -341,11 +342,24 @@ static void __init early_cpu_detect(void
552
553 get_cpu_vendor(c, 1);
554
555+ early_get_cap(c);
556+
557 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
558 cpu_devs[c->x86_vendor]->c_early_init)
559 cpu_devs[c->x86_vendor]->c_early_init(c);
560+}
561
562- early_get_cap(c);
563+/*
564+ * The NOPL instruction is supposed to exist on all CPUs with
565+ * family >= 6; unfortunately, that's not true in practice because
566+ * of early VIA chips and (more importantly) broken virtualizers that
567+ * are not easy to detect. In the latter case it doesn't even *fail*
568+ * reliably, so probing for it doesn't even work. Disable it completely
569+ * unless we can find a reliable way to detect all the broken cases.
570+ */
571+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
572+{
573+ clear_cpu_cap(c, X86_FEATURE_NOPL);
574 }
575
576 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
577@@ -402,8 +416,8 @@ static void __cpuinit generic_identify(s
578 }
579
580 init_scattered_cpuid_features(c);
581+ detect_nopl(c);
582 }
583-
584 }
585
586 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
587@@ -434,7 +448,7 @@ __setup("serialnumber", x86_serial_nr_se
588 /*
589 * This does the hard work of actually picking apart the CPU stuff...
590 */
591-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
592+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
593 {
594 int i;
595
596@@ -448,6 +462,8 @@ void __cpuinit identify_cpu(struct cpuin
597 c->x86_max_cores = 1;
598 c->x86_clflush_size = 32;
599 memset(&c->x86_capability, 0, sizeof c->x86_capability);
600+ if (boot_cpu_has(X86_FEATURE_SYSCALL32))
601+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
602
603 if (!have_cpuid_p()) {
604 /*
605--- /dev/null 1970-01-01 00:00:00.000000000 +0000
82094b55 606+++ sle11-2009-10-16/arch/x86/kernel/cpu/common_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
607@@ -0,0 +1,771 @@
608+#include <linux/init.h>
609+#include <linux/kernel.h>
610+#include <linux/sched.h>
611+#include <linux/string.h>
612+#include <linux/bootmem.h>
613+#include <linux/bitops.h>
614+#include <linux/module.h>
615+#include <linux/kgdb.h>
616+#include <linux/topology.h>
617+#include <linux/delay.h>
618+#include <linux/smp.h>
619+#include <linux/percpu.h>
620+#include <asm/i387.h>
621+#include <asm/msr.h>
622+#include <asm/io.h>
623+#include <asm/linkage.h>
624+#include <asm/mmu_context.h>
625+#include <asm/mtrr.h>
626+#include <asm/mce.h>
627+#include <asm/pat.h>
628+#include <asm/asm.h>
629+#include <asm/numa.h>
630+#ifdef CONFIG_X86_LOCAL_APIC
631+#include <asm/mpspec.h>
632+#include <asm/apic.h>
633+#include <mach_apic.h>
634+#elif defined(CONFIG_XEN)
635+#include <mach_apic.h>
636+#endif
637+#include <asm/pda.h>
638+#include <asm/pgtable.h>
639+#include <asm/processor.h>
640+#include <asm/desc.h>
641+#include <asm/atomic.h>
642+#include <asm/proto.h>
643+#include <asm/sections.h>
644+#include <asm/setup.h>
645+#include <asm/genapic.h>
646+
647+#include "cpu.h"
648+
649+/* We need valid kernel segments for data and code in long mode too
650+ * IRET will check the segment types kkeil 2000/10/28
651+ * Also sysret mandates a special GDT layout
652+ */
653+/* The TLS descriptors are currently at a different place compared to i386.
654+ Hopefully nobody expects them at a fixed place (Wine?) */
655+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
656+ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
657+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
658+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
659+ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
660+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
661+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
662+} };
663+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
664+
665+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
666+
667+/* Current gdt points %fs at the "master" per-cpu area: after this,
668+ * it's on the real one. */
669+void switch_to_new_gdt(void)
670+{
671+#ifndef CONFIG_XEN
672+ struct desc_ptr gdt_descr;
673+
674+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
675+ gdt_descr.size = GDT_SIZE - 1;
676+ load_gdt(&gdt_descr);
677+#else
678+ void *va, *gdt_addr = get_cpu_gdt_table(smp_processor_id());
679+ unsigned long frames[16];
680+ unsigned int f = 0;
681+
682+ for (va = gdt_addr; va < gdt_addr + GDT_SIZE; va += PAGE_SIZE) {
683+ frames[f++] = virt_to_mfn(va);
684+ make_page_readonly(va, XENFEAT_writable_descriptor_tables);
685+ }
686+ if (HYPERVISOR_set_gdt(frames, GDT_SIZE / sizeof(struct desc_struct)))
687+ BUG();
688+#endif
689+}
690+
691+struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
692+
693+static void __cpuinit default_init(struct cpuinfo_x86 *c)
694+{
695+ display_cacheinfo(c);
696+}
697+
698+static struct cpu_dev __cpuinitdata default_cpu = {
699+ .c_init = default_init,
700+ .c_vendor = "Unknown",
701+};
702+static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
703+
704+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
705+{
706+ unsigned int *v;
707+
708+ if (c->extended_cpuid_level < 0x80000004)
709+ return 0;
710+
711+ v = (unsigned int *) c->x86_model_id;
712+ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
713+ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
714+ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
715+ c->x86_model_id[48] = 0;
716+ return 1;
717+}
718+
719+
720+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
721+{
722+ unsigned int n, dummy, ebx, ecx, edx;
723+
724+ n = c->extended_cpuid_level;
725+
726+ if (n >= 0x80000005) {
727+ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
728+ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
729+ "D cache %dK (%d bytes/line)\n",
730+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
731+ c->x86_cache_size = (ecx>>24) + (edx>>24);
732+ /* On K8 L1 TLB is inclusive, so don't count it */
733+ c->x86_tlbsize = 0;
734+ }
735+
736+ if (n >= 0x80000006) {
737+ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
738+ ecx = cpuid_ecx(0x80000006);
739+ c->x86_cache_size = ecx >> 16;
740+ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
741+
742+ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
743+ c->x86_cache_size, ecx & 0xFF);
744+ }
745+}
746+
747+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
748+{
749+#ifdef CONFIG_SMP
750+ u32 eax, ebx, ecx, edx;
751+ int index_msb, core_bits;
752+
753+ cpuid(1, &eax, &ebx, &ecx, &edx);
754+
755+
756+ if (!cpu_has(c, X86_FEATURE_HT))
757+ return;
758+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
759+ goto out;
760+
761+ smp_num_siblings = (ebx & 0xff0000) >> 16;
762+
763+ if (smp_num_siblings == 1) {
764+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
765+ } else if (smp_num_siblings > 1) {
766+
767+ if (smp_num_siblings > NR_CPUS) {
768+ printk(KERN_WARNING "CPU: Unsupported number of "
769+ "siblings %d", smp_num_siblings);
770+ smp_num_siblings = 1;
771+ return;
772+ }
773+
774+ index_msb = get_count_order(smp_num_siblings);
775+ c->phys_proc_id = phys_pkg_id(index_msb);
776+
777+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
778+
779+ index_msb = get_count_order(smp_num_siblings);
780+
781+ core_bits = get_count_order(c->x86_max_cores);
782+
783+ c->cpu_core_id = phys_pkg_id(index_msb) &
784+ ((1 << core_bits) - 1);
785+ }
786+out:
787+ if ((c->x86_max_cores * smp_num_siblings) > 1) {
788+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
789+ c->phys_proc_id);
790+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
791+ c->cpu_core_id);
792+ }
793+
794+#endif
795+}
796+
797+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
798+{
799+ char *v = c->x86_vendor_id;
800+ int i;
801+ static int printed;
802+
803+ for (i = 0; i < X86_VENDOR_NUM; i++) {
804+ if (cpu_devs[i]) {
805+ if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
806+ (cpu_devs[i]->c_ident[1] &&
807+ !strcmp(v, cpu_devs[i]->c_ident[1]))) {
808+ c->x86_vendor = i;
809+ this_cpu = cpu_devs[i];
810+ return;
811+ }
812+ }
813+ }
814+ if (!printed) {
815+ printed++;
816+ printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
817+ printk(KERN_ERR "CPU: Your system may be unstable.\n");
818+ }
819+ c->x86_vendor = X86_VENDOR_UNKNOWN;
820+}
821+
822+static void __init early_cpu_support_print(void)
823+{
824+ int i,j;
825+ struct cpu_dev *cpu_devx;
826+
827+ printk("KERNEL supported cpus:\n");
828+ for (i = 0; i < X86_VENDOR_NUM; i++) {
829+ cpu_devx = cpu_devs[i];
830+ if (!cpu_devx)
831+ continue;
832+ for (j = 0; j < 2; j++) {
833+ if (!cpu_devx->c_ident[j])
834+ continue;
835+ printk(" %s %s\n", cpu_devx->c_vendor,
836+ cpu_devx->c_ident[j]);
837+ }
838+ }
839+}
840+
841+/*
842+ * The NOPL instruction is supposed to exist on all CPUs with
843+ * family >= 6, unfortunately, that's not true in practice because
844+ * of early VIA chips and (more importantly) broken virtualizers that
845+ * are not easy to detect. Hence, probe for it based on first
846+ * principles.
847+ *
848+ * Note: no 64-bit chip is known to lack these, but put the code here
849+ * for consistency with 32 bits, and to make it utterly trivial to
850+ * diagnose the problem should it ever surface.
851+ */
852+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
853+{
854+ const u32 nopl_signature = 0x888c53b1; /* Random number */
855+ u32 has_nopl = nopl_signature;
856+
857+ clear_cpu_cap(c, X86_FEATURE_NOPL);
858+ if (c->x86 >= 6) {
859+ asm volatile("\n"
860+ "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
861+ "2:\n"
862+ " .section .fixup,\"ax\"\n"
863+ "3: xor %0,%0\n"
864+ " jmp 2b\n"
865+ " .previous\n"
866+ _ASM_EXTABLE(1b,3b)
867+ : "+a" (has_nopl));
868+
869+ if (has_nopl == nopl_signature)
870+ set_cpu_cap(c, X86_FEATURE_NOPL);
871+ }
872+}
873+
874+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
875+
876+void __init early_cpu_init(void)
877+{
878+ struct cpu_vendor_dev *cvdev;
879+
880+ for (cvdev = __x86cpuvendor_start ;
881+ cvdev < __x86cpuvendor_end ;
882+ cvdev++)
883+ cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
884+ early_cpu_support_print();
885+ early_identify_cpu(&boot_cpu_data);
886+}
887+
888+/* Do some early cpuid on the boot CPU to get some parameter that are
889+ needed before check_bugs. Everything advanced is in identify_cpu
890+ below. */
891+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
892+{
893+ u32 tfms, xlvl;
894+
895+ c->loops_per_jiffy = loops_per_jiffy;
896+ c->x86_cache_size = -1;
897+ c->x86_vendor = X86_VENDOR_UNKNOWN;
898+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
899+ c->x86_vendor_id[0] = '\0'; /* Unset */
900+ c->x86_model_id[0] = '\0'; /* Unset */
901+ c->x86_clflush_size = 64;
902+ c->x86_cache_alignment = c->x86_clflush_size;
903+ c->x86_max_cores = 1;
904+ c->x86_coreid_bits = 0;
905+ c->extended_cpuid_level = 0;
906+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
907+
908+ /* Get vendor name */
909+ cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
910+ (unsigned int *)&c->x86_vendor_id[0],
911+ (unsigned int *)&c->x86_vendor_id[8],
912+ (unsigned int *)&c->x86_vendor_id[4]);
913+
914+ get_cpu_vendor(c);
915+
916+ /* Initialize the standard set of capabilities */
917+ /* Note that the vendor-specific code below might override */
918+
919+ /* Intel-defined flags: level 0x00000001 */
920+ if (c->cpuid_level >= 0x00000001) {
921+ __u32 misc;
922+ cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
923+ &c->x86_capability[0]);
924+ c->x86 = (tfms >> 8) & 0xf;
925+ c->x86_model = (tfms >> 4) & 0xf;
926+ c->x86_mask = tfms & 0xf;
927+ if (c->x86 == 0xf)
928+ c->x86 += (tfms >> 20) & 0xff;
929+ if (c->x86 >= 0x6)
930+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
931+ if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
932+ c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
933+ } else {
934+ /* Have CPUID level 0 only - unheard of */
935+ c->x86 = 4;
936+ }
937+
938+ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
939+#ifdef CONFIG_SMP
940+ c->phys_proc_id = c->initial_apicid;
941+#endif
942+ /* AMD-defined flags: level 0x80000001 */
943+ xlvl = cpuid_eax(0x80000000);
944+ c->extended_cpuid_level = xlvl;
945+ if ((xlvl & 0xffff0000) == 0x80000000) {
946+ if (xlvl >= 0x80000001) {
947+ c->x86_capability[1] = cpuid_edx(0x80000001);
948+ c->x86_capability[6] = cpuid_ecx(0x80000001);
949+ }
950+ if (xlvl >= 0x80000004)
951+ get_model_name(c); /* Default name */
952+ }
953+
954+ /* Transmeta-defined flags: level 0x80860001 */
955+ xlvl = cpuid_eax(0x80860000);
956+ if ((xlvl & 0xffff0000) == 0x80860000) {
957+ /* Don't set x86_cpuid_level here for now to not confuse. */
958+ if (xlvl >= 0x80860001)
959+ c->x86_capability[2] = cpuid_edx(0x80860001);
960+ }
961+
962+ if (c->extended_cpuid_level >= 0x80000007)
963+ c->x86_power = cpuid_edx(0x80000007);
964+
965+ if (c->extended_cpuid_level >= 0x80000008) {
966+ u32 eax = cpuid_eax(0x80000008);
967+
968+ c->x86_virt_bits = (eax >> 8) & 0xff;
969+ c->x86_phys_bits = eax & 0xff;
970+ }
971+
972+ detect_nopl(c);
973+
974+ if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
975+ cpu_devs[c->x86_vendor]->c_early_init)
976+ cpu_devs[c->x86_vendor]->c_early_init(c);
977+
978+ validate_pat_support(c);
979+}
980+
981+/*
982+ * This does the hard work of actually picking apart the CPU stuff...
983+ */
984+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
985+{
986+ int i;
987+
988+ early_identify_cpu(c);
989+
990+ init_scattered_cpuid_features(c);
991+
992+ c->apicid = phys_pkg_id(0);
993+
994+ /*
995+ * Vendor-specific initialization. In this section we
996+ * canonicalize the feature flags, meaning if there are
997+ * features a certain CPU supports which CPUID doesn't
998+ * tell us, CPUID claiming incorrect flags, or other bugs,
999+ * we handle them here.
1000+ *
1001+ * At the end of this section, c->x86_capability better
1002+ * indicate the features this CPU genuinely supports!
1003+ */
1004+ if (this_cpu->c_init)
1005+ this_cpu->c_init(c);
1006+
1007+ detect_ht(c);
1008+
1009+ /*
1010+ * On SMP, boot_cpu_data holds the common feature set between
1011+ * all CPUs; so make sure that we indicate which features are
1012+ * common between the CPUs. The first time this routine gets
1013+ * executed, c == &boot_cpu_data.
1014+ */
1015+ if (c != &boot_cpu_data) {
1016+ /* AND the already accumulated flags with these */
1017+ for (i = 0; i < NCAPINTS; i++)
1018+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
1019+ }
1020+
1021+ /* Clear all flags overriden by options */
1022+ for (i = 0; i < NCAPINTS; i++)
1023+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
1024+
1025+#ifdef CONFIG_X86_MCE
1026+ mcheck_init(c);
1027+#endif
1028+ select_idle_routine(c);
1029+
1030+#ifdef CONFIG_NUMA
1031+ numa_add_cpu(smp_processor_id());
1032+#endif
1033+
1034+}
1035+
1036+void __cpuinit identify_boot_cpu(void)
1037+{
1038+ identify_cpu(&boot_cpu_data);
1039+}
1040+
1041+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
1042+{
1043+ BUG_ON(c == &boot_cpu_data);
1044+ identify_cpu(c);
1045+ mtrr_ap_init();
1046+}
1047+
1048+static __init int setup_noclflush(char *arg)
1049+{
1050+ setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
1051+ return 1;
1052+}
1053+__setup("noclflush", setup_noclflush);
1054+
1055+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
1056+{
1057+ if (c->x86_model_id[0])
1058+ printk(KERN_CONT "%s", c->x86_model_id);
1059+
1060+ if (c->x86_mask || c->cpuid_level >= 0)
1061+ printk(KERN_CONT " stepping %02x\n", c->x86_mask);
1062+ else
1063+ printk(KERN_CONT "\n");
1064+}
1065+
1066+static __init int setup_disablecpuid(char *arg)
1067+{
1068+ int bit;
1069+ if (get_option(&arg, &bit) && bit < NCAPINTS*32)
1070+ setup_clear_cpu_cap(bit);
1071+ else
1072+ return 0;
1073+ return 1;
1074+}
1075+__setup("clearcpuid=", setup_disablecpuid);
1076+
1077+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
1078+
1079+struct x8664_pda **_cpu_pda __read_mostly;
1080+EXPORT_SYMBOL(_cpu_pda);
1081+
1082+#ifndef CONFIG_X86_NO_IDT
1083+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
1084+#endif
1085+
1086+char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
1087+
1088+unsigned long __supported_pte_mask __read_mostly = ~0UL;
1089+EXPORT_SYMBOL_GPL(__supported_pte_mask);
1090+
1091+static int do_not_nx __cpuinitdata;
1092+
1093+/* noexec=on|off
1094+Control non executable mappings for 64bit processes.
1095+
1096+on Enable(default)
1097+off Disable
1098+*/
1099+static int __init nonx_setup(char *str)
1100+{
1101+ if (!str)
1102+ return -EINVAL;
1103+ if (!strncmp(str, "on", 2)) {
1104+ __supported_pte_mask |= _PAGE_NX;
1105+ do_not_nx = 0;
1106+ } else if (!strncmp(str, "off", 3)) {
1107+ do_not_nx = 1;
1108+ __supported_pte_mask &= ~_PAGE_NX;
1109+ }
1110+ return 0;
1111+}
1112+early_param("noexec", nonx_setup);
1113+
1114+int force_personality32;
1115+
1116+/* noexec32=on|off
1117+Control non executable heap for 32bit processes.
1118+To control the stack too use noexec=off
1119+
1120+on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
1121+off PROT_READ implies PROT_EXEC
1122+*/
1123+static int __init nonx32_setup(char *str)
1124+{
1125+ if (!strcmp(str, "on"))
1126+ force_personality32 &= ~READ_IMPLIES_EXEC;
1127+ else if (!strcmp(str, "off"))
1128+ force_personality32 |= READ_IMPLIES_EXEC;
1129+ return 1;
1130+}
1131+__setup("noexec32=", nonx32_setup);
1132+
1133+static void __init_refok switch_pt(int cpu)
1134+{
1135+#ifdef CONFIG_XEN
1136+ if (cpu == 0)
1137+ xen_init_pt();
1138+ xen_pt_switch(__pa_symbol(init_level4_pgt));
1139+ xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
1140+#endif
1141+}
1142+
1143+void pda_init(int cpu)
1144+{
1145+ struct x8664_pda *pda = cpu_pda(cpu);
1146+
1147+ /* Setup up data that may be needed in __get_free_pages early */
1148+ loadsegment(fs, 0);
1149+ loadsegment(gs, 0);
1150+#ifndef CONFIG_XEN
1151+ /* Memory clobbers used to order PDA accessed */
1152+ mb();
1153+ wrmsrl(MSR_GS_BASE, pda);
1154+ mb();
1155+#else
1156+ if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
1157+ (unsigned long)pda))
1158+ BUG();
1159+#endif
1160+
1161+ pda->cpunumber = cpu;
1162+ pda->irqcount = -1;
1163+ pda->kernelstack = (unsigned long)stack_thread_info() -
1164+ PDA_STACKOFFSET + THREAD_SIZE;
1165+ pda->active_mm = &init_mm;
1166+ pda->mmu_state = 0;
1167+
1168+ if (cpu == 0) {
1169+ /* others are initialized in smpboot.c */
1170+ pda->pcurrent = &init_task;
1171+ pda->irqstackptr = boot_cpu_stack;
1172+ pda->irqstackptr += IRQSTACKSIZE - 64;
1173+ } else {
1174+ if (!pda->irqstackptr) {
1175+ pda->irqstackptr = (char *)
1176+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
1177+ if (!pda->irqstackptr)
1178+ panic("cannot allocate irqstack for cpu %d",
1179+ cpu);
1180+ pda->irqstackptr += IRQSTACKSIZE - 64;
1181+ }
1182+
1183+ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
1184+ pda->nodenumber = cpu_to_node(cpu);
1185+ }
1186+
1187+ switch_pt(cpu);
1188+}
1189+
1190+#ifndef CONFIG_X86_NO_TSS
1191+char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
1192+ DEBUG_STKSZ] __page_aligned_bss;
1193+#endif
1194+
1195+extern asmlinkage void ignore_sysret(void);
1196+
1197+void __cpuinit syscall_init(void)
1198+{
1199+#ifndef CONFIG_XEN
1200+ /*
1201+ * LSTAR and STAR live in a bit strange symbiosis.
1202+ * They both write to the same internal register. STAR allows to
1203+ * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
1204+ */
1205+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
1206+ wrmsrl(MSR_LSTAR, system_call);
1207+ wrmsrl(MSR_CSTAR, ignore_sysret);
1208+
1209+ /* Flags to clear on syscall */
1210+ wrmsrl(MSR_SYSCALL_MASK,
1211+ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
1212+#endif
1213+#ifdef CONFIG_IA32_EMULATION
1214+ syscall32_cpu_init();
1215+#else
1216+ static const struct callback_register __cpuinitconst cstar = {
1217+ .type = CALLBACKTYPE_syscall32,
1218+ .address = (unsigned long)ignore_sysret
1219+ };
1220+
1221+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
1222+ printk(KERN_WARNING "Unable to register CSTAR callback\n");
1223+#endif
1224+}
1225+
1226+void __cpuinit check_efer(void)
1227+{
1228+ unsigned long efer;
1229+
1230+ rdmsrl(MSR_EFER, efer);
1231+ if (!(efer & EFER_NX) || do_not_nx)
1232+ __supported_pte_mask &= ~_PAGE_NX;
1233+}
1234+
1235+unsigned long kernel_eflags;
1236+
1237+#ifndef CONFIG_X86_NO_TSS
1238+/*
1239+ * Copies of the original ist values from the tss are only accessed during
1240+ * debugging, no special alignment required.
1241+ */
1242+DEFINE_PER_CPU(struct orig_ist, orig_ist);
1243+#endif
1244+
1245+/*
1246+ * cpu_init() initializes state that is per-CPU. Some data is already
1247+ * initialized (naturally) in the bootstrap process, such as the GDT
1248+ * and IDT. We reload them nevertheless, this function acts as a
1249+ * 'CPU state barrier', nothing should get across.
1250+ * A lot of state is already set up in PDA init.
1251+ */
1252+void __cpuinit cpu_init(void)
1253+{
1254+ int cpu = stack_smp_processor_id();
1255+#ifndef CONFIG_X86_NO_TSS
1256+ struct tss_struct *t = &per_cpu(init_tss, cpu);
1257+ struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
1258+ unsigned long v;
1259+ char *estacks = NULL;
1260+ int i;
1261+#endif
1262+ struct task_struct *me;
1263+
1264+ /* CPU 0 is initialised in head64.c */
1265+ if (cpu != 0)
1266+ pda_init(cpu);
1267+#ifndef CONFIG_X86_NO_TSS
1268+ else
1269+ estacks = boot_exception_stacks;
1270+#endif
1271+
1272+ me = current;
1273+
1274+ if (cpu_test_and_set(cpu, cpu_initialized))
1275+ panic("CPU#%d already initialized!\n", cpu);
1276+
1277+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
1278+
1279+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1280+
1281+ /*
1282+ * Initialize the per-CPU GDT with the boot GDT,
1283+ * and set up the GDT descriptor:
1284+ */
1285+
1286+ switch_to_new_gdt();
1287+#ifndef CONFIG_X86_NO_IDT
1288+ load_idt((const struct desc_ptr *)&idt_descr);
1289+#endif
1290+
1291+ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
1292+ syscall_init();
1293+
1294+ wrmsrl(MSR_FS_BASE, 0);
1295+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
1296+ barrier();
1297+
1298+ check_efer();
1299+
1300+#ifndef CONFIG_X86_NO_TSS
1301+ /*
1302+ * set up and load the per-CPU TSS
1303+ */
1304+ if (!orig_ist->ist[0]) {
1305+ static const unsigned int order[N_EXCEPTION_STACKS] = {
1306+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
1307+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
1308+ };
1309+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1310+ if (cpu) {
1311+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1312+ if (!estacks)
1313+ panic("Cannot allocate exception "
1314+ "stack %ld %d\n", v, cpu);
1315+ }
1316+ estacks += PAGE_SIZE << order[v];
1317+ orig_ist->ist[v] = t->x86_tss.ist[v] =
1318+ (unsigned long)estacks;
1319+ }
1320+ }
1321+
1322+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1323+ /*
1324+ * <= is required because the CPU will access up to
1325+ * 8 bits beyond the end of the IO permission bitmap.
1326+ */
1327+ for (i = 0; i <= IO_BITMAP_LONGS; i++)
1328+ t->io_bitmap[i] = ~0UL;
1329+#endif
1330+
1331+ atomic_inc(&init_mm.mm_count);
1332+ me->active_mm = &init_mm;
1333+ if (me->mm)
1334+ BUG();
1335+ enter_lazy_tlb(&init_mm, me);
1336+
1337+ load_sp0(t, &current->thread);
1338+#ifndef CONFIG_X86_NO_TSS
1339+ set_tss_desc(cpu, t);
1340+ load_TR_desc();
1341+#endif
1342+ load_LDT(&init_mm.context);
1343+
1344+#ifdef CONFIG_KGDB
1345+ /*
1346+ * If the kgdb is connected no debug regs should be altered. This
1347+ * is only applicable when KGDB and a KGDB I/O module are built
1348+ * into the kernel and you are using early debugging with
1349+ * kgdbwait. KGDB will control the kernel HW breakpoint registers.
1350+ */
1351+ if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1352+ arch_kgdb_ops.correct_hw_break();
1353+ else {
1354+#endif
1355+ /*
1356+ * Clear all 6 debug registers:
1357+ */
1358+
1359+ set_debugreg(0UL, 0);
1360+ set_debugreg(0UL, 1);
1361+ set_debugreg(0UL, 2);
1362+ set_debugreg(0UL, 3);
1363+ set_debugreg(0UL, 6);
1364+ set_debugreg(0UL, 7);
1365+#ifdef CONFIG_KGDB
1366+ /* If the kgdb is connected no debug regs should be altered. */
1367+ }
1368+#endif
1369+
1370+ fpu_init();
1371+
1372+ asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
1373+ if (raw_irqs_disabled())
1374+ kernel_eflags &= ~X86_EFLAGS_IF;
1375+
1376+ if (is_uv_system())
1377+ uv_cpu_init();
1378+}
1379--- /dev/null 1970-01-01 00:00:00.000000000 +0000
82094b55 1380+++ sle11-2009-10-16/arch/x86/kernel/e820-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
1381@@ -0,0 +1,1545 @@
1382+/*
1383+ * Handle the memory map.
1384+ * The functions here do the job until bootmem takes over.
1385+ *
1386+ * Getting sanitize_e820_map() in sync with i386 version by applying change:
1387+ * - Provisions for empty E820 memory regions (reported by certain BIOSes).
1388+ * Alex Achenbach <xela@slit.de>, December 2002.
1389+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
1390+ *
1391+ */
1392+#include <linux/kernel.h>
1393+#include <linux/types.h>
1394+#include <linux/init.h>
1395+#include <linux/bootmem.h>
1396+#include <linux/ioport.h>
1397+#include <linux/string.h>
1398+#include <linux/kexec.h>
1399+#include <linux/module.h>
1400+#include <linux/mm.h>
1401+#include <linux/pfn.h>
1402+#include <linux/suspend.h>
1403+#include <linux/firmware-map.h>
1404+
1405+#include <asm/pgtable.h>
1406+#include <asm/page.h>
1407+#include <asm/e820.h>
1408+#include <asm/proto.h>
1409+#include <asm/setup.h>
1410+#include <xen/interface/memory.h>
1411+
1412+/*
1413+ * The e820 map is the map that gets modified e.g. with command line parameters
1414+ * and that is also registered with modifications in the kernel resource tree
1415+ * with the iomem_resource as parent.
1416+ *
1417+ * The e820_saved is directly saved after the BIOS-provided memory map is
1418+ * copied. It doesn't get modified afterwards. It's registered for the
1419+ * /sys/firmware/memmap interface.
1420+ *
1421+ * That memory map is not modified and is used as base for kexec. The kexec'd
1422+ * kernel should get the same memory map as the firmware provides. Then the
1423+ * user can e.g. boot the original kernel with mem=1G while still booting the
1424+ * next kernel with full memory.
1425+ */
1426+struct e820map e820;
1427+#ifndef CONFIG_XEN
1428+struct e820map e820_saved;
1429+#else
1430+static struct e820map machine_e820;
1431+#define e820_saved machine_e820
1432+#endif
1433+
1434+/* For PCI or other memory-mapped resources */
1435+unsigned long pci_mem_start = 0xaeedbabe;
1436+#ifdef CONFIG_PCI
1437+EXPORT_SYMBOL(pci_mem_start);
1438+#endif
1439+
1440+/*
1441+ * This function checks if any part of the range <start,end> is mapped
1442+ * with type.
1443+ */
1444+int
1445+e820_any_mapped(u64 start, u64 end, unsigned type)
1446+{
1447+ int i;
1448+
1449+#ifndef CONFIG_XEN
1450+ for (i = 0; i < e820.nr_map; i++) {
1451+ struct e820entry *ei = &e820.map[i];
1452+#else
1453+ if (!is_initial_xendomain())
1454+ return 0;
1455+ for (i = 0; i < machine_e820.nr_map; ++i) {
1456+ const struct e820entry *ei = &machine_e820.map[i];
1457+#endif
1458+
1459+ if (type && ei->type != type)
1460+ continue;
1461+ if (ei->addr >= end || ei->addr + ei->size <= start)
1462+ continue;
1463+ return 1;
1464+ }
1465+ return 0;
1466+}
1467+EXPORT_SYMBOL_GPL(e820_any_mapped);
1468+
1469+/*
1470+ * This function checks if the entire range <start,end> is mapped with type.
1471+ *
1472+ * Note: this function only works correct if the e820 table is sorted and
1473+ * not-overlapping, which is the case
1474+ */
1475+int __init e820_all_mapped(u64 start, u64 end, unsigned type)
1476+{
1477+ int i;
1478+
1479+#ifndef CONFIG_XEN
1480+ for (i = 0; i < e820.nr_map; i++) {
1481+ struct e820entry *ei = &e820.map[i];
1482+#else
1483+ if (!is_initial_xendomain())
1484+ return 0;
1485+ for (i = 0; i < machine_e820.nr_map; ++i) {
1486+ const struct e820entry *ei = &machine_e820.map[i];
1487+#endif
1488+
1489+ if (type && ei->type != type)
1490+ continue;
1491+ /* is the region (part) in overlap with the current region ?*/
1492+ if (ei->addr >= end || ei->addr + ei->size <= start)
1493+ continue;
1494+
1495+ /* if the region is at the beginning of <start,end> we move
1496+ * start to the end of the region since it's ok until there
1497+ */
1498+ if (ei->addr <= start)
1499+ start = ei->addr + ei->size;
1500+ /*
1501+ * if start is now at or beyond end, we're done, full
1502+ * coverage
1503+ */
1504+ if (start >= end)
1505+ return 1;
1506+ }
1507+ return 0;
1508+}
1509+
1510+/*
1511+ * Add a memory region to the kernel e820 map.
1512+ */
1513+void __init e820_add_region(u64 start, u64 size, int type)
1514+{
1515+ int x = e820.nr_map;
1516+
1517+ if (x == ARRAY_SIZE(e820.map)) {
1518+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
1519+ return;
1520+ }
1521+
1522+ e820.map[x].addr = start;
1523+ e820.map[x].size = size;
1524+ e820.map[x].type = type;
1525+ e820.nr_map++;
1526+}
1527+
1528+void __init e820_print_map(char *who)
1529+{
1530+ int i;
1531+
1532+ for (i = 0; i < e820.nr_map; i++) {
1533+ printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
1534+ (unsigned long long) e820.map[i].addr,
1535+ (unsigned long long)
1536+ (e820.map[i].addr + e820.map[i].size));
1537+ switch (e820.map[i].type) {
1538+ case E820_RAM:
1539+ case E820_RESERVED_KERN:
1540+ printk(KERN_CONT "(usable)\n");
1541+ break;
1542+ case E820_RESERVED:
1543+ printk(KERN_CONT "(reserved)\n");
1544+ break;
1545+ case E820_ACPI:
1546+ printk(KERN_CONT "(ACPI data)\n");
1547+ break;
1548+ case E820_NVS:
1549+ printk(KERN_CONT "(ACPI NVS)\n");
1550+ break;
1551+ default:
1552+ printk(KERN_CONT "type %u\n", e820.map[i].type);
1553+ break;
1554+ }
1555+ }
1556+}
1557+
1558+/*
1559+ * Sanitize the BIOS e820 map.
1560+ *
1561+ * Some e820 responses include overlapping entries. The following
1562+ * replaces the original e820 map with a new one, removing overlaps,
1563+ * and resolving conflicting memory types in favor of highest
1564+ * numbered type.
1565+ *
1566+ * The input parameter biosmap points to an array of 'struct
1567+ * e820entry' which on entry has elements in the range [0, *pnr_map)
1568+ * valid, and which has space for up to max_nr_map entries.
1569+ * On return, the resulting sanitized e820 map entries will be in
1570+ * overwritten in the same location, starting at biosmap.
1571+ *
1572+ * The integer pointed to by pnr_map must be valid on entry (the
1573+ * current number of valid entries located at biosmap) and will
1574+ * be updated on return, with the new number of valid entries
1575+ * (something no more than max_nr_map.)
1576+ *
1577+ * The return value from sanitize_e820_map() is zero if it
1578+ * successfully 'sanitized' the map entries passed in, and is -1
1579+ * if it did nothing, which can happen if either of (1) it was
1580+ * only passed one map entry, or (2) any of the input map entries
1581+ * were invalid (start + size < start, meaning that the size was
1582+ * so big the described memory range wrapped around through zero.)
1583+ *
1584+ * Visually we're performing the following
1585+ * (1,2,3,4 = memory types)...
1586+ *
1587+ * Sample memory map (w/overlaps):
1588+ * ____22__________________
1589+ * ______________________4_
1590+ * ____1111________________
1591+ * _44_____________________
1592+ * 11111111________________
1593+ * ____________________33__
1594+ * ___________44___________
1595+ * __________33333_________
1596+ * ______________22________
1597+ * ___________________2222_
1598+ * _________111111111______
1599+ * _____________________11_
1600+ * _________________4______
1601+ *
1602+ * Sanitized equivalent (no overlap):
1603+ * 1_______________________
1604+ * _44_____________________
1605+ * ___1____________________
1606+ * ____22__________________
1607+ * ______11________________
1608+ * _________1______________
1609+ * __________3_____________
1610+ * ___________44___________
1611+ * _____________33_________
1612+ * _______________2________
1613+ * ________________1_______
1614+ * _________________4______
1615+ * ___________________2____
1616+ * ____________________33__
1617+ * ______________________4_
1618+ */
1619+
1620+int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
1621+ int *pnr_map)
1622+{
1623+ struct change_member {
1624+ struct e820entry *pbios; /* pointer to original bios entry */
1625+ unsigned long long addr; /* address for this change point */
1626+ };
1627+ static struct change_member change_point_list[2*E820_X_MAX] __initdata;
1628+ static struct change_member *change_point[2*E820_X_MAX] __initdata;
1629+ static struct e820entry *overlap_list[E820_X_MAX] __initdata;
1630+ static struct e820entry new_bios[E820_X_MAX] __initdata;
1631+ struct change_member *change_tmp;
1632+ unsigned long current_type, last_type;
1633+ unsigned long long last_addr;
1634+ int chgidx, still_changing;
1635+ int overlap_entries;
1636+ int new_bios_entry;
1637+ int old_nr, new_nr, chg_nr;
1638+ int i;
1639+
1640+ /* if there's only one memory region, don't bother */
1641+#ifdef CONFIG_XEN
1642+ if (*pnr_map == 1)
1643+ return 0;
1644+#endif
1645+ if (*pnr_map < 2)
1646+ return -1;
1647+
1648+ old_nr = *pnr_map;
1649+ BUG_ON(old_nr > max_nr_map);
1650+
1651+ /* bail out if we find any unreasonable addresses in bios map */
1652+ for (i = 0; i < old_nr; i++)
1653+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
1654+ return -1;
1655+
1656+ /* create pointers for initial change-point information (for sorting) */
1657+ for (i = 0; i < 2 * old_nr; i++)
1658+ change_point[i] = &change_point_list[i];
1659+
1660+ /* record all known change-points (starting and ending addresses),
1661+ omitting those that are for empty memory regions */
1662+ chgidx = 0;
1663+ for (i = 0; i < old_nr; i++) {
1664+ if (biosmap[i].size != 0) {
1665+ change_point[chgidx]->addr = biosmap[i].addr;
1666+ change_point[chgidx++]->pbios = &biosmap[i];
1667+ change_point[chgidx]->addr = biosmap[i].addr +
1668+ biosmap[i].size;
1669+ change_point[chgidx++]->pbios = &biosmap[i];
1670+ }
1671+ }
1672+ chg_nr = chgidx;
1673+
1674+ /* sort change-point list by memory addresses (low -> high) */
1675+ still_changing = 1;
1676+ while (still_changing) {
1677+ still_changing = 0;
1678+ for (i = 1; i < chg_nr; i++) {
1679+ unsigned long long curaddr, lastaddr;
1680+ unsigned long long curpbaddr, lastpbaddr;
1681+
1682+ curaddr = change_point[i]->addr;
1683+ lastaddr = change_point[i - 1]->addr;
1684+ curpbaddr = change_point[i]->pbios->addr;
1685+ lastpbaddr = change_point[i - 1]->pbios->addr;
1686+
1687+ /*
1688+ * swap entries, when:
1689+ *
1690+ * curaddr > lastaddr or
1691+ * curaddr == lastaddr and curaddr == curpbaddr and
1692+ * lastaddr != lastpbaddr
1693+ */
1694+ if (curaddr < lastaddr ||
1695+ (curaddr == lastaddr && curaddr == curpbaddr &&
1696+ lastaddr != lastpbaddr)) {
1697+ change_tmp = change_point[i];
1698+ change_point[i] = change_point[i-1];
1699+ change_point[i-1] = change_tmp;
1700+ still_changing = 1;
1701+ }
1702+ }
1703+ }
1704+
1705+ /* create a new bios memory map, removing overlaps */
1706+ overlap_entries = 0; /* number of entries in the overlap table */
1707+ new_bios_entry = 0; /* index for creating new bios map entries */
1708+ last_type = 0; /* start with undefined memory type */
1709+ last_addr = 0; /* start with 0 as last starting address */
1710+
1711+ /* loop through change-points, determining affect on the new bios map */
1712+ for (chgidx = 0; chgidx < chg_nr; chgidx++) {
1713+ /* keep track of all overlapping bios entries */
1714+ if (change_point[chgidx]->addr ==
1715+ change_point[chgidx]->pbios->addr) {
1716+ /*
1717+ * add map entry to overlap list (> 1 entry
1718+ * implies an overlap)
1719+ */
1720+ overlap_list[overlap_entries++] =
1721+ change_point[chgidx]->pbios;
1722+ } else {
1723+ /*
1724+ * remove entry from list (order independent,
1725+ * so swap with last)
1726+ */
1727+ for (i = 0; i < overlap_entries; i++) {
1728+ if (overlap_list[i] ==
1729+ change_point[chgidx]->pbios)
1730+ overlap_list[i] =
1731+ overlap_list[overlap_entries-1];
1732+ }
1733+ overlap_entries--;
1734+ }
1735+ /*
1736+ * if there are overlapping entries, decide which
1737+ * "type" to use (larger value takes precedence --
1738+ * 1=usable, 2,3,4,4+=unusable)
1739+ */
1740+ current_type = 0;
1741+ for (i = 0; i < overlap_entries; i++)
1742+ if (overlap_list[i]->type > current_type)
1743+ current_type = overlap_list[i]->type;
1744+ /*
1745+ * continue building up new bios map based on this
1746+ * information
1747+ */
1748+ if (current_type != last_type) {
1749+ if (last_type != 0) {
1750+ new_bios[new_bios_entry].size =
1751+ change_point[chgidx]->addr - last_addr;
1752+ /*
1753+ * move forward only if the new size
1754+ * was non-zero
1755+ */
1756+ if (new_bios[new_bios_entry].size != 0)
1757+ /*
1758+ * no more space left for new
1759+ * bios entries ?
1760+ */
1761+ if (++new_bios_entry >= max_nr_map)
1762+ break;
1763+ }
1764+ if (current_type != 0) {
1765+ new_bios[new_bios_entry].addr =
1766+ change_point[chgidx]->addr;
1767+ new_bios[new_bios_entry].type = current_type;
1768+ last_addr = change_point[chgidx]->addr;
1769+ }
1770+ last_type = current_type;
1771+ }
1772+ }
1773+ /* retain count for new bios entries */
1774+ new_nr = new_bios_entry;
1775+
1776+ /* copy new bios mapping into original location */
1777+ memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
1778+ *pnr_map = new_nr;
1779+
1780+ return 0;
1781+}
1782+
1783+static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
1784+{
1785+ while (nr_map) {
1786+ u64 start = biosmap->addr;
1787+ u64 size = biosmap->size;
1788+ u64 end = start + size;
1789+ u32 type = biosmap->type;
1790+
1791+ /* Overflow in 64 bits? Ignore the memory map. */
1792+ if (start > end)
1793+ return -1;
1794+
1795+ e820_add_region(start, size, type);
1796+
1797+ biosmap++;
1798+ nr_map--;
1799+ }
1800+ return 0;
1801+}
1802+
1803+/*
1804+ * Copy the BIOS e820 map into a safe place.
1805+ *
1806+ * Sanity-check it while we're at it..
1807+ *
1808+ * If we're lucky and live on a modern system, the setup code
1809+ * will have given us a memory map that we can use to properly
1810+ * set up memory. If we aren't, we'll fake a memory map.
1811+ */
1812+static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
1813+{
1814+#ifndef CONFIG_XEN
1815+ /* Only one memory region (or negative)? Ignore it */
1816+ if (nr_map < 2)
1817+ return -1;
1818+#else
1819+ BUG_ON(nr_map < 1);
1820+#endif
1821+
1822+ return __append_e820_map(biosmap, nr_map);
1823+}
1824+
1825+static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
1826+ u64 size, unsigned old_type,
1827+ unsigned new_type)
1828+{
1829+ unsigned int i, x;
1830+ u64 real_updated_size = 0;
1831+
1832+ BUG_ON(old_type == new_type);
1833+
1834+ if (size > (ULLONG_MAX - start))
1835+ size = ULLONG_MAX - start;
1836+
1837+ for (i = 0; i < e820x->nr_map; i++) {
1838+ struct e820entry *ei = &e820x->map[i];
1839+ u64 final_start, final_end;
1840+ if (ei->type != old_type)
1841+ continue;
1842+ /* totally covered? */
1843+ if (ei->addr >= start &&
1844+ (ei->addr + ei->size) <= (start + size)) {
1845+ ei->type = new_type;
1846+ real_updated_size += ei->size;
1847+ continue;
1848+ }
1849+ /* partially covered */
1850+ final_start = max(start, ei->addr);
1851+ final_end = min(start + size, ei->addr + ei->size);
1852+ if (final_start >= final_end)
1853+ continue;
1854+
1855+ x = e820x->nr_map;
1856+ if (x == ARRAY_SIZE(e820x->map)) {
1857+ printk(KERN_ERR "Too many memory map entries!\n");
1858+ break;
1859+ }
1860+ e820x->map[x].addr = final_start;
1861+ e820x->map[x].size = final_end - final_start;
1862+ e820x->map[x].type = new_type;
1863+ e820x->nr_map++;
1864+
1865+ real_updated_size += final_end - final_start;
1866+
1867+ if (ei->addr < final_start)
1868+ continue;
1869+ ei->addr = final_end;
1870+ ei->size -= final_end - final_start;
1871+ }
1872+ return real_updated_size;
1873+}
1874+
1875+u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
1876+ unsigned new_type)
1877+{
1878+ return e820_update_range_map(&e820, start, size, old_type, new_type);
1879+}
1880+
1881+static u64 __init e820_update_range_saved(u64 start, u64 size,
1882+ unsigned old_type, unsigned new_type)
1883+{
1884+#ifdef CONFIG_XEN
1885+ if (is_initial_xendomain())
1886+ return e820_update_range_map(&machine_e820,
1887+ phys_to_machine(start), size,
1888+ old_type, new_type);
1889+#endif
1890+ return e820_update_range_map(&e820_saved, start, size, old_type,
1891+ new_type);
1892+}
1893+
1894+/* make e820 not cover the range */
1895+u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
1896+ int checktype)
1897+{
1898+ int i;
1899+ u64 real_removed_size = 0;
1900+
1901+ if (size > (ULLONG_MAX - start))
1902+ size = ULLONG_MAX - start;
1903+
1904+ for (i = 0; i < e820.nr_map; i++) {
1905+ struct e820entry *ei = &e820.map[i];
1906+ u64 final_start, final_end;
1907+
1908+ if (checktype && ei->type != old_type)
1909+ continue;
1910+ /* totally covered? */
1911+ if (ei->addr >= start &&
1912+ (ei->addr + ei->size) <= (start + size)) {
1913+ real_removed_size += ei->size;
1914+ memset(ei, 0, sizeof(struct e820entry));
1915+ continue;
1916+ }
1917+ /* partially covered */
1918+ final_start = max(start, ei->addr);
1919+ final_end = min(start + size, ei->addr + ei->size);
1920+ if (final_start >= final_end)
1921+ continue;
1922+ real_removed_size += final_end - final_start;
1923+
1924+ ei->size -= final_end - final_start;
1925+ if (ei->addr < final_start)
1926+ continue;
1927+ ei->addr = final_end;
1928+ }
1929+ return real_removed_size;
1930+}
1931+
1932+void __init update_e820(void)
1933+{
1934+ int nr_map;
1935+
1936+ nr_map = e820.nr_map;
1937+ if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
1938+ return;
1939+ e820.nr_map = nr_map;
1940+ printk(KERN_INFO "modified physical RAM map:\n");
1941+ e820_print_map("modified");
1942+}
1943+static void __init update_e820_saved(void)
1944+{
1945+ int nr_map;
1946+
1947+ nr_map = e820_saved.nr_map;
1948+ if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
1949+ return;
1950+ e820_saved.nr_map = nr_map;
1951+}
1952+
1953+#ifdef CONFIG_XEN
1954+#define e820 machine_e820
1955+#endif
1956+
1957+#define MAX_GAP_END 0x100000000ull
1958+/*
1959+ * Search for a gap in the e820 memory space from start_addr to end_addr.
1960+ */
1961+__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
1962+ unsigned long start_addr, unsigned long long end_addr)
1963+{
1964+ unsigned long long last;
1965+ int i = e820.nr_map;
1966+ int found = 0;
1967+
1968+ last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
1969+#ifdef CONFIG_X86_64
1970+ if (start_addr >= MAX_GAP_END)
1971+ last = end_addr ?: (1UL << boot_cpu_data.x86_phys_bits);
1972+#endif
1973+
1974+ while (--i >= 0) {
1975+ unsigned long long start = e820.map[i].addr;
1976+ unsigned long long end = start + e820.map[i].size;
1977+
1978+ if (end < start_addr)
1979+ continue;
1980+
1981+ /*
1982+ * Since "last" is at most 4GB, we know we'll
1983+ * fit in 32 bits if this condition is true
1984+ */
1985+ if (last > end) {
1986+ unsigned long gap = last - end;
1987+
1988+ if (gap >= *gapsize) {
1989+ *gapsize = gap;
1990+ *gapstart = end;
1991+ found = 1;
1992+ }
1993+ }
1994+ if (start < last)
1995+ last = start;
1996+ }
1997+ return found;
1998+}
1999+
2000+/*
2001+ * Search for the biggest gap in the low 32 bits of the e820
2002+ * memory space. We pass this space to PCI to assign MMIO resources
2003+ * for hotplug or unconfigured devices in.
2004+ * Hopefully the BIOS let enough space left.
2005+ */
2006+__init void e820_setup_gap(void)
2007+{
2008+ unsigned long gapstart, gapsize, round;
2009+ int found;
2010+
2011+ gapstart = 0x10000000;
2012+ gapsize = 0x400000;
2013+ found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
2014+
2015+#ifdef CONFIG_X86_64
2016+ if (!found) {
2017+ printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
2018+ "address range\n"
2019+ KERN_ERR "PCI: Unassigned devices with 32bit resource "
2020+ "registers may break!\n");
2021+ found = e820_search_gap(&gapstart, &gapsize, MAX_GAP_END, 0);
2022+ BUG_ON(!found);
2023+ }
2024+#endif
2025+
2026+ /*
2027+ * See how much we want to round up: start off with
2028+ * rounding to the next 1MB area.
2029+ */
2030+ round = 0x100000;
2031+ while ((gapsize >> 4) > round)
2032+ round += round;
2033+ /* Fun with two's complement */
2034+ pci_mem_start = (gapstart + round) & -round;
2035+
2036+ printk(KERN_INFO
2037+ "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
2038+ pci_mem_start, gapstart, gapsize);
2039+}
2040+
2041+#undef e820
2042+
2043+#ifndef CONFIG_XEN
2044+/**
2045+ * Because of the size limitation of struct boot_params, only first
2046+ * 128 E820 memory entries are passed to kernel via
2047+ * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
2048+ * linked list of struct setup_data, which is parsed here.
2049+ */
2050+void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
2051+{
2052+ u32 map_len;
2053+ int entries;
2054+ struct e820entry *extmap;
2055+
2056+ entries = sdata->len / sizeof(struct e820entry);
2057+ map_len = sdata->len + sizeof(struct setup_data);
2058+ if (map_len > PAGE_SIZE)
2059+ sdata = early_ioremap(pa_data, map_len);
2060+ extmap = (struct e820entry *)(sdata->data);
2061+ __append_e820_map(extmap, entries);
2062+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
2063+ if (map_len > PAGE_SIZE)
2064+ early_iounmap(sdata, map_len);
2065+ printk(KERN_INFO "extended physical RAM map:\n");
2066+ e820_print_map("extended");
2067+}
2068+
2069+#if defined(CONFIG_X86_64) || \
2070+ (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
2071+/**
2072+ * Find the ranges of physical addresses that do not correspond to
2073+ * e820 RAM areas and mark the corresponding pages as nosave for
2074+ * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
2075+ *
2076+ * This function requires the e820 map to be sorted and without any
2077+ * overlapping entries and assumes the first e820 area to be RAM.
2078+ */
2079+void __init e820_mark_nosave_regions(unsigned long limit_pfn)
2080+{
2081+ int i;
2082+ unsigned long pfn;
2083+
2084+ pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
2085+ for (i = 1; i < e820.nr_map; i++) {
2086+ struct e820entry *ei = &e820.map[i];
2087+
2088+ if (pfn < PFN_UP(ei->addr))
2089+ register_nosave_region(pfn, PFN_UP(ei->addr));
2090+
2091+ pfn = PFN_DOWN(ei->addr + ei->size);
2092+ if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
2093+ register_nosave_region(PFN_UP(ei->addr), pfn);
2094+
2095+ if (pfn >= limit_pfn)
2096+ break;
2097+ }
2098+}
2099+#endif
2100+#endif
2101+
2102+/*
2103+ * Early reserved memory areas.
2104+ */
2105+#define MAX_EARLY_RES 20
2106+
2107+struct early_res {
2108+ u64 start, end;
2109+ char name[16];
2110+ char overlap_ok;
2111+};
2112+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
2113+#ifndef CONFIG_XEN
2114+ { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
2115+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
2116+ { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
2117+#endif
2118+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
2119+ /*
2120+ * But first pinch a few for the stack/trampoline stuff
2121+ * FIXME: Don't need the extra page at 4K, but need to fix
2122+ * trampoline before removing it. (see the GDT stuff)
2123+ */
2124+ { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
2125+ /*
2126+ * Has to be in very low memory so we can execute
2127+ * real-mode AP code.
2128+ */
2129+ { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
2130+#endif
2131+#endif
2132+ {}
2133+};
2134+
2135+static int __init find_overlapped_early(u64 start, u64 end)
2136+{
2137+ int i;
2138+ struct early_res *r;
2139+
2140+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
2141+ r = &early_res[i];
2142+ if (end > r->start && start < r->end)
2143+ break;
2144+ }
2145+
2146+ return i;
2147+}
2148+
2149+/*
2150+ * Drop the i-th range from the early reservation map,
2151+ * by copying any higher ranges down one over it, and
2152+ * clearing what had been the last slot.
2153+ */
2154+static void __init drop_range(int i)
2155+{
2156+ int j;
2157+
2158+ for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
2159+ ;
2160+
2161+ memmove(&early_res[i], &early_res[i + 1],
2162+ (j - 1 - i) * sizeof(struct early_res));
2163+
2164+ early_res[j - 1].end = 0;
2165+}
2166+
2167+/*
2168+ * Split any existing ranges that:
2169+ * 1) are marked 'overlap_ok', and
2170+ * 2) overlap with the stated range [start, end)
2171+ * into whatever portion (if any) of the existing range is entirely
2172+ * below or entirely above the stated range. Drop the portion
2173+ * of the existing range that overlaps with the stated range,
2174+ * which will allow the caller of this routine to then add that
2175+ * stated range without conflicting with any existing range.
2176+ */
2177+static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
2178+{
2179+ int i;
2180+ struct early_res *r;
2181+ u64 lower_start, lower_end;
2182+ u64 upper_start, upper_end;
2183+ char name[16];
2184+
2185+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
2186+ r = &early_res[i];
2187+
2188+ /* Continue past non-overlapping ranges */
2189+ if (end <= r->start || start >= r->end)
2190+ continue;
2191+
2192+ /*
2193+ * Leave non-ok overlaps as is; let caller
2194+ * panic "Overlapping early reservations"
2195+ * when it hits this overlap.
2196+ */
2197+ if (!r->overlap_ok)
2198+ return;
2199+
2200+ /*
2201+ * We have an ok overlap. We will drop it from the early
2202+ * reservation map, and add back in any non-overlapping
2203+ * portions (lower or upper) as separate, overlap_ok,
2204+ * non-overlapping ranges.
2205+ */
2206+
2207+ /* 1. Note any non-overlapping (lower or upper) ranges. */
2208+ strncpy(name, r->name, sizeof(name) - 1);
2209+
2210+ lower_start = lower_end = 0;
2211+ upper_start = upper_end = 0;
2212+ if (r->start < start) {
2213+ lower_start = r->start;
2214+ lower_end = start;
2215+ }
2216+ if (r->end > end) {
2217+ upper_start = end;
2218+ upper_end = r->end;
2219+ }
2220+
2221+ /* 2. Drop the original ok overlapping range */
2222+ drop_range(i);
2223+
2224+ i--; /* resume for-loop on copied down entry */
2225+
2226+ /* 3. Add back in any non-overlapping ranges. */
2227+ if (lower_end)
2228+ reserve_early_overlap_ok(lower_start, lower_end, name);
2229+ if (upper_end)
2230+ reserve_early_overlap_ok(upper_start, upper_end, name);
2231+ }
2232+}
2233+
2234+static void __init __reserve_early(u64 start, u64 end, char *name,
2235+ int overlap_ok)
2236+{
2237+ int i;
2238+ struct early_res *r;
2239+
2240+ i = find_overlapped_early(start, end);
2241+ if (i >= MAX_EARLY_RES)
2242+ panic("Too many early reservations");
2243+ r = &early_res[i];
2244+ if (r->end)
2245+ panic("Overlapping early reservations "
2246+ "%llx-%llx %s to %llx-%llx %s\n",
2247+ start, end - 1, name?name:"", r->start,
2248+ r->end - 1, r->name);
2249+ r->start = start;
2250+ r->end = end;
2251+ r->overlap_ok = overlap_ok;
2252+ if (name)
2253+ strncpy(r->name, name, sizeof(r->name) - 1);
2254+}
2255+
2256+/*
2257+ * A few early reservtations come here.
2258+ *
2259+ * The 'overlap_ok' in the name of this routine does -not- mean it
2260+ * is ok for these reservations to overlap an earlier reservation.
2261+ * Rather it means that it is ok for subsequent reservations to
2262+ * overlap this one.
2263+ *
2264+ * Use this entry point to reserve early ranges when you are doing
2265+ * so out of "Paranoia", reserving perhaps more memory than you need,
2266+ * just in case, and don't mind a subsequent overlapping reservation
2267+ * that is known to be needed.
2268+ *
2269+ * The drop_overlaps_that_are_ok() call here isn't really needed.
2270+ * It would be needed if we had two colliding 'overlap_ok'
2271+ * reservations, so that the second such would not panic on the
2272+ * overlap with the first. We don't have any such as of this
2273+ * writing, but might as well tolerate such if it happens in
2274+ * the future.
2275+ */
2276+void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
2277+{
2278+ drop_overlaps_that_are_ok(start, end);
2279+ __reserve_early(start, end, name, 1);
2280+}
2281+
2282+/*
2283+ * Most early reservations come here.
2284+ *
2285+ * We first have drop_overlaps_that_are_ok() drop any pre-existing
2286+ * 'overlap_ok' ranges, so that we can then reserve this memory
2287+ * range without risk of panic'ing on an overlapping overlap_ok
2288+ * early reservation.
2289+ */
2290+void __init reserve_early(u64 start, u64 end, char *name)
2291+{
2292+ drop_overlaps_that_are_ok(start, end);
2293+ __reserve_early(start, end, name, 0);
2294+}
2295+
2296+void __init free_early(u64 start, u64 end)
2297+{
2298+ struct early_res *r;
2299+ int i;
2300+
2301+ i = find_overlapped_early(start, end);
2302+ r = &early_res[i];
2303+ if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
2304+ panic("free_early on not reserved area: %llx-%llx!",
2305+ start, end - 1);
2306+
2307+ drop_range(i);
2308+}
2309+
2310+void __init early_res_to_bootmem(u64 start, u64 end)
2311+{
2312+ int i, count;
2313+ u64 final_start, final_end;
2314+
2315+ count = 0;
2316+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
2317+ count++;
2318+
2319+ printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
2320+ count, start, end);
2321+ for (i = 0; i < count; i++) {
2322+ struct early_res *r = &early_res[i];
2323+ printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
2324+ r->start, r->end, r->name);
2325+ final_start = max(start, r->start);
2326+ final_end = min(end, r->end);
2327+ if (final_start >= final_end) {
2328+ printk(KERN_CONT "\n");
2329+ continue;
2330+ }
2331+ printk(KERN_CONT " ==> [%010llx - %010llx]\n",
2332+ final_start, final_end);
2333+ reserve_bootmem_generic(final_start, final_end - final_start,
2334+ BOOTMEM_DEFAULT);
2335+ }
2336+}
2337+
2338+/* Check for already reserved areas */
2339+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
2340+{
2341+ int i;
2342+ u64 addr = *addrp;
2343+ int changed = 0;
2344+ struct early_res *r;
2345+again:
2346+ i = find_overlapped_early(addr, addr + size);
2347+ r = &early_res[i];
2348+ if (i < MAX_EARLY_RES && r->end) {
2349+ *addrp = addr = round_up(r->end, align);
2350+ changed = 1;
2351+ goto again;
2352+ }
2353+ return changed;
2354+}
2355+
2356+/* Check for already reserved areas */
2357+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
2358+{
2359+ int i;
2360+ u64 addr = *addrp, last;
2361+ u64 size = *sizep;
2362+ int changed = 0;
2363+again:
2364+ last = addr + size;
2365+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
2366+ struct early_res *r = &early_res[i];
2367+ if (last > r->start && addr < r->start) {
2368+ size = r->start - addr;
2369+ changed = 1;
2370+ goto again;
2371+ }
2372+ if (last > r->end && addr < r->end) {
2373+ addr = round_up(r->end, align);
2374+ size = last - addr;
2375+ changed = 1;
2376+ goto again;
2377+ }
2378+ if (last <= r->end && addr >= r->start) {
2379+ (*sizep)++;
2380+ return 0;
2381+ }
2382+ }
2383+ if (changed) {
2384+ *addrp = addr;
2385+ *sizep = size;
2386+ }
2387+ return changed;
2388+}
2389+
2390+/*
2391+ * Find a free area with specified alignment in a specific range.
2392+ */
2393+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
2394+{
2395+ int i;
2396+
2397+ for (i = 0; i < e820.nr_map; i++) {
2398+ struct e820entry *ei = &e820.map[i];
2399+ u64 addr, last;
2400+ u64 ei_last;
2401+
2402+ if (ei->type != E820_RAM)
2403+ continue;
2404+ addr = round_up(ei->addr, align);
2405+ ei_last = ei->addr + ei->size;
2406+ if (addr < start)
2407+ addr = round_up(start, align);
2408+ if (addr >= ei_last)
2409+ continue;
2410+ while (bad_addr(&addr, size, align) && addr+size <= ei_last)
2411+ ;
2412+ last = addr + size;
2413+ if (last > ei_last)
2414+ continue;
2415+ if (last > end)
2416+ continue;
2417+ return addr;
2418+ }
2419+ return -1ULL;
2420+}
2421+
2422+/*
2423+ * Find next free range after *start
2424+ */
2425+u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
2426+{
2427+ int i;
2428+
2429+ for (i = 0; i < e820.nr_map; i++) {
2430+ struct e820entry *ei = &e820.map[i];
2431+ u64 addr, last;
2432+ u64 ei_last;
2433+
2434+ if (ei->type != E820_RAM)
2435+ continue;
2436+ addr = round_up(ei->addr, align);
2437+ ei_last = ei->addr + ei->size;
2438+ if (addr < start)
2439+ addr = round_up(start, align);
2440+ if (addr >= ei_last)
2441+ continue;
2442+ *sizep = ei_last - addr;
2443+ while (bad_addr_size(&addr, sizep, align) &&
2444+ addr + *sizep <= ei_last)
2445+ ;
2446+ last = addr + *sizep;
2447+ if (last > ei_last)
2448+ continue;
2449+ return addr;
2450+ }
2451+
2452+ return -1ULL;
2453+}
2454+
2455+/*
2456+ * pre allocated 4k and reserved it in e820
2457+ */
2458+u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
2459+{
2460+ u64 size = 0;
2461+ u64 addr;
2462+ u64 start;
2463+#ifdef CONFIG_XEN
2464+ unsigned int order = get_order(sizet);
2465+
2466+ if (is_initial_xendomain()) {
2467+ sizet = PAGE_SIZE << order;
2468+ if (align < PAGE_SIZE)
2469+ align = PAGE_SIZE;
2470+ }
2471+#endif
2472+ for (start = startt; ; start += size) {
2473+ start = find_e820_area_size(start, &size, align);
2474+ if (!(start + 1))
2475+ return 0;
2476+ if (size >= sizet)
2477+ break;
2478+ }
2479+
2480+#ifdef CONFIG_X86_32
2481+ if (start >= MAXMEM)
2482+ return 0;
2483+ if (start + size > MAXMEM)
2484+ size = MAXMEM - start;
2485+#endif
2486+#ifdef CONFIG_XEN
2487+ if ((start >> PAGE_SHIFT) >= xen_start_info->nr_pages)
2488+ return 0;
2489+ if (PFN_UP(start + size) > xen_start_info->nr_pages)
2490+ size = ((u64)xen_start_info->nr_pages << PAGE_SHIFT) - start;
2491+#endif
2492+
2493+ addr = round_down(start + size - sizet, align);
2494+ if (addr < start)
2495+ return 0;
2496+#ifdef CONFIG_XEN
2497+ if (is_initial_xendomain()) {
2498+ int rc;
2499+ unsigned long max_initmap_pfn;
2500+
2501+ max_initmap_pfn = ALIGN(PFN_UP(__pa(xen_start_info->pt_base))
2502+ + xen_start_info->nr_pt_frames
2503+ + 1 + (1 << (19 - PAGE_SHIFT)),
2504+ 1UL << (22 - PAGE_SHIFT));
2505+#ifdef CONFIG_X86_32
2506+ if ((addr >> PAGE_SHIFT)
2507+ < max(max_initmap_pfn, max_pfn_mapped))
2508+ rc = xen_create_contiguous_region((unsigned long)
2509+ __va(addr),
2510+ order, 32);
2511+#else
2512+ if ((addr >> PAGE_SHIFT) < max_pfn_mapped)
2513+ rc = xen_create_contiguous_region((unsigned long)
2514+ __va(addr),
2515+ order, 32);
2516+ else if ((addr >> PAGE_SHIFT) < max_initmap_pfn)
2517+ rc = xen_create_contiguous_region(__START_KERNEL_map
2518+ + addr,
2519+ order, 32);
2520+#endif
2521+ else
2522+ rc = early_create_contiguous_region(addr >> PAGE_SHIFT,
2523+ order, 32);
2524+ if (rc)
2525+ return 0;
2526+ }
2527+#endif
2528+ e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
2529+ e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
2530+ printk(KERN_INFO "update e820 for early_reserve_e820\n");
2531+ update_e820();
2532+ update_e820_saved();
2533+
2534+ return addr;
2535+}
2536+
2537+#ifdef CONFIG_X86_32
2538+# ifdef CONFIG_X86_PAE
2539+# define MAX_ARCH_PFN (1ULL<<(40-PAGE_SHIFT))
2540+# else
2541+# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
2542+# endif
2543+#else /* CONFIG_X86_32 */
2544+# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
2545+#endif
2546+
2547+/*
2548+ * Find the highest page frame number we have available
2549+ */
2550+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
2551+{
2552+ int i;
2553+ unsigned long last_pfn = 0;
2554+ unsigned long max_arch_pfn = MAX_ARCH_PFN;
2555+
2556+ for (i = 0; i < e820.nr_map; i++) {
2557+ struct e820entry *ei = &e820.map[i];
2558+ unsigned long start_pfn;
2559+ unsigned long end_pfn;
2560+
2561+ if (ei->type != type)
2562+ continue;
2563+
2564+ start_pfn = ei->addr >> PAGE_SHIFT;
2565+ end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
2566+
2567+ if (start_pfn >= limit_pfn)
2568+ continue;
2569+ if (end_pfn > limit_pfn) {
2570+ last_pfn = limit_pfn;
2571+ break;
2572+ }
2573+ if (end_pfn > last_pfn)
2574+ last_pfn = end_pfn;
2575+ }
2576+
2577+ if (last_pfn > max_arch_pfn)
2578+ last_pfn = max_arch_pfn;
2579+
2580+ printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
2581+ last_pfn, max_arch_pfn);
2582+ return last_pfn;
2583+}
2584+unsigned long __init e820_end_of_ram_pfn(void)
2585+{
2586+ return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
2587+}
2588+
2589+unsigned long __init e820_end_of_low_ram_pfn(void)
2590+{
2591+ return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
2592+}
2593+/*
2594+ * Finds an active region in the address range from start_pfn to last_pfn and
2595+ * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
2596+ */
2597+int __init e820_find_active_region(const struct e820entry *ei,
2598+ unsigned long start_pfn,
2599+ unsigned long last_pfn,
2600+ unsigned long *ei_startpfn,
2601+ unsigned long *ei_endpfn)
2602+{
2603+ u64 align = PAGE_SIZE;
2604+
2605+ *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
2606+ *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
2607+
2608+ /* Skip map entries smaller than a page */
2609+ if (*ei_startpfn >= *ei_endpfn)
2610+ return 0;
2611+
2612+ /* Skip if map is outside the node */
2613+ if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
2614+ *ei_startpfn >= last_pfn)
2615+ return 0;
2616+
2617+ /* Check for overlaps */
2618+ if (*ei_startpfn < start_pfn)
2619+ *ei_startpfn = start_pfn;
2620+ if (*ei_endpfn > last_pfn)
2621+ *ei_endpfn = last_pfn;
2622+
2623+ return 1;
2624+}
2625+
2626+/* Walk the e820 map and register active regions within a node */
2627+void __init e820_register_active_regions(int nid, unsigned long start_pfn,
2628+ unsigned long last_pfn)
2629+{
2630+ unsigned long ei_startpfn;
2631+ unsigned long ei_endpfn;
2632+ int i;
2633+
2634+ for (i = 0; i < e820.nr_map; i++)
2635+ if (e820_find_active_region(&e820.map[i],
2636+ start_pfn, last_pfn,
2637+ &ei_startpfn, &ei_endpfn))
2638+ add_active_range(nid, ei_startpfn, ei_endpfn);
2639+}
2640+
2641+/*
2642+ * Find the hole size (in bytes) in the memory range.
2643+ * @start: starting address of the memory range to scan
2644+ * @end: ending address of the memory range to scan
2645+ */
2646+u64 __init e820_hole_size(u64 start, u64 end)
2647+{
2648+ unsigned long start_pfn = start >> PAGE_SHIFT;
2649+ unsigned long last_pfn = end >> PAGE_SHIFT;
2650+ unsigned long ei_startpfn, ei_endpfn, ram = 0;
2651+ int i;
2652+
2653+ for (i = 0; i < e820.nr_map; i++) {
2654+ if (e820_find_active_region(&e820.map[i],
2655+ start_pfn, last_pfn,
2656+ &ei_startpfn, &ei_endpfn))
2657+ ram += ei_endpfn - ei_startpfn;
2658+ }
2659+ return end - start - ((u64)ram << PAGE_SHIFT);
2660+}
2661+
2662+static void early_panic(char *msg)
2663+{
2664+ early_printk(msg);
2665+ panic(msg);
2666+}
2667+
2668+static int userdef __initdata;
2669+
2670+/* "mem=nopentium" disables the 4MB page tables. */
2671+static int __init parse_memopt(char *p)
2672+{
2673+ u64 mem_size, current_end;
2674+ unsigned int i;
2675+
2676+ if (!p)
2677+ return -EINVAL;
2678+
2679+#ifdef CONFIG_X86_32
2680+ if (!strcmp(p, "nopentium")) {
2681+ setup_clear_cpu_cap(X86_FEATURE_PSE);
2682+ return 0;
2683+ }
2684+#endif
2685+
2686+ userdef = 1;
2687+ mem_size = memparse(p, &p);
2688+ e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
2689+
2690+ i = e820.nr_map - 1;
2691+ current_end = e820.map[i].addr + e820.map[i].size;
2692+ if (current_end < mem_size) {
2693+ /*
2694+ * The e820 map ends before our requested size so
2695+ * extend the final entry to the requested address.
2696+ */
2697+ if (e820.map[i].type == E820_RAM)
2698+ e820.map[i].size = mem_size - e820.map[i].addr;
2699+ else
2700+ e820_add_region(current_end, mem_size - current_end, E820_RAM);
2701+ }
2702+
2703+ return 0;
2704+}
2705+early_param("mem", parse_memopt);
2706+
2707+#ifndef CONFIG_XEN
2708+static int __init parse_memmap_opt(char *p)
2709+{
2710+ char *oldp;
2711+ u64 start_at, mem_size;
2712+
2713+ if (!p)
2714+ return -EINVAL;
2715+
2716+ if (!strncmp(p, "exactmap", 8)) {
2717+#ifdef CONFIG_CRASH_DUMP
2718+ /*
2719+ * If we are doing a crash dump, we still need to know
2720+ * the real mem size before original memory map is
2721+ * reset.
2722+ */
2723+ saved_max_pfn = e820_end_of_ram_pfn();
2724+#endif
2725+ e820.nr_map = 0;
2726+ userdef = 1;
2727+ return 0;
2728+ }
2729+
2730+ oldp = p;
2731+ mem_size = memparse(p, &p);
2732+ if (p == oldp)
2733+ return -EINVAL;
2734+
2735+ userdef = 1;
2736+ if (*p == '@') {
2737+ start_at = memparse(p+1, &p);
2738+ e820_add_region(start_at, mem_size, E820_RAM);
2739+ } else if (*p == '#') {
2740+ start_at = memparse(p+1, &p);
2741+ e820_add_region(start_at, mem_size, E820_ACPI);
2742+ } else if (*p == '$') {
2743+ start_at = memparse(p+1, &p);
2744+ e820_add_region(start_at, mem_size, E820_RESERVED);
2745+ } else
2746+ e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
2747+
2748+ return *p == '\0' ? 0 : -EINVAL;
2749+}
2750+early_param("memmap", parse_memmap_opt);
82094b55 2751+#endif
2cb7cef9
BS
2752+
2753+void __init finish_e820_parsing(void)
2754+{
2755+ if (userdef) {
2756+ int nr = e820.nr_map;
2757+
2758+ if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
2759+ early_panic("Invalid user supplied memory map");
2760+ e820.nr_map = nr;
2761+
2762+ printk(KERN_INFO "user-defined physical RAM map:\n");
2763+ e820_print_map("user");
2764+ }
2765+}
2cb7cef9
BS
2766+
2767+static inline const char *e820_type_to_string(int e820_type)
2768+{
2769+ switch (e820_type) {
2770+ case E820_RESERVED_KERN:
2771+ case E820_RAM: return "System RAM";
2772+ case E820_ACPI: return "ACPI Tables";
2773+ case E820_NVS: return "ACPI Non-volatile Storage";
2774+ default: return "reserved";
2775+ }
2776+}
2777+
2778+#ifdef CONFIG_XEN
2779+#define e820 machine_e820
2780+#endif
2781+
2782+/*
2783+ * Mark e820 reserved areas as busy for the resource manager.
2784+ */
2785+void __init e820_reserve_resources(void)
2786+{
2787+ int i;
2788+ struct resource *res;
2789+ u64 end;
2790+
2791+ res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
2792+ for (i = 0; i < e820.nr_map; i++) {
2793+ end = e820.map[i].addr + e820.map[i].size - 1;
2794+#ifndef CONFIG_RESOURCES_64BIT
2795+ if (end > 0x100000000ULL) {
2796+ res++;
2797+ continue;
2798+ }
2799+#endif
2800+ res->name = e820_type_to_string(e820.map[i].type);
2801+ res->start = e820.map[i].addr;
2802+ res->end = end;
2803+
2804+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
2805+ insert_resource(&iomem_resource, res);
2806+ res++;
2807+ }
2808+
2809+ for (i = 0; i < e820_saved.nr_map; i++) {
2810+ struct e820entry *entry = &e820_saved.map[i];
2811+ firmware_map_add_early(entry->addr,
2812+ entry->addr + entry->size - 1,
2813+ e820_type_to_string(entry->type));
2814+ }
2815+}
2816+
2817+#undef e820
2818+
2819+#ifndef CONFIG_XEN
2820+char *__init default_machine_specific_memory_setup(void)
2821+{
2822+ char *who = "BIOS-e820";
2823+ int new_nr;
2824+ /*
2825+ * Try to copy the BIOS-supplied E820-map.
2826+ *
2827+ * Otherwise fake a memory map; one section from 0k->640k,
2828+ * the next section from 1mb->appropriate_mem_k
2829+ */
2830+ new_nr = boot_params.e820_entries;
2831+ sanitize_e820_map(boot_params.e820_map,
2832+ ARRAY_SIZE(boot_params.e820_map),
2833+ &new_nr);
2834+ boot_params.e820_entries = new_nr;
2835+ if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
2836+ < 0) {
2837+ u64 mem_size;
2838+
2839+ /* compare results from other methods and take the greater */
2840+ if (boot_params.alt_mem_k
2841+ < boot_params.screen_info.ext_mem_k) {
2842+ mem_size = boot_params.screen_info.ext_mem_k;
2843+ who = "BIOS-88";
2844+ } else {
2845+ mem_size = boot_params.alt_mem_k;
2846+ who = "BIOS-e801";
2847+ }
2848+
2849+ e820.nr_map = 0;
2850+ e820_add_region(0, LOWMEMSIZE(), E820_RAM);
2851+ e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
2852+ }
2853+
2854+ /* In case someone cares... */
2855+ return who;
2856+}
2857+
2858+char *__init __attribute__((weak)) machine_specific_memory_setup(void)
2859+{
2860+ if (x86_quirks->arch_memory_setup) {
2861+ char *who = x86_quirks->arch_memory_setup();
2862+
2863+ if (who)
2864+ return who;
2865+ }
2866+ return default_machine_specific_memory_setup();
2867+}
2868+#endif
2869+
2870+char * __init memory_setup(void)
2871+{
2872+ int rc, nr_map;
2873+ struct xen_memory_map memmap;
2874+ /*
2875+ * This is rather large for a stack variable but this early in
2876+ * the boot process we know we have plenty slack space.
2877+ */
2878+ struct e820entry map[E820MAX];
2879+
2880+ memmap.nr_entries = E820MAX;
2881+ set_xen_guest_handle(memmap.buffer, map);
2882+
2883+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
2884+ if (rc == -ENOSYS) {
2885+ memmap.nr_entries = 1;
2886+ map[0].addr = 0ULL;
2887+ map[0].size = PFN_PHYS((unsigned long long)xen_start_info->nr_pages);
2888+ /* 8MB slack (to balance backend allocations). */
2889+ map[0].size += 8ULL << 20;
2890+ map[0].type = E820_RAM;
2891+ rc = 0;
2892+ }
2893+ BUG_ON(rc);
2894+
2895+ nr_map = memmap.nr_entries;
2896+ sanitize_e820_map(map, ARRAY_SIZE(map), &nr_map);
2897+
2898+ if (append_e820_map(map, nr_map) < 0)
2899+ BUG();
2900+
2901+#ifdef CONFIG_XEN
2902+ if (is_initial_xendomain()) {
2903+ memmap.nr_entries = E820MAX;
2904+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
2905+
2906+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
2907+ BUG();
2908+ machine_e820.nr_map = memmap.nr_entries;
2909+ }
2910+#endif
2911+
2912+ return "Xen";
2913+}
2914+
2915+void __init setup_memory_map(void)
2916+{
2917+ char *who;
2918+
2919+ who = memory_setup();
2920+#ifdef CONFIG_XEN
2921+ if (!is_initial_xendomain())
2922+#endif
2923+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
2924+ printk(KERN_INFO "Xen-provided physical RAM map:\n");
2925+ e820_print_map(who);
2926+}
82094b55 2927--- sle11-2009-10-16.orig/arch/x86/kernel/e820_32-xen.c 2009-03-16 16:38:05.000000000 +0100
2cb7cef9
BS
2928+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
2929@@ -1,873 +0,0 @@
2930-#include <linux/kernel.h>
2931-#include <linux/types.h>
2932-#include <linux/init.h>
2933-#include <linux/bootmem.h>
2934-#include <linux/ioport.h>
2935-#include <linux/string.h>
2936-#include <linux/kexec.h>
2937-#include <linux/module.h>
2938-#include <linux/mm.h>
2939-#include <linux/pfn.h>
2940-#include <linux/uaccess.h>
2941-#include <linux/suspend.h>
2942-
2943-#include <asm/pgtable.h>
2944-#include <asm/page.h>
2945-#include <asm/e820.h>
2946-#include <asm/setup.h>
2947-#include <xen/interface/memory.h>
2948-
2949-struct e820map e820;
2950-struct change_member {
2951- struct e820entry *pbios; /* pointer to original bios entry */
2952- unsigned long long addr; /* address for this change point */
2953-};
2954-static struct change_member change_point_list[2*E820MAX] __initdata;
2955-static struct change_member *change_point[2*E820MAX] __initdata;
2956-static struct e820entry *overlap_list[E820MAX] __initdata;
2957-static struct e820entry new_bios[E820MAX] __initdata;
2958-/* For PCI or other memory-mapped resources */
2959-unsigned long pci_mem_start = 0x10000000;
2960-#ifdef CONFIG_PCI
2961-EXPORT_SYMBOL(pci_mem_start);
2962-#endif
2963-extern int user_defined_memmap;
2964-
2965-static struct resource system_rom_resource = {
2966- .name = "System ROM",
2967- .start = 0xf0000,
2968- .end = 0xfffff,
2969- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2970-};
2971-
2972-static struct resource extension_rom_resource = {
2973- .name = "Extension ROM",
2974- .start = 0xe0000,
2975- .end = 0xeffff,
2976- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2977-};
2978-
2979-static struct resource adapter_rom_resources[] = { {
2980- .name = "Adapter ROM",
2981- .start = 0xc8000,
2982- .end = 0,
2983- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2984-}, {
2985- .name = "Adapter ROM",
2986- .start = 0,
2987- .end = 0,
2988- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2989-}, {
2990- .name = "Adapter ROM",
2991- .start = 0,
2992- .end = 0,
2993- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2994-}, {
2995- .name = "Adapter ROM",
2996- .start = 0,
2997- .end = 0,
2998- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
2999-}, {
3000- .name = "Adapter ROM",
3001- .start = 0,
3002- .end = 0,
3003- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3004-}, {
3005- .name = "Adapter ROM",
3006- .start = 0,
3007- .end = 0,
3008- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3009-} };
3010-
3011-static struct resource video_rom_resource = {
3012- .name = "Video ROM",
3013- .start = 0xc0000,
3014- .end = 0xc7fff,
3015- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
3016-};
3017-
3018-#define ROMSIGNATURE 0xaa55
3019-
3020-static int __init romsignature(const unsigned char *rom)
3021-{
3022- const unsigned short * const ptr = (const unsigned short *)rom;
3023- unsigned short sig;
3024-
3025- return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
3026-}
3027-
3028-static int __init romchecksum(const unsigned char *rom, unsigned long length)
3029-{
3030- unsigned char sum, c;
3031-
3032- for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
3033- sum += c;
3034- return !length && !sum;
3035-}
3036-
3037-static void __init probe_roms(void)
3038-{
3039- const unsigned char *rom;
3040- unsigned long start, length, upper;
3041- unsigned char c;
3042- int i;
3043-
3044-#ifdef CONFIG_XEN
3045- /* Nothing to do if not running in dom0. */
3046- if (!is_initial_xendomain())
3047- return;
3048-#endif
3049-
3050- /* video rom */
3051- upper = adapter_rom_resources[0].start;
3052- for (start = video_rom_resource.start; start < upper; start += 2048) {
3053- rom = isa_bus_to_virt(start);
3054- if (!romsignature(rom))
3055- continue;
3056-
3057- video_rom_resource.start = start;
3058-
3059- if (probe_kernel_address(rom + 2, c) != 0)
3060- continue;
3061-
3062- /* 0 < length <= 0x7f * 512, historically */
3063- length = c * 512;
3064-
3065- /* if checksum okay, trust length byte */
3066- if (length && romchecksum(rom, length))
3067- video_rom_resource.end = start + length - 1;
3068-
3069- request_resource(&iomem_resource, &video_rom_resource);
3070- break;
3071- }
3072-
3073- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
3074- if (start < upper)
3075- start = upper;
3076-
3077- /* system rom */
3078- request_resource(&iomem_resource, &system_rom_resource);
3079- upper = system_rom_resource.start;
3080-
3081- /* check for extension rom (ignore length byte!) */
3082- rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
3083- if (romsignature(rom)) {
3084- length = extension_rom_resource.end - extension_rom_resource.start + 1;
3085- if (romchecksum(rom, length)) {
3086- request_resource(&iomem_resource, &extension_rom_resource);
3087- upper = extension_rom_resource.start;
3088- }
3089- }
3090-
3091- /* check for adapter roms on 2k boundaries */
3092- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
3093- rom = isa_bus_to_virt(start);
3094- if (!romsignature(rom))
3095- continue;
3096-
3097- if (probe_kernel_address(rom + 2, c) != 0)
3098- continue;
3099-
3100- /* 0 < length <= 0x7f * 512, historically */
3101- length = c * 512;
3102-
3103- /* but accept any length that fits if checksum okay */
3104- if (!length || start + length > upper || !romchecksum(rom, length))
3105- continue;
3106-
3107- adapter_rom_resources[i].start = start;
3108- adapter_rom_resources[i].end = start + length - 1;
3109- request_resource(&iomem_resource, &adapter_rom_resources[i]);
3110-
3111- start = adapter_rom_resources[i++].end & ~2047UL;
3112- }
3113-}
3114-
3115-#ifdef CONFIG_XEN
3116-static struct e820map machine_e820;
3117-#define e820 machine_e820
3118-#endif
3119-
3120-/*
3121- * Request address space for all standard RAM and ROM resources
3122- * and also for regions reported as reserved by the e820.
3123- */
3124-void __init init_iomem_resources(struct resource *code_resource,
3125- struct resource *data_resource,
3126- struct resource *bss_resource)
3127-{
3128- int i;
3129-
3130- probe_roms();
3131- for (i = 0; i < e820.nr_map; i++) {
3132- struct resource *res;
3133-#ifndef CONFIG_RESOURCES_64BIT
3134- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
3135- continue;
3136-#endif
3137- res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
3138- switch (e820.map[i].type) {
3139- case E820_RAM: res->name = "System RAM"; break;
3140- case E820_ACPI: res->name = "ACPI Tables"; break;
3141- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
3142- default: res->name = "reserved";
3143- }
3144- res->start = e820.map[i].addr;
3145- res->end = res->start + e820.map[i].size - 1;
3146- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3147- if (request_resource(&iomem_resource, res)) {
3148- kfree(res);
3149- continue;
3150- }
3151- if (e820.map[i].type == E820_RAM) {
3152- /*
3153- * We don't know which RAM region contains kernel data,
3154- * so we try it repeatedly and let the resource manager
3155- * test it.
3156- */
3157-#ifndef CONFIG_XEN
3158- request_resource(res, code_resource);
3159- request_resource(res, data_resource);
3160- request_resource(res, bss_resource);
3161-#endif
3162-#ifdef CONFIG_KEXEC
3163- if (crashk_res.start != crashk_res.end)
3164- request_resource(res, &crashk_res);
3165-#ifdef CONFIG_XEN
3166- xen_machine_kexec_register_resources(res);
3167-#endif
3168-#endif
3169- }
3170- }
3171-}
3172-
3173-#undef e820
3174-
3175-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
3176-/**
3177- * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
3178- * correspond to e820 RAM areas and mark the corresponding pages as nosave for
3179- * hibernation.
3180- *
3181- * This function requires the e820 map to be sorted and without any
3182- * overlapping entries and assumes the first e820 area to be RAM.
3183- */
3184-void __init e820_mark_nosave_regions(void)
3185-{
3186- int i;
3187- unsigned long pfn;
3188-
3189- pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
3190- for (i = 1; i < e820.nr_map; i++) {
3191- struct e820entry *ei = &e820.map[i];
3192-
3193- if (pfn < PFN_UP(ei->addr))
3194- register_nosave_region(pfn, PFN_UP(ei->addr));
3195-
3196- pfn = PFN_DOWN(ei->addr + ei->size);
3197- if (ei->type != E820_RAM)
3198- register_nosave_region(PFN_UP(ei->addr), pfn);
3199-
3200- if (pfn >= max_low_pfn)
3201- break;
3202- }
3203-}
3204-#endif
3205-
3206-void __init add_memory_region(unsigned long long start,
3207- unsigned long long size, int type)
3208-{
3209- int x;
3210-
3211- x = e820.nr_map;
3212-
3213- if (x == E820MAX) {
3214- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
3215- return;
3216- }
3217-
3218- e820.map[x].addr = start;
3219- e820.map[x].size = size;
3220- e820.map[x].type = type;
3221- e820.nr_map++;
3222-} /* add_memory_region */
3223-
3224-/*
3225- * Sanitize the BIOS e820 map.
3226- *
3227- * Some e820 responses include overlapping entries. The following
3228- * replaces the original e820 map with a new one, removing overlaps.
3229- *
3230- */
3231-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
3232-{
3233- struct change_member *change_tmp;
3234- unsigned long current_type, last_type;
3235- unsigned long long last_addr;
3236- int chgidx, still_changing;
3237- int overlap_entries;
3238- int new_bios_entry;
3239- int old_nr, new_nr, chg_nr;
3240- int i;
3241-
3242- /*
3243- Visually we're performing the following (1,2,3,4 = memory types)...
3244-
3245- Sample memory map (w/overlaps):
3246- ____22__________________
3247- ______________________4_
3248- ____1111________________
3249- _44_____________________
3250- 11111111________________
3251- ____________________33__
3252- ___________44___________
3253- __________33333_________
3254- ______________22________
3255- ___________________2222_
3256- _________111111111______
3257- _____________________11_
3258- _________________4______
3259-
3260- Sanitized equivalent (no overlap):
3261- 1_______________________
3262- _44_____________________
3263- ___1____________________
3264- ____22__________________
3265- ______11________________
3266- _________1______________
3267- __________3_____________
3268- ___________44___________
3269- _____________33_________
3270- _______________2________
3271- ________________1_______
3272- _________________4______
3273- ___________________2____
3274- ____________________33__
3275- ______________________4_
3276- */
3277- /* if there's only one memory region, don't bother */
3278- if (*pnr_map < 2) {
3279- return -1;
3280- }
3281-
3282- old_nr = *pnr_map;
3283-
3284- /* bail out if we find any unreasonable addresses in bios map */
3285- for (i=0; i<old_nr; i++)
3286- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
3287- return -1;
3288- }
3289-
3290- /* create pointers for initial change-point information (for sorting) */
3291- for (i=0; i < 2*old_nr; i++)
3292- change_point[i] = &change_point_list[i];
3293-
3294- /* record all known change-points (starting and ending addresses),
3295- omitting those that are for empty memory regions */
3296- chgidx = 0;
3297- for (i=0; i < old_nr; i++) {
3298- if (biosmap[i].size != 0) {
3299- change_point[chgidx]->addr = biosmap[i].addr;
3300- change_point[chgidx++]->pbios = &biosmap[i];
3301- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
3302- change_point[chgidx++]->pbios = &biosmap[i];
3303- }
3304- }
3305- chg_nr = chgidx; /* true number of change-points */
3306-
3307- /* sort change-point list by memory addresses (low -> high) */
3308- still_changing = 1;
3309- while (still_changing) {
3310- still_changing = 0;
3311- for (i=1; i < chg_nr; i++) {
3312- /* if <current_addr> > <last_addr>, swap */
3313- /* or, if current=<start_addr> & last=<end_addr>, swap */
3314- if ((change_point[i]->addr < change_point[i-1]->addr) ||
3315- ((change_point[i]->addr == change_point[i-1]->addr) &&
3316- (change_point[i]->addr == change_point[i]->pbios->addr) &&
3317- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
3318- )
3319- {
3320- change_tmp = change_point[i];
3321- change_point[i] = change_point[i-1];
3322- change_point[i-1] = change_tmp;
3323- still_changing=1;
3324- }
3325- }
3326- }
3327-
3328- /* create a new bios memory map, removing overlaps */
3329- overlap_entries=0; /* number of entries in the overlap table */
3330- new_bios_entry=0; /* index for creating new bios map entries */
3331- last_type = 0; /* start with undefined memory type */
3332- last_addr = 0; /* start with 0 as last starting address */
3333- /* loop through change-points, determining affect on the new bios map */
3334- for (chgidx=0; chgidx < chg_nr; chgidx++)
3335- {
3336- /* keep track of all overlapping bios entries */
3337- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
3338- {
3339- /* add map entry to overlap list (> 1 entry implies an overlap) */
3340- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
3341- }
3342- else
3343- {
3344- /* remove entry from list (order independent, so swap with last) */
3345- for (i=0; i<overlap_entries; i++)
3346- {
3347- if (overlap_list[i] == change_point[chgidx]->pbios)
3348- overlap_list[i] = overlap_list[overlap_entries-1];
3349- }
3350- overlap_entries--;
3351- }
3352- /* if there are overlapping entries, decide which "type" to use */
3353- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
3354- current_type = 0;
3355- for (i=0; i<overlap_entries; i++)
3356- if (overlap_list[i]->type > current_type)
3357- current_type = overlap_list[i]->type;
3358- /* continue building up new bios map based on this information */
3359- if (current_type != last_type) {
3360- if (last_type != 0) {
3361- new_bios[new_bios_entry].size =
3362- change_point[chgidx]->addr - last_addr;
3363- /* move forward only if the new size was non-zero */
3364- if (new_bios[new_bios_entry].size != 0)
3365- if (++new_bios_entry >= E820MAX)
3366- break; /* no more space left for new bios entries */
3367- }
3368- if (current_type != 0) {
3369- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
3370- new_bios[new_bios_entry].type = current_type;
3371- last_addr=change_point[chgidx]->addr;
3372- }
3373- last_type = current_type;
3374- }
3375- }
3376- new_nr = new_bios_entry; /* retain count for new bios entries */
3377-
3378- /* copy new bios mapping into original location */
3379- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
3380- *pnr_map = new_nr;
3381-
3382- return 0;
3383-}
3384-
3385-/*
3386- * Copy the BIOS e820 map into a safe place.
3387- *
3388- * Sanity-check it while we're at it..
3389- *
3390- * If we're lucky and live on a modern system, the setup code
3391- * will have given us a memory map that we can use to properly
3392- * set up memory. If we aren't, we'll fake a memory map.
3393- *
3394- * We check to see that the memory map contains at least 2 elements
3395- * before we'll use it, because the detection code in setup.S may
3396- * not be perfect and most every PC known to man has two memory
3397- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
3398- * thinkpad 560x, for example, does not cooperate with the memory
3399- * detection code.)
3400- */
3401-int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
3402-{
3403-#ifndef CONFIG_XEN
3404- /* Only one memory region (or negative)? Ignore it */
3405- if (nr_map < 2)
3406- return -1;
3407-#else
3408- BUG_ON(nr_map < 1);
3409-#endif
3410-
3411- do {
3412- u64 start = biosmap->addr;
3413- u64 size = biosmap->size;
3414- u64 end = start + size;
3415- u32 type = biosmap->type;
3416-
3417- /* Overflow in 64 bits? Ignore the memory map. */
3418- if (start > end)
3419- return -1;
3420-
3421- add_memory_region(start, size, type);
3422- } while (biosmap++, --nr_map);
3423-
3424-#ifdef CONFIG_XEN
3425- if (is_initial_xendomain()) {
3426- struct xen_memory_map memmap;
3427-
3428- memmap.nr_entries = E820MAX;
3429- set_xen_guest_handle(memmap.buffer, machine_e820.map);
3430-
3431- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
3432- BUG();
3433- machine_e820.nr_map = memmap.nr_entries;
3434- } else
3435- machine_e820 = e820;
3436-#endif
3437-
3438- return 0;
3439-}
3440-
3441-/*
3442- * Find the highest page frame number we have available
3443- */
3444-void __init propagate_e820_map(void)
3445-{
3446- int i;
3447-
3448- max_pfn = 0;
3449-
3450- for (i = 0; i < e820.nr_map; i++) {
3451- unsigned long start, end;
3452- /* RAM? */
3453- if (e820.map[i].type != E820_RAM)
3454- continue;
3455- start = PFN_UP(e820.map[i].addr);
3456- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3457- if (start >= end)
3458- continue;
3459- if (end > max_pfn)
3460- max_pfn = end;
3461- memory_present(0, start, end);
3462- }
3463-}
3464-
3465-/*
3466- * Register fully available low RAM pages with the bootmem allocator.
3467- */
3468-void __init register_bootmem_low_pages(unsigned long max_low_pfn)
3469-{
3470- int i;
3471-
3472- for (i = 0; i < e820.nr_map; i++) {
3473- unsigned long curr_pfn, last_pfn, size;
3474- /*
3475- * Reserve usable low memory
3476- */
3477- if (e820.map[i].type != E820_RAM)
3478- continue;
3479- /*
3480- * We are rounding up the start address of usable memory:
3481- */
3482- curr_pfn = PFN_UP(e820.map[i].addr);
3483- if (curr_pfn >= max_low_pfn)
3484- continue;
3485- /*
3486- * ... and at the end of the usable range downwards:
3487- */
3488- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
3489-
3490-#ifdef CONFIG_XEN
3491- /*
3492- * Truncate to the number of actual pages currently
3493- * present.
3494- */
3495- if (last_pfn > xen_start_info->nr_pages)
3496- last_pfn = xen_start_info->nr_pages;
3497-#endif
3498-
3499- if (last_pfn > max_low_pfn)
3500- last_pfn = max_low_pfn;
3501-
3502- /*
3503- * .. finally, did all the rounding and playing
3504- * around just make the area go away?
3505- */
3506- if (last_pfn <= curr_pfn)
3507- continue;
3508-
3509- size = last_pfn - curr_pfn;
3510- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
3511- }
3512-}
3513-
3514-void __init e820_register_memory(void)
3515-{
3516- unsigned long gapstart, gapsize, round;
3517- unsigned long long last;
3518- int i;
3519-
3520-#ifdef CONFIG_XEN
3521- if (is_initial_xendomain()) {
3522- struct xen_memory_map memmap;
3523-
3524- memmap.nr_entries = E820MAX;
3525- set_xen_guest_handle(memmap.buffer, machine_e820.map);
3526-
3527- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
3528- BUG();
3529- machine_e820.nr_map = memmap.nr_entries;
3530- }
3531- else
3532- machine_e820 = e820;
3533-#define e820 machine_e820
3534-#endif
3535-
3536- /*
3537- * Search for the biggest gap in the low 32 bits of the e820
3538- * memory space.
3539- */
3540- last = 0x100000000ull;
3541- gapstart = 0x10000000;
3542- gapsize = 0x400000;
3543- i = e820.nr_map;
3544- while (--i >= 0) {
3545- unsigned long long start = e820.map[i].addr;
3546- unsigned long long end = start + e820.map[i].size;
3547-
3548- /*
3549- * Since "last" is at most 4GB, we know we'll
3550- * fit in 32 bits if this condition is true
3551- */
3552- if (last > end) {
3553- unsigned long gap = last - end;
3554-
3555- if (gap > gapsize) {
3556- gapsize = gap;
3557- gapstart = end;
3558- }
3559- }
3560- if (start < last)
3561- last = start;
3562- }
3563-#undef e820
3564-
3565- /*
3566- * See how much we want to round up: start off with
3567- * rounding to the next 1MB area.
3568- */
3569- round = 0x100000;
3570- while ((gapsize >> 4) > round)
3571- round += round;
3572- /* Fun with two's complement */
3573- pci_mem_start = (gapstart + round) & -round;
3574-
3575- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
3576- pci_mem_start, gapstart, gapsize);
3577-}
3578-
3579-void __init print_memory_map(char *who)
3580-{
3581- int i;
3582-
3583- for (i = 0; i < e820.nr_map; i++) {
3584- printk(" %s: %016Lx - %016Lx ", who,
3585- e820.map[i].addr,
3586- e820.map[i].addr + e820.map[i].size);
3587- switch (e820.map[i].type) {
3588- case E820_RAM: printk("(usable)\n");
3589- break;
3590- case E820_RESERVED:
3591- printk("(reserved)\n");
3592- break;
3593- case E820_ACPI:
3594- printk("(ACPI data)\n");
3595- break;
3596- case E820_NVS:
3597- printk("(ACPI NVS)\n");
3598- break;
3599- default: printk("type %u\n", e820.map[i].type);
3600- break;
3601- }
3602- }
3603-}
3604-
3605-void __init limit_regions(unsigned long long size)
3606-{
3607- unsigned long long current_addr = 0;
3608- int i;
3609-
3610- print_memory_map("limit_regions start");
3611- for (i = 0; i < e820.nr_map; i++) {
3612- current_addr = e820.map[i].addr + e820.map[i].size;
3613- if (current_addr < size)
3614- continue;
3615-
3616- if (e820.map[i].type != E820_RAM)
3617- continue;
3618-
3619- if (e820.map[i].addr >= size) {
3620- /*
3621- * This region starts past the end of the
3622- * requested size, skip it completely.
3623- */
3624- e820.nr_map = i;
3625- } else {
3626- e820.nr_map = i + 1;
3627- e820.map[i].size -= current_addr - size;
3628- }
3629- print_memory_map("limit_regions endfor");
3630- return;
3631- }
3632-#ifdef CONFIG_XEN
3633- if (current_addr < size) {
3634- /*
3635- * The e820 map finished before our requested size so
3636- * extend the final entry to the requested address.
3637- */
3638- --i;
3639- if (e820.map[i].type == E820_RAM)
3640- e820.map[i].size -= current_addr - size;
3641- else
3642- add_memory_region(current_addr, size - current_addr, E820_RAM);
3643- }
3644-#endif
3645- print_memory_map("limit_regions endfunc");
3646-}
3647-
3648-/*
3649- * This function checks if any part of the range <start,end> is mapped
3650- * with type.
3651- */
3652-int
3653-e820_any_mapped(u64 start, u64 end, unsigned type)
3654-{
3655- int i;
3656-
3657-#ifndef CONFIG_XEN
3658- for (i = 0; i < e820.nr_map; i++) {
3659- const struct e820entry *ei = &e820.map[i];
3660-#else
3661- if (!is_initial_xendomain())
3662- return 0;
3663- for (i = 0; i < machine_e820.nr_map; ++i) {
3664- const struct e820entry *ei = &machine_e820.map[i];
3665-#endif
3666-
3667- if (type && ei->type != type)
3668- continue;
3669- if (ei->addr >= end || ei->addr + ei->size <= start)
3670- continue;
3671- return 1;
3672- }
3673- return 0;
3674-}
3675-EXPORT_SYMBOL_GPL(e820_any_mapped);
3676-
3677- /*
3678- * This function checks if the entire range <start,end> is mapped with type.
3679- *
3680- * Note: this function only works correct if the e820 table is sorted and
3681- * not-overlapping, which is the case
3682- */
3683-int __init
3684-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
3685-{
3686- u64 start = s;
3687- u64 end = e;
3688- int i;
3689-
3690-#ifndef CONFIG_XEN
3691- for (i = 0; i < e820.nr_map; i++) {
3692- struct e820entry *ei = &e820.map[i];
3693-#else
3694- if (!is_initial_xendomain())
3695- return 0;
3696- for (i = 0; i < machine_e820.nr_map; ++i) {
3697- const struct e820entry *ei = &machine_e820.map[i];
3698-#endif
3699-
3700- if (type && ei->type != type)
3701- continue;
3702- /* is the region (part) in overlap with the current region ?*/
3703- if (ei->addr >= end || ei->addr + ei->size <= start)
3704- continue;
3705- /* if the region is at the beginning of <start,end> we move
3706- * start to the end of the region since it's ok until there
3707- */
3708- if (ei->addr <= start)
3709- start = ei->addr + ei->size;
3710- /* if start is now at or beyond end, we're done, full
3711- * coverage */
3712- if (start >= end)
3713- return 1; /* we're done */
3714- }
3715- return 0;
3716-}
3717-
3718-static int __init parse_memmap(char *arg)
3719-{
3720- if (!arg)
3721- return -EINVAL;
3722-
3723- if (strcmp(arg, "exactmap") == 0) {
3724-#ifdef CONFIG_CRASH_DUMP
3725- /* If we are doing a crash dump, we
3726- * still need to know the real mem
3727- * size before original memory map is
3728- * reset.
3729- */
3730- propagate_e820_map();
3731- saved_max_pfn = max_pfn;
3732-#endif
3733- e820.nr_map = 0;
3734- user_defined_memmap = 1;
3735- } else {
3736- /* If the user specifies memory size, we
3737- * limit the BIOS-provided memory map to
3738- * that size. exactmap can be used to specify
3739- * the exact map. mem=number can be used to
3740- * trim the existing memory map.
3741- */
3742- unsigned long long start_at, mem_size;
3743-
3744- mem_size = memparse(arg, &arg);
3745- if (*arg == '@') {
3746- start_at = memparse(arg+1, &arg);
3747- add_memory_region(start_at, mem_size, E820_RAM);
3748- } else if (*arg == '#') {
3749- start_at = memparse(arg+1, &arg);
3750- add_memory_region(start_at, mem_size, E820_ACPI);
3751- } else if (*arg == '$') {
3752- start_at = memparse(arg+1, &arg);
3753- add_memory_region(start_at, mem_size, E820_RESERVED);
3754- } else {
3755- limit_regions(mem_size);
3756- user_defined_memmap = 1;
3757- }
3758- }
3759- return 0;
3760-}
3761-early_param("memmap", parse_memmap);
3762-
3763-#ifndef CONFIG_XEN
3764-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
3765- unsigned new_type)
3766-{
3767- int i;
3768-
3769- BUG_ON(old_type == new_type);
3770-
3771- for (i = 0; i < e820.nr_map; i++) {
3772- struct e820entry *ei = &e820.map[i];
3773- u64 final_start, final_end;
3774- if (ei->type != old_type)
3775- continue;
3776- /* totally covered? */
3777- if (ei->addr >= start && ei->size <= size) {
3778- ei->type = new_type;
3779- continue;
3780- }
3781- /* partially covered */
3782- final_start = max(start, ei->addr);
3783- final_end = min(start + size, ei->addr + ei->size);
3784- if (final_start >= final_end)
3785- continue;
3786- add_memory_region(final_start, final_end - final_start,
3787- new_type);
3788- }
3789-}
3790-
3791-void __init update_e820(void)
3792-{
3793- u8 nr_map;
3794-
3795- nr_map = e820.nr_map;
3796- if (sanitize_e820_map(e820.map, &nr_map))
3797- return;
3798- e820.nr_map = nr_map;
3799- printk(KERN_INFO "modified physical RAM map:\n");
3800- print_memory_map("modified");
3801-}
3802-#endif
82094b55 3803--- sle11-2009-10-16.orig/arch/x86/kernel/e820_64-xen.c 2009-03-16 16:38:05.000000000 +0100
2cb7cef9
BS
3804+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
3805@@ -1,1045 +0,0 @@
3806-/*
3807- * Handle the memory map.
3808- * The functions here do the job until bootmem takes over.
3809- *
3810- * Getting sanitize_e820_map() in sync with i386 version by applying change:
3811- * - Provisions for empty E820 memory regions (reported by certain BIOSes).
3812- * Alex Achenbach <xela@slit.de>, December 2002.
3813- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
3814- *
3815- */
3816-#include <linux/kernel.h>
3817-#include <linux/types.h>
3818-#include <linux/init.h>
3819-#include <linux/bootmem.h>
3820-#include <linux/ioport.h>
3821-#include <linux/string.h>
3822-#include <linux/kexec.h>
3823-#include <linux/module.h>
3824-#include <linux/mm.h>
3825-#include <linux/suspend.h>
3826-#include <linux/pfn.h>
3827-
3828-#include <asm/pgtable.h>
3829-#include <asm/page.h>
3830-#include <asm/e820.h>
3831-#include <asm/proto.h>
3832-#include <asm/setup.h>
3833-#include <asm/sections.h>
3834-#include <asm/kdebug.h>
3835-#include <xen/interface/memory.h>
3836-
3837-struct e820map e820 __initdata;
3838-#ifdef CONFIG_XEN
3839-struct e820map machine_e820;
3840-#endif
3841-
3842-/*
3843- * PFN of last memory page.
3844- */
3845-unsigned long end_pfn;
3846-
3847-/*
3848- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
3849- * The direct mapping extends to max_pfn_mapped, so that we can directly access
3850- * apertures, ACPI and other tables without having to play with fixmaps.
3851- */
3852-unsigned long max_pfn_mapped;
3853-
3854-/*
3855- * Last pfn which the user wants to use.
3856- */
3857-static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
3858-
3859-/*
3860- * Early reserved memory areas.
3861- */
3862-#define MAX_EARLY_RES 20
3863-
3864-struct early_res {
3865- unsigned long start, end;
3866- char name[16];
3867-};
3868-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
3869-#ifndef CONFIG_XEN
3870- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
3871-#ifdef CONFIG_X86_TRAMPOLINE
3872- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
3873-#endif
3874-#endif
3875- {}
3876-};
3877-
3878-void __init reserve_early(unsigned long start, unsigned long end, char *name)
3879-{
3880- int i;
3881- struct early_res *r;
3882- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
3883- r = &early_res[i];
3884- if (end > r->start && start < r->end)
3885- panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
3886- start, end - 1, name?name:"", r->start, r->end - 1, r->name);
3887- }
3888- if (i >= MAX_EARLY_RES)
3889- panic("Too many early reservations");
3890- r = &early_res[i];
3891- r->start = start;
3892- r->end = end;
3893- if (name)
3894- strncpy(r->name, name, sizeof(r->name) - 1);
3895-}
3896-
3897-void __init free_early(unsigned long start, unsigned long end)
3898-{
3899- struct early_res *r;
3900- int i, j;
3901-
3902- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
3903- r = &early_res[i];
3904- if (start == r->start && end == r->end)
3905- break;
3906- }
3907- if (i >= MAX_EARLY_RES || !early_res[i].end)
3908- panic("free_early on not reserved area: %lx-%lx!", start, end);
3909-
3910- for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
3911- ;
3912-
3913- memmove(&early_res[i], &early_res[i + 1],
3914- (j - 1 - i) * sizeof(struct early_res));
3915-
3916- early_res[j - 1].end = 0;
3917-}
3918-
3919-void __init early_res_to_bootmem(unsigned long start, unsigned long end)
3920-{
3921- int i;
3922- unsigned long final_start, final_end;
3923- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
3924- struct early_res *r = &early_res[i];
3925- final_start = max(start, r->start);
3926- final_end = min(end, r->end);
3927- if (final_start >= final_end)
3928- continue;
3929- printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
3930- final_start, final_end - 1, r->name);
3931- reserve_bootmem_generic(final_start, final_end - final_start);
3932- }
3933-}
3934-
3935-/* Check for already reserved areas */
3936-static inline int __init
3937-bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
3938-{
3939- int i;
3940- unsigned long addr = *addrp, last;
3941- int changed = 0;
3942-again:
3943- last = addr + size;
3944- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
3945- struct early_res *r = &early_res[i];
3946- if (last >= r->start && addr < r->end) {
3947- *addrp = addr = round_up(r->end, align);
3948- changed = 1;
3949- goto again;
3950- }
3951- }
3952- return changed;
3953-}
3954-
3955-/* Check for already reserved areas */
3956-static inline int __init
3957-bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
3958-{
3959- int i;
3960- unsigned long addr = *addrp, last;
3961- unsigned long size = *sizep;
3962- int changed = 0;
3963-again:
3964- last = addr + size;
3965- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
3966- struct early_res *r = &early_res[i];
3967- if (last > r->start && addr < r->start) {
3968- size = r->start - addr;
3969- changed = 1;
3970- goto again;
3971- }
3972- if (last > r->end && addr < r->end) {
3973- addr = round_up(r->end, align);
3974- size = last - addr;
3975- changed = 1;
3976- goto again;
3977- }
3978- if (last <= r->end && addr >= r->start) {
3979- (*sizep)++;
3980- return 0;
3981- }
3982- }
3983- if (changed) {
3984- *addrp = addr;
3985- *sizep = size;
3986- }
3987- return changed;
3988-}
3989-/*
3990- * This function checks if any part of the range <start,end> is mapped
3991- * with type.
3992- */
3993-int
3994-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
3995-{
3996- int i;
3997-
3998-#ifndef CONFIG_XEN
3999- for (i = 0; i < e820.nr_map; i++) {
4000- struct e820entry *ei = &e820.map[i];
4001-#else
4002- if (!is_initial_xendomain())
4003- return 0;
4004- for (i = 0; i < machine_e820.nr_map; i++) {
4005- const struct e820entry *ei = &machine_e820.map[i];
4006-#endif
4007-
4008- if (type && ei->type != type)
4009- continue;
4010- if (ei->addr >= end || ei->addr + ei->size <= start)
4011- continue;
4012- return 1;
4013- }
4014- return 0;
4015-}
4016-EXPORT_SYMBOL_GPL(e820_any_mapped);
4017-
4018-/*
4019- * This function checks if the entire range <start,end> is mapped with type.
4020- *
4021- * Note: this function only works correct if the e820 table is sorted and
4022- * not-overlapping, which is the case
4023- */
4024-int __init e820_all_mapped(unsigned long start, unsigned long end,
4025- unsigned type)
4026-{
4027- int i;
4028-
4029-#ifndef CONFIG_XEN
4030- for (i = 0; i < e820.nr_map; i++) {
4031- struct e820entry *ei = &e820.map[i];
4032-#else
4033- if (!is_initial_xendomain())
4034- return 0;
4035- for (i = 0; i < machine_e820.nr_map; i++) {
4036- const struct e820entry *ei = &machine_e820.map[i];
4037-#endif
4038-
4039- if (type && ei->type != type)
4040- continue;
4041- /* is the region (part) in overlap with the current region ?*/
4042- if (ei->addr >= end || ei->addr + ei->size <= start)
4043- continue;
4044-
4045- /* if the region is at the beginning of <start,end> we move
4046- * start to the end of the region since it's ok until there
4047- */
4048- if (ei->addr <= start)
4049- start = ei->addr + ei->size;
4050- /*
4051- * if start is now at or beyond end, we're done, full
4052- * coverage
4053- */
4054- if (start >= end)
4055- return 1;
4056- }
4057- return 0;
4058-}
4059-
4060-/*
4061- * Find a free area with specified alignment in a specific range.
4062- */
4063-unsigned long __init find_e820_area(unsigned long start, unsigned long end,
4064- unsigned long size, unsigned long align)
4065-{
4066- int i;
4067-
4068- for (i = 0; i < e820.nr_map; i++) {
4069- struct e820entry *ei = &e820.map[i];
4070- unsigned long addr, last;
4071- unsigned long ei_last;
4072-
4073- if (ei->type != E820_RAM)
4074- continue;
4075- addr = round_up(ei->addr, align);
4076- ei_last = ei->addr + ei->size;
4077- if (addr < start)
4078- addr = round_up(start, align);
4079- if (addr >= ei_last)
4080- continue;
4081- while (bad_addr(&addr, size, align) && addr+size <= ei_last)
4082- ;
4083- last = addr + size;
4084- if (last > ei_last)
4085- continue;
4086- if (last > end)
4087- continue;
4088- return addr;
4089- }
4090- return -1UL;
4091-}
4092-
4093-/*
4094- * Find next free range after *start
4095- */
4096-unsigned long __init find_e820_area_size(unsigned long start,
4097- unsigned long *sizep,
4098- unsigned long align)
4099-{
4100- int i;
4101-
4102- for (i = 0; i < e820.nr_map; i++) {
4103- struct e820entry *ei = &e820.map[i];
4104- unsigned long addr, last;
4105- unsigned long ei_last;
4106-
4107- if (ei->type != E820_RAM)
4108- continue;
4109- addr = round_up(ei->addr, align);
4110- ei_last = ei->addr + ei->size;
4111- if (addr < start)
4112- addr = round_up(start, align);
4113- if (addr >= ei_last)
4114- continue;
4115- *sizep = ei_last - addr;
4116- while (bad_addr_size(&addr, sizep, align) &&
4117- addr + *sizep <= ei_last)
4118- ;
4119- last = addr + *sizep;
4120- if (last > ei_last)
4121- continue;
4122- return addr;
4123- }
4124- return -1UL;
4125-
4126-}
4127-/*
4128- * Find the highest page frame number we have available
4129- */
4130-unsigned long __init e820_end_of_ram(void)
4131-{
4132- unsigned long end_pfn;
4133-
4134- end_pfn = find_max_pfn_with_active_regions();
4135-
4136- if (end_pfn > max_pfn_mapped)
4137- max_pfn_mapped = end_pfn;
4138- if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
4139- max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
4140- if (end_pfn > end_user_pfn)
4141- end_pfn = end_user_pfn;
4142- if (end_pfn > max_pfn_mapped)
4143- end_pfn = max_pfn_mapped;
4144-
4145- printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
4146- return end_pfn;
4147-}
4148-
4149-/*
4150- * Mark e820 reserved areas as busy for the resource manager.
4151- */
4152-void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
4153-{
4154- int i;
4155- struct resource *res;
4156-
4157- res = alloc_bootmem_low(sizeof(struct resource) * nr_map);
4158- for (i = 0; i < nr_map; i++) {
4159- switch (e820[i].type) {
4160- case E820_RAM: res->name = "System RAM"; break;
4161- case E820_ACPI: res->name = "ACPI Tables"; break;
4162- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
4163- default: res->name = "reserved";
4164- }
4165- res->start = e820[i].addr;
4166- res->end = res->start + e820[i].size - 1;
4167- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
4168- insert_resource(&iomem_resource, res);
4169- res++;
4170- }
4171-}
4172-
4173-#ifndef CONFIG_XEN
4174-/*
4175- * Find the ranges of physical addresses that do not correspond to
4176- * e820 RAM areas and mark the corresponding pages as nosave for software
4177- * suspend and suspend to RAM.
4178- *
4179- * This function requires the e820 map to be sorted and without any
4180- * overlapping entries and assumes the first e820 area to be RAM.
4181- */
4182-void __init e820_mark_nosave_regions(void)
4183-{
4184- int i;
4185- unsigned long paddr;
4186-
4187- paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
4188- for (i = 1; i < e820.nr_map; i++) {
4189- struct e820entry *ei = &e820.map[i];
4190-
4191- if (paddr < ei->addr)
4192- register_nosave_region(PFN_DOWN(paddr),
4193- PFN_UP(ei->addr));
4194-
4195- paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
4196- if (ei->type != E820_RAM)
4197- register_nosave_region(PFN_UP(ei->addr),
4198- PFN_DOWN(paddr));
4199-
4200- if (paddr >= (end_pfn << PAGE_SHIFT))
4201- break;
4202- }
4203-}
4204-#endif
4205-
4206-/*
4207- * Finds an active region in the address range from start_pfn to end_pfn and
4208- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
4209- */
4210-static int __init e820_find_active_region(const struct e820entry *ei,
4211- unsigned long start_pfn,
4212- unsigned long end_pfn,
4213- unsigned long *ei_startpfn,
4214- unsigned long *ei_endpfn)
4215-{
4216- *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
4217- *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
4218-
4219- /* Skip map entries smaller than a page */
4220- if (*ei_startpfn >= *ei_endpfn)
4221- return 0;
4222-
4223- /* Check if max_pfn_mapped should be updated */
4224- if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
4225- max_pfn_mapped = *ei_endpfn;
4226-
4227- /* Skip if map is outside the node */
4228- if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
4229- *ei_startpfn >= end_pfn)
4230- return 0;
4231-
4232- /* Check for overlaps */
4233- if (*ei_startpfn < start_pfn)
4234- *ei_startpfn = start_pfn;
4235- if (*ei_endpfn > end_pfn)
4236- *ei_endpfn = end_pfn;
4237-
4238- /* Obey end_user_pfn to save on memmap */
4239- if (*ei_startpfn >= end_user_pfn)
4240- return 0;
4241- if (*ei_endpfn > end_user_pfn)
4242- *ei_endpfn = end_user_pfn;
4243-
4244- return 1;
4245-}
4246-
4247-/* Walk the e820 map and register active regions within a node */
4248-void __init
4249-e820_register_active_regions(int nid, unsigned long start_pfn,
4250- unsigned long end_pfn)
4251-{
4252- unsigned long ei_startpfn;
4253- unsigned long ei_endpfn;
4254- int i;
4255-
4256- for (i = 0; i < e820.nr_map; i++)
4257- if (e820_find_active_region(&e820.map[i],
4258- start_pfn, end_pfn,
4259- &ei_startpfn, &ei_endpfn))
4260- add_active_range(nid, ei_startpfn, ei_endpfn);
4261-}
4262-
4263-/*
4264- * Add a memory region to the kernel e820 map.
4265- */
4266-void __init add_memory_region(unsigned long start, unsigned long size, int type)
4267-{
4268- int x = e820.nr_map;
4269-
4270- if (x == E820MAX) {
4271- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
4272- return;
4273- }
4274-
4275- e820.map[x].addr = start;
4276- e820.map[x].size = size;
4277- e820.map[x].type = type;
4278- e820.nr_map++;
4279-}
4280-
4281-/*
4282- * Find the hole size (in bytes) in the memory range.
4283- * @start: starting address of the memory range to scan
4284- * @end: ending address of the memory range to scan
4285- */
4286-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
4287-{
4288- unsigned long start_pfn = start >> PAGE_SHIFT;
4289- unsigned long end_pfn = end >> PAGE_SHIFT;
4290- unsigned long ei_startpfn, ei_endpfn, ram = 0;
4291- int i;
4292-
4293- for (i = 0; i < e820.nr_map; i++) {
4294- if (e820_find_active_region(&e820.map[i],
4295- start_pfn, end_pfn,
4296- &ei_startpfn, &ei_endpfn))
4297- ram += ei_endpfn - ei_startpfn;
4298- }
4299- return end - start - (ram << PAGE_SHIFT);
4300-}
4301-
4302-static void __init e820_print_map(char *who)
4303-{
4304- int i;
4305-
4306- for (i = 0; i < e820.nr_map; i++) {
4307- printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
4308- (unsigned long long) e820.map[i].addr,
4309- (unsigned long long)
4310- (e820.map[i].addr + e820.map[i].size));
4311- switch (e820.map[i].type) {
4312- case E820_RAM:
4313- printk(KERN_CONT "(usable)\n");
4314- break;
4315- case E820_RESERVED:
4316- printk(KERN_CONT "(reserved)\n");
4317- break;
4318- case E820_ACPI:
4319- printk(KERN_CONT "(ACPI data)\n");
4320- break;
4321- case E820_NVS:
4322- printk(KERN_CONT "(ACPI NVS)\n");
4323- break;
4324- default:
4325- printk(KERN_CONT "type %u\n", e820.map[i].type);
4326- break;
4327- }
4328- }
4329-}
4330-
4331-/*
4332- * Sanitize the BIOS e820 map.
4333- *
4334- * Some e820 responses include overlapping entries. The following
4335- * replaces the original e820 map with a new one, removing overlaps.
4336- *
4337- */
4338-static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
4339-{
4340- struct change_member {
4341- struct e820entry *pbios; /* pointer to original bios entry */
4342- unsigned long long addr; /* address for this change point */
4343- };
4344- static struct change_member change_point_list[2*E820MAX] __initdata;
4345- static struct change_member *change_point[2*E820MAX] __initdata;
4346- static struct e820entry *overlap_list[E820MAX] __initdata;
4347- static struct e820entry new_bios[E820MAX] __initdata;
4348- struct change_member *change_tmp;
4349- unsigned long current_type, last_type;
4350- unsigned long long last_addr;
4351- int chgidx, still_changing;
4352- int overlap_entries;
4353- int new_bios_entry;
4354- int old_nr, new_nr, chg_nr;
4355- int i;
4356-
4357- /*
4358- Visually we're performing the following
4359- (1,2,3,4 = memory types)...
4360-
4361- Sample memory map (w/overlaps):
4362- ____22__________________
4363- ______________________4_
4364- ____1111________________
4365- _44_____________________
4366- 11111111________________
4367- ____________________33__
4368- ___________44___________
4369- __________33333_________
4370- ______________22________
4371- ___________________2222_
4372- _________111111111______
4373- _____________________11_
4374- _________________4______
4375-
4376- Sanitized equivalent (no overlap):
4377- 1_______________________
4378- _44_____________________
4379- ___1____________________
4380- ____22__________________
4381- ______11________________
4382- _________1______________
4383- __________3_____________
4384- ___________44___________
4385- _____________33_________
4386- _______________2________
4387- ________________1_______
4388- _________________4______
4389- ___________________2____
4390- ____________________33__
4391- ______________________4_
4392- */
4393-
4394- /* if there's only one memory region, don't bother */
4395- if (*pnr_map < 2)
4396- return -1;
4397-
4398- old_nr = *pnr_map;
4399-
4400- /* bail out if we find any unreasonable addresses in bios map */
4401- for (i = 0; i < old_nr; i++)
4402- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
4403- return -1;
4404-
4405- /* create pointers for initial change-point information (for sorting) */
4406- for (i = 0; i < 2 * old_nr; i++)
4407- change_point[i] = &change_point_list[i];
4408-
4409- /* record all known change-points (starting and ending addresses),
4410- omitting those that are for empty memory regions */
4411- chgidx = 0;
4412- for (i = 0; i < old_nr; i++) {
4413- if (biosmap[i].size != 0) {
4414- change_point[chgidx]->addr = biosmap[i].addr;
4415- change_point[chgidx++]->pbios = &biosmap[i];
4416- change_point[chgidx]->addr = biosmap[i].addr +
4417- biosmap[i].size;
4418- change_point[chgidx++]->pbios = &biosmap[i];
4419- }
4420- }
4421- chg_nr = chgidx;
4422-
4423- /* sort change-point list by memory addresses (low -> high) */
4424- still_changing = 1;
4425- while (still_changing) {
4426- still_changing = 0;
4427- for (i = 1; i < chg_nr; i++) {
4428- unsigned long long curaddr, lastaddr;
4429- unsigned long long curpbaddr, lastpbaddr;
4430-
4431- curaddr = change_point[i]->addr;
4432- lastaddr = change_point[i - 1]->addr;
4433- curpbaddr = change_point[i]->pbios->addr;
4434- lastpbaddr = change_point[i - 1]->pbios->addr;
4435-
4436- /*
4437- * swap entries, when:
4438- *
4439- * curaddr > lastaddr or
4440- * curaddr == lastaddr and curaddr == curpbaddr and
4441- * lastaddr != lastpbaddr
4442- */
4443- if (curaddr < lastaddr ||
4444- (curaddr == lastaddr && curaddr == curpbaddr &&
4445- lastaddr != lastpbaddr)) {
4446- change_tmp = change_point[i];
4447- change_point[i] = change_point[i-1];
4448- change_point[i-1] = change_tmp;
4449- still_changing = 1;
4450- }
4451- }
4452- }
4453-
4454- /* create a new bios memory map, removing overlaps */
4455- overlap_entries = 0; /* number of entries in the overlap table */
4456- new_bios_entry = 0; /* index for creating new bios map entries */
4457- last_type = 0; /* start with undefined memory type */
4458- last_addr = 0; /* start with 0 as last starting address */
4459-
4460- /* loop through change-points, determining affect on the new bios map */
4461- for (chgidx = 0; chgidx < chg_nr; chgidx++) {
4462- /* keep track of all overlapping bios entries */
4463- if (change_point[chgidx]->addr ==
4464- change_point[chgidx]->pbios->addr) {
4465- /*
4466- * add map entry to overlap list (> 1 entry
4467- * implies an overlap)
4468- */
4469- overlap_list[overlap_entries++] =
4470- change_point[chgidx]->pbios;
4471- } else {
4472- /*
4473- * remove entry from list (order independent,
4474- * so swap with last)
4475- */
4476- for (i = 0; i < overlap_entries; i++) {
4477- if (overlap_list[i] ==
4478- change_point[chgidx]->pbios)
4479- overlap_list[i] =
4480- overlap_list[overlap_entries-1];
4481- }
4482- overlap_entries--;
4483- }
4484- /*
4485- * if there are overlapping entries, decide which
4486- * "type" to use (larger value takes precedence --
4487- * 1=usable, 2,3,4,4+=unusable)
4488- */
4489- current_type = 0;
4490- for (i = 0; i < overlap_entries; i++)
4491- if (overlap_list[i]->type > current_type)
4492- current_type = overlap_list[i]->type;
4493- /*
4494- * continue building up new bios map based on this
4495- * information
4496- */
4497- if (current_type != last_type) {
4498- if (last_type != 0) {
4499- new_bios[new_bios_entry].size =
4500- change_point[chgidx]->addr - last_addr;
4501- /*
4502- * move forward only if the new size
4503- * was non-zero
4504- */
4505- if (new_bios[new_bios_entry].size != 0)
4506- /*
4507- * no more space left for new
4508- * bios entries ?
4509- */
4510- if (++new_bios_entry >= E820MAX)
4511- break;
4512- }
4513- if (current_type != 0) {
4514- new_bios[new_bios_entry].addr =
4515- change_point[chgidx]->addr;
4516- new_bios[new_bios_entry].type = current_type;
4517- last_addr = change_point[chgidx]->addr;
4518- }
4519- last_type = current_type;
4520- }
4521- }
4522- /* retain count for new bios entries */
4523- new_nr = new_bios_entry;
4524-
4525- /* copy new bios mapping into original location */
4526- memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
4527- *pnr_map = new_nr;
4528-
4529- return 0;
4530-}
4531-
4532-/*
4533- * Copy the BIOS e820 map into a safe place.
4534- *
4535- * Sanity-check it while we're at it..
4536- *
4537- * If we're lucky and live on a modern system, the setup code
4538- * will have given us a memory map that we can use to properly
4539- * set up memory. If we aren't, we'll fake a memory map.
4540- */
4541-static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
4542-{
4543-#ifndef CONFIG_XEN
4544- /* Only one memory region (or negative)? Ignore it */
4545- if (nr_map < 2)
4546- return -1;
4547-#else
4548- BUG_ON(nr_map < 1);
4549-#endif
4550-
4551- do {
4552- u64 start = biosmap->addr;
4553- u64 size = biosmap->size;
4554- u64 end = start + size;
4555- u32 type = biosmap->type;
4556-
4557- /* Overflow in 64 bits? Ignore the memory map. */
4558- if (start > end)
4559- return -1;
4560-
4561- add_memory_region(start, size, type);
4562- } while (biosmap++, --nr_map);
4563-
4564-#ifdef CONFIG_XEN
4565- if (is_initial_xendomain()) {
4566- struct xen_memory_map memmap;
4567-
4568- memmap.nr_entries = E820MAX;
4569- set_xen_guest_handle(memmap.buffer, machine_e820.map);
4570-
4571- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
4572- BUG();
4573- machine_e820.nr_map = memmap.nr_entries;
4574- } else
4575- machine_e820 = e820;
4576-#endif
4577-
4578- return 0;
4579-}
4580-
4581-static void early_panic(char *msg)
4582-{
4583- early_printk(msg);
4584- panic(msg);
4585-}
4586-
4587-/* We're not void only for x86 32-bit compat */
4588-char * __init machine_specific_memory_setup(void)
4589-{
4590-#ifndef CONFIG_XEN
4591- char *who = "BIOS-e820";
4592- /*
4593- * Try to copy the BIOS-supplied E820-map.
4594- *
4595- * Otherwise fake a memory map; one section from 0k->640k,
4596- * the next section from 1mb->appropriate_mem_k
4597- */
4598- sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
4599- if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
4600- early_panic("Cannot find a valid memory map");
4601-#else /* CONFIG_XEN */
4602- char *who = "Xen";
4603- int rc;
4604- struct xen_memory_map memmap;
4605- /*
4606- * This is rather large for a stack variable but this early in
4607- * the boot process we know we have plenty slack space.
4608- */
4609- struct e820entry map[E820MAX];
4610-
4611- memmap.nr_entries = E820MAX;
4612- set_xen_guest_handle(memmap.buffer, map);
4613-
4614- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
4615- if ( rc == -ENOSYS ) {
4616- memmap.nr_entries = 1;
4617- map[0].addr = 0ULL;
4618- map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
4619- /* 8MB slack (to balance backend allocations). */
4620- map[0].size += 8 << 20;
4621- map[0].type = E820_RAM;
4622- rc = 0;
4623- }
4624- BUG_ON(rc);
4625-
4626- sanitize_e820_map(map, (char *)&memmap.nr_entries);
4627-
4628- if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
4629- early_panic("Cannot find a valid memory map");
4630-#endif
4631- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
4632- e820_print_map(who);
4633-
4634- /* In case someone cares... */
4635- return who;
4636-}
4637-
4638-static int __init parse_memopt(char *p)
4639-{
4640- int i;
4641- unsigned long current_end;
4642- unsigned long end;
4643-
4644- if (!p)
4645- return -EINVAL;
4646- end_user_pfn = memparse(p, &p);
4647- end_user_pfn >>= PAGE_SHIFT;
4648-
4649- end = end_user_pfn<<PAGE_SHIFT;
4650- i = e820.nr_map-1;
4651- current_end = e820.map[i].addr + e820.map[i].size;
4652-
4653- if (current_end < end) {
4654- /*
4655- * The e820 map ends before our requested size so
4656- * extend the final entry to the requested address.
4657- */
4658- if (e820.map[i].type == E820_RAM)
4659- e820.map[i].size = end - e820.map[i].addr;
4660- else
4661- add_memory_region(current_end, end - current_end, E820_RAM);
4662- }
4663-
4664- return 0;
4665-}
4666-early_param("mem", parse_memopt);
4667-
4668-static int userdef __initdata;
4669-
4670-static int __init parse_memmap_opt(char *p)
4671-{
4672- char *oldp;
4673- unsigned long long start_at, mem_size;
4674-
4675- if (!strcmp(p, "exactmap")) {
4676-#ifdef CONFIG_CRASH_DUMP
4677- /*
4678- * If we are doing a crash dump, we still need to know
4679- * the real mem size before original memory map is
4680- * reset.
4681- */
4682- e820_register_active_regions(0, 0, -1UL);
4683- saved_max_pfn = e820_end_of_ram();
4684- remove_all_active_ranges();
4685-#endif
4686- max_pfn_mapped = 0;
4687- e820.nr_map = 0;
4688- userdef = 1;
4689- return 0;
4690- }
4691-
4692- oldp = p;
4693- mem_size = memparse(p, &p);
4694- if (p == oldp)
4695- return -EINVAL;
4696-
4697- userdef = 1;
4698- if (*p == '@') {
4699- start_at = memparse(p+1, &p);
4700- add_memory_region(start_at, mem_size, E820_RAM);
4701- } else if (*p == '#') {
4702- start_at = memparse(p+1, &p);
4703- add_memory_region(start_at, mem_size, E820_ACPI);
4704- } else if (*p == '$') {
4705- start_at = memparse(p+1, &p);
4706- add_memory_region(start_at, mem_size, E820_RESERVED);
4707- } else {
4708- end_user_pfn = (mem_size >> PAGE_SHIFT);
4709- }
4710- return *p == '\0' ? 0 : -EINVAL;
4711-}
4712-early_param("memmap", parse_memmap_opt);
4713-
4714-void __init finish_e820_parsing(void)
4715-{
4716- if (userdef) {
4717- char nr = e820.nr_map;
4718-
4719- if (sanitize_e820_map(e820.map, &nr) < 0)
4720- early_panic("Invalid user supplied memory map");
4721- e820.nr_map = nr;
4722-
4723- printk(KERN_INFO "user-defined physical RAM map:\n");
4724- e820_print_map("user");
4725- }
4726-}
4727-
4728-#ifndef CONFIG_XEN
4729-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
4730- unsigned new_type)
4731-{
4732- int i;
4733-
4734- BUG_ON(old_type == new_type);
4735-
4736- for (i = 0; i < e820.nr_map; i++) {
4737- struct e820entry *ei = &e820.map[i];
4738- u64 final_start, final_end;
4739- if (ei->type != old_type)
4740- continue;
4741- /* totally covered? */
4742- if (ei->addr >= start && ei->size <= size) {
4743- ei->type = new_type;
4744- continue;
4745- }
4746- /* partially covered */
4747- final_start = max(start, ei->addr);
4748- final_end = min(start + size, ei->addr + ei->size);
4749- if (final_start >= final_end)
4750- continue;
4751- add_memory_region(final_start, final_end - final_start,
4752- new_type);
4753- }
4754-}
4755-
4756-void __init update_e820(void)
4757-{
4758- u8 nr_map;
4759-
4760- nr_map = e820.nr_map;
4761- if (sanitize_e820_map(e820.map, &nr_map))
4762- return;
4763- e820.nr_map = nr_map;
4764- printk(KERN_INFO "modified physical RAM map:\n");
4765- e820_print_map("modified");
4766-}
4767-#endif
4768-
4769-unsigned long pci_mem_start = 0xaeedbabe;
4770-EXPORT_SYMBOL(pci_mem_start);
4771-
4772-/*
4773- * Search for the biggest gap in the low 32 bits of the e820
4774- * memory space. We pass this space to PCI to assign MMIO resources
4775- * for hotplug or unconfigured devices in.
4776- * Hopefully the BIOS let enough space left.
4777- */
4778-__init void e820_setup_gap(struct e820entry *e820, int nr_map)
4779-{
4780- unsigned long gapstart, gapsize, round;
4781- unsigned long last;
4782- int i;
4783- int found = 0;
4784-
4785- last = 0x100000000ull;
4786- gapstart = 0x10000000;
4787- gapsize = 0x400000;
4788- i = nr_map;
4789- while (--i >= 0) {
4790- unsigned long long start = e820[i].addr;
4791- unsigned long long end = start + e820[i].size;
4792-
4793- /*
4794- * Since "last" is at most 4GB, we know we'll
4795- * fit in 32 bits if this condition is true
4796- */
4797- if (last > end) {
4798- unsigned long gap = last - end;
4799-
4800- if (gap > gapsize) {
4801- gapsize = gap;
4802- gapstart = end;
4803- found = 1;
4804- }
4805- }
4806- if (start < last)
4807- last = start;
4808- }
4809-
4810- if (!found) {
4811- gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
4812- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
4813- "address range\n"
4814- KERN_ERR "PCI: Unassigned devices with 32bit resource "
4815- "registers may break!\n");
4816- }
4817-
4818- /*
4819- * See how much we want to round up: start off with
4820- * rounding to the next 1MB area.
4821- */
4822- round = 0x100000;
4823- while ((gapsize >> 4) > round)
4824- round += round;
4825- /* Fun with two's complement */
4826- pci_mem_start = (gapstart + round) & -round;
4827-
4828- printk(KERN_INFO
4829- "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
4830- pci_mem_start, gapstart, gapsize);
4831-}
4832-
4833-int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
4834-{
4835- int i;
4836-
4837- if (slot < 0 || slot >= e820.nr_map)
4838- return -1;
4839- for (i = slot; i < e820.nr_map; i++) {
4840- if (e820.map[i].type != E820_RAM)
4841- continue;
4842- break;
4843- }
4844- if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
4845- return -1;
4846- *addr = e820.map[i].addr;
4847- *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
4848- max_pfn << PAGE_SHIFT) - *addr;
4849- return i + 1;
4850-}
82094b55
AF
4851--- sle11-2009-10-16.orig/arch/x86/kernel/early_printk-xen.c 2009-09-24 10:29:16.000000000 +0200
4852+++ sle11-2009-10-16/arch/x86/kernel/early_printk-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
4853@@ -225,7 +225,7 @@ static struct console simnow_console = {
4854 static struct console *early_console = &early_vga_console;
4855 static int early_console_initialized;
4856
4857-void early_printk(const char *fmt, ...)
4858+asmlinkage void early_printk(const char *fmt, ...)
4859 {
4860 char buf[512];
4861 int n;
82094b55
AF
4862--- sle11-2009-10-16.orig/arch/x86/kernel/entry_32-xen.S 2009-03-16 16:38:05.000000000 +0100
4863+++ sle11-2009-10-16/arch/x86/kernel/entry_32-xen.S 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
4864@@ -51,15 +51,26 @@
4865 #include <asm/percpu.h>
4866 #include <asm/dwarf2.h>
4867 #include <asm/processor-flags.h>
4868-#include "irq_vectors.h"
4869+#include <asm/ftrace.h>
4870+#include <asm/irq_vectors.h>
4871 #include <xen/interface/xen.h>
4872
4873+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
4874+#include <linux/elf-em.h>
4875+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
4876+#define __AUDIT_ARCH_LE 0x40000000
4877+
4878+#ifndef CONFIG_AUDITSYSCALL
4879+#define sysenter_audit syscall_trace_entry
4880+#define sysexit_audit syscall_exit_work
4881+#endif
4882+
4883 /*
4884 * We use macros for low-level operations which need to be overridden
4885 * for paravirtualization. The following will never clobber any registers:
4886 * INTERRUPT_RETURN (aka. "iret")
4887 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
4888- * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
4889+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
4890 *
4891 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
4892 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
4893@@ -277,11 +288,6 @@ END(resume_kernel)
4894 #endif
4895 CFI_ENDPROC
4896
4897- .macro test_tif ti_reg # system call tracing in operation / emulation
4898- /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
4899- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(\ti_reg)
4900- .endm
4901-
4902 /* SYSENTER_RETURN points to after the "sysenter" instruction in
4903 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
4904
4905@@ -338,8 +344,9 @@ sysenter_past_esp:
4906 .previous
4907
4908 GET_THREAD_INFO(%ebp)
4909- test_tif %ebp
4910- jnz syscall_trace_entry
4911+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
4912+ jnz sysenter_audit
4913+sysenter_do_call:
4914 cmpl $(nr_syscalls), %eax
4915 jae syscall_badsys
4916 call *sys_call_table(,%eax,4)
4917@@ -349,14 +356,54 @@ sysenter_past_esp:
4918 TRACE_IRQS_OFF
4919 movl TI_flags(%ebp), %ecx
4920 testw $_TIF_ALLWORK_MASK, %cx
4921- jne syscall_exit_work
4922+ jne sysexit_audit
4923+sysenter_exit:
4924 /* if something modifies registers it must also disable sysexit */
4925 movl PT_EIP(%esp), %edx
4926 movl PT_OLDESP(%esp), %ecx
4927 xorl %ebp,%ebp
4928 TRACE_IRQS_ON
4929 1: mov PT_FS(%esp), %fs
4930- ENABLE_INTERRUPTS_SYSCALL_RET
4931+ ENABLE_INTERRUPTS_SYSEXIT
4932+
4933+#ifdef CONFIG_AUDITSYSCALL
4934+sysenter_audit:
4935+ testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
4936+ jnz syscall_trace_entry
4937+ addl $4,%esp
4938+ CFI_ADJUST_CFA_OFFSET -4
4939+ /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
4940+ /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
4941+ /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
4942+ movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
4943+ movl %eax,%edx /* 2nd arg: syscall number */
4944+ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
4945+ call audit_syscall_entry
4946+ pushl %ebx
4947+ CFI_ADJUST_CFA_OFFSET 4
4948+ movl PT_EAX(%esp),%eax /* reload syscall number */
4949+ jmp sysenter_do_call
4950+
4951+sysexit_audit:
4952+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
4953+ jne syscall_exit_work
4954+ TRACE_IRQS_ON
4955+ ENABLE_INTERRUPTS(CLBR_ANY)
4956+ movl %eax,%edx /* second arg, syscall return value */
4957+ cmpl $0,%eax /* is it < 0? */
4958+ setl %al /* 1 if so, 0 if not */
4959+ movzbl %al,%eax /* zero-extend that */
4960+ inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
4961+ call audit_syscall_exit
4962+ DISABLE_INTERRUPTS(CLBR_ANY)
4963+ TRACE_IRQS_OFF
4964+ movl TI_flags(%ebp), %ecx
4965+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
4966+ jne syscall_exit_work
4967+ movl PT_EAX(%esp),%eax /* reload syscall return value */
4968+ jmp sysenter_exit
4969+#endif
4970+
4971 CFI_ENDPROC
4972 .pushsection .fixup,"ax"
4973 2: movl $0,PT_FS(%esp)
4974@@ -400,7 +447,7 @@ ENTRY(system_call)
4975 CFI_ADJUST_CFA_OFFSET 4
4976 SAVE_ALL
4977 GET_THREAD_INFO(%ebp)
4978- test_tif %ebp
4979+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
4980 jnz syscall_trace_entry
4981 cmpl $(nr_syscalls), %eax
4982 jae syscall_badsys
4983@@ -413,10 +460,6 @@ syscall_exit:
4984 # setting need_resched or sigpending
4985 # between sampling and the iret
4986 TRACE_IRQS_OFF
4987- testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
4988- jz no_singlestep
4989- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
4990-no_singlestep:
4991 movl TI_flags(%ebp), %ecx
4992 testw $_TIF_ALLWORK_MASK, %cx # current->work
4993 jne syscall_exit_work
4994@@ -588,12 +631,8 @@ END(work_pending)
4995 syscall_trace_entry:
4996 movl $-ENOSYS,PT_EAX(%esp)
4997 movl %esp, %eax
4998- xorl %edx,%edx
4999- call do_syscall_trace
5000- cmpl $0, %eax
5001- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
5002- # so must skip actual syscall
5003- movl PT_ORIG_EAX(%esp), %eax
5004+ call syscall_trace_enter
5005+ /* What it returned is what we'll actually use. */
5006 cmpl $(nr_syscalls), %eax
5007 jnae syscall_call
5008 jmp syscall_exit
5009@@ -602,14 +641,13 @@ END(syscall_trace_entry)
5010 # perform syscall exit tracing
5011 ALIGN
5012 syscall_exit_work:
5013- testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
5014+ testb $_TIF_WORK_SYSCALL_EXIT, %cl
5015 jz work_pending
5016 TRACE_IRQS_ON
5017- ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
5018+ ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
5019 # schedule() instead
5020 movl %esp, %eax
5021- movl $1, %edx
5022- call do_syscall_trace
5023+ call syscall_trace_leave
5024 jmp resume_userspace
5025 END(syscall_exit_work)
5026 CFI_ENDPROC
5027@@ -1113,10 +1151,10 @@ ENTRY(native_iret)
5028 .previous
5029 END(native_iret)
5030
5031-ENTRY(native_irq_enable_syscall_ret)
5032+ENTRY(native_irq_enable_sysexit)
5033 sti
5034 sysexit
5035-END(native_irq_enable_syscall_ret)
5036+END(native_irq_enable_sysexit)
5037 #endif
5038
5039 KPROBE_ENTRY(int3)
5040@@ -1265,6 +1303,77 @@ ENTRY(kernel_thread_helper)
5041 CFI_ENDPROC
5042 ENDPROC(kernel_thread_helper)
5043
5044+#ifdef CONFIG_FTRACE
5045+#ifdef CONFIG_DYNAMIC_FTRACE
5046+
5047+ENTRY(mcount)
5048+ pushl %eax
5049+ pushl %ecx
5050+ pushl %edx
5051+ movl 0xc(%esp), %eax
5052+ subl $MCOUNT_INSN_SIZE, %eax
5053+
5054+.globl mcount_call
5055+mcount_call:
5056+ call ftrace_stub
5057+
5058+ popl %edx
5059+ popl %ecx
5060+ popl %eax
5061+
5062+ ret
5063+END(mcount)
5064+
5065+ENTRY(ftrace_caller)
5066+ pushl %eax
5067+ pushl %ecx
5068+ pushl %edx
5069+ movl 0xc(%esp), %eax
5070+ movl 0x4(%ebp), %edx
5071+ subl $MCOUNT_INSN_SIZE, %eax
5072+
5073+.globl ftrace_call
5074+ftrace_call:
5075+ call ftrace_stub
5076+
5077+ popl %edx
5078+ popl %ecx
5079+ popl %eax
5080+
5081+.globl ftrace_stub
5082+ftrace_stub:
5083+ ret
5084+END(ftrace_caller)
5085+
5086+#else /* ! CONFIG_DYNAMIC_FTRACE */
5087+
5088+ENTRY(mcount)
5089+ cmpl $ftrace_stub, ftrace_trace_function
5090+ jnz trace
5091+.globl ftrace_stub
5092+ftrace_stub:
5093+ ret
5094+
5095+ /* taken from glibc */
5096+trace:
5097+ pushl %eax
5098+ pushl %ecx
5099+ pushl %edx
5100+ movl 0xc(%esp), %eax
5101+ movl 0x4(%ebp), %edx
5102+ subl $MCOUNT_INSN_SIZE, %eax
5103+
5104+ call *ftrace_trace_function
5105+
5106+ popl %edx
5107+ popl %ecx
5108+ popl %eax
5109+
5110+ jmp ftrace_stub
5111+END(mcount)
5112+#endif /* CONFIG_DYNAMIC_FTRACE */
5113+#endif /* CONFIG_FTRACE */
5114+
5115 #include <asm/alternative-asm.h>
5116
5117 # pv syscall call handler stub
5118@@ -1290,7 +1399,7 @@ ENTRY(ia32pv_cstar_target)
5119 .previous
5120 SAVE_ALL
5121 GET_THREAD_INFO(%ebp)
5122- test_tif %ebp
5123+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
5124 jnz cstar_trace_entry
5125 cmpl $nr_syscalls,%eax
5126 jae cstar_badsys
5127@@ -1324,29 +1433,21 @@ cstar_trace_entry:
5128 btl %eax,cstar_special
5129 jc .Lcstar_trace_special
5130 1: movl %esp,%eax
5131- xorl %edx,%edx
5132 LOCK_PREFIX
5133 orl $_TIF_CSTAR,TI_flags(%ebp)
5134- call do_syscall_trace
5135+ call syscall_trace_enter
5136 LOCK_PREFIX
5137 andl $~_TIF_CSTAR,TI_flags(%ebp)
5138- testl %eax,%eax
5139- jne .Lcstar_resume # ret != 0 -> running under PTRACE_SYSEMU,
5140- # so must skip actual syscall
5141- movl PT_ORIG_EAX(%esp),%eax
5142+ /* What it returned is what we'll actually use. */
5143 cmpl $nr_syscalls,%eax
5144 jb .Lcstar_call
5145 jmp .Lcstar_exit
5146 .Lcstar_trace_special:
5147 movl PT_ECX(%esp),%ecx
5148 movl %esp,%eax
5149- xorl %edx,%edx
5150 movl %ecx,PT_EBP(%esp) # put user EBP back in place
5151- call do_syscall_trace
5152- testl %eax,%eax
5153- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
5154- # so must skip actual syscall
5155- movl PT_ORIG_EAX(%esp),%eax
5156+ call syscall_trace_enter
5157+ /* What it returned is what we'll actually use. */
5158 cmpl $nr_syscalls,%eax
5159 jb syscall_call
5160 jmp syscall_exit
82094b55
AF
5161--- sle11-2009-10-16.orig/arch/x86/kernel/entry_64.S 2009-10-28 14:55:02.000000000 +0100
5162+++ sle11-2009-10-16/arch/x86/kernel/entry_64.S 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5163@@ -1409,7 +1409,7 @@ ENTRY(arch_unwind_init_running)
5164 ENDPROC(arch_unwind_init_running)
5165 #endif
5166
5167-#ifdef CONFIG_XEN
5168+#ifdef CONFIG_PARAVIRT_XEN
5169 ENTRY(xen_hypervisor_callback)
5170 zeroentry xen_do_hypervisor_callback
5171 END(xen_hypervisor_callback)
5172@@ -1507,7 +1507,7 @@ ENTRY(xen_failsafe_callback)
5173 CFI_ENDPROC
5174 END(xen_failsafe_callback)
5175
5176-#endif /* CONFIG_XEN */
5177+#endif /* CONFIG_PARAVIRT_XEN */
5178
5179 #ifdef CONFIG_KDB
5180
82094b55
AF
5181--- sle11-2009-10-16.orig/arch/x86/kernel/entry_64-xen.S 2009-03-16 16:38:05.000000000 +0100
5182+++ sle11-2009-10-16/arch/x86/kernel/entry_64-xen.S 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5183@@ -53,19 +53,130 @@
5184 #include <asm/hw_irq.h>
5185 #include <asm/page.h>
5186 #include <asm/irqflags.h>
5187+#include <asm/ftrace.h>
5188 #include <asm/errno.h>
5189 #include <xen/interface/xen.h>
5190 #include <xen/interface/features.h>
5191
5192+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
5193+#include <linux/elf-em.h>
5194+#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
5195+#define __AUDIT_ARCH_64BIT 0x80000000
5196+#define __AUDIT_ARCH_LE 0x40000000
5197+
5198 .code64
5199
5200+#ifdef CONFIG_FTRACE
5201+#ifdef CONFIG_DYNAMIC_FTRACE
5202+ENTRY(mcount)
5203+
5204+ subq $0x38, %rsp
5205+ movq %rax, (%rsp)
5206+ movq %rcx, 8(%rsp)
5207+ movq %rdx, 16(%rsp)
5208+ movq %rsi, 24(%rsp)
5209+ movq %rdi, 32(%rsp)
5210+ movq %r8, 40(%rsp)
5211+ movq %r9, 48(%rsp)
5212+
5213+ movq 0x38(%rsp), %rdi
5214+ subq $MCOUNT_INSN_SIZE, %rdi
5215+
5216+.globl mcount_call
5217+mcount_call:
5218+ call ftrace_stub
5219+
5220+ movq 48(%rsp), %r9
5221+ movq 40(%rsp), %r8
5222+ movq 32(%rsp), %rdi
5223+ movq 24(%rsp), %rsi
5224+ movq 16(%rsp), %rdx
5225+ movq 8(%rsp), %rcx
5226+ movq (%rsp), %rax
5227+ addq $0x38, %rsp
5228+
5229+ retq
5230+END(mcount)
5231+
5232+ENTRY(ftrace_caller)
5233+
5234+ /* taken from glibc */
5235+ subq $0x38, %rsp
5236+ movq %rax, (%rsp)
5237+ movq %rcx, 8(%rsp)
5238+ movq %rdx, 16(%rsp)
5239+ movq %rsi, 24(%rsp)
5240+ movq %rdi, 32(%rsp)
5241+ movq %r8, 40(%rsp)
5242+ movq %r9, 48(%rsp)
5243+
5244+ movq 0x38(%rsp), %rdi
5245+ movq 8(%rbp), %rsi
5246+ subq $MCOUNT_INSN_SIZE, %rdi
5247+
5248+.globl ftrace_call
5249+ftrace_call:
5250+ call ftrace_stub
5251+
5252+ movq 48(%rsp), %r9
5253+ movq 40(%rsp), %r8
5254+ movq 32(%rsp), %rdi
5255+ movq 24(%rsp), %rsi
5256+ movq 16(%rsp), %rdx
5257+ movq 8(%rsp), %rcx
5258+ movq (%rsp), %rax
5259+ addq $0x38, %rsp
5260+
5261+.globl ftrace_stub
5262+ftrace_stub:
5263+ retq
5264+END(ftrace_caller)
5265+
5266+#else /* ! CONFIG_DYNAMIC_FTRACE */
5267+ENTRY(mcount)
5268+ cmpq $ftrace_stub, ftrace_trace_function
5269+ jnz trace
5270+.globl ftrace_stub
5271+ftrace_stub:
5272+ retq
5273+
5274+trace:
5275+ /* taken from glibc */
5276+ subq $0x38, %rsp
5277+ movq %rax, (%rsp)
5278+ movq %rcx, 8(%rsp)
5279+ movq %rdx, 16(%rsp)
5280+ movq %rsi, 24(%rsp)
5281+ movq %rdi, 32(%rsp)
5282+ movq %r8, 40(%rsp)
5283+ movq %r9, 48(%rsp)
5284+
5285+ movq 0x38(%rsp), %rdi
5286+ movq 8(%rbp), %rsi
5287+ subq $MCOUNT_INSN_SIZE, %rdi
5288+
5289+ call *ftrace_trace_function
5290+
5291+ movq 48(%rsp), %r9
5292+ movq 40(%rsp), %r8
5293+ movq 32(%rsp), %rdi
5294+ movq 24(%rsp), %rsi
5295+ movq 16(%rsp), %rdx
5296+ movq 8(%rsp), %rcx
5297+ movq (%rsp), %rax
5298+ addq $0x38, %rsp
5299+
5300+ jmp ftrace_stub
5301+END(mcount)
5302+#endif /* CONFIG_DYNAMIC_FTRACE */
5303+#endif /* CONFIG_FTRACE */
5304+
5305 #ifndef CONFIG_PREEMPT
5306 #define retint_kernel retint_restore_args
5307 #endif
5308
5309 #ifdef CONFIG_PARAVIRT
5310-ENTRY(native_irq_enable_syscall_ret)
5311- movq %gs:pda_oldrsp,%rsp
5312+ENTRY(native_usergs_sysret64)
5313 swapgs
5314 sysretq
5315 #endif /* CONFIG_PARAVIRT */
5316@@ -102,7 +213,7 @@ NMI_MASK = 0x80000000
5317 .macro FAKE_STACK_FRAME child_rip
5318 /* push in order ss, rsp, eflags, cs, rip */
5319 xorl %eax, %eax
5320- pushq %rax /* ss */
5321+ pushq $__KERNEL_DS /* ss */
5322 CFI_ADJUST_CFA_OFFSET 8
5323 /*CFI_REL_OFFSET ss,0*/
5324 pushq %rax /* rsp */
5325@@ -197,13 +308,13 @@ ENTRY(ret_from_fork)
5326 CFI_ADJUST_CFA_OFFSET -4
5327 call schedule_tail
5328 GET_THREAD_INFO(%rcx)
5329- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
5330+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
5331 jnz rff_trace
5332 rff_action:
5333 RESTORE_REST
5334 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
5335 je int_ret_from_sys_call
5336- testl $_TIF_IA32,threadinfo_flags(%rcx)
5337+ testl $_TIF_IA32,TI_flags(%rcx)
5338 jnz int_ret_from_sys_call
5339 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
5340 jmp ret_from_sys_call
5341@@ -265,8 +376,9 @@ ENTRY(system_call)
5342 SAVE_ARGS -8,0
5343 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
5344 GET_THREAD_INFO(%rcx)
5345- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
5346+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
5347 jnz tracesys
5348+system_call_fastpath:
5349 cmpq $__NR_syscall_max,%rax
5350 ja badsys
5351 movq %r10,%rcx
5352@@ -284,7 +396,7 @@ sysret_check:
5353 GET_THREAD_INFO(%rcx)
5354 DISABLE_INTERRUPTS(CLBR_NONE)
5355 TRACE_IRQS_OFF
5356- movl threadinfo_flags(%rcx),%edx
5357+ movl TI_flags(%rcx),%edx
5358 andl %edi,%edx
5359 jnz sysret_careful
5360 CFI_REMEMBER_STATE
5361@@ -315,16 +427,16 @@ sysret_careful:
5362 sysret_signal:
5363 TRACE_IRQS_ON
5364 ENABLE_INTERRUPTS(CLBR_NONE)
5365- testl $_TIF_DO_NOTIFY_MASK,%edx
5366- jz 1f
5367-
5368- /* Really a signal */
5369+#ifdef CONFIG_AUDITSYSCALL
5370+ bt $TIF_SYSCALL_AUDIT,%edx
5371+ jc sysret_audit
5372+#endif
5373 /* edx: work flags (arg3) */
5374 leaq do_notify_resume(%rip),%rax
5375 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
5376 xorl %esi,%esi # oldset -> arg2
5377 call ptregscall_common
5378-1: movl $_TIF_NEED_RESCHED,%edi
5379+ movl $_TIF_WORK_MASK,%edi
5380 /* Use IRET because user could have changed frame. This
5381 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
5382 DISABLE_INTERRUPTS(CLBR_NONE)
5383@@ -335,14 +447,56 @@ badsys:
5384 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
5385 jmp ret_from_sys_call
5386
5387+#ifdef CONFIG_AUDITSYSCALL
5388+ /*
5389+ * Fast path for syscall audit without full syscall trace.
5390+ * We just call audit_syscall_entry() directly, and then
5391+ * jump back to the normal fast path.
5392+ */
5393+auditsys:
5394+ movq %r10,%r9 /* 6th arg: 4th syscall arg */
5395+ movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
5396+ movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
5397+ movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
5398+ movq %rax,%rsi /* 2nd arg: syscall number */
5399+ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
5400+ call audit_syscall_entry
5401+ LOAD_ARGS 0 /* reload call-clobbered registers */
5402+ jmp system_call_fastpath
5403+
5404+ /*
5405+ * Return fast path for syscall audit. Call audit_syscall_exit()
5406+ * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
5407+ * masked off.
5408+ */
5409+sysret_audit:
5410+ movq %rax,%rsi /* second arg, syscall return value */
5411+ cmpq $0,%rax /* is it < 0? */
5412+ setl %al /* 1 if so, 0 if not */
5413+ movzbl %al,%edi /* zero-extend that into %edi */
5414+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
5415+ call audit_syscall_exit
5416+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
5417+ jmp sysret_check
5418+#endif /* CONFIG_AUDITSYSCALL */
5419+
5420 /* Do syscall tracing */
5421 tracesys:
5422+#ifdef CONFIG_AUDITSYSCALL
5423+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
5424+ jz auditsys
5425+#endif
5426 SAVE_REST
5427 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
5428 FIXUP_TOP_OF_STACK %rdi
5429 movq %rsp,%rdi
5430 call syscall_trace_enter
5431- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
5432+ /*
5433+ * Reload arg registers from stack in case ptrace changed them.
5434+ * We don't reload %rax because syscall_trace_enter() returned
5435+ * the value it wants us to use in the table lookup.
5436+ */
5437+ LOAD_ARGS ARGOFFSET, 1
5438 RESTORE_REST
5439 cmpq $__NR_syscall_max,%rax
5440 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
5441@@ -356,6 +510,7 @@ tracesys:
5442 * Has correct top of stack, but partial stack frame.
5443 */
5444 .globl int_ret_from_sys_call
5445+ .globl int_with_check
5446 int_ret_from_sys_call:
5447 DISABLE_INTERRUPTS(CLBR_NONE)
5448 TRACE_IRQS_OFF
5449@@ -370,10 +525,10 @@ int_ret_from_sys_call:
5450 int_with_check:
5451 LOCKDEP_SYS_EXIT_IRQ
5452 GET_THREAD_INFO(%rcx)
5453- movl threadinfo_flags(%rcx),%edx
5454+ movl TI_flags(%rcx),%edx
5455 andl %edi,%edx
5456 jnz int_careful
5457- andl $~TS_COMPAT,threadinfo_status(%rcx)
5458+ andl $~TS_COMPAT,TI_status(%rcx)
5459 jmp retint_restore_args
5460
5461 /* Either reschedule or signal or syscall exit tracking needed. */
5462@@ -399,7 +554,7 @@ int_very_careful:
5463 ENABLE_INTERRUPTS(CLBR_NONE)
5464 SAVE_REST
5465 /* Check for syscall exit trace */
5466- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
5467+ testl $_TIF_WORK_SYSCALL_EXIT,%edx
5468 jz int_signal
5469 pushq %rdi
5470 CFI_ADJUST_CFA_OFFSET 8
5471@@ -407,7 +562,7 @@ int_very_careful:
5472 call syscall_trace_leave
5473 popq %rdi
5474 CFI_ADJUST_CFA_OFFSET -8
5475- andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
5476+ andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
5477 jmp int_restore_rest
5478
5479 int_signal:
5480@@ -416,7 +571,7 @@ int_signal:
5481 movq %rsp,%rdi # &ptregs -> arg1
5482 xorl %esi,%esi # oldset -> arg2
5483 call do_notify_resume
5484-1: movl $_TIF_NEED_RESCHED,%edi
5485+1: movl $_TIF_WORK_MASK,%edi
5486 int_restore_rest:
5487 RESTORE_REST
5488 DISABLE_INTERRUPTS(CLBR_NONE)
5489@@ -443,7 +598,6 @@ END(\label)
5490 PTREGSCALL stub_clone, sys_clone, %r8
5491 PTREGSCALL stub_fork, sys_fork, %rdi
5492 PTREGSCALL stub_vfork, sys_vfork, %rdi
5493- PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
5494 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
5495 PTREGSCALL stub_iopl, sys_iopl, %rsi
5496
5497@@ -517,10 +671,12 @@ END(stub_rt_sigreturn)
5498 *
5499 */
5500
5501-retint_check:
5502+retint_with_reschedule:
5503 CFI_DEFAULT_STACK adj=1
5504+ movl $_TIF_WORK_MASK,%edi
5505+retint_check:
5506 LOCKDEP_SYS_EXIT_IRQ
5507- movl threadinfo_flags(%rcx),%edx
5508+ movl TI_flags(%rcx),%edx
5509 andl %edi,%edx
5510 CFI_REMEMBER_STATE
5511 jnz retint_careful
5512@@ -565,17 +721,16 @@ retint_signal:
5513 RESTORE_REST
5514 DISABLE_INTERRUPTS(CLBR_NONE)
5515 TRACE_IRQS_OFF
5516- movl $_TIF_NEED_RESCHED,%edi
5517 GET_THREAD_INFO(%rcx)
5518- jmp retint_check
5519+ jmp retint_with_reschedule
5520
5521 #ifdef CONFIG_PREEMPT
5522 /* Returning to kernel space. Check if we need preemption */
5523 /* rcx: threadinfo. interrupts off. */
5524 ENTRY(retint_kernel)
5525- cmpl $0,threadinfo_preempt_count(%rcx)
5526+ cmpl $0,TI_preempt_count(%rcx)
5527 jnz retint_restore_args
5528- bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
5529+ bt $TIF_NEED_RESCHED,TI_flags(%rcx)
5530 jnc retint_restore_args
5531 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
5532 jnc retint_restore_args
5533@@ -630,6 +785,9 @@ END(invalidate_interrupt\num)
5534 ENTRY(call_function_interrupt)
5535 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
5536 END(call_function_interrupt)
5537+ENTRY(call_function_single_interrupt)
5538+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
5539+END(call_function_single_interrupt)
5540 ENTRY(irq_move_cleanup_interrupt)
5541 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
5542 END(irq_move_cleanup_interrupt)
5543@@ -639,6 +797,10 @@ ENTRY(apic_timer_interrupt)
5544 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
5545 END(apic_timer_interrupt)
5546
5547+ENTRY(uv_bau_message_intr1)
5548+ apicinterrupt 220,uv_bau_message_interrupt
5549+END(uv_bau_message_intr1)
5550+
5551 ENTRY(error_interrupt)
5552 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
5553 END(error_interrupt)
5554@@ -752,7 +914,7 @@ paranoid_restore\trace:
5555 jmp irq_return
5556 paranoid_userspace\trace:
5557 GET_THREAD_INFO(%rcx)
5558- movl threadinfo_flags(%rcx),%ebx
5559+ movl TI_flags(%rcx),%ebx
5560 andl $_TIF_WORK_MASK,%ebx
5561 jz paranoid_swapgs\trace
5562 movq %rsp,%rdi /* &pt_regs */
5563@@ -849,7 +1011,7 @@ error_exit:
5564 testb $3,CS-ARGOFFSET(%rsp)
5565 jz retint_kernel
5566 LOCKDEP_SYS_EXIT_IRQ
5567- movl threadinfo_flags(%rcx),%edx
5568+ movl TI_flags(%rcx),%edx
5569 movl $_TIF_WORK_MASK,%edi
5570 andl %edi,%edx
5571 jnz retint_careful
5572@@ -871,11 +1033,11 @@ error_kernelspace:
5573 iret run with kernel gs again, so don't set the user space flag.
5574 B stepping K8s sometimes report an truncated RIP for IRET
5575 exceptions returning to compat mode. Check for these here too. */
5576- leaq irq_return(%rip),%rbp
5577- cmpq %rbp,RIP(%rsp)
5578+ leaq irq_return(%rip),%rcx
5579+ cmpq %rcx,RIP(%rsp)
5580 je error_swapgs
5581- movl %ebp,%ebp /* zero extend */
5582- cmpq %rbp,RIP(%rsp)
5583+ movl %ecx,%ecx /* zero extend */
5584+ cmpq %rcx,RIP(%rsp)
5585 je error_swapgs
5586 cmpq $gs_change,RIP(%rsp)
5587 je error_swapgs
5588@@ -1121,6 +1283,7 @@ END(device_not_available)
5589 /* runs on exception stack */
5590 KPROBE_ENTRY(debug)
5591 /* INTR_FRAME
5592+ PARAVIRT_ADJUST_EXCEPTION_FRAME
5593 pushq $0
5594 CFI_ADJUST_CFA_OFFSET 8 */
5595 zeroentry do_debug
5596@@ -1148,6 +1311,7 @@ END(do_nmi_callback)
5597
5598 KPROBE_ENTRY(int3)
5599 /* INTR_FRAME
5600+ PARAVIRT_ADJUST_EXCEPTION_FRAME
5601 pushq $0
5602 CFI_ADJUST_CFA_OFFSET 8 */
5603 zeroentry do_int3
5604@@ -1171,14 +1335,11 @@ ENTRY(coprocessor_segment_overrun)
5605 zeroentry do_coprocessor_segment_overrun
5606 END(coprocessor_segment_overrun)
5607
5608-ENTRY(reserved)
5609- zeroentry do_reserved
5610-END(reserved)
5611-
5612 #if 0
5613 /* runs on exception stack */
5614 ENTRY(double_fault)
5615 XCPT_FRAME
5616+ PARAVIRT_ADJUST_EXCEPTION_FRAME
5617 paranoidentry do_double_fault
5618 jmp paranoid_exit1
5619 CFI_ENDPROC
5620@@ -1196,6 +1357,7 @@ END(segment_not_present)
5621 /* runs on exception stack */
5622 ENTRY(stack_segment)
5623 /* XCPT_FRAME
5624+ PARAVIRT_ADJUST_EXCEPTION_FRAME
5625 paranoidentry do_stack_segment */
5626 errorentry do_stack_segment
5627 /* jmp paranoid_exit1
5628@@ -1222,6 +1384,7 @@ END(spurious_interrupt_bug)
5629 /* runs on exception stack */
5630 ENTRY(machine_check)
5631 INTR_FRAME
5632+ PARAVIRT_ADJUST_EXCEPTION_FRAME
5633 pushq $0
5634 CFI_ADJUST_CFA_OFFSET 8
5635 paranoidentry do_machine_check
82094b55
AF
5636--- sle11-2009-10-16.orig/arch/x86/kernel/fixup.c 2009-03-16 16:33:40.000000000 +0100
5637+++ sle11-2009-10-16/arch/x86/kernel/fixup.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5638@@ -33,6 +33,7 @@
5639 #include <linux/kernel.h>
5640 #include <linux/delay.h>
5641 #include <linux/version.h>
5642+#include <asm/traps.h>
5643
5644 #define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args )
5645
82094b55
AF
5646--- sle11-2009-10-16.orig/arch/x86/kernel/genapic_64-xen.c 2009-03-16 16:38:05.000000000 +0100
5647+++ sle11-2009-10-16/arch/x86/kernel/genapic_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5648@@ -58,7 +58,7 @@ void __init setup_apic_routing(void)
5649 else
5650 #endif
5651
5652- if (num_possible_cpus() <= 8)
5653+ if (max_physical_apicid < 8)
5654 genapic = &apic_flat;
5655 else
5656 genapic = &apic_physflat;
5657@@ -121,4 +121,5 @@ int is_uv_system(void)
5658 {
5659 return uv_system_type != UV_NONE;
5660 }
5661+EXPORT_SYMBOL_GPL(is_uv_system);
5662 #endif
82094b55
AF
5663--- sle11-2009-10-16.orig/arch/x86/kernel/genapic_xen_64.c 2009-03-16 16:38:05.000000000 +0100
5664+++ sle11-2009-10-16/arch/x86/kernel/genapic_xen_64.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5665@@ -43,7 +43,7 @@ void xen_send_IPI_shortcut(unsigned int
5666 __send_IPI_one(smp_processor_id(), vector);
5667 break;
5668 case APIC_DEST_ALLBUT:
5669- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
5670+ for_each_possible_cpu(cpu) {
5671 if (cpu == smp_processor_id())
5672 continue;
5673 if (cpu_isset(cpu, cpu_online_map)) {
5674@@ -52,7 +52,7 @@ void xen_send_IPI_shortcut(unsigned int
5675 }
5676 break;
5677 case APIC_DEST_ALLINC:
5678- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
5679+ for_each_possible_cpu(cpu) {
5680 if (cpu_isset(cpu, cpu_online_map)) {
5681 __send_IPI_one(cpu, vector);
5682 }
5683@@ -81,8 +81,6 @@ static cpumask_t xen_vector_allocation_d
5684 */
5685 static void xen_init_apic_ldr(void)
5686 {
5687- Dprintk("%s\n", __FUNCTION__);
5688- return;
5689 }
5690
5691 static void xen_send_IPI_allbutself(int vector)
5692@@ -92,14 +90,12 @@ static void xen_send_IPI_allbutself(int
5693 * we get an APIC send error if we try to broadcast.
5694 * thus we have to avoid sending IPIs in this case.
5695 */
5696- Dprintk("%s\n", __FUNCTION__);
5697 if (num_online_cpus() > 1)
5698 xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL);
5699 }
5700
5701 static void xen_send_IPI_all(int vector)
5702 {
5703- Dprintk("%s\n", __FUNCTION__);
5704 xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
5705 }
5706
5707@@ -109,11 +105,10 @@ static void xen_send_IPI_mask(cpumask_t
5708 unsigned int cpu;
5709 unsigned long flags;
5710
5711- Dprintk("%s\n", __FUNCTION__);
5712 local_irq_save(flags);
5713 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
5714
5715- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
5716+ for_each_possible_cpu(cpu) {
5717 if (cpu_isset(cpu, cpumask)) {
5718 __send_IPI_one(cpu, vector);
5719 }
5720@@ -125,14 +120,12 @@ static void xen_send_IPI_mask(cpumask_t
5721 static int xen_apic_id_registered(void)
5722 {
5723 /* better be set */
5724- Dprintk("%s\n", __FUNCTION__);
5725 return physid_isset(smp_processor_id(), phys_cpu_present_map);
5726 }
5727 #endif
5728
5729 static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
5730 {
5731- Dprintk("%s\n", __FUNCTION__);
5732 return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
5733 }
5734
5735@@ -140,7 +133,6 @@ static unsigned int phys_pkg_id(int inde
5736 {
5737 u32 ebx;
5738
5739- Dprintk("%s\n", __FUNCTION__);
5740 ebx = cpuid_ebx(1);
5741 return ((ebx >> 24) & 0xFF) >> index_msb;
5742 }
5743--- /dev/null 1970-01-01 00:00:00.000000000 +0000
82094b55 5744+++ sle11-2009-10-16/arch/x86/kernel/head-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5745@@ -0,0 +1,57 @@
5746+#include <linux/kernel.h>
5747+#include <linux/init.h>
5748+
5749+#include <asm/setup.h>
5750+#include <asm/bios_ebda.h>
5751+
5752+#define BIOS_LOWMEM_KILOBYTES 0x413
5753+
5754+/*
5755+ * The BIOS places the EBDA/XBDA at the top of conventional
5756+ * memory, and usually decreases the reported amount of
5757+ * conventional memory (int 0x12) too. This also contains a
5758+ * workaround for Dell systems that neglect to reserve EBDA.
5759+ * The same workaround also avoids a problem with the AMD768MPX
5760+ * chipset: reserve a page before VGA to prevent PCI prefetch
5761+ * into it (errata #56). Usually the page is reserved anyways,
5762+ * unless you have no PS/2 mouse plugged in.
5763+ */
5764+void __init reserve_ebda_region(void)
5765+{
5766+#ifndef CONFIG_XEN
5767+ unsigned int lowmem, ebda_addr;
5768+
5769+ /* To determine the position of the EBDA and the */
5770+ /* end of conventional memory, we need to look at */
5771+ /* the BIOS data area. In a paravirtual environment */
5772+ /* that area is absent. We'll just have to assume */
5773+ /* that the paravirt case can handle memory setup */
5774+ /* correctly, without our help. */
5775+ if (paravirt_enabled())
5776+ return;
5777+
5778+ /* end of low (conventional) memory */
5779+ lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
5780+ lowmem <<= 10;
5781+
5782+ /* start of EBDA area */
5783+ ebda_addr = get_bios_ebda();
5784+
5785+ /* Fixup: bios puts an EBDA in the top 64K segment */
5786+ /* of conventional memory, but does not adjust lowmem. */
5787+ if ((lowmem - ebda_addr) <= 0x10000)
5788+ lowmem = ebda_addr;
5789+
5790+ /* Fixup: bios does not report an EBDA at all. */
5791+ /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
5792+ if ((ebda_addr == 0) && (lowmem >= 0x9f000))
5793+ lowmem = 0x9f000;
5794+
5795+ /* Paranoia: should never happen, but... */
5796+ if ((lowmem == 0) || (lowmem >= 0x100000))
5797+ lowmem = 0x9f000;
5798+
5799+ /* reserve all memory between lowmem and the 1MB mark */
5800+ reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
5801+#endif
5802+}
5803--- /dev/null 1970-01-01 00:00:00.000000000 +0000
82094b55 5804+++ sle11-2009-10-16/arch/x86/kernel/head32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5805@@ -0,0 +1,57 @@
5806+/*
5807+ * linux/arch/i386/kernel/head32.c -- prepare to run common code
5808+ *
5809+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5810+ * Copyright (C) 2007 Eric Biederman <ebiederm@xmission.com>
5811+ */
5812+
5813+#include <linux/init.h>
5814+#include <linux/start_kernel.h>
5815+
5816+#include <asm/setup.h>
5817+#include <asm/sections.h>
5818+#include <asm/e820.h>
5819+#include <asm/bios_ebda.h>
5820+
5821+void __init i386_start_kernel(void)
5822+{
5823+ reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
5824+
5825+#ifndef CONFIG_XEN
5826+#ifdef CONFIG_BLK_DEV_INITRD
5827+ /* Reserve INITRD */
5828+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
5829+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
5830+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
5831+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
5832+ reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
5833+ }
5834+#endif
5835+ reserve_early(init_pg_tables_start, init_pg_tables_end,
5836+ "INIT_PG_TABLE");
5837+#else
5838+ reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
5839+ __pa(xen_start_info->pt_base)
5840+ + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
5841+ "Xen provided");
5842+
5843+ {
5844+ int max_cmdline;
5845+
5846+ if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
5847+ max_cmdline = COMMAND_LINE_SIZE;
5848+ memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
5849+ boot_command_line[max_cmdline-1] = '\0';
5850+ }
5851+#endif
5852+
5853+ reserve_ebda_region();
5854+
5855+ /*
5856+ * At this point everything still needed from the boot loader
5857+ * or BIOS or kernel text should be early reserved or marked not
5858+ * RAM in e820. All other memory is free game.
5859+ */
5860+
5861+ start_kernel();
5862+}
82094b55
AF
5863--- sle11-2009-10-16.orig/arch/x86/kernel/head64-xen.c 2009-03-16 16:38:05.000000000 +0100
5864+++ sle11-2009-10-16/arch/x86/kernel/head64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
5865@@ -32,7 +32,26 @@
5866 #include <asm/e820.h>
5867 #include <asm/bios_ebda.h>
5868
5869-unsigned long start_pfn;
5870+/* boot cpu pda */
5871+static struct x8664_pda _boot_cpu_pda __read_mostly;
5872+
5873+#ifdef CONFIG_SMP
5874+/*
5875+ * We install an empty cpu_pda pointer table to indicate to early users
5876+ * (numa_set_node) that the cpu_pda pointer table for cpus other than
5877+ * the boot cpu is not yet setup.
5878+ */
5879+static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
5880+#else
5881+static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
5882+#endif
5883+
5884+void __init x86_64_init_pda(void)
5885+{
5886+ _cpu_pda = __cpu_pda;
5887+ cpu_pda(0) = &_boot_cpu_pda;
5888+ pda_init(0);
5889+}
5890
5891 #ifndef CONFIG_XEN
5892 static void __init zap_identity_mappings(void)
5893@@ -77,83 +96,10 @@ EXPORT_SYMBOL(machine_to_phys_mapping);
5894 unsigned int machine_to_phys_order;
5895 EXPORT_SYMBOL(machine_to_phys_order);
5896
5897-#define BIOS_LOWMEM_KILOBYTES 0x413
5898-
5899-/*
5900- * The BIOS places the EBDA/XBDA at the top of conventional
5901- * memory, and usually decreases the reported amount of
5902- * conventional memory (int 0x12) too. This also contains a
5903- * workaround for Dell systems that neglect to reserve EBDA.
5904- * The same workaround also avoids a problem with the AMD768MPX
5905- * chipset: reserve a page before VGA to prevent PCI prefetch
5906- * into it (errata #56). Usually the page is reserved anyways,
5907- * unless you have no PS/2 mouse plugged in.
5908- */
5909-static void __init reserve_ebda_region(void)
5910-{
5911-#ifndef CONFIG_XEN
5912- unsigned int lowmem, ebda_addr;
5913-
5914- /* To determine the position of the EBDA and the */
5915- /* end of conventional memory, we need to look at */
5916- /* the BIOS data area. In a paravirtual environment */
5917- /* that area is absent. We'll just have to assume */
5918- /* that the paravirt case can handle memory setup */
5919- /* correctly, without our help. */
5920- if (paravirt_enabled())
5921- return;
5922-
5923- /* end of low (conventional) memory */
5924- lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
5925- lowmem <<= 10;
5926-
5927- /* start of EBDA area */
5928- ebda_addr = get_bios_ebda();
5929-
5930- /* Fixup: bios puts an EBDA in the top 64K segment */
5931- /* of conventional memory, but does not adjust lowmem. */
5932- if ((lowmem - ebda_addr) <= 0x10000)
5933- lowmem = ebda_addr;
5934-
5935- /* Fixup: bios does not report an EBDA at all. */
5936- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
5937- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
5938- lowmem = 0x9f000;
5939-
5940- /* Paranoia: should never happen, but... */
5941- if ((lowmem == 0) || (lowmem >= 0x100000))
5942- lowmem = 0x9f000;
5943-
5944- /* reserve all memory between lowmem and the 1MB mark */
5945- reserve_early(lowmem, 0x100000, "BIOS reserved");
5946-#endif
5947-}
5948-
5949-static void __init reserve_setup_data(void)
5950-{
5951-#ifndef CONFIG_XEN
5952- struct setup_data *data;
5953- unsigned long pa_data;
5954- char buf[32];
5955-
5956- if (boot_params.hdr.version < 0x0209)
5957- return;
5958- pa_data = boot_params.hdr.setup_data;
5959- while (pa_data) {
5960- data = early_ioremap(pa_data, sizeof(*data));
5961- sprintf(buf, "setup data %x", data->type);
5962- reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
5963- pa_data = data->next;
5964- early_iounmap(data, sizeof(*data));
5965- }
5966-#endif
5967-}
5968-
5969 void __init x86_64_start_kernel(char * real_mode_data)
5970 {
5971 struct xen_machphys_mapping mapping;
5972 unsigned long machine_to_phys_nr_ents;
5973- int i;
5974
5975 /*
5976 * Build-time sanity checks on the kernel image and module
5977@@ -167,6 +113,7 @@ void __init x86_64_start_kernel(char * r
5978 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
5979 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
5980 (__START_KERNEL & PGDIR_MASK)));
5981+ BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
5982
5983 xen_setup_features();
5984
5985@@ -174,8 +121,6 @@ void __init x86_64_start_kernel(char * r
5986 if (!xen_feature(XENFEAT_auto_translated_physmap))
5987 phys_to_machine_mapping =
5988 (unsigned long *)xen_start_info->mfn_list;
5989- start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
5990- xen_start_info->nr_pt_frames;
5991
5992 machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
5993 machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
5994@@ -208,19 +153,23 @@ void __init x86_64_start_kernel(char * r
5995
5996 early_printk("Kernel alive\n");
5997
5998- for (i = 0; i < NR_CPUS; i++)
5999- cpu_pda(i) = &boot_cpu_pda[i];
6000+ x86_64_init_pda();
6001
6002- pda_init(0);
6003+ early_printk("Kernel really alive\n");
6004+
6005+ x86_64_start_reservations(real_mode_data);
6006+}
6007+
6008+void __init x86_64_start_reservations(char *real_mode_data)
6009+{
6010 copy_bootdata(__va(real_mode_data));
6011
6012 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
6013
6014 reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
6015- start_pfn << PAGE_SHIFT, "Xen provided");
6016-
6017- reserve_ebda_region();
6018- reserve_setup_data();
6019+ __pa(xen_start_info->pt_base)
6020+ + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
6021+ "Xen provided");
6022
6023 /*
6024 * At this point everything still needed from the boot loader
82094b55
AF
6025--- sle11-2009-10-16.orig/arch/x86/kernel/head_64-xen.S 2009-02-16 16:17:21.000000000 +0100
6026+++ sle11-2009-10-16/arch/x86/kernel/head_64-xen.S 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
6027@@ -95,53 +95,6 @@ NEXT_PAGE(hypercall_page)
6028
6029 #undef NEXT_PAGE
6030
6031- .data
6032-
6033- .align 16
6034- .globl cpu_gdt_descr
6035-cpu_gdt_descr:
6036- .word gdt_end-cpu_gdt_table-1
6037-gdt:
6038- .quad cpu_gdt_table
6039-#ifdef CONFIG_SMP
6040- .rept NR_CPUS-1
6041- .word 0
6042- .quad 0
6043- .endr
6044-#endif
6045-
6046-/* We need valid kernel segments for data and code in long mode too
6047- * IRET will check the segment types kkeil 2000/10/28
6048- * Also sysret mandates a special GDT layout
6049- */
6050-
6051- .section .data.page_aligned, "aw"
6052- .align PAGE_SIZE
6053-
6054-/* The TLS descriptors are currently at a different place compared to i386.
6055- Hopefully nobody expects them at a fixed place (Wine?) */
6056-
6057-ENTRY(cpu_gdt_table)
6058- .quad 0x0000000000000000 /* NULL descriptor */
6059- .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
6060- .quad 0x00af9b000000ffff /* __KERNEL_CS */
6061- .quad 0x00cf93000000ffff /* __KERNEL_DS */
6062- .quad 0x00cffb000000ffff /* __USER32_CS */
6063- .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
6064- .quad 0x00affb000000ffff /* __USER_CS */
6065- .quad 0x0 /* unused */
6066- .quad 0,0 /* TSS */
6067- .quad 0,0 /* LDT */
6068- .quad 0,0,0 /* three TLS descriptors */
6069- .quad 0x0000f40000000000 /* node/CPU stored in limit */
6070-gdt_end:
6071- /* asm/segment.h:GDT_ENTRIES must match this */
6072- /* This should be a multiple of the cache line size */
6073- /* GDTs of other CPUs are now dynamically allocated */
6074-
6075- /* zero the remaining page */
6076- .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
6077-
6078 .section .bss.page_aligned, "aw", @nobits
6079 .align PAGE_SIZE
6080 ENTRY(empty_zero_page)
82094b55
AF
6081--- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-16 16:38:05.000000000 +0100
6082+++ sle11-2009-10-16/arch/x86/kernel/io_apic_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
6083@@ -25,6 +25,7 @@
6084 #include <linux/init.h>
6085 #include <linux/delay.h>
6086 #include <linux/sched.h>
6087+#include <linux/bootmem.h>
6088 #include <linux/mc146818rtc.h>
6089 #include <linux/compiler.h>
6090 #include <linux/acpi.h>
6091@@ -75,7 +76,7 @@ static struct { int pin, apic; } ioapic_
6092 static DEFINE_SPINLOCK(ioapic_lock);
6093 static DEFINE_SPINLOCK(vector_lock);
6094
6095-int timer_over_8254 __initdata = 1;
6096+int timer_through_8259 __initdata;
6097
6098 /*
6099 * Is the SiS APIC rmw bug present ?
6100@@ -89,15 +90,21 @@ int sis_apic_bug = -1;
6101 int nr_ioapic_registers[MAX_IO_APICS];
6102
6103 /* I/O APIC entries */
6104-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
6105+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
6106 int nr_ioapics;
6107
6108 /* MP IRQ source entries */
6109-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
6110+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
6111
6112 /* # of MP IRQ source entries */
6113 int mp_irq_entries;
6114
6115+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
6116+int mp_bus_id_to_type[MAX_MP_BUSSES];
6117+#endif
6118+
6119+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
6120+
6121 static int disable_timer_pin_1 __initdata;
6122
6123 /*
6124@@ -128,7 +135,7 @@ struct io_apic {
6125 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
6126 {
6127 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
6128- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
6129+ + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
6130 }
6131 #endif
6132
6133@@ -142,7 +149,7 @@ static inline unsigned int io_apic_read(
6134 struct physdev_apic apic_op;
6135 int ret;
6136
6137- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
6138+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
6139 apic_op.reg = reg;
6140 ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
6141 if (ret)
6142@@ -160,7 +167,7 @@ static inline void io_apic_write(unsigne
6143 #else
6144 struct physdev_apic apic_op;
6145
6146- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
6147+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
6148 apic_op.reg = reg;
6149 apic_op.value = value;
6150 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
6151@@ -288,7 +295,7 @@ static void __init replace_pin_at_irq(un
6152 }
6153 }
6154
6155-static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
6156+static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
6157 {
6158 struct irq_pin_list *entry = irq_2_pin + irq;
6159 unsigned int pin, reg;
6160@@ -308,30 +315,32 @@ static void __modify_IO_APIC_irq (unsign
6161 }
6162
6163 /* mask = 1 */
6164-static void __mask_IO_APIC_irq (unsigned int irq)
6165+static void __mask_IO_APIC_irq(unsigned int irq)
6166 {
6167- __modify_IO_APIC_irq(irq, 0x00010000, 0);
6168+ __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
6169 }
6170
6171 /* mask = 0 */
6172-static void __unmask_IO_APIC_irq (unsigned int irq)
6173+static void __unmask_IO_APIC_irq(unsigned int irq)
6174 {
6175- __modify_IO_APIC_irq(irq, 0, 0x00010000);
6176+ __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
6177 }
6178
6179 /* mask = 1, trigger = 0 */
6180-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
6181+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
6182 {
6183- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
6184+ __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
6185+ IO_APIC_REDIR_LEVEL_TRIGGER);
6186 }
6187
6188 /* mask = 0, trigger = 1 */
6189-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
6190+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
6191 {
6192- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
6193+ __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
6194+ IO_APIC_REDIR_MASKED);
6195 }
6196
6197-static void mask_IO_APIC_irq (unsigned int irq)
6198+static void mask_IO_APIC_irq(unsigned int irq)
6199 {
6200 unsigned long flags;
6201
6202@@ -340,7 +349,7 @@ static void mask_IO_APIC_irq (unsigned i
6203 spin_unlock_irqrestore(&ioapic_lock, flags);
6204 }
6205
6206-static void unmask_IO_APIC_irq (unsigned int irq)
6207+static void unmask_IO_APIC_irq(unsigned int irq)
6208 {
6209 unsigned long flags;
6210
6211@@ -352,7 +361,7 @@ static void unmask_IO_APIC_irq (unsigned
6212 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
6213 {
6214 struct IO_APIC_route_entry entry;
6215-
6216+
6217 /* Check delivery_mode to be sure we're not clearing an SMI pin */
6218 entry = ioapic_read_entry(apic, pin);
6219 if (entry.delivery_mode == dest_SMI)
6220@@ -364,7 +373,7 @@ static void clear_IO_APIC_pin(unsigned i
6221 ioapic_mask_entry(apic, pin);
6222 }
6223
6224-static void clear_IO_APIC (void)
6225+static void clear_IO_APIC(void)
6226 {
6227 int apic, pin;
6228
6229@@ -381,7 +390,7 @@ static void set_ioapic_affinity_irq(unsi
6230 struct irq_pin_list *entry = irq_2_pin + irq;
6231 unsigned int apicid_value;
6232 cpumask_t tmp;
6233-
6234+
6235 cpus_and(tmp, cpumask, cpu_online_map);
6236 if (cpus_empty(tmp))
6237 tmp = TARGET_CPUS;
6238@@ -410,7 +419,7 @@ static void set_ioapic_affinity_irq(unsi
6239 # include <linux/kernel_stat.h> /* kstat */
6240 # include <linux/slab.h> /* kmalloc() */
6241 # include <linux/timer.h>
6242-
6243+
6244 #define IRQBALANCE_CHECK_ARCH -999
6245 #define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
6246 #define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
6247@@ -422,14 +431,14 @@ static int physical_balance __read_mostl
6248 static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
6249
6250 static struct irq_cpu_info {
6251- unsigned long * last_irq;
6252- unsigned long * irq_delta;
6253+ unsigned long *last_irq;
6254+ unsigned long *irq_delta;
6255 unsigned long irq;
6256 } irq_cpu_data[NR_CPUS];
6257
6258 #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
6259-#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])
6260-#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
6261+#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
6262+#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
6263
6264 #define IDLE_ENOUGH(cpu,now) \
6265 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
6266@@ -468,8 +477,8 @@ inside:
6267 if (cpu == -1)
6268 cpu = NR_CPUS-1;
6269 }
6270- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
6271- (search_idle && !IDLE_ENOUGH(cpu,now)));
6272+ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
6273+ (search_idle && !IDLE_ENOUGH(cpu, now)));
6274
6275 return cpu;
6276 }
6277@@ -479,15 +488,14 @@ static inline void balance_irq(int cpu,
6278 unsigned long now = jiffies;
6279 cpumask_t allowed_mask;
6280 unsigned int new_cpu;
6281-
6282+
6283 if (irqbalance_disabled)
6284- return;
6285+ return;
6286
6287 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
6288 new_cpu = move(cpu, allowed_mask, now, 1);
6289- if (cpu != new_cpu) {
6290+ if (cpu != new_cpu)
6291 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
6292- }
6293 }
6294
6295 static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
6296@@ -499,14 +507,14 @@ static inline void rotate_irqs_among_cpu
6297 if (!irq_desc[j].action)
6298 continue;
6299 /* Is it a significant load ? */
6300- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
6301+ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
6302 useful_load_threshold)
6303 continue;
6304 balance_irq(i, j);
6305 }
6306 }
6307 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
6308- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
6309+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
6310 return;
6311 }
6312
6313@@ -535,22 +543,22 @@ static void do_irq_balance(void)
6314 /* Is this an active IRQ or balancing disabled ? */
6315 if (!irq_desc[j].action || irq_balancing_disabled(j))
6316 continue;
6317- if ( package_index == i )
6318- IRQ_DELTA(package_index,j) = 0;
6319+ if (package_index == i)
6320+ IRQ_DELTA(package_index, j) = 0;
6321 /* Determine the total count per processor per IRQ */
6322 value_now = (unsigned long) kstat_cpu(i).irqs[j];
6323
6324 /* Determine the activity per processor per IRQ */
6325- delta = value_now - LAST_CPU_IRQ(i,j);
6326+ delta = value_now - LAST_CPU_IRQ(i, j);
6327
6328 /* Update last_cpu_irq[][] for the next time */
6329- LAST_CPU_IRQ(i,j) = value_now;
6330+ LAST_CPU_IRQ(i, j) = value_now;
6331
6332 /* Ignore IRQs whose rate is less than the clock */
6333 if (delta < useful_load_threshold)
6334 continue;
6335 /* update the load for the processor or package total */
6336- IRQ_DELTA(package_index,j) += delta;
6337+ IRQ_DELTA(package_index, j) += delta;
6338
6339 /* Keep track of the higher numbered sibling as well */
6340 if (i != package_index)
6341@@ -576,7 +584,8 @@ static void do_irq_balance(void)
6342 max_cpu_irq = ULONG_MAX;
6343
6344 tryanothercpu:
6345- /* Look for heaviest loaded processor.
6346+ /*
6347+ * Look for heaviest loaded processor.
6348 * We may come back to get the next heaviest loaded processor.
6349 * Skip processors with trivial loads.
6350 */
6351@@ -585,7 +594,7 @@ tryanothercpu:
6352 for_each_online_cpu(i) {
6353 if (i != CPU_TO_PACKAGEINDEX(i))
6354 continue;
6355- if (max_cpu_irq <= CPU_IRQ(i))
6356+ if (max_cpu_irq <= CPU_IRQ(i))
6357 continue;
6358 if (tmp_cpu_irq < CPU_IRQ(i)) {
6359 tmp_cpu_irq = CPU_IRQ(i);
6360@@ -594,8 +603,9 @@ tryanothercpu:
6361 }
6362
6363 if (tmp_loaded == -1) {
6364- /* In the case of small number of heavy interrupt sources,
6365- * loading some of the cpus too much. We use Ingo's original
6366+ /*
6367+ * In the case of small number of heavy interrupt sources,
6368+ * loading some of the cpus too much. We use Ingo's original
6369 * approach to rotate them around.
6370 */
6371 if (!first_attempt && imbalance >= useful_load_threshold) {
6372@@ -604,13 +614,14 @@ tryanothercpu:
6373 }
6374 goto not_worth_the_effort;
6375 }
6376-
6377+
6378 first_attempt = 0; /* heaviest search */
6379 max_cpu_irq = tmp_cpu_irq; /* load */
6380 max_loaded = tmp_loaded; /* processor */
6381 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
6382-
6383- /* if imbalance is less than approx 10% of max load, then
6384+
6385+ /*
6386+ * if imbalance is less than approx 10% of max load, then
6387 * observe diminishing returns action. - quit
6388 */
6389 if (imbalance < (max_cpu_irq >> 3))
6390@@ -626,26 +637,25 @@ tryanotherirq:
6391 /* Is this an active IRQ? */
6392 if (!irq_desc[j].action)
6393 continue;
6394- if (imbalance <= IRQ_DELTA(max_loaded,j))
6395+ if (imbalance <= IRQ_DELTA(max_loaded, j))
6396 continue;
6397 /* Try to find the IRQ that is closest to the imbalance
6398 * without going over.
6399 */
6400- if (move_this_load < IRQ_DELTA(max_loaded,j)) {
6401- move_this_load = IRQ_DELTA(max_loaded,j);
6402+ if (move_this_load < IRQ_DELTA(max_loaded, j)) {
6403+ move_this_load = IRQ_DELTA(max_loaded, j);
6404 selected_irq = j;
6405 }
6406 }
6407- if (selected_irq == -1) {
6408+ if (selected_irq == -1)
6409 goto tryanothercpu;
6410- }
6411
6412 imbalance = move_this_load;
6413-
6414+
6415 /* For physical_balance case, we accumulated both load
6416 * values in the one of the siblings cpu_irq[],
6417 * to use the same code for physical and logical processors
6418- * as much as possible.
6419+ * as much as possible.
6420 *
6421 * NOTE: the cpu_irq[] array holds the sum of the load for
6422 * sibling A and sibling B in the slot for the lowest numbered
6423@@ -674,11 +684,11 @@ tryanotherirq:
6424 /* mark for change destination */
6425 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
6426
6427- /* Since we made a change, come back sooner to
6428+ /* Since we made a change, come back sooner to
6429 * check for more variation.
6430 */
6431 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
6432- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
6433+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
6434 return;
6435 }
6436 goto tryanotherirq;
6437@@ -689,7 +699,7 @@ not_worth_the_effort:
6438 * upward
6439 */
6440 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
6441- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
6442+ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
6443 return;
6444 }
6445
6446@@ -728,13 +738,13 @@ static int __init balanced_irq_init(void
6447 cpumask_t tmp;
6448
6449 cpus_shift_right(tmp, cpu_online_map, 2);
6450- c = &boot_cpu_data;
6451+ c = &boot_cpu_data;
6452 /* When not overwritten by the command line ask subarchitecture. */
6453 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
6454 irqbalance_disabled = NO_BALANCE_IRQ;
6455 if (irqbalance_disabled)
6456 return 0;
6457-
6458+
6459 /* disable irqbalance completely if there is only one processor online */
6460 if (num_online_cpus() < 2) {
6461 irqbalance_disabled = 1;
6462@@ -748,16 +758,14 @@ static int __init balanced_irq_init(void
6463 physical_balance = 1;
6464
6465 for_each_online_cpu(i) {
6466- irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
6467- irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
6468+ irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
6469+ irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
6470 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
6471 printk(KERN_ERR "balanced_irq_init: out of memory");
6472 goto failed;
6473 }
6474- memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
6475- memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
6476 }
6477-
6478+
6479 printk(KERN_INFO "Starting balanced_irq\n");
6480 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
6481 return 0;
6482@@ -799,7 +807,7 @@ void send_IPI_self(int vector)
6483 /*
6484 * Send the IPI. The write to APIC_ICR fires this off.
6485 */
6486- apic_write_around(APIC_ICR, cfg);
6487+ apic_write(APIC_ICR, cfg);
6488 #endif
6489 }
6490 #endif /* !CONFIG_SMP */
6491@@ -853,10 +861,10 @@ static int find_irq_entry(int apic, int
6492 int i;
6493
6494 for (i = 0; i < mp_irq_entries; i++)
6495- if (mp_irqs[i].mpc_irqtype == type &&
6496- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
6497- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
6498- mp_irqs[i].mpc_dstirq == pin)
6499+ if (mp_irqs[i].mp_irqtype == type &&
6500+ (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
6501+ mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
6502+ mp_irqs[i].mp_dstirq == pin)
6503 return i;
6504
6505 return -1;
6506@@ -871,13 +879,13 @@ static int __init find_isa_irq_pin(int i
6507 int i;
6508
6509 for (i = 0; i < mp_irq_entries; i++) {
6510- int lbus = mp_irqs[i].mpc_srcbus;
6511+ int lbus = mp_irqs[i].mp_srcbus;
6512
6513 if (test_bit(lbus, mp_bus_not_pci) &&
6514- (mp_irqs[i].mpc_irqtype == type) &&
6515- (mp_irqs[i].mpc_srcbusirq == irq))
6516+ (mp_irqs[i].mp_irqtype == type) &&
6517+ (mp_irqs[i].mp_srcbusirq == irq))
6518
6519- return mp_irqs[i].mpc_dstirq;
6520+ return mp_irqs[i].mp_dstirq;
6521 }
6522 return -1;
6523 }
6524@@ -887,17 +895,17 @@ static int __init find_isa_irq_apic(int
6525 int i;
6526
6527 for (i = 0; i < mp_irq_entries; i++) {
6528- int lbus = mp_irqs[i].mpc_srcbus;
6529+ int lbus = mp_irqs[i].mp_srcbus;
6530
6531 if (test_bit(lbus, mp_bus_not_pci) &&
6532- (mp_irqs[i].mpc_irqtype == type) &&
6533- (mp_irqs[i].mpc_srcbusirq == irq))
6534+ (mp_irqs[i].mp_irqtype == type) &&
6535+ (mp_irqs[i].mp_srcbusirq == irq))
6536 break;
6537 }
6538 if (i < mp_irq_entries) {
6539 int apic;
6540- for(apic = 0; apic < nr_ioapics; apic++) {
6541- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
6542+ for (apic = 0; apic < nr_ioapics; apic++) {
6543+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
6544 return apic;
6545 }
6546 }
6547@@ -918,28 +926,28 @@ int IO_APIC_get_PCI_irq_vector(int bus,
6548
6549 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
6550 "slot:%d, pin:%d.\n", bus, slot, pin);
6551- if (mp_bus_id_to_pci_bus[bus] == -1) {
6552+ if (test_bit(bus, mp_bus_not_pci)) {
6553 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
6554 return -1;
6555 }
6556 for (i = 0; i < mp_irq_entries; i++) {
6557- int lbus = mp_irqs[i].mpc_srcbus;
6558+ int lbus = mp_irqs[i].mp_srcbus;
6559
6560 for (apic = 0; apic < nr_ioapics; apic++)
6561- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
6562- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
6563+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
6564+ mp_irqs[i].mp_dstapic == MP_APIC_ALL)
6565 break;
6566
6567 if (!test_bit(lbus, mp_bus_not_pci) &&
6568- !mp_irqs[i].mpc_irqtype &&
6569+ !mp_irqs[i].mp_irqtype &&
6570 (bus == lbus) &&
6571- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
6572- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
6573+ (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
6574+ int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
6575
6576 if (!(apic || IO_APIC_IRQ(irq)))
6577 continue;
6578
6579- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
6580+ if (pin == (mp_irqs[i].mp_srcbusirq & 3))
6581 return irq;
6582 /*
6583 * Use the first all-but-pin matching entry as a
6584@@ -954,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
6585 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
6586
6587 /*
6588- * This function currently is only a helper for the i386 smp boot process where
6589+ * This function currently is only a helper for the i386 smp boot process where
6590 * we need to reprogram the ioredtbls to cater for the cpus which have come online
6591 * so mask in all cases should simply be TARGET_CPUS
6592 */
6593@@ -1008,7 +1016,7 @@ static int EISA_ELCR(unsigned int irq)
6594 * EISA conforming in the MP table, that means its trigger type must
6595 * be read in from the ELCR */
6596
6597-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
6598+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
6599 #define default_EISA_polarity(idx) default_ISA_polarity(idx)
6600
6601 /* PCI interrupts are always polarity one level triggered,
6602@@ -1025,118 +1033,115 @@ static int EISA_ELCR(unsigned int irq)
6603
6604 static int MPBIOS_polarity(int idx)
6605 {
6606- int bus = mp_irqs[idx].mpc_srcbus;
6607+ int bus = mp_irqs[idx].mp_srcbus;
6608 int polarity;
6609
6610 /*
6611 * Determine IRQ line polarity (high active or low active):
6612 */
6613- switch (mp_irqs[idx].mpc_irqflag & 3)
6614+ switch (mp_irqs[idx].mp_irqflag & 3) {
6615+ case 0: /* conforms, ie. bus-type dependent polarity */
6616 {
6617- case 0: /* conforms, ie. bus-type dependent polarity */
6618- {
6619- polarity = test_bit(bus, mp_bus_not_pci)?
6620- default_ISA_polarity(idx):
6621- default_PCI_polarity(idx);
6622- break;
6623- }
6624- case 1: /* high active */
6625- {
6626- polarity = 0;
6627- break;
6628- }
6629- case 2: /* reserved */
6630- {
6631- printk(KERN_WARNING "broken BIOS!!\n");
6632- polarity = 1;
6633- break;
6634- }
6635- case 3: /* low active */
6636- {
6637- polarity = 1;
6638- break;
6639- }
6640- default: /* invalid */
6641- {
6642- printk(KERN_WARNING "broken BIOS!!\n");
6643- polarity = 1;
6644- break;
6645- }
6646+ polarity = test_bit(bus, mp_bus_not_pci)?
6647+ default_ISA_polarity(idx):
6648+ default_PCI_polarity(idx);
6649+ break;
6650+ }
6651+ case 1: /* high active */
6652+ {
6653+ polarity = 0;
6654+ break;
6655+ }
6656+ case 2: /* reserved */
6657+ {
6658+ printk(KERN_WARNING "broken BIOS!!\n");
6659+ polarity = 1;
6660+ break;
6661+ }
6662+ case 3: /* low active */
6663+ {
6664+ polarity = 1;
6665+ break;
6666+ }
6667+ default: /* invalid */
6668+ {
6669+ printk(KERN_WARNING "broken BIOS!!\n");
6670+ polarity = 1;
6671+ break;
6672+ }
6673 }
6674 return polarity;
6675 }
6676
6677 static int MPBIOS_trigger(int idx)
6678 {
6679- int bus = mp_irqs[idx].mpc_srcbus;
6680+ int bus = mp_irqs[idx].mp_srcbus;
6681 int trigger;
6682
6683 /*
6684 * Determine IRQ trigger mode (edge or level sensitive):
6685 */
6686- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
6687+ switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
6688+ case 0: /* conforms, ie. bus-type dependent */
6689 {
6690- case 0: /* conforms, ie. bus-type dependent */
6691- {
6692- trigger = test_bit(bus, mp_bus_not_pci)?
6693- default_ISA_trigger(idx):
6694- default_PCI_trigger(idx);
6695+ trigger = test_bit(bus, mp_bus_not_pci)?
6696+ default_ISA_trigger(idx):
6697+ default_PCI_trigger(idx);
6698 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
6699- switch (mp_bus_id_to_type[bus])
6700- {
6701- case MP_BUS_ISA: /* ISA pin */
6702- {
6703- /* set before the switch */
6704- break;
6705- }
6706- case MP_BUS_EISA: /* EISA pin */
6707- {
6708- trigger = default_EISA_trigger(idx);
6709- break;
6710- }
6711- case MP_BUS_PCI: /* PCI pin */
6712- {
6713- /* set before the switch */
6714- break;
6715- }
6716- case MP_BUS_MCA: /* MCA pin */
6717- {
6718- trigger = default_MCA_trigger(idx);
6719- break;
6720- }
6721- default:
6722- {
6723- printk(KERN_WARNING "broken BIOS!!\n");
6724- trigger = 1;
6725- break;
6726- }
6727- }
6728-#endif
6729+ switch (mp_bus_id_to_type[bus]) {
6730+ case MP_BUS_ISA: /* ISA pin */
6731+ {
6732+ /* set before the switch */
6733 break;
6734 }
6735- case 1: /* edge */
6736+ case MP_BUS_EISA: /* EISA pin */
6737 {
6738- trigger = 0;
6739+ trigger = default_EISA_trigger(idx);
6740 break;
6741 }
6742- case 2: /* reserved */
6743+ case MP_BUS_PCI: /* PCI pin */
6744 {
6745- printk(KERN_WARNING "broken BIOS!!\n");
6746- trigger = 1;
6747+ /* set before the switch */
6748 break;
6749 }
6750- case 3: /* level */
6751+ case MP_BUS_MCA: /* MCA pin */
6752 {
6753- trigger = 1;
6754+ trigger = default_MCA_trigger(idx);
6755 break;
6756 }
6757- default: /* invalid */
6758+ default:
6759 {
6760 printk(KERN_WARNING "broken BIOS!!\n");
6761- trigger = 0;
6762+ trigger = 1;
6763 break;
6764 }
6765 }
6766+#endif
6767+ break;
6768+ }
6769+ case 1: /* edge */
6770+ {
6771+ trigger = 0;
6772+ break;
6773+ }
6774+ case 2: /* reserved */
6775+ {
6776+ printk(KERN_WARNING "broken BIOS!!\n");
6777+ trigger = 1;
6778+ break;
6779+ }
6780+ case 3: /* level */
6781+ {
6782+ trigger = 1;
6783+ break;
6784+ }
6785+ default: /* invalid */
6786+ {
6787+ printk(KERN_WARNING "broken BIOS!!\n");
6788+ trigger = 0;
6789+ break;
6790+ }
6791+ }
6792 return trigger;
6793 }
6794
6795@@ -1153,16 +1158,16 @@ static inline int irq_trigger(int idx)
6796 static int pin_2_irq(int idx, int apic, int pin)
6797 {
6798 int irq, i;
6799- int bus = mp_irqs[idx].mpc_srcbus;
6800+ int bus = mp_irqs[idx].mp_srcbus;
6801
6802 /*
6803 * Debugging check, we are in big trouble if this message pops up!
6804 */
6805- if (mp_irqs[idx].mpc_dstirq != pin)
6806+ if (mp_irqs[idx].mp_dstirq != pin)
6807 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
6808
6809 if (test_bit(bus, mp_bus_not_pci))
6810- irq = mp_irqs[idx].mpc_srcbusirq;
6811+ irq = mp_irqs[idx].mp_srcbusirq;
6812 else {
6813 /*
6814 * PCI IRQs are mapped in order
6815@@ -1204,8 +1209,8 @@ static inline int IO_APIC_irq_trigger(in
6816
6817 for (apic = 0; apic < nr_ioapics; apic++) {
6818 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
6819- idx = find_irq_entry(apic,pin,mp_INT);
6820- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
6821+ idx = find_irq_entry(apic, pin, mp_INT);
6822+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
6823 return irq_trigger(idx);
6824 }
6825 }
6826@@ -1291,25 +1296,25 @@ static void __init setup_IO_APIC_irqs(vo
6827 /*
6828 * add it to the IO-APIC irq-routing table:
6829 */
6830- memset(&entry,0,sizeof(entry));
6831+ memset(&entry, 0, sizeof(entry));
6832
6833 entry.delivery_mode = INT_DELIVERY_MODE;
6834 entry.dest_mode = INT_DEST_MODE;
6835 entry.mask = 0; /* enable IRQ */
6836- entry.dest.logical.logical_dest =
6837+ entry.dest.logical.logical_dest =
6838 cpu_mask_to_apicid(TARGET_CPUS);
6839
6840- idx = find_irq_entry(apic,pin,mp_INT);
6841+ idx = find_irq_entry(apic, pin, mp_INT);
6842 if (idx == -1) {
6843 if (first_notcon) {
6844 apic_printk(APIC_VERBOSE, KERN_DEBUG
6845 " IO-APIC (apicid-pin) %d-%d",
6846- mp_ioapics[apic].mpc_apicid,
6847+ mp_ioapics[apic].mp_apicid,
6848 pin);
6849 first_notcon = 0;
6850 } else
6851 apic_printk(APIC_VERBOSE, ", %d-%d",
6852- mp_ioapics[apic].mpc_apicid, pin);
6853+ mp_ioapics[apic].mp_apicid, pin);
6854 continue;
6855 }
6856
6857@@ -1343,7 +1348,7 @@ static void __init setup_IO_APIC_irqs(vo
6858 vector = assign_irq_vector(irq);
6859 entry.vector = vector;
6860 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
6861-
6862+
6863 if (!apic && (irq < 16))
6864 disable_8259A_irq(irq);
6865 }
6866@@ -1355,27 +1360,23 @@ static void __init setup_IO_APIC_irqs(vo
6867 apic_printk(APIC_VERBOSE, " not connected.\n");
6868 }
6869
6870+#ifndef CONFIG_XEN
6871 /*
6872- * Set up the 8259A-master output pin:
6873+ * Set up the timer pin, possibly with the 8259A-master behind.
6874 */
6875-#ifndef CONFIG_XEN
6876-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
6877+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
6878+ int vector)
6879 {
6880 struct IO_APIC_route_entry entry;
6881
6882- memset(&entry,0,sizeof(entry));
6883-
6884- disable_8259A_irq(0);
6885-
6886- /* mask LVT0 */
6887- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
6888+ memset(&entry, 0, sizeof(entry));
6889
6890 /*
6891 * We use logical delivery to get the timer IRQ
6892 * to the first CPU.
6893 */
6894 entry.dest_mode = INT_DEST_MODE;
6895- entry.mask = 0; /* unmask IRQ now */
6896+ entry.mask = 1; /* mask IRQ now */
6897 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
6898 entry.delivery_mode = INT_DELIVERY_MODE;
6899 entry.polarity = 0;
6900@@ -1384,17 +1385,14 @@ static void __init setup_ExtINT_IRQ0_pin
6901
6902 /*
6903 * The timer IRQ doesn't have to know that behind the
6904- * scene we have a 8259A-master in AEOI mode ...
6905+ * scene we may have a 8259A-master in AEOI mode ...
6906 */
6907- irq_desc[0].chip = &ioapic_chip;
6908- set_irq_handler(0, handle_edge_irq);
6909+ ioapic_register_intr(0, vector, IOAPIC_EDGE);
6910
6911 /*
6912 * Add it to the IO-APIC irq-routing table:
6913 */
6914 ioapic_write_entry(apic, pin, entry);
6915-
6916- enable_8259A_irq(0);
6917 }
6918
6919 void __init print_IO_APIC(void)
6920@@ -1409,10 +1407,10 @@ void __init print_IO_APIC(void)
6921 if (apic_verbosity == APIC_QUIET)
6922 return;
6923
6924- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
6925+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
6926 for (i = 0; i < nr_ioapics; i++)
6927 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
6928- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
6929+ mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
6930
6931 /*
6932 * We are a bit conservative about what we expect. We have to
6933@@ -1431,7 +1429,7 @@ void __init print_IO_APIC(void)
6934 reg_03.raw = io_apic_read(apic, 3);
6935 spin_unlock_irqrestore(&ioapic_lock, flags);
6936
6937- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
6938+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
6939 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
6940 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
6941 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
6942@@ -1512,7 +1510,7 @@ void __init print_IO_APIC(void)
6943 return;
6944 }
6945
6946-static void print_APIC_bitfield (int base)
6947+static void print_APIC_bitfield(int base)
6948 {
6949 unsigned int v;
6950 int i, j;
6951@@ -1533,7 +1531,7 @@ static void print_APIC_bitfield (int bas
6952 }
6953 }
6954
6955-void /*__init*/ print_local_APIC(void * dummy)
6956+void /*__init*/ print_local_APIC(void *dummy)
6957 {
6958 unsigned int v, ver, maxlvt;
6959
6960@@ -1542,6 +1540,7 @@ void /*__init*/ print_local_APIC(void *
6961
6962 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
6963 smp_processor_id(), hard_smp_processor_id());
6964+ v = apic_read(APIC_ID);
6965 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
6966 GET_APIC_ID(read_apic_id()));
6967 v = apic_read(APIC_LVR);
6968@@ -1616,9 +1615,9 @@ void /*__init*/ print_local_APIC(void *
6969 printk("\n");
6970 }
6971
6972-void print_all_local_APICs (void)
6973+void print_all_local_APICs(void)
6974 {
6975- on_each_cpu(print_local_APIC, NULL, 1, 1);
6976+ on_each_cpu(print_local_APIC, NULL, 1);
6977 }
6978
6979 void /*__init*/ print_PIC(void)
6980@@ -1639,11 +1638,11 @@ void /*__init*/ print_PIC(void)
6981 v = inb(0xa0) << 8 | inb(0x20);
6982 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
6983
6984- outb(0x0b,0xa0);
6985- outb(0x0b,0x20);
6986+ outb(0x0b, 0xa0);
6987+ outb(0x0b, 0x20);
6988 v = inb(0xa0) << 8 | inb(0x20);
6989- outb(0x0a,0xa0);
6990- outb(0x0a,0x20);
6991+ outb(0x0a, 0xa0);
6992+ outb(0x0a, 0x20);
6993
6994 spin_unlock_irqrestore(&i8259A_lock, flags);
6995
6996@@ -1652,6 +1651,8 @@ void /*__init*/ print_PIC(void)
6997 v = inb(0x4d1) << 8 | inb(0x4d0);
6998 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
6999 }
7000+#else
7001+void __init print_IO_APIC(void) {}
7002 #endif /* !CONFIG_XEN */
7003
7004 static void __init enable_IO_APIC(void)
7005@@ -1681,7 +1682,7 @@ static void __init enable_IO_APIC(void)
7006 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
7007 }
7008 #ifndef CONFIG_XEN
7009- for(apic = 0; apic < nr_ioapics; apic++) {
7010+ for (apic = 0; apic < nr_ioapics; apic++) {
7011 int pin;
7012 /* See if any of the pins is in ExtINT mode */
7013 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
7014@@ -1774,7 +1775,7 @@ void disable_IO_APIC(void)
7015 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
7016 */
7017
7018-#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
7019+#ifndef CONFIG_XEN
7020 static void __init setup_ioapic_ids_from_mpc(void)
7021 {
7022 union IO_APIC_reg_00 reg_00;
7023@@ -1784,6 +1785,11 @@ static void __init setup_ioapic_ids_from
7024 unsigned char old_id;
7025 unsigned long flags;
7026
7027+#ifdef CONFIG_X86_NUMAQ
7028+ if (found_numaq)
7029+ return;
7030+#endif
7031+
7032 /*
7033 * Don't check I/O APIC IDs for xAPIC systems. They have
7034 * no meaning without the serial APIC bus.
7035@@ -1806,15 +1812,15 @@ static void __init setup_ioapic_ids_from
7036 spin_lock_irqsave(&ioapic_lock, flags);
7037 reg_00.raw = io_apic_read(apic, 0);
7038 spin_unlock_irqrestore(&ioapic_lock, flags);
7039-
7040- old_id = mp_ioapics[apic].mpc_apicid;
7041
7042- if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
7043+ old_id = mp_ioapics[apic].mp_apicid;
7044+
7045+ if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
7046 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
7047- apic, mp_ioapics[apic].mpc_apicid);
7048+ apic, mp_ioapics[apic].mp_apicid);
7049 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
7050 reg_00.bits.ID);
7051- mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
7052+ mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
7053 }
7054
7055 /*
7056@@ -1823,9 +1829,9 @@ static void __init setup_ioapic_ids_from
7057 * 'stuck on smp_invalidate_needed IPI wait' messages.
7058 */
7059 if (check_apicid_used(phys_id_present_map,
7060- mp_ioapics[apic].mpc_apicid)) {
7061+ mp_ioapics[apic].mp_apicid)) {
7062 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
7063- apic, mp_ioapics[apic].mpc_apicid);
7064+ apic, mp_ioapics[apic].mp_apicid);
7065 for (i = 0; i < get_physical_broadcast(); i++)
7066 if (!physid_isset(i, phys_id_present_map))
7067 break;
7068@@ -1834,13 +1840,13 @@ static void __init setup_ioapic_ids_from
7069 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
7070 i);
7071 physid_set(i, phys_id_present_map);
7072- mp_ioapics[apic].mpc_apicid = i;
7073+ mp_ioapics[apic].mp_apicid = i;
7074 } else {
7075 physid_mask_t tmp;
7076- tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
7077+ tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
7078 apic_printk(APIC_VERBOSE, "Setting %d in the "
7079 "phys_id_present_map\n",
7080- mp_ioapics[apic].mpc_apicid);
7081+ mp_ioapics[apic].mp_apicid);
7082 physids_or(phys_id_present_map, phys_id_present_map, tmp);
7083 }
7084
7085@@ -1849,21 +1855,21 @@ static void __init setup_ioapic_ids_from
7086 * We need to adjust the IRQ routing table
7087 * if the ID changed.
7088 */
7089- if (old_id != mp_ioapics[apic].mpc_apicid)
7090+ if (old_id != mp_ioapics[apic].mp_apicid)
7091 for (i = 0; i < mp_irq_entries; i++)
7092- if (mp_irqs[i].mpc_dstapic == old_id)
7093- mp_irqs[i].mpc_dstapic
7094- = mp_ioapics[apic].mpc_apicid;
7095+ if (mp_irqs[i].mp_dstapic == old_id)
7096+ mp_irqs[i].mp_dstapic
7097+ = mp_ioapics[apic].mp_apicid;
7098
7099 /*
7100 * Read the right value from the MPC table and
7101 * write it into the ID register.
7102- */
7103+ */
7104 apic_printk(APIC_VERBOSE, KERN_INFO
7105 "...changing IO-APIC physical APIC ID to %d ...",
7106- mp_ioapics[apic].mpc_apicid);
7107+ mp_ioapics[apic].mp_apicid);
7108
7109- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
7110+ reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
7111 spin_lock_irqsave(&ioapic_lock, flags);
7112 io_apic_write(apic, 0, reg_00.raw);
7113 spin_unlock_irqrestore(&ioapic_lock, flags);
7114@@ -1874,17 +1880,13 @@ static void __init setup_ioapic_ids_from
7115 spin_lock_irqsave(&ioapic_lock, flags);
7116 reg_00.raw = io_apic_read(apic, 0);
7117 spin_unlock_irqrestore(&ioapic_lock, flags);
7118- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
7119+ if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
7120 printk("could not set ID!\n");
7121 else
7122 apic_printk(APIC_VERBOSE, " ok.\n");
7123 }
7124 }
7125-#else
7126-static void __init setup_ioapic_ids_from_mpc(void) { }
7127-#endif
7128
7129-#ifndef CONFIG_XEN
7130 int no_timer_check __initdata;
7131
7132 static int __init notimercheck(char *s)
7133@@ -2077,45 +2079,53 @@ static inline void init_IO_APIC_traps(vo
7134 * The local APIC irq-chip implementation:
7135 */
7136
7137-static void ack_apic(unsigned int irq)
7138+static void ack_lapic_irq(unsigned int irq)
7139 {
7140 ack_APIC_irq();
7141 }
7142
7143-static void mask_lapic_irq (unsigned int irq)
7144+static void mask_lapic_irq(unsigned int irq)
7145 {
7146 unsigned long v;
7147
7148 v = apic_read(APIC_LVT0);
7149- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
7150+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
7151 }
7152
7153-static void unmask_lapic_irq (unsigned int irq)
7154+static void unmask_lapic_irq(unsigned int irq)
7155 {
7156 unsigned long v;
7157
7158 v = apic_read(APIC_LVT0);
7159- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
7160+ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
7161 }
7162
7163 static struct irq_chip lapic_chip __read_mostly = {
7164- .name = "local-APIC-edge",
7165+ .name = "local-APIC",
7166 .mask = mask_lapic_irq,
7167 .unmask = unmask_lapic_irq,
7168- .eoi = ack_apic,
7169+ .ack = ack_lapic_irq,
7170 };
7171
7172+static void lapic_register_intr(int irq, int vector)
7173+{
7174+ irq_desc[irq].status &= ~IRQ_LEVEL;
7175+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
7176+ "edge");
7177+ set_intr_gate(vector, interrupt[irq]);
7178+}
7179+
7180 static void __init setup_nmi(void)
7181 {
7182 /*
7183- * Dirty trick to enable the NMI watchdog ...
7184+ * Dirty trick to enable the NMI watchdog ...
7185 * We put the 8259A master into AEOI mode and
7186 * unmask on all local APICs LVT0 as NMI.
7187 *
7188 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
7189 * is from Maciej W. Rozycki - so we do not have to EOI from
7190 * the NMI handler or the timer interrupt.
7191- */
7192+ */
7193 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
7194
7195 enable_NMI_through_LVT0();
7196@@ -2191,11 +2201,16 @@ static inline void __init unlock_ExtINT_
7197 static inline void __init check_timer(void)
7198 {
7199 int apic1, pin1, apic2, pin2;
7200+ int no_pin1 = 0;
7201 int vector;
7202+ unsigned int ver;
7203 unsigned long flags;
7204
7205 local_irq_save(flags);
7206
7207+ ver = apic_read(APIC_LVR);
7208+ ver = GET_APIC_VERSION(ver);
7209+
7210 /*
7211 * get/set the timer IRQ vector:
7212 */
7213@@ -2204,34 +2219,54 @@ static inline void __init check_timer(vo
7214 set_intr_gate(vector, interrupt[0]);
7215
7216 /*
7217- * Subtle, code in do_timer_interrupt() expects an AEOI
7218- * mode for the 8259A whenever interrupts are routed
7219- * through I/O APICs. Also IRQ0 has to be enabled in
7220- * the 8259A which implies the virtual wire has to be
7221- * disabled in the local APIC.
7222+ * As IRQ0 is to be enabled in the 8259A, the virtual
7223+ * wire has to be disabled in the local APIC. Also
7224+ * timer interrupts need to be acknowledged manually in
7225+ * the 8259A for the i82489DX when using the NMI
7226+ * watchdog as that APIC treats NMIs as level-triggered.
7227+ * The AEOI mode will finish them in the 8259A
7228+ * automatically.
7229 */
7230- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
7231+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
7232 init_8259A(1);
7233- timer_ack = 1;
7234- if (timer_over_8254 > 0)
7235- enable_8259A_irq(0);
7236+ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
7237
7238 pin1 = find_isa_irq_pin(0, mp_INT);
7239 apic1 = find_isa_irq_apic(0, mp_INT);
7240 pin2 = ioapic_i8259.pin;
7241 apic2 = ioapic_i8259.apic;
7242
7243- printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
7244- vector, apic1, pin1, apic2, pin2);
7245+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
7246+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
7247+ vector, apic1, pin1, apic2, pin2);
7248+
7249+ /*
7250+ * Some BIOS writers are clueless and report the ExtINTA
7251+ * I/O APIC input from the cascaded 8259A as the timer
7252+ * interrupt input. So just in case, if only one pin
7253+ * was found above, try it both directly and through the
7254+ * 8259A.
7255+ */
7256+ if (pin1 == -1) {
7257+ pin1 = pin2;
7258+ apic1 = apic2;
7259+ no_pin1 = 1;
7260+ } else if (pin2 == -1) {
7261+ pin2 = pin1;
7262+ apic2 = apic1;
7263+ }
7264
7265 if (pin1 != -1) {
7266 /*
7267 * Ok, does IRQ0 through the IOAPIC work?
7268 */
7269+ if (no_pin1) {
7270+ add_pin_to_irq(0, apic1, pin1);
7271+ setup_timer_IRQ0_pin(apic1, pin1, vector);
7272+ }
7273 unmask_IO_APIC_irq(0);
7274 if (timer_irq_works()) {
7275 if (nmi_watchdog == NMI_IO_APIC) {
7276- disable_8259A_irq(0);
7277 setup_nmi();
7278 enable_8259A_irq(0);
7279 }
7280@@ -2240,71 +2275,77 @@ static inline void __init check_timer(vo
7281 goto out;
7282 }
7283 clear_IO_APIC_pin(apic1, pin1);
7284- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
7285- "IO-APIC\n");
7286- }
7287-
7288- printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
7289- if (pin2 != -1) {
7290- printk("\n..... (found pin %d) ...", pin2);
7291+ if (!no_pin1)
7292+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
7293+ "8254 timer not connected to IO-APIC\n");
7294+
7295+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
7296+ "(IRQ0) through the 8259A ...\n");
7297+ apic_printk(APIC_QUIET, KERN_INFO
7298+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
7299 /*
7300 * legacy devices should be connected to IO APIC #0
7301 */
7302- setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
7303+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
7304+ setup_timer_IRQ0_pin(apic2, pin2, vector);
7305+ unmask_IO_APIC_irq(0);
7306+ enable_8259A_irq(0);
7307 if (timer_irq_works()) {
7308- printk("works.\n");
7309- if (pin1 != -1)
7310- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
7311- else
7312- add_pin_to_irq(0, apic2, pin2);
7313+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
7314+ timer_through_8259 = 1;
7315 if (nmi_watchdog == NMI_IO_APIC) {
7316+ disable_8259A_irq(0);
7317 setup_nmi();
7318+ enable_8259A_irq(0);
7319 }
7320 goto out;
7321 }
7322 /*
7323 * Cleanup, just in case ...
7324 */
7325+ disable_8259A_irq(0);
7326 clear_IO_APIC_pin(apic2, pin2);
7327+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
7328 }
7329- printk(" failed.\n");
7330
7331 if (nmi_watchdog == NMI_IO_APIC) {
7332- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
7333- nmi_watchdog = 0;
7334+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
7335+ "through the IO-APIC - disabling NMI Watchdog!\n");
7336+ nmi_watchdog = NMI_NONE;
7337 }
7338+ timer_ack = 0;
7339
7340- printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
7341+ apic_printk(APIC_QUIET, KERN_INFO
7342+ "...trying to set up timer as Virtual Wire IRQ...\n");
7343
7344- disable_8259A_irq(0);
7345- set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
7346- "fasteoi");
7347- apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
7348+ lapic_register_intr(0, vector);
7349+ apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
7350 enable_8259A_irq(0);
7351
7352 if (timer_irq_works()) {
7353- printk(" works.\n");
7354+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
7355 goto out;
7356 }
7357- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
7358- printk(" failed.\n");
7359+ disable_8259A_irq(0);
7360+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
7361+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
7362
7363- printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
7364+ apic_printk(APIC_QUIET, KERN_INFO
7365+ "...trying to set up timer as ExtINT IRQ...\n");
7366
7367- timer_ack = 0;
7368 init_8259A(0);
7369 make_8259A_irq(0);
7370- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
7371+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
7372
7373 unlock_ExtINT_logic();
7374
7375 if (timer_irq_works()) {
7376- printk(" works.\n");
7377+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
7378 goto out;
7379 }
7380- printk(" failed :(.\n");
7381+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
7382 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
7383- "report. Then try booting with the 'noapic' option");
7384+ "report. Then try booting with the 'noapic' option.\n");
7385 out:
7386 local_irq_restore(flags);
7387 }
7388@@ -2314,11 +2355,21 @@ int timer_uses_ioapic_pin_0 = 0;
7389 #endif
7390
7391 /*
7392- *
7393- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
7394- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
7395- * Linux doesn't really care, as it's not actually used
7396- * for any interrupt handling anyway.
7397+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
7398+ * to devices. However there may be an I/O APIC pin available for
7399+ * this interrupt regardless. The pin may be left unconnected, but
7400+ * typically it will be reused as an ExtINT cascade interrupt for
7401+ * the master 8259A. In the MPS case such a pin will normally be
7402+ * reported as an ExtINT interrupt in the MP table. With ACPI
7403+ * there is no provision for ExtINT interrupts, and in the absence
7404+ * of an override it would be treated as an ordinary ISA I/O APIC
7405+ * interrupt, that is edge-triggered and unmasked by default. We
7406+ * used to do this, but it caused problems on some systems because
7407+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
7408+ * the same ExtINT cascade interrupt to drive the local APIC of the
7409+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
7410+ * the I/O APIC in all cases now. No actual device should request
7411+ * it anyway. --macro
7412 */
7413 #define PIC_IRQS (1 << PIC_CASCADE_IR)
7414
7415@@ -2328,25 +2379,22 @@ void __init setup_IO_APIC(void)
7416 int i;
7417
7418 /* Reserve all the system vectors. */
7419- for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
7420+ for (i = first_system_vector; i < NR_VECTORS; i++)
7421 set_bit(i, used_vectors);
7422 #endif
7423
7424 enable_IO_APIC();
7425
7426- if (acpi_ioapic)
7427- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
7428- else
7429- io_apic_irqs = ~PIC_IRQS;
7430+ io_apic_irqs = ~PIC_IRQS;
7431
7432 printk("ENABLING IO-APIC IRQs\n");
7433
7434+#ifndef CONFIG_XEN
7435 /*
7436 * Set up IO-APIC IRQ routing.
7437 */
7438 if (!acpi_ioapic)
7439 setup_ioapic_ids_from_mpc();
7440-#ifndef CONFIG_XEN
7441 sync_Arb_IDs();
7442 #endif
7443 setup_IO_APIC_irqs();
7444@@ -2356,28 +2404,14 @@ void __init setup_IO_APIC(void)
7445 print_IO_APIC();
7446 }
7447
7448-static int __init setup_disable_8254_timer(char *s)
7449-{
7450- timer_over_8254 = -1;
7451- return 1;
7452-}
7453-static int __init setup_enable_8254_timer(char *s)
7454-{
7455- timer_over_8254 = 2;
7456- return 1;
7457-}
7458-
7459-__setup("disable_8254_timer", setup_disable_8254_timer);
7460-__setup("enable_8254_timer", setup_enable_8254_timer);
7461-
7462 /*
7463 * Called after all the initialization is done. If we didnt find any
7464 * APIC bugs then we can allow the modify fast path
7465 */
7466-
7467+
7468 static int __init io_apic_bug_finalize(void)
7469 {
7470- if(sis_apic_bug == -1)
7471+ if (sis_apic_bug == -1)
7472 sis_apic_bug = 0;
7473 if (is_initial_xendomain()) {
7474 struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
7475@@ -2396,17 +2430,17 @@ struct sysfs_ioapic_data {
7476 struct sys_device dev;
7477 struct IO_APIC_route_entry entry[0];
7478 };
7479-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
7480+static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
7481
7482 static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
7483 {
7484 struct IO_APIC_route_entry *entry;
7485 struct sysfs_ioapic_data *data;
7486 int i;
7487-
7488+
7489 data = container_of(dev, struct sysfs_ioapic_data, dev);
7490 entry = data->entry;
7491- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
7492+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
7493 entry[i] = ioapic_read_entry(dev->id, i);
7494
7495 return 0;
7496@@ -2419,18 +2453,18 @@ static int ioapic_resume(struct sys_devi
7497 unsigned long flags;
7498 union IO_APIC_reg_00 reg_00;
7499 int i;
7500-
7501+
7502 data = container_of(dev, struct sysfs_ioapic_data, dev);
7503 entry = data->entry;
7504
7505 spin_lock_irqsave(&ioapic_lock, flags);
7506 reg_00.raw = io_apic_read(dev->id, 0);
7507- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
7508- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
7509+ if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
7510+ reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
7511 io_apic_write(dev->id, 0, reg_00.raw);
7512 }
7513 spin_unlock_irqrestore(&ioapic_lock, flags);
7514- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
7515+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
7516 ioapic_write_entry(dev->id, i, entry[i]);
7517
7518 return 0;
7519@@ -2444,24 +2478,23 @@ static struct sysdev_class ioapic_sysdev
7520
7521 static int __init ioapic_init_sysfs(void)
7522 {
7523- struct sys_device * dev;
7524+ struct sys_device *dev;
7525 int i, size, error = 0;
7526
7527 error = sysdev_class_register(&ioapic_sysdev_class);
7528 if (error)
7529 return error;
7530
7531- for (i = 0; i < nr_ioapics; i++ ) {
7532- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
7533+ for (i = 0; i < nr_ioapics; i++) {
7534+ size = sizeof(struct sys_device) + nr_ioapic_registers[i]
7535 * sizeof(struct IO_APIC_route_entry);
7536- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
7537+ mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
7538 if (!mp_ioapic_data[i]) {
7539 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
7540 continue;
7541 }
7542- memset(mp_ioapic_data[i], 0, size);
7543 dev = &mp_ioapic_data[i]->dev;
7544- dev->id = i;
7545+ dev->id = i;
7546 dev->cls = &ioapic_sysdev_class;
7547 error = sysdev_register(dev);
7548 if (error) {
7549@@ -2538,7 +2571,7 @@ static int msi_compose_msg(struct pci_de
7550 msg->address_lo =
7551 MSI_ADDR_BASE_LO |
7552 ((INT_DEST_MODE == 0) ?
7553- MSI_ADDR_DEST_MODE_PHYSICAL:
7554+MSI_ADDR_DEST_MODE_PHYSICAL:
7555 MSI_ADDR_DEST_MODE_LOGICAL) |
7556 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
7557 MSI_ADDR_REDIRECTION_CPU:
7558@@ -2549,7 +2582,7 @@ static int msi_compose_msg(struct pci_de
7559 MSI_DATA_TRIGGER_EDGE |
7560 MSI_DATA_LEVEL_ASSERT |
7561 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
7562- MSI_DATA_DELIVERY_FIXED:
7563+MSI_DATA_DELIVERY_FIXED:
7564 MSI_DATA_DELIVERY_LOWPRI) |
7565 MSI_DATA_VECTOR(vector);
7566 }
7567@@ -2720,12 +2753,12 @@ int arch_setup_ht_irq(unsigned int irq,
7568 #endif /* CONFIG_HT_IRQ */
7569
7570 /* --------------------------------------------------------------------------
7571- ACPI-based IOAPIC Configuration
7572+ ACPI-based IOAPIC Configuration
7573 -------------------------------------------------------------------------- */
7574
7575 #ifdef CONFIG_ACPI
7576
7577-int __init io_apic_get_unique_id (int ioapic, int apic_id)
7578+int __init io_apic_get_unique_id(int ioapic, int apic_id)
7579 {
7580 #ifndef CONFIG_XEN
7581 union IO_APIC_reg_00 reg_00;
7582@@ -2735,10 +2768,10 @@ int __init io_apic_get_unique_id (int io
7583 int i = 0;
7584
7585 /*
7586- * The P4 platform supports up to 256 APIC IDs on two separate APIC
7587- * buses (one for LAPICs, one for IOAPICs), where predecessors only
7588+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
7589+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
7590 * supports up to 16 on one shared APIC bus.
7591- *
7592+ *
7593 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
7594 * advantage of new APIC bus architecture.
7595 */
7596@@ -2757,7 +2790,7 @@ int __init io_apic_get_unique_id (int io
7597 }
7598
7599 /*
7600- * Every APIC in a system must have a unique ID or we get lots of nice
7601+ * Every APIC in a system must have a unique ID or we get lots of nice
7602 * 'stuck on smp_invalidate_needed IPI wait' messages.
7603 */
7604 if (check_apicid_used(apic_id_map, apic_id)) {
7605@@ -2774,7 +2807,7 @@ int __init io_apic_get_unique_id (int io
7606 "trying %d\n", ioapic, apic_id, i);
7607
7608 apic_id = i;
7609- }
7610+ }
7611
7612 tmp = apicid_to_cpu_present(apic_id);
7613 physids_or(apic_id_map, apic_id_map, tmp);
7614@@ -2802,7 +2835,7 @@ int __init io_apic_get_unique_id (int io
7615 }
7616
7617
7618-int __init io_apic_get_version (int ioapic)
7619+int __init io_apic_get_version(int ioapic)
7620 {
7621 union IO_APIC_reg_01 reg_01;
7622 unsigned long flags;
7623@@ -2815,7 +2848,7 @@ int __init io_apic_get_version (int ioap
7624 }
7625
7626
7627-int __init io_apic_get_redir_entries (int ioapic)
7628+int __init io_apic_get_redir_entries(int ioapic)
7629 {
7630 union IO_APIC_reg_01 reg_01;
7631 unsigned long flags;
7632@@ -2828,7 +2861,7 @@ int __init io_apic_get_redir_entries (in
7633 }
7634
7635
7636-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
7637+int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
7638 {
7639 struct IO_APIC_route_entry entry;
7640
7641@@ -2844,7 +2877,7 @@ int io_apic_set_pci_routing (int ioapic,
7642 * corresponding device driver registers for this IRQ.
7643 */
7644
7645- memset(&entry,0,sizeof(entry));
7646+ memset(&entry, 0, sizeof(entry));
7647
7648 entry.delivery_mode = INT_DELIVERY_MODE;
7649 entry.dest_mode = INT_DEST_MODE;
7650@@ -2863,7 +2896,7 @@ int io_apic_set_pci_routing (int ioapic,
7651
7652 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
7653 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
7654- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
7655+ mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
7656 edge_level, active_high_low);
7657
7658 ioapic_register_intr(irq, entry.vector, edge_level);
7659@@ -2884,8 +2917,8 @@ int acpi_get_override_irq(int bus_irq, i
7660 return -1;
7661
7662 for (i = 0; i < mp_irq_entries; i++)
7663- if (mp_irqs[i].mpc_irqtype == mp_INT &&
7664- mp_irqs[i].mpc_srcbusirq == bus_irq)
7665+ if (mp_irqs[i].mp_irqtype == mp_INT &&
7666+ mp_irqs[i].mp_srcbusirq == bus_irq)
7667 break;
7668 if (i >= mp_irq_entries)
7669 return -1;
7670@@ -2918,3 +2951,35 @@ static int __init parse_noapic(char *arg
7671 return 0;
7672 }
7673 early_param("noapic", parse_noapic);
7674+
7675+#ifndef CONFIG_XEN
7676+void __init ioapic_init_mappings(void)
7677+{
7678+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
7679+ int i;
7680+
7681+ for (i = 0; i < nr_ioapics; i++) {
7682+ if (smp_found_config) {
7683+ ioapic_phys = mp_ioapics[i].mp_apicaddr;
7684+ if (!ioapic_phys) {
7685+ printk(KERN_ERR
7686+ "WARNING: bogus zero IO-APIC "
7687+ "address found in MPTABLE, "
7688+ "disabling IO/APIC support!\n");
7689+ smp_found_config = 0;
7690+ skip_ioapic_setup = 1;
7691+ goto fake_ioapic_page;
7692+ }
7693+ } else {
7694+fake_ioapic_page:
7695+ ioapic_phys = (unsigned long)
7696+ alloc_bootmem_pages(PAGE_SIZE);
7697+ ioapic_phys = __pa(ioapic_phys);
7698+ }
7699+ set_fixmap_nocache(idx, ioapic_phys);
7700+ printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
7701+ __fix_to_virt(idx), ioapic_phys);
7702+ idx++;
7703+ }
7704+}
7705+#endif
82094b55
AF
7706--- sle11-2009-10-16.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-16 16:38:05.000000000 +0100
7707+++ sle11-2009-10-16/arch/x86/kernel/io_apic_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
7708@@ -45,6 +45,7 @@
7709 #include <asm/proto.h>
7710 #include <asm/acpi.h>
7711 #include <asm/dma.h>
7712+#include <asm/i8259.h>
7713 #include <asm/nmi.h>
7714 #include <asm/msidef.h>
7715 #include <asm/hypertransport.h>
7716@@ -63,10 +64,16 @@ struct irq_cfg {
7717 };
7718
7719 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
7720-struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
7721+static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
7722
7723 static int assign_irq_vector(int irq, cpumask_t mask);
7724
7725+#ifndef CONFIG_XEN
7726+int first_system_vector = 0xfe;
7727+
7728+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
7729+#endif
7730+
7731 #define __apicdebuginit __init
7732
7733 int sis_apic_bug; /* not actually supported, dummy for compile */
7734@@ -89,14 +96,14 @@ unsigned long io_apic_irqs;
7735
7736 #define clear_IO_APIC() ((void)0)
7737 #else
7738-int timer_over_8254 __initdata = 1;
7739+int timer_through_8259 __initdata;
7740
7741 /* Where if anywhere is the i8259 connect in external int mode */
7742 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
7743 #endif
7744
7745 static DEFINE_SPINLOCK(ioapic_lock);
7746-DEFINE_SPINLOCK(vector_lock);
7747+static DEFINE_SPINLOCK(vector_lock);
7748
7749 /*
7750 * # of IRQ routing registers
7751@@ -104,15 +111,17 @@ DEFINE_SPINLOCK(vector_lock);
7752 int nr_ioapic_registers[MAX_IO_APICS];
7753
7754 /* I/O APIC entries */
7755-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
7756+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
7757 int nr_ioapics;
7758
7759 /* MP IRQ source entries */
7760-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
7761+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
7762
7763 /* # of MP IRQ source entries */
7764 int mp_irq_entries;
7765
7766+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
7767+
7768 /*
7769 * Rough estimation of how many shared IRQs there are, can
7770 * be changed anytime.
7771@@ -141,7 +150,7 @@ struct io_apic {
7772 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
7773 {
7774 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
7775- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
7776+ + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
7777 }
7778 #endif
7779
7780@@ -155,7 +164,7 @@ static inline unsigned int io_apic_read(
7781 struct physdev_apic apic_op;
7782 int ret;
7783
7784- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
7785+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
7786 apic_op.reg = reg;
7787 ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
7788 if (ret)
7789@@ -173,7 +182,7 @@ static inline void io_apic_write(unsigne
7790 #else
7791 struct physdev_apic apic_op;
7792
7793- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
7794+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
7795 apic_op.reg = reg;
7796 apic_op.value = value;
7797 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
7798@@ -209,7 +218,7 @@ static bool io_apic_level_ack_pending(un
7799 break;
7800 reg = io_apic_read(entry->apic, 0x10 + pin*2);
7801 /* Is the remote IRR bit set? */
7802- if ((reg >> 14) & 1) {
7803+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
7804 spin_unlock_irqrestore(&ioapic_lock, flags);
7805 return true;
7806 }
7807@@ -308,7 +317,7 @@ static void __target_IO_APIC_irq(unsigne
7808 break;
7809 io_apic_write(apic, 0x11 + pin*2, dest);
7810 reg = io_apic_read(apic, 0x10 + pin*2);
7811- reg &= ~0x000000ff;
7812+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
7813 reg |= vector;
7814 io_apic_modify(apic, reg);
7815 if (!entry->next)
7816@@ -372,6 +381,26 @@ static void add_pin_to_irq(unsigned int
7817 }
7818
7819 #ifndef CONFIG_XEN
7820+/*
7821+ * Reroute an IRQ to a different pin.
7822+ */
7823+static void __init replace_pin_at_irq(unsigned int irq,
7824+ int oldapic, int oldpin,
7825+ int newapic, int newpin)
7826+{
7827+ struct irq_pin_list *entry = irq_2_pin + irq;
7828+
7829+ while (1) {
7830+ if (entry->apic == oldapic && entry->pin == oldpin) {
7831+ entry->apic = newapic;
7832+ entry->pin = newpin;
7833+ }
7834+ if (!entry->next)
7835+ break;
7836+ entry = irq_2_pin + entry->next;
7837+ }
7838+}
7839+
7840 #define __DO_ACTION(R, ACTION, FINAL) \
7841 \
7842 { \
7843@@ -399,10 +428,11 @@ static void add_pin_to_irq(unsigned int
7844 static void name##_IO_APIC_irq (unsigned int irq) \
7845 __DO_ACTION(R, ACTION, FINAL)
7846
7847-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
7848- /* mask = 1 */
7849-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
7850- /* mask = 0 */
7851+/* mask = 1 */
7852+DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
7853+
7854+/* mask = 0 */
7855+DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
7856
7857 static void mask_IO_APIC_irq (unsigned int irq)
7858 {
7859@@ -465,22 +495,6 @@ static int __init disable_timer_pin_setu
7860 }
7861 __setup("disable_timer_pin_1", disable_timer_pin_setup);
7862
7863-#ifndef CONFIG_XEN
7864-static int __init setup_disable_8254_timer(char *s)
7865-{
7866- timer_over_8254 = -1;
7867- return 1;
7868-}
7869-static int __init setup_enable_8254_timer(char *s)
7870-{
7871- timer_over_8254 = 2;
7872- return 1;
7873-}
7874-
7875-__setup("disable_8254_timer", setup_disable_8254_timer);
7876-__setup("enable_8254_timer", setup_enable_8254_timer);
7877-#endif /* !CONFIG_XEN */
7878-
7879
7880 /*
7881 * Find the IRQ entry number of a certain pin.
7882@@ -490,10 +504,10 @@ static int find_irq_entry(int apic, int
7883 int i;
7884
7885 for (i = 0; i < mp_irq_entries; i++)
7886- if (mp_irqs[i].mpc_irqtype == type &&
7887- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
7888- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
7889- mp_irqs[i].mpc_dstirq == pin)
7890+ if (mp_irqs[i].mp_irqtype == type &&
7891+ (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
7892+ mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
7893+ mp_irqs[i].mp_dstirq == pin)
7894 return i;
7895
7896 return -1;
7897@@ -508,13 +522,13 @@ static int __init find_isa_irq_pin(int i
7898 int i;
7899
7900 for (i = 0; i < mp_irq_entries; i++) {
7901- int lbus = mp_irqs[i].mpc_srcbus;
7902+ int lbus = mp_irqs[i].mp_srcbus;
7903
7904 if (test_bit(lbus, mp_bus_not_pci) &&
7905- (mp_irqs[i].mpc_irqtype == type) &&
7906- (mp_irqs[i].mpc_srcbusirq == irq))
7907+ (mp_irqs[i].mp_irqtype == type) &&
7908+ (mp_irqs[i].mp_srcbusirq == irq))
7909
7910- return mp_irqs[i].mpc_dstirq;
7911+ return mp_irqs[i].mp_dstirq;
7912 }
7913 return -1;
7914 }
7915@@ -524,17 +538,17 @@ static int __init find_isa_irq_apic(int
7916 int i;
7917
7918 for (i = 0; i < mp_irq_entries; i++) {
7919- int lbus = mp_irqs[i].mpc_srcbus;
7920+ int lbus = mp_irqs[i].mp_srcbus;
7921
7922 if (test_bit(lbus, mp_bus_not_pci) &&
7923- (mp_irqs[i].mpc_irqtype == type) &&
7924- (mp_irqs[i].mpc_srcbusirq == irq))
7925+ (mp_irqs[i].mp_irqtype == type) &&
7926+ (mp_irqs[i].mp_srcbusirq == irq))
7927 break;
7928 }
7929 if (i < mp_irq_entries) {
7930 int apic;
7931 for(apic = 0; apic < nr_ioapics; apic++) {
7932- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
7933+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
7934 return apic;
7935 }
7936 }
7937@@ -555,28 +569,28 @@ int IO_APIC_get_PCI_irq_vector(int bus,
7938
7939 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
7940 bus, slot, pin);
7941- if (mp_bus_id_to_pci_bus[bus] == -1) {
7942+ if (test_bit(bus, mp_bus_not_pci)) {
7943 apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
7944 return -1;
7945 }
7946 for (i = 0; i < mp_irq_entries; i++) {
7947- int lbus = mp_irqs[i].mpc_srcbus;
7948+ int lbus = mp_irqs[i].mp_srcbus;
7949
7950 for (apic = 0; apic < nr_ioapics; apic++)
7951- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
7952- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
7953+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
7954+ mp_irqs[i].mp_dstapic == MP_APIC_ALL)
7955 break;
7956
7957 if (!test_bit(lbus, mp_bus_not_pci) &&
7958- !mp_irqs[i].mpc_irqtype &&
7959+ !mp_irqs[i].mp_irqtype &&
7960 (bus == lbus) &&
7961- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
7962- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
7963+ (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
7964+ int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
7965
7966 if (!(apic || IO_APIC_IRQ(irq)))
7967 continue;
7968
7969- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
7970+ if (pin == (mp_irqs[i].mp_srcbusirq & 3))
7971 return irq;
7972 /*
7973 * Use the first all-but-pin matching entry as a
7974@@ -604,13 +618,13 @@ int IO_APIC_get_PCI_irq_vector(int bus,
7975
7976 static int MPBIOS_polarity(int idx)
7977 {
7978- int bus = mp_irqs[idx].mpc_srcbus;
7979+ int bus = mp_irqs[idx].mp_srcbus;
7980 int polarity;
7981
7982 /*
7983 * Determine IRQ line polarity (high active or low active):
7984 */
7985- switch (mp_irqs[idx].mpc_irqflag & 3)
7986+ switch (mp_irqs[idx].mp_irqflag & 3)
7987 {
7988 case 0: /* conforms, ie. bus-type dependent polarity */
7989 if (test_bit(bus, mp_bus_not_pci))
7990@@ -646,13 +660,13 @@ static int MPBIOS_polarity(int idx)
7991
7992 static int MPBIOS_trigger(int idx)
7993 {
7994- int bus = mp_irqs[idx].mpc_srcbus;
7995+ int bus = mp_irqs[idx].mp_srcbus;
7996 int trigger;
7997
7998 /*
7999 * Determine IRQ trigger mode (edge or level sensitive):
8000 */
8001- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
8002+ switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
8003 {
8004 case 0: /* conforms, ie. bus-type dependent */
8005 if (test_bit(bus, mp_bus_not_pci))
8006@@ -699,16 +713,16 @@ static inline int irq_trigger(int idx)
8007 static int pin_2_irq(int idx, int apic, int pin)
8008 {
8009 int irq, i;
8010- int bus = mp_irqs[idx].mpc_srcbus;
8011+ int bus = mp_irqs[idx].mp_srcbus;
8012
8013 /*
8014 * Debugging check, we are in big trouble if this message pops up!
8015 */
8016- if (mp_irqs[idx].mpc_dstirq != pin)
8017+ if (mp_irqs[idx].mp_dstirq != pin)
8018 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
8019
8020 if (test_bit(bus, mp_bus_not_pci)) {
8021- irq = mp_irqs[idx].mpc_srcbusirq;
8022+ irq = mp_irqs[idx].mp_srcbusirq;
8023 } else {
8024 /*
8025 * PCI IRQs are mapped in order
8026@@ -722,6 +736,19 @@ static int pin_2_irq(int idx, int apic,
8027 return irq;
8028 }
8029
8030+void lock_vector_lock(void)
8031+{
8032+ /* Used to the online set of cpus does not change
8033+ * during assign_irq_vector.
8034+ */
8035+ spin_lock(&vector_lock);
8036+}
8037+
8038+void unlock_vector_lock(void)
8039+{
8040+ spin_unlock(&vector_lock);
8041+}
8042+
8043 static int __assign_irq_vector(int irq, cpumask_t mask)
8044 {
8045 struct physdev_irq irq_op;
8046@@ -773,7 +800,7 @@ static void __clear_irq_vector(int irq)
8047
8048 vector = cfg->vector;
8049 cpus_and(mask, cfg->domain, cpu_online_map);
8050- for_each_cpu_mask(cpu, mask)
8051+ for_each_cpu_mask_nr(cpu, mask)
8052 per_cpu(vector_irq, cpu)[vector] = -1;
8053
8054 cfg->vector = 0;
8055@@ -842,7 +869,7 @@ static void setup_IO_APIC_irq(int apic,
8056 apic_printk(APIC_VERBOSE,KERN_DEBUG
8057 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
8058 "IRQ %d Mode:%i Active:%i)\n",
8059- apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
8060+ apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
8061 irq, trigger, polarity);
8062
8063 /*
8064@@ -883,10 +910,10 @@ static void __init setup_IO_APIC_irqs(vo
8065 idx = find_irq_entry(apic,pin,mp_INT);
8066 if (idx == -1) {
8067 if (first_notcon) {
8068- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
8069+ apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
8070 first_notcon = 0;
8071 } else
8072- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
8073+ apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
8074 continue;
8075 }
8076 if (!first_notcon) {
8077@@ -908,26 +935,21 @@ static void __init setup_IO_APIC_irqs(vo
8078
8079 #ifndef CONFIG_XEN
8080 /*
8081- * Set up the 8259A-master output pin as broadcast to all
8082- * CPUs.
8083+ * Set up the timer pin, possibly with the 8259A-master behind.
8084 */
8085-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
8086+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
8087+ int vector)
8088 {
8089 struct IO_APIC_route_entry entry;
8090
8091 memset(&entry, 0, sizeof(entry));
8092
8093- disable_8259A_irq(0);
8094-
8095- /* mask LVT0 */
8096- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
8097-
8098 /*
8099 * We use logical delivery to get the timer IRQ
8100 * to the first CPU.
8101 */
8102 entry.dest_mode = INT_DEST_MODE;
8103- entry.mask = 0; /* unmask IRQ now */
8104+ entry.mask = 1; /* mask IRQ now */
8105 entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
8106 entry.delivery_mode = INT_DELIVERY_MODE;
8107 entry.polarity = 0;
8108@@ -936,7 +958,7 @@ static void __init setup_ExtINT_IRQ0_pin
8109
8110 /*
8111 * The timer IRQ doesn't have to know that behind the
8112- * scene we have a 8259A-master in AEOI mode ...
8113+ * scene we may have a 8259A-master in AEOI mode ...
8114 */
8115 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
8116
8117@@ -944,8 +966,6 @@ static void __init setup_ExtINT_IRQ0_pin
8118 * Add it to the IO-APIC irq-routing table:
8119 */
8120 ioapic_write_entry(apic, pin, entry);
8121-
8122- enable_8259A_irq(0);
8123 }
8124
8125 void __apicdebuginit print_IO_APIC(void)
8126@@ -962,7 +982,7 @@ void __apicdebuginit print_IO_APIC(void)
8127 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
8128 for (i = 0; i < nr_ioapics; i++)
8129 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
8130- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
8131+ mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
8132
8133 /*
8134 * We are a bit conservative about what we expect. We have to
8135@@ -980,7 +1000,7 @@ void __apicdebuginit print_IO_APIC(void)
8136 spin_unlock_irqrestore(&ioapic_lock, flags);
8137
8138 printk("\n");
8139- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
8140+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
8141 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
8142 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
8143
8144@@ -1072,6 +1092,7 @@ void __apicdebuginit print_local_APIC(vo
8145
8146 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
8147 smp_processor_id(), hard_smp_processor_id());
8148+ v = apic_read(APIC_ID);
8149 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
8150 v = apic_read(APIC_LVR);
8151 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
8152@@ -1141,7 +1162,7 @@ void __apicdebuginit print_local_APIC(vo
8153
8154 void print_all_local_APICs (void)
8155 {
8156- on_each_cpu(print_local_APIC, NULL, 1, 1);
8157+ on_each_cpu(print_local_APIC, NULL, 1);
8158 }
8159
8160 void __apicdebuginit print_PIC(void)
8161@@ -1175,6 +1196,8 @@ void __apicdebuginit print_PIC(void)
8162 v = inb(0x4d1) << 8 | inb(0x4d0);
8163 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
8164 }
8165+#else
8166+void __apicdebuginit print_IO_APIC(void) {}
8167 #endif /* !CONFIG_XEN */
8168
8169 void __init enable_IO_APIC(void)
8170@@ -1359,12 +1382,10 @@ static unsigned int startup_ioapic_irq(u
8171 static int ioapic_retrigger_irq(unsigned int irq)
8172 {
8173 struct irq_cfg *cfg = &irq_cfg[irq];
8174- cpumask_t mask;
8175 unsigned long flags;
8176
8177 spin_lock_irqsave(&vector_lock, flags);
8178- mask = cpumask_of_cpu(first_cpu(cfg->domain));
8179- send_IPI_mask(mask, cfg->vector);
8180+ send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
8181 spin_unlock_irqrestore(&vector_lock, flags);
8182
8183 return 1;
8184@@ -1545,7 +1566,7 @@ static inline void init_IO_APIC_traps(vo
8185 }
8186
8187 #ifndef CONFIG_XEN
8188-static void enable_lapic_irq (unsigned int irq)
8189+static void unmask_lapic_irq(unsigned int irq)
8190 {
8191 unsigned long v;
8192
8193@@ -1553,7 +1574,7 @@ static void enable_lapic_irq (unsigned i
8194 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
8195 }
8196
8197-static void disable_lapic_irq (unsigned int irq)
8198+static void mask_lapic_irq(unsigned int irq)
8199 {
8200 unsigned long v;
8201
8202@@ -1566,19 +1587,20 @@ static void ack_lapic_irq (unsigned int
8203 ack_APIC_irq();
8204 }
8205
8206-static void end_lapic_irq (unsigned int i) { /* nothing */ }
8207-
8208-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
8209- .name = "local-APIC",
8210- .typename = "local-APIC-edge",
8211- .startup = NULL, /* startup_irq() not used for IRQ0 */
8212- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
8213- .enable = enable_lapic_irq,
8214- .disable = disable_lapic_irq,
8215- .ack = ack_lapic_irq,
8216- .end = end_lapic_irq,
8217+static struct irq_chip lapic_chip __read_mostly = {
8218+ .name = "local-APIC",
8219+ .mask = mask_lapic_irq,
8220+ .unmask = unmask_lapic_irq,
8221+ .ack = ack_lapic_irq,
8222 };
8223
8224+static void lapic_register_intr(int irq)
8225+{
8226+ irq_desc[irq].status &= ~IRQ_LEVEL;
8227+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
8228+ "edge");
8229+}
8230+
8231 static void __init setup_nmi(void)
8232 {
8233 /*
8234@@ -1664,6 +1686,7 @@ static inline void __init check_timer(vo
8235 struct irq_cfg *cfg = irq_cfg + 0;
8236 int apic1, pin1, apic2, pin2;
8237 unsigned long flags;
8238+ int no_pin1 = 0;
8239
8240 local_irq_save(flags);
8241
8242@@ -1674,34 +1697,48 @@ static inline void __init check_timer(vo
8243 assign_irq_vector(0, TARGET_CPUS);
8244
8245 /*
8246- * Subtle, code in do_timer_interrupt() expects an AEOI
8247- * mode for the 8259A whenever interrupts are routed
8248- * through I/O APICs. Also IRQ0 has to be enabled in
8249- * the 8259A which implies the virtual wire has to be
8250- * disabled in the local APIC.
8251+ * As IRQ0 is to be enabled in the 8259A, the virtual
8252+ * wire has to be disabled in the local APIC.
8253 */
8254 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
8255 init_8259A(1);
8256- if (timer_over_8254 > 0)
8257- enable_8259A_irq(0);
8258
8259 pin1 = find_isa_irq_pin(0, mp_INT);
8260 apic1 = find_isa_irq_apic(0, mp_INT);
8261 pin2 = ioapic_i8259.pin;
8262 apic2 = ioapic_i8259.apic;
8263
8264- apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
8265- cfg->vector, apic1, pin1, apic2, pin2);
8266+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
8267+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
8268+ cfg->vector, apic1, pin1, apic2, pin2);
8269+
8270+ /*
8271+ * Some BIOS writers are clueless and report the ExtINTA
8272+ * I/O APIC input from the cascaded 8259A as the timer
8273+ * interrupt input. So just in case, if only one pin
8274+ * was found above, try it both directly and through the
8275+ * 8259A.
8276+ */
8277+ if (pin1 == -1) {
8278+ pin1 = pin2;
8279+ apic1 = apic2;
8280+ no_pin1 = 1;
8281+ } else if (pin2 == -1) {
8282+ pin2 = pin1;
8283+ apic2 = apic1;
8284+ }
8285
8286 if (pin1 != -1) {
8287 /*
8288 * Ok, does IRQ0 through the IOAPIC work?
8289 */
8290+ if (no_pin1) {
8291+ add_pin_to_irq(0, apic1, pin1);
8292+ setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
8293+ }
8294 unmask_IO_APIC_irq(0);
8295 if (!no_timer_check && timer_irq_works()) {
8296- nmi_watchdog_default();
8297 if (nmi_watchdog == NMI_IO_APIC) {
8298- disable_8259A_irq(0);
8299 setup_nmi();
8300 enable_8259A_irq(0);
8301 }
8302@@ -1710,54 +1747,62 @@ static inline void __init check_timer(vo
8303 goto out;
8304 }
8305 clear_IO_APIC_pin(apic1, pin1);
8306- apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
8307- "connected to IO-APIC\n");
8308- }
8309-
8310- apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
8311- "through the 8259A ... ");
8312- if (pin2 != -1) {
8313- apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
8314- apic2, pin2);
8315+ if (!no_pin1)
8316+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
8317+ "8254 timer not connected to IO-APIC\n");
8318+
8319+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
8320+ "(IRQ0) through the 8259A ...\n");
8321+ apic_printk(APIC_QUIET, KERN_INFO
8322+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
8323 /*
8324 * legacy devices should be connected to IO APIC #0
8325 */
8326- setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
8327+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
8328+ setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
8329+ unmask_IO_APIC_irq(0);
8330+ enable_8259A_irq(0);
8331 if (timer_irq_works()) {
8332- apic_printk(APIC_VERBOSE," works.\n");
8333- nmi_watchdog_default();
8334+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
8335+ timer_through_8259 = 1;
8336 if (nmi_watchdog == NMI_IO_APIC) {
8337+ disable_8259A_irq(0);
8338 setup_nmi();
8339+ enable_8259A_irq(0);
8340 }
8341 goto out;
8342 }
8343 /*
8344 * Cleanup, just in case ...
8345 */
8346+ disable_8259A_irq(0);
8347 clear_IO_APIC_pin(apic2, pin2);
8348+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
8349 }
8350- apic_printk(APIC_VERBOSE," failed.\n");
8351
8352 if (nmi_watchdog == NMI_IO_APIC) {
8353- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
8354- nmi_watchdog = 0;
8355+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
8356+ "through the IO-APIC - disabling NMI Watchdog!\n");
8357+ nmi_watchdog = NMI_NONE;
8358 }
8359
8360- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
8361+ apic_printk(APIC_QUIET, KERN_INFO
8362+ "...trying to set up timer as Virtual Wire IRQ...\n");
8363
8364- disable_8259A_irq(0);
8365- irq_desc[0].chip = &lapic_irq_type;
8366+ lapic_register_intr(0);
8367 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
8368 enable_8259A_irq(0);
8369
8370 if (timer_irq_works()) {
8371- apic_printk(APIC_VERBOSE," works.\n");
8372+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
8373 goto out;
8374 }
8375+ disable_8259A_irq(0);
8376 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
8377- apic_printk(APIC_VERBOSE," failed.\n");
8378+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
8379
8380- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
8381+ apic_printk(APIC_QUIET, KERN_INFO
8382+ "...trying to set up timer as ExtINT IRQ...\n");
8383
8384 init_8259A(0);
8385 make_8259A_irq(0);
8386@@ -1766,11 +1811,12 @@ static inline void __init check_timer(vo
8387 unlock_ExtINT_logic();
8388
8389 if (timer_irq_works()) {
8390- apic_printk(APIC_VERBOSE," works.\n");
8391+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
8392 goto out;
8393 }
8394- apic_printk(APIC_VERBOSE," failed :(.\n");
8395- panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
8396+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
8397+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
8398+ "report. Then try booting with the 'noapic' option.\n");
8399 out:
8400 local_irq_restore(flags);
8401 }
8402@@ -1788,10 +1834,21 @@ __setup("no_timer_check", notimercheck);
8403
8404 /*
8405 *
8406- * IRQs that are handled by the PIC in the MPS IOAPIC case.
8407- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
8408- * Linux doesn't really care, as it's not actually used
8409- * for any interrupt handling anyway.
8410+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
8411+ * to devices. However there may be an I/O APIC pin available for
8412+ * this interrupt regardless. The pin may be left unconnected, but
8413+ * typically it will be reused as an ExtINT cascade interrupt for
8414+ * the master 8259A. In the MPS case such a pin will normally be
8415+ * reported as an ExtINT interrupt in the MP table. With ACPI
8416+ * there is no provision for ExtINT interrupts, and in the absence
8417+ * of an override it would be treated as an ordinary ISA I/O APIC
8418+ * interrupt, that is edge-triggered and unmasked by default. We
8419+ * used to do this, but it caused problems on some systems because
8420+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
8421+ * the same ExtINT cascade interrupt to drive the local APIC of the
8422+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
8423+ * the I/O APIC in all cases now. No actual device should request
8424+ * it anyway. --macro
8425 */
8426 #define PIC_IRQS (1<<2)
8427
8428@@ -1799,10 +1856,7 @@ void __init setup_IO_APIC(void)
8429 {
8430 enable_IO_APIC();
8431
8432- if (acpi_ioapic)
8433- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
8434- else
8435- io_apic_irqs = ~PIC_IRQS;
8436+ io_apic_irqs = ~PIC_IRQS;
8437
8438 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
8439
8440@@ -1851,8 +1905,8 @@ static int ioapic_resume(struct sys_devi
8441
8442 spin_lock_irqsave(&ioapic_lock, flags);
8443 reg_00.raw = io_apic_read(dev->id, 0);
8444- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
8445- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
8446+ if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
8447+ reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
8448 io_apic_write(dev->id, 0, reg_00.raw);
8449 }
8450 spin_unlock_irqrestore(&ioapic_lock, flags);
8451@@ -2254,8 +2308,8 @@ int acpi_get_override_irq(int bus_irq, i
8452 return -1;
8453
8454 for (i = 0; i < mp_irq_entries; i++)
8455- if (mp_irqs[i].mpc_irqtype == mp_INT &&
8456- mp_irqs[i].mpc_srcbusirq == bus_irq)
8457+ if (mp_irqs[i].mp_irqtype == mp_INT &&
8458+ mp_irqs[i].mp_srcbusirq == bus_irq)
8459 break;
8460 if (i >= mp_irq_entries)
8461 return -1;
8462@@ -2349,7 +2403,7 @@ void __init ioapic_init_mappings(void)
8463 ioapic_res = ioapic_setup_resources();
8464 for (i = 0; i < nr_ioapics; i++) {
8465 if (smp_found_config) {
8466- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
8467+ ioapic_phys = mp_ioapics[i].mp_apicaddr;
8468 } else {
8469 ioapic_phys = (unsigned long)
8470 alloc_bootmem_pages(PAGE_SIZE);
82094b55
AF
8471--- sle11-2009-10-16.orig/arch/x86/kernel/ipi-xen.c 2009-03-16 16:38:05.000000000 +0100
8472+++ sle11-2009-10-16/arch/x86/kernel/ipi-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8473@@ -8,7 +8,6 @@
8474 #include <linux/kernel_stat.h>
8475 #include <linux/mc146818rtc.h>
8476 #include <linux/cache.h>
8477-#include <linux/interrupt.h>
8478 #include <linux/cpu.h>
8479 #include <linux/module.h>
8480
8481@@ -85,7 +84,7 @@ void __send_IPI_shortcut(unsigned int sh
8482 /*
8483 * Send the IPI. The write to APIC_ICR fires this off.
8484 */
8485- apic_write_around(APIC_ICR, cfg);
8486+ apic_write(APIC_ICR, cfg);
8487 #else
8488 int cpu;
8489
8490@@ -132,7 +131,7 @@ static inline void __send_IPI_dest_field
8491 * prepare target chip field
8492 */
8493 cfg = __prepare_ICR2(mask);
8494- apic_write_around(APIC_ICR2, cfg);
8495+ apic_write(APIC_ICR2, cfg);
8496
8497 /*
8498 * program the ICR
8499@@ -142,7 +141,7 @@ static inline void __send_IPI_dest_field
8500 /*
8501 * Send the IPI. The write to APIC_ICR fires this off.
8502 */
8503- apic_write_around(APIC_ICR, cfg);
8504+ apic_write(APIC_ICR, cfg);
8505 }
8506 #endif
8507
82094b55
AF
8508--- sle11-2009-10-16.orig/arch/x86/kernel/irq_32-xen.c 2009-03-16 16:38:05.000000000 +0100
8509+++ sle11-2009-10-16/arch/x86/kernel/irq_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8510@@ -48,6 +48,29 @@ void ack_bad_irq(unsigned int irq)
8511 #endif
8512 }
8513
8514+#ifdef CONFIG_DEBUG_STACKOVERFLOW
8515+/* Debugging check for stack overflow: is there less than 1KB free? */
8516+static int check_stack_overflow(void)
8517+{
8518+ long sp;
8519+
8520+ __asm__ __volatile__("andl %%esp,%0" :
8521+ "=r" (sp) : "0" (THREAD_SIZE - 1));
8522+
8523+ return sp < (sizeof(struct thread_info) + STACK_WARN);
8524+}
8525+
8526+static void print_stack_overflow(void)
8527+{
8528+ printk(KERN_WARNING "low stack detected by irq handler\n");
8529+ dump_stack();
8530+}
8531+
8532+#else
8533+static inline int check_stack_overflow(void) { return 0; }
8534+static inline void print_stack_overflow(void) { }
8535+#endif
8536+
8537 #ifdef CONFIG_4KSTACKS
8538 /*
8539 * per-CPU IRQ handling contexts (thread information and stack)
8540@@ -59,48 +82,26 @@ union irq_ctx {
8541
8542 static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
8543 static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
8544-#endif
8545-
8546-/*
8547- * do_IRQ handles all normal device IRQ's (the special
8548- * SMP cross-CPU interrupts have their own specific
8549- * handlers).
8550- */
8551-unsigned int do_IRQ(struct pt_regs *regs)
8552-{
8553- struct pt_regs *old_regs;
8554- /* high bit used in ret_from_ code */
8555- int irq = ~regs->orig_ax;
8556- struct irq_desc *desc = irq_desc + irq;
8557-#ifdef CONFIG_4KSTACKS
8558- union irq_ctx *curctx, *irqctx;
8559- u32 *isp;
8560-#endif
8561
8562- if (unlikely((unsigned)irq >= NR_IRQS)) {
8563- printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
8564- __func__, irq);
8565- BUG();
8566- }
8567+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
8568+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
8569
8570- old_regs = set_irq_regs(regs);
8571- /*irq_enter();*/
8572-#ifdef CONFIG_DEBUG_STACKOVERFLOW
8573- /* Debugging check for stack overflow: is there less than 1KB free? */
8574- {
8575- long sp;
8576-
8577- __asm__ __volatile__("andl %%esp,%0" :
8578- "=r" (sp) : "0" (THREAD_SIZE - 1));
8579- if (unlikely(sp < (sizeof(struct thread_info) + STACK_WARN))) {
8580- printk("do_IRQ: stack overflow: %ld\n",
8581- sp - sizeof(struct thread_info));
8582- dump_stack();
8583- }
8584- }
8585-#endif
8586+static void call_on_stack(void *func, void *stack)
8587+{
8588+ asm volatile("xchgl %%ebx,%%esp \n"
8589+ "call *%%edi \n"
8590+ "movl %%ebx,%%esp \n"
8591+ : "=b" (stack)
8592+ : "0" (stack),
8593+ "D"(func)
8594+ : "memory", "cc", "edx", "ecx", "eax");
8595+}
8596
8597-#ifdef CONFIG_4KSTACKS
8598+static inline int
8599+execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
8600+{
8601+ union irq_ctx *curctx, *irqctx;
8602+ u32 *isp, arg1, arg2;
8603
8604 curctx = (union irq_ctx *) current_thread_info();
8605 irqctx = hardirq_ctx[smp_processor_id()];
8606@@ -111,52 +112,39 @@ unsigned int do_IRQ(struct pt_regs *regs
8607 * handler) we can't do that and just have to keep using the
8608 * current stack (which is the irq stack already after all)
8609 */
8610- if (curctx != irqctx) {
8611- int arg1, arg2, bx;
8612-
8613- /* build the stack frame on the IRQ stack */
8614- isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
8615- irqctx->tinfo.task = curctx->tinfo.task;
8616- irqctx->tinfo.previous_esp = current_stack_pointer;
8617+ if (unlikely(curctx == irqctx))
8618+ return 0;
8619
8620- /*
8621- * Copy the softirq bits in preempt_count so that the
8622- * softirq checks work in the hardirq context.
8623- */
8624- irqctx->tinfo.preempt_count =
8625- (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
8626- (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
8627-
8628- asm volatile(
8629- " xchgl %%ebx,%%esp \n"
8630- " call *%%edi \n"
8631- " movl %%ebx,%%esp \n"
8632- : "=a" (arg1), "=d" (arg2), "=b" (bx)
8633- : "0" (irq), "1" (desc), "2" (isp),
8634- "D" (desc->handle_irq)
8635- : "memory", "cc", "ecx"
8636- );
8637- } else
8638-#endif
8639- desc->handle_irq(irq, desc);
8640+ /* build the stack frame on the IRQ stack */
8641+ isp = (u32 *) ((char*)irqctx + sizeof(*irqctx));
8642+ irqctx->tinfo.task = curctx->tinfo.task;
8643+ irqctx->tinfo.previous_esp = current_stack_pointer;
8644
8645- /*irq_exit();*/
8646- set_irq_regs(old_regs);
8647+ /*
8648+ * Copy the softirq bits in preempt_count so that the
8649+ * softirq checks work in the hardirq context.
8650+ */
8651+ irqctx->tinfo.preempt_count =
8652+ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
8653+ (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
8654+
8655+ if (unlikely(overflow))
8656+ call_on_stack(print_stack_overflow, isp);
8657+
8658+ asm volatile("xchgl %%ebx,%%esp \n"
8659+ "call *%%edi \n"
8660+ "movl %%ebx,%%esp \n"
8661+ : "=a" (arg1), "=d" (arg2), "=b" (isp)
8662+ : "0" (irq), "1" (desc), "2" (isp),
8663+ "D" (desc->handle_irq)
8664+ : "memory", "cc", "ecx");
8665 return 1;
8666 }
8667
8668-#ifdef CONFIG_4KSTACKS
8669-
8670-static char softirq_stack[NR_CPUS * THREAD_SIZE]
8671- __attribute__((__section__(".bss.page_aligned")));
8672-
8673-static char hardirq_stack[NR_CPUS * THREAD_SIZE]
8674- __attribute__((__section__(".bss.page_aligned")));
8675-
8676 /*
8677 * allocate per-cpu stacks for hardirq and for softirq processing
8678 */
8679-void irq_ctx_init(int cpu)
8680+void __cpuinit irq_ctx_init(int cpu)
8681 {
8682 union irq_ctx *irqctx;
8683
8684@@ -164,25 +152,25 @@ void irq_ctx_init(int cpu)
8685 return;
8686
8687 irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
8688- irqctx->tinfo.task = NULL;
8689- irqctx->tinfo.exec_domain = NULL;
8690- irqctx->tinfo.cpu = cpu;
8691- irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
8692- irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
8693+ irqctx->tinfo.task = NULL;
8694+ irqctx->tinfo.exec_domain = NULL;
8695+ irqctx->tinfo.cpu = cpu;
8696+ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
8697+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
8698
8699 hardirq_ctx[cpu] = irqctx;
8700
8701 irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
8702- irqctx->tinfo.task = NULL;
8703- irqctx->tinfo.exec_domain = NULL;
8704- irqctx->tinfo.cpu = cpu;
8705- irqctx->tinfo.preempt_count = 0;
8706- irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
8707+ irqctx->tinfo.task = NULL;
8708+ irqctx->tinfo.exec_domain = NULL;
8709+ irqctx->tinfo.cpu = cpu;
8710+ irqctx->tinfo.preempt_count = 0;
8711+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
8712
8713 softirq_ctx[cpu] = irqctx;
8714
8715- printk("CPU %u irqstacks, hard=%p soft=%p\n",
8716- cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
8717+ printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
8718+ cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
8719 }
8720
8721 void irq_ctx_exit(int cpu)
8722@@ -211,25 +199,56 @@ asmlinkage void do_softirq(void)
8723 /* build the stack frame on the softirq stack */
8724 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
8725
8726- asm volatile(
8727- " xchgl %%ebx,%%esp \n"
8728- " call __do_softirq \n"
8729- " movl %%ebx,%%esp \n"
8730- : "=b"(isp)
8731- : "0"(isp)
8732- : "memory", "cc", "edx", "ecx", "eax"
8733- );
8734+ call_on_stack(__do_softirq, isp);
8735 /*
8736 * Shouldnt happen, we returned above if in_interrupt():
8737- */
8738+ */
8739 WARN_ON_ONCE(softirq_count());
8740 }
8741
8742 local_irq_restore(flags);
8743 }
8744+
8745+#else
8746+static inline int
8747+execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
8748 #endif
8749
8750 /*
8751+ * do_IRQ handles all normal device IRQ's (the special
8752+ * SMP cross-CPU interrupts have their own specific
8753+ * handlers).
8754+ */
8755+unsigned int do_IRQ(struct pt_regs *regs)
8756+{
8757+ struct pt_regs *old_regs;
8758+ /* high bit used in ret_from_ code */
8759+ int overflow, irq = ~regs->orig_ax;
8760+ struct irq_desc *desc = irq_desc + irq;
8761+
8762+ if (unlikely((unsigned)irq >= NR_IRQS)) {
8763+ printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
8764+ __func__, irq);
8765+ BUG();
8766+ }
8767+
8768+ old_regs = set_irq_regs(regs);
8769+ /*irq_enter();*/
8770+
8771+ overflow = check_stack_overflow();
8772+
8773+ if (!execute_on_irq_stack(overflow, desc, irq)) {
8774+ if (unlikely(overflow))
8775+ print_stack_overflow();
8776+ desc->handle_irq(irq, desc);
8777+ }
8778+
8779+ /*irq_exit();*/
8780+ set_irq_regs(old_regs);
8781+ return 1;
8782+}
8783+
8784+/*
8785 * Interrupt statistics:
8786 */
8787
8788@@ -337,6 +356,42 @@ skip:
8789 return 0;
8790 }
8791
8792+/*
8793+ * /proc/stat helpers
8794+ */
8795+u64 arch_irq_stat_cpu(unsigned int cpu)
8796+{
8797+ u64 sum = nmi_count(cpu);
8798+
8799+#ifdef CONFIG_X86_LOCAL_APIC
8800+ sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
8801+#endif
8802+#ifdef CONFIG_SMP
8803+ sum += per_cpu(irq_stat, cpu).irq_resched_count;
8804+ sum += per_cpu(irq_stat, cpu).irq_call_count;
8805+#ifndef CONFIG_XEN
8806+ sum += per_cpu(irq_stat, cpu).irq_tlb_count;
8807+#endif
8808+#endif
8809+#ifdef CONFIG_X86_MCE
8810+ sum += per_cpu(irq_stat, cpu).irq_thermal_count;
8811+#endif
8812+#ifdef CONFIG_X86_LOCAL_APIC
8813+ sum += per_cpu(irq_stat, cpu).irq_spurious_count;
8814+#endif
8815+ return sum;
8816+}
8817+
8818+u64 arch_irq_stat(void)
8819+{
8820+ u64 sum = atomic_read(&irq_err_count);
8821+
8822+#ifdef CONFIG_X86_IO_APIC
8823+ sum += atomic_read(&irq_mis_count);
8824+#endif
8825+ return sum;
8826+}
8827+
8828 #ifdef CONFIG_HOTPLUG_CPU
8829
8830 void fixup_irqs(cpumask_t map)
82094b55
AF
8831--- sle11-2009-10-16.orig/arch/x86/kernel/irq_64-xen.c 2009-03-16 16:33:40.000000000 +0100
8832+++ sle11-2009-10-16/arch/x86/kernel/irq_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8833@@ -163,6 +163,34 @@ skip:
8834 }
8835
8836 /*
8837+ * /proc/stat helpers
8838+ */
8839+u64 arch_irq_stat_cpu(unsigned int cpu)
8840+{
8841+ u64 sum = cpu_pda(cpu)->__nmi_count;
8842+
8843+ sum += cpu_pda(cpu)->apic_timer_irqs;
8844+#ifdef CONFIG_SMP
8845+ sum += cpu_pda(cpu)->irq_resched_count;
8846+ sum += cpu_pda(cpu)->irq_call_count;
8847+#ifndef CONFIG_XEN
8848+ sum += cpu_pda(cpu)->irq_tlb_count;
8849+#endif
8850+#endif
8851+#ifdef CONFIG_X86_MCE
8852+ sum += cpu_pda(cpu)->irq_thermal_count;
8853+ sum += cpu_pda(cpu)->irq_threshold_count;
8854+#endif
8855+ sum += cpu_pda(cpu)->irq_spurious_count;
8856+ return sum;
8857+}
8858+
8859+u64 arch_irq_stat(void)
8860+{
8861+ return atomic_read(&irq_err_count);
8862+}
8863+
8864+/*
8865 * do_IRQ handles all normal device IRQ's (the special
8866 * SMP cross-CPU interrupts have their own specific
8867 * handlers).
82094b55
AF
8868--- sle11-2009-10-16.orig/arch/x86/kernel/ldt-xen.c 2009-03-16 16:33:40.000000000 +0100
8869+++ sle11-2009-10-16/arch/x86/kernel/ldt-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8870@@ -20,9 +20,9 @@
8871 #include <asm/mmu_context.h>
8872
8873 #ifdef CONFIG_SMP
8874-static void flush_ldt(void *null)
8875+static void flush_ldt(void *current_mm)
8876 {
8877- if (current->active_mm)
8878+ if (current->active_mm == current_mm)
8879 load_LDT(&current->active_mm->context);
8880 }
8881 #endif
8882@@ -62,8 +62,6 @@ static int alloc_ldt(mm_context_t *pc, i
8883
8884 if (reload) {
8885 #ifdef CONFIG_SMP
8886- cpumask_t mask;
8887-
8888 preempt_disable();
8889 #endif
8890 make_pages_readonly(newldt,
8891@@ -71,9 +69,9 @@ static int alloc_ldt(mm_context_t *pc, i
8892 XENFEAT_writable_descriptor_tables);
8893 load_LDT(pc);
8894 #ifdef CONFIG_SMP
8895- mask = cpumask_of_cpu(smp_processor_id());
8896- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
8897- smp_call_function(flush_ldt, NULL, 1, 1);
8898+ if (!cpus_equal(current->mm->cpu_vm_mask,
8899+ cpumask_of_cpu(smp_processor_id())))
8900+ smp_call_function(flush_ldt, current->mm, 1);
8901 preempt_enable();
8902 #endif
8903 }
82094b55
AF
8904--- sle11-2009-10-16.orig/arch/x86/kernel/machine_kexec_32.c 2008-11-25 12:35:53.000000000 +0100
8905+++ sle11-2009-10-16/arch/x86/kernel/machine_kexec_32.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8906@@ -68,6 +68,8 @@ void machine_kexec_setup_load_arg(xen_ke
8907 xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
8908 xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
8909
8910+ if (image->type == KEXEC_TYPE_DEFAULT)
8911+ xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page);
8912 }
8913
8914 int __init machine_kexec_setup_resources(struct resource *hypervisor,
82094b55
AF
8915--- sle11-2009-10-16.orig/arch/x86/kernel/microcode-xen.c 2009-03-16 16:38:05.000000000 +0100
8916+++ sle11-2009-10-16/arch/x86/kernel/microcode-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8917@@ -5,13 +5,14 @@
8918 * 2006 Shaohua Li <shaohua.li@intel.com>
8919 *
8920 * This driver allows to upgrade microcode on Intel processors
8921- * belonging to IA-32 family - PentiumPro, Pentium II,
8922+ * belonging to IA-32 family - PentiumPro, Pentium II,
8923 * Pentium III, Xeon, Pentium 4, etc.
8924 *
8925- * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual,
8926- * Order Number 245472 or free download from:
8927- *
8928- * http://developer.intel.com/design/pentium4/manuals/245472.htm
8929+ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
8930+ * Software Developer's Manual
8931+ * Order Number 253668 or free download from:
8932+ *
8933+ * http://developer.intel.com/design/pentium4/manuals/253668.htm
8934 *
8935 * For more information, go to http://www.urbanmyth.org/microcode
8936 *
8937@@ -26,6 +27,7 @@
8938 #include <linux/kernel.h>
8939 #include <linux/init.h>
8940 #include <linux/sched.h>
8941+#include <linux/smp_lock.h>
8942 #include <linux/cpumask.h>
8943 #include <linux/module.h>
8944 #include <linux/slab.h>
8945@@ -86,6 +88,7 @@ static int do_microcode_update (const vo
8946
8947 static int microcode_open (struct inode *unused1, struct file *unused2)
8948 {
8949+ cycle_kernel_lock();
8950 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
8951 }
8952
8953@@ -162,7 +165,7 @@ static int request_microcode(void)
8954 c->x86, c->x86_model, c->x86_mask);
8955 error = request_firmware(&firmware, name, &microcode_pdev->dev);
8956 if (error) {
8957- pr_debug("microcode: ucode data file %s load failed\n", name);
8958+ pr_debug("microcode: data file %s load failed\n", name);
8959 return error;
8960 }
8961
8962@@ -183,6 +186,9 @@ static int __init microcode_init (void)
8963 {
8964 int error;
8965
8966+ printk(KERN_INFO
8967+ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
8968+
8969 error = microcode_dev_init();
8970 if (error)
8971 return error;
8972@@ -195,8 +201,6 @@ static int __init microcode_init (void)
8973
8974 request_microcode();
8975
8976- printk(KERN_INFO
8977- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
8978 return 0;
8979 }
8980
82094b55
AF
8981--- sle11-2009-10-16.orig/arch/x86/kernel/mpparse-xen.c 2009-03-16 16:38:05.000000000 +0100
8982+++ sle11-2009-10-16/arch/x86/kernel/mpparse-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
8983@@ -25,6 +25,9 @@
8984 #include <asm/proto.h>
8985 #include <asm/acpi.h>
8986 #include <asm/bios_ebda.h>
8987+#include <asm/e820.h>
8988+#include <asm/trampoline.h>
8989+#include <asm/setup.h>
8990
8991 #include <mach_apic.h>
8992 #ifdef CONFIG_X86_32
8993@@ -32,27 +35,10 @@
8994 #include <mach_mpparse.h>
8995 #endif
8996
8997-/* Have we found an MP table */
8998-int smp_found_config;
8999-
9000-/*
9001- * Various Linux-internal data structures created from the
9002- * MP-table.
9003- */
9004-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
9005-int mp_bus_id_to_type[MAX_MP_BUSSES];
9006-#endif
9007-
9008-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
9009-int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
9010-
9011-static int mp_current_pci_id;
9012-
9013-int pic_mode;
9014-
9015-/*
9016- * Intel MP BIOS table parsing routines:
9017- */
9018+static void *_bus_to_virt(unsigned long ma)
9019+{
9020+ return is_ISA_range(ma, ma) ? isa_bus_to_virt(ma) : bus_to_virt(ma);
9021+}
9022
9023 /*
9024 * Checksum an MP configuration block.
9025@@ -68,19 +54,7 @@ static int __init mpf_checksum(unsigned
9026 return sum & 0xFF;
9027 }
9028
9029-#ifdef CONFIG_X86_NUMAQ
9030-/*
9031- * Have to match translation table entries to main table entries by counter
9032- * hence the mpc_record variable .... can't see a less disgusting way of
9033- * doing this ....
9034- */
9035-
9036-static int mpc_record;
9037-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
9038- __cpuinitdata;
9039-#endif
9040-
9041-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
9042+static void __init MP_processor_info(struct mpc_config_processor *m)
9043 {
9044 #ifndef CONFIG_XEN
9045 int apicid;
9046@@ -90,11 +64,12 @@ static void __cpuinit MP_processor_info(
9047 disabled_cpus++;
9048 return;
9049 }
9050-#ifdef CONFIG_X86_NUMAQ
9051- apicid = mpc_apic_id(m, translation_table[mpc_record]);
9052-#else
9053- apicid = m->mpc_apicid;
9054-#endif
9055+
9056+ if (x86_quirks->mpc_apic_id)
9057+ apicid = x86_quirks->mpc_apic_id(m);
9058+ else
9059+ apicid = m->mpc_apicid;
9060+
9061 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
9062 bootup_cpu = " (Bootup-CPU)";
9063 boot_cpu_physical_apicid = m->mpc_apicid;
9064@@ -107,18 +82,17 @@ static void __cpuinit MP_processor_info(
9065 #endif
9066 }
9067
9068+#ifdef CONFIG_X86_IO_APIC
9069 static void __init MP_bus_info(struct mpc_config_bus *m)
9070 {
9071 char str[7];
9072-
9073 memcpy(str, m->mpc_bustype, 6);
9074 str[6] = 0;
9075
9076-#ifdef CONFIG_X86_NUMAQ
9077- mpc_oem_bus_info(m, str, translation_table[mpc_record]);
9078-#else
9079- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
9080-#endif
9081+ if (x86_quirks->mpc_oem_bus_info)
9082+ x86_quirks->mpc_oem_bus_info(m, str);
9083+ else
9084+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
9085
9086 #if MAX_MP_BUSSES < 256
9087 if (m->mpc_busid >= MAX_MP_BUSSES) {
9088@@ -135,12 +109,10 @@ static void __init MP_bus_info(struct mp
9089 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
9090 #endif
9091 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
9092-#ifdef CONFIG_X86_NUMAQ
9093- mpc_oem_pci_bus(m, translation_table[mpc_record]);
9094-#endif
9095+ if (x86_quirks->mpc_oem_pci_bus)
9096+ x86_quirks->mpc_oem_pci_bus(m);
9097+
9098 clear_bit(m->mpc_busid, mp_bus_not_pci);
9099- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
9100- mp_current_pci_id++;
9101 #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
9102 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
9103 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
9104@@ -151,6 +123,7 @@ static void __init MP_bus_info(struct mp
9105 } else
9106 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
9107 }
9108+#endif
9109
9110 #ifdef CONFIG_X86_IO_APIC
9111
9112@@ -180,117 +153,111 @@ static void __init MP_ioapic_info(struct
9113 if (bad_ioapic(m->mpc_apicaddr))
9114 return;
9115
9116- mp_ioapics[nr_ioapics] = *m;
9117+ mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
9118+ mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
9119+ mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
9120+ mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
9121+ mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
9122 nr_ioapics++;
9123 }
9124
9125-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
9126+static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
9127 {
9128- mp_irqs[mp_irq_entries] = *m;
9129- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
9130+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
9131 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
9132 m->mpc_irqtype, m->mpc_irqflag & 3,
9133 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
9134 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
9135- if (++mp_irq_entries == MAX_IRQ_SOURCES)
9136- panic("Max # of irq sources exceeded!!\n");
9137 }
9138
9139-#endif
9140-
9141-static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
9142+static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
9143 {
9144- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
9145- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
9146- m->mpc_irqtype, m->mpc_irqflag & 3,
9147- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
9148- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
9149+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
9150+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
9151+ mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
9152+ (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
9153+ mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
9154 }
9155
9156-#ifdef CONFIG_X86_NUMAQ
9157-static void __init MP_translation_info(struct mpc_config_translation *m)
9158+static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
9159+ struct mp_config_intsrc *mp_irq)
9160 {
9161- printk(KERN_INFO
9162- "Translation: record %d, type %d, quad %d, global %d, local %d\n",
9163- mpc_record, m->trans_type, m->trans_quad, m->trans_global,
9164- m->trans_local);
9165+ mp_irq->mp_dstapic = m->mpc_dstapic;
9166+ mp_irq->mp_type = m->mpc_type;
9167+ mp_irq->mp_irqtype = m->mpc_irqtype;
9168+ mp_irq->mp_irqflag = m->mpc_irqflag;
9169+ mp_irq->mp_srcbus = m->mpc_srcbus;
9170+ mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
9171+ mp_irq->mp_dstirq = m->mpc_dstirq;
9172+}
9173
9174- if (mpc_record >= MAX_MPC_ENTRY)
9175- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
9176- else
9177- translation_table[mpc_record] = m; /* stash this for later */
9178- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
9179- node_set_online(m->trans_quad);
9180+static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
9181+ struct mpc_config_intsrc *m)
9182+{
9183+ m->mpc_dstapic = mp_irq->mp_dstapic;
9184+ m->mpc_type = mp_irq->mp_type;
9185+ m->mpc_irqtype = mp_irq->mp_irqtype;
9186+ m->mpc_irqflag = mp_irq->mp_irqflag;
9187+ m->mpc_srcbus = mp_irq->mp_srcbus;
9188+ m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
9189+ m->mpc_dstirq = mp_irq->mp_dstirq;
9190 }
9191
9192-/*
9193- * Read/parse the MPC oem tables
9194- */
9195+static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
9196+ struct mpc_config_intsrc *m)
9197+{
9198+ if (mp_irq->mp_dstapic != m->mpc_dstapic)
9199+ return 1;
9200+ if (mp_irq->mp_type != m->mpc_type)
9201+ return 2;
9202+ if (mp_irq->mp_irqtype != m->mpc_irqtype)
9203+ return 3;
9204+ if (mp_irq->mp_irqflag != m->mpc_irqflag)
9205+ return 4;
9206+ if (mp_irq->mp_srcbus != m->mpc_srcbus)
9207+ return 5;
9208+ if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
9209+ return 6;
9210+ if (mp_irq->mp_dstirq != m->mpc_dstirq)
9211+ return 7;
9212
9213-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
9214- unsigned short oemsize)
9215+ return 0;
9216+}
9217+
9218+static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
9219 {
9220- int count = sizeof(*oemtable); /* the header size */
9221- unsigned char *oemptr = ((unsigned char *)oemtable) + count;
9222+ int i;
9223
9224- mpc_record = 0;
9225- printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
9226- oemtable);
9227- if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
9228- printk(KERN_WARNING
9229- "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
9230- oemtable->oem_signature[0], oemtable->oem_signature[1],
9231- oemtable->oem_signature[2], oemtable->oem_signature[3]);
9232- return;
9233- }
9234- if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
9235- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
9236- return;
9237- }
9238- while (count < oemtable->oem_length) {
9239- switch (*oemptr) {
9240- case MP_TRANSLATION:
9241- {
9242- struct mpc_config_translation *m =
9243- (struct mpc_config_translation *)oemptr;
9244- MP_translation_info(m);
9245- oemptr += sizeof(*m);
9246- count += sizeof(*m);
9247- ++mpc_record;
9248- break;
9249- }
9250- default:
9251- {
9252- printk(KERN_WARNING
9253- "Unrecognised OEM table entry type! - %d\n",
9254- (int)*oemptr);
9255- return;
9256- }
9257- }
9258+ print_MP_intsrc_info(m);
9259+
9260+ for (i = 0; i < mp_irq_entries; i++) {
9261+ if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
9262+ return;
9263 }
9264+
9265+ assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
9266+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
9267+ panic("Max # of irq sources exceeded!!\n");
9268 }
9269
9270-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
9271- char *productid)
9272+#endif
9273+
9274+static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
9275 {
9276- if (strncmp(oem, "IBM NUMA", 8))
9277- printk("Warning! May not be a NUMA-Q system!\n");
9278- if (mpc->mpc_oemptr)
9279- smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
9280- mpc->mpc_oemsize);
9281+ apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
9282+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
9283+ m->mpc_irqtype, m->mpc_irqflag & 3,
9284+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
9285+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
9286 }
9287-#endif /* CONFIG_X86_NUMAQ */
9288
9289 /*
9290 * Read/parse the MPC
9291 */
9292
9293-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
9294+static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
9295+ char *str)
9296 {
9297- char str[16];
9298- char oem[10];
9299- int count = sizeof(*mpc);
9300- unsigned char *mpt = ((unsigned char *)mpc) + count;
9301
9302 if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
9303 printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
9304@@ -313,19 +280,41 @@ static int __init smp_read_mpc(struct mp
9305 }
9306 memcpy(oem, mpc->mpc_oem, 8);
9307 oem[8] = 0;
9308- printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
9309+ printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
9310
9311 memcpy(str, mpc->mpc_productid, 12);
9312 str[12] = 0;
9313- printk("Product ID: %s ", str);
9314
9315-#ifdef CONFIG_X86_32
9316- mps_oem_check(mpc, oem, str);
9317-#endif
9318- printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
9319+ printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
9320
9321 printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
9322
9323+ return 1;
9324+}
9325+
9326+static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
9327+{
9328+ char str[16];
9329+ char oem[10];
9330+
9331+ int count = sizeof(*mpc);
9332+ unsigned char *mpt = ((unsigned char *)mpc) + count;
9333+
9334+ if (!smp_check_mpc(mpc, oem, str))
9335+ return 0;
9336+
9337+#ifdef CONFIG_X86_32
9338+ /*
9339+ * need to make sure summit and es7000's mps_oem_check is safe to be
9340+ * called early via genericarch 's mps_oem_check
9341+ */
9342+ if (early) {
9343+#ifdef CONFIG_X86_NUMAQ
9344+ numaq_mps_oem_check(mpc, oem, str);
9345+#endif
9346+ } else
9347+ mps_oem_check(mpc, oem, str);
9348+#endif
9349 /* save the local APIC address, it might be non-default */
9350 if (!acpi_lapic)
9351 mp_lapic_addr = mpc->mpc_lapic;
9352@@ -333,12 +322,17 @@ static int __init smp_read_mpc(struct mp
9353 if (early)
9354 return 1;
9355
9356+ if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
9357+ struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
9358+ x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
9359+ }
9360+
9361 /*
9362 * Now process the configuration blocks.
9363 */
9364-#ifdef CONFIG_X86_NUMAQ
9365- mpc_record = 0;
9366-#endif
9367+ if (x86_quirks->mpc_record)
9368+ *x86_quirks->mpc_record = 0;
9369+
9370 while (count < mpc->mpc_length) {
9371 switch (*mpt) {
9372 case MP_PROCESSOR:
9373@@ -356,7 +350,9 @@ static int __init smp_read_mpc(struct mp
9374 {
9375 struct mpc_config_bus *m =
9376 (struct mpc_config_bus *)mpt;
9377+#ifdef CONFIG_X86_IO_APIC
9378 MP_bus_info(m);
9379+#endif
9380 mpt += sizeof(*m);
9381 count += sizeof(*m);
9382 break;
9383@@ -402,10 +398,14 @@ static int __init smp_read_mpc(struct mp
9384 count = mpc->mpc_length;
9385 break;
9386 }
9387-#ifdef CONFIG_X86_NUMAQ
9388- ++mpc_record;
9389-#endif
9390+ if (x86_quirks->mpc_record)
9391+ (*x86_quirks->mpc_record)++;
9392 }
9393+
9394+#ifdef CONFIG_X86_GENERICARCH
9395+ generic_bigsmp_probe();
9396+#endif
9397+
9398 setup_apic_routing();
9399 if (!num_processors)
9400 printk(KERN_ERR "MPTABLE: no processors registered!\n");
9401@@ -431,7 +431,7 @@ static void __init construct_default_ioi
9402 intsrc.mpc_type = MP_INTSRC;
9403 intsrc.mpc_irqflag = 0; /* conforming */
9404 intsrc.mpc_srcbus = 0;
9405- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
9406+ intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
9407
9408 intsrc.mpc_irqtype = mp_INT;
9409
9410@@ -492,40 +492,11 @@ static void __init construct_default_ioi
9411 MP_intsrc_info(&intsrc);
9412 }
9413
9414-#endif
9415
9416-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
9417+static void __init construct_ioapic_table(int mpc_default_type)
9418 {
9419- struct mpc_config_processor processor;
9420- struct mpc_config_bus bus;
9421-#ifdef CONFIG_X86_IO_APIC
9422 struct mpc_config_ioapic ioapic;
9423-#endif
9424- struct mpc_config_lintsrc lintsrc;
9425- int linttypes[2] = { mp_ExtINT, mp_NMI };
9426- int i;
9427-
9428- /*
9429- * local APIC has default address
9430- */
9431- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
9432-
9433- /*
9434- * 2 CPUs, numbered 0 & 1.
9435- */
9436- processor.mpc_type = MP_PROCESSOR;
9437- /* Either an integrated APIC or a discrete 82489DX. */
9438- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
9439- processor.mpc_cpuflag = CPU_ENABLED;
9440- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
9441- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
9442- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
9443- processor.mpc_reserved[0] = 0;
9444- processor.mpc_reserved[1] = 0;
9445- for (i = 0; i < 2; i++) {
9446- processor.mpc_apicid = i;
9447- MP_processor_info(&processor);
9448- }
9449+ struct mpc_config_bus bus;
9450
9451 bus.mpc_type = MP_BUS;
9452 bus.mpc_busid = 0;
9453@@ -554,7 +525,6 @@ static inline void __init construct_defa
9454 MP_bus_info(&bus);
9455 }
9456
9457-#ifdef CONFIG_X86_IO_APIC
9458 ioapic.mpc_type = MP_IOAPIC;
9459 ioapic.mpc_apicid = 2;
9460 ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
9461@@ -566,7 +536,42 @@ static inline void __init construct_defa
9462 * We set up most of the low 16 IO-APIC pins according to MPS rules.
9463 */
9464 construct_default_ioirq_mptable(mpc_default_type);
9465+}
9466+#else
9467+static inline void __init construct_ioapic_table(int mpc_default_type) { }
9468 #endif
9469+
9470+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
9471+{
9472+ struct mpc_config_processor processor;
9473+ struct mpc_config_lintsrc lintsrc;
9474+ int linttypes[2] = { mp_ExtINT, mp_NMI };
9475+ int i;
9476+
9477+ /*
9478+ * local APIC has default address
9479+ */
9480+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
9481+
9482+ /*
9483+ * 2 CPUs, numbered 0 & 1.
9484+ */
9485+ processor.mpc_type = MP_PROCESSOR;
9486+ /* Either an integrated APIC or a discrete 82489DX. */
9487+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
9488+ processor.mpc_cpuflag = CPU_ENABLED;
9489+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
9490+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
9491+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
9492+ processor.mpc_reserved[0] = 0;
9493+ processor.mpc_reserved[1] = 0;
9494+ for (i = 0; i < 2; i++) {
9495+ processor.mpc_apicid = i;
9496+ MP_processor_info(&processor);
9497+ }
9498+
9499+ construct_ioapic_table(mpc_default_type);
9500+
9501 lintsrc.mpc_type = MP_LINTSRC;
9502 lintsrc.mpc_irqflag = 0; /* conforming */
9503 lintsrc.mpc_srcbusid = 0;
9504@@ -584,10 +589,14 @@ static struct intel_mp_floating *mpf_fou
9505 /*
9506 * Scan the memory blocks for an SMP configuration block.
9507 */
9508-static void __init __get_smp_config(unsigned early)
9509+static void __init __get_smp_config(unsigned int early)
9510 {
9511 struct intel_mp_floating *mpf = mpf_found;
9512
9513+ if (x86_quirks->mach_get_smp_config) {
9514+ if (x86_quirks->mach_get_smp_config(early))
9515+ return;
9516+ }
9517 if (acpi_lapic && early)
9518 return;
9519 /*
9520@@ -604,7 +613,7 @@ static void __init __get_smp_config(unsi
9521
9522 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
9523 mpf->mpf_specification);
9524-#ifdef CONFIG_X86_32
9525+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
9526 if (mpf->mpf_feature2 & (1 << 7)) {
9527 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
9528 pic_mode = 1;
9529@@ -635,8 +644,10 @@ static void __init __get_smp_config(unsi
9530 * Read the physical hardware table. Anything here will
9531 * override the defaults.
9532 */
9533- if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
9534+ if (!smp_read_mpc(_bus_to_virt(mpf->mpf_physptr), early)) {
9535+#ifdef CONFIG_X86_LOCAL_APIC
9536 smp_found_config = 0;
9537+#endif
9538 printk(KERN_ERR
9539 "BIOS bug, MP table errors detected!...\n");
9540 printk(KERN_ERR "... disabling SMP support. "
9541@@ -690,10 +701,11 @@ void __init get_smp_config(void)
9542 static int __init smp_scan_config(unsigned long base, unsigned long length,
9543 unsigned reserve)
9544 {
9545- unsigned int *bp = isa_bus_to_virt(base);
9546+ unsigned int *bp = _bus_to_virt(base);
9547 struct intel_mp_floating *mpf;
9548
9549- Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
9550+ apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
9551+ bp, length);
9552 BUILD_BUG_ON(sizeof(*mpf) != 16);
9553
9554 while (length > 0) {
9555@@ -703,16 +715,22 @@ static int __init smp_scan_config(unsign
9556 !mpf_checksum((unsigned char *)bp, 16) &&
9557 ((mpf->mpf_specification == 1)
9558 || (mpf->mpf_specification == 4))) {
9559-
9560+#ifdef CONFIG_X86_LOCAL_APIC
9561 smp_found_config = 1;
9562+#endif
9563 mpf_found = mpf;
9564-#ifdef CONFIG_X86_32
9565+
9566 #ifndef CONFIG_XEN
9567 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
9568 mpf, virt_to_phys(mpf));
9569- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
9570+
9571+ if (!reserve)
9572+ return 1;
9573+ reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
9574 BOOTMEM_DEFAULT);
9575 if (mpf->mpf_physptr) {
9576+ unsigned long size = PAGE_SIZE;
9577+#ifdef CONFIG_X86_32
9578 /*
9579 * We cannot access to MPC table to compute
9580 * table size yet, as only few megabytes from
9581@@ -722,27 +740,18 @@ static int __init smp_scan_config(unsign
9582 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
9583 * in reserve_bootmem.
9584 */
9585- unsigned long size = PAGE_SIZE;
9586 unsigned long end = max_low_pfn * PAGE_SIZE;
9587 if (mpf->mpf_physptr + size > end)
9588 size = end - mpf->mpf_physptr;
9589- reserve_bootmem(mpf->mpf_physptr, size,
9590+#endif
9591+ reserve_bootmem_generic(mpf->mpf_physptr, size,
9592 BOOTMEM_DEFAULT);
9593 }
9594 #else
9595 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
9596- mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
9597-#endif
9598-#elif !defined(CONFIG_XEN)
9599- if (!reserve)
9600- return 1;
9601-
9602- reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
9603- if (mpf->mpf_physptr)
9604- reserve_bootmem_generic(mpf->mpf_physptr,
9605- PAGE_SIZE);
9606+ mpf, ((void *)bp - _bus_to_virt(base)) + base);
9607 #endif
9608- return 1;
9609+ return 1;
9610 }
9611 bp += 4;
9612 length -= 16;
9613@@ -750,12 +759,16 @@ static int __init smp_scan_config(unsign
9614 return 0;
9615 }
9616
9617-static void __init __find_smp_config(unsigned reserve)
9618+static void __init __find_smp_config(unsigned int reserve)
9619 {
9620 #ifndef CONFIG_XEN
9621 unsigned int address;
9622 #endif
9623
9624+ if (x86_quirks->mach_find_smp_config) {
9625+ if (x86_quirks->mach_find_smp_config(reserve))
9626+ return;
9627+ }
9628 /*
9629 * FIXME: Linux assumes you have 640K of base ram..
9630 * this continues the error...
9631@@ -802,300 +815,297 @@ void __init find_smp_config(void)
9632 __find_smp_config(1);
9633 }
9634
9635-/* --------------------------------------------------------------------------
9636- ACPI-based MP Configuration
9637- -------------------------------------------------------------------------- */
9638-
9639-/*
9640- * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
9641- */
9642-int es7000_plat;
9643-
9644-#ifdef CONFIG_ACPI
9645+#ifdef CONFIG_X86_IO_APIC
9646+static u8 __initdata irq_used[MAX_IRQ_SOURCES];
9647
9648-#ifdef CONFIG_X86_IO_APIC
9649+static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
9650+{
9651+ int i;
9652
9653-#define MP_ISA_BUS 0
9654+ if (m->mpc_irqtype != mp_INT)
9655+ return 0;
9656
9657-extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
9658+ if (m->mpc_irqflag != 0x0f)
9659+ return 0;
9660
9661-static int mp_find_ioapic(int gsi)
9662-{
9663- int i = 0;
9664+ /* not legacy */
9665
9666- /* Find the IOAPIC that manages this GSI. */
9667- for (i = 0; i < nr_ioapics; i++) {
9668- if ((gsi >= mp_ioapic_routing[i].gsi_base)
9669- && (gsi <= mp_ioapic_routing[i].gsi_end))
9670- return i;
9671+ for (i = 0; i < mp_irq_entries; i++) {
9672+ if (mp_irqs[i].mp_irqtype != mp_INT)
9673+ continue;
9674+
9675+ if (mp_irqs[i].mp_irqflag != 0x0f)
9676+ continue;
9677+
9678+ if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
9679+ continue;
9680+ if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
9681+ continue;
9682+ if (irq_used[i]) {
9683+ /* already claimed */
9684+ return -2;
9685+ }
9686+ irq_used[i] = 1;
9687+ return i;
9688 }
9689
9690- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
9691+ /* not found */
9692 return -1;
9693 }
9694
9695-static u8 __init uniq_ioapic_id(u8 id)
9696-{
9697-#ifdef CONFIG_X86_32
9698- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
9699- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
9700- return io_apic_get_unique_id(nr_ioapics, id);
9701- else
9702- return id;
9703-#else
9704- int i;
9705- DECLARE_BITMAP(used, 256);
9706- bitmap_zero(used, 256);
9707- for (i = 0; i < nr_ioapics; i++) {
9708- struct mpc_config_ioapic *ia = &mp_ioapics[i];
9709- __set_bit(ia->mpc_apicid, used);
9710- }
9711- if (!test_bit(id, used))
9712- return id;
9713- return find_first_zero_bit(used, 256);
9714+#define SPARE_SLOT_NUM 20
9715+
9716+static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
9717 #endif
9718-}
9719
9720-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
9721+static int __init replace_intsrc_all(struct mp_config_table *mpc,
9722+ unsigned long mpc_new_phys,
9723+ unsigned long mpc_new_length)
9724 {
9725- int idx = 0;
9726-
9727- if (bad_ioapic(address))
9728- return;
9729+#ifdef CONFIG_X86_IO_APIC
9730+ int i;
9731+ int nr_m_spare = 0;
9732+#endif
9733
9734- idx = nr_ioapics;
9735+ int count = sizeof(*mpc);
9736+ unsigned char *mpt = ((unsigned char *)mpc) + count;
9737
9738- mp_ioapics[idx].mpc_type = MP_IOAPIC;
9739- mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
9740- mp_ioapics[idx].mpc_apicaddr = address;
9741+ printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
9742+ while (count < mpc->mpc_length) {
9743+ switch (*mpt) {
9744+ case MP_PROCESSOR:
9745+ {
9746+ struct mpc_config_processor *m =
9747+ (struct mpc_config_processor *)mpt;
9748+ mpt += sizeof(*m);
9749+ count += sizeof(*m);
9750+ break;
9751+ }
9752+ case MP_BUS:
9753+ {
9754+ struct mpc_config_bus *m =
9755+ (struct mpc_config_bus *)mpt;
9756+ mpt += sizeof(*m);
9757+ count += sizeof(*m);
9758+ break;
9759+ }
9760+ case MP_IOAPIC:
9761+ {
9762+ mpt += sizeof(struct mpc_config_ioapic);
9763+ count += sizeof(struct mpc_config_ioapic);
9764+ break;
9765+ }
9766+ case MP_INTSRC:
9767+ {
9768+#ifdef CONFIG_X86_IO_APIC
9769+ struct mpc_config_intsrc *m =
9770+ (struct mpc_config_intsrc *)mpt;
9771
9772-#ifndef CONFIG_XEN
9773- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
9774+ apic_printk(APIC_VERBOSE, "OLD ");
9775+ print_MP_intsrc_info(m);
9776+ i = get_MP_intsrc_index(m);
9777+ if (i > 0) {
9778+ assign_to_mpc_intsrc(&mp_irqs[i], m);
9779+ apic_printk(APIC_VERBOSE, "NEW ");
9780+ print_mp_irq_info(&mp_irqs[i]);
9781+ } else if (!i) {
9782+ /* legacy, do nothing */
9783+ } else if (nr_m_spare < SPARE_SLOT_NUM) {
9784+ /*
9785+ * not found (-1), or duplicated (-2)
9786+ * are invalid entries,
9787+ * we need to use the slot later
9788+ */
9789+ m_spare[nr_m_spare] = m;
9790+ nr_m_spare++;
9791+ }
9792 #endif
9793- mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
9794-#ifdef CONFIG_X86_32
9795- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
9796-#else
9797- mp_ioapics[idx].mpc_apicver = 0;
9798+ mpt += sizeof(struct mpc_config_intsrc);
9799+ count += sizeof(struct mpc_config_intsrc);
9800+ break;
9801+ }
9802+ case MP_LINTSRC:
9803+ {
9804+ struct mpc_config_lintsrc *m =
9805+ (struct mpc_config_lintsrc *)mpt;
9806+ mpt += sizeof(*m);
9807+ count += sizeof(*m);
9808+ break;
9809+ }
9810+ default:
9811+ /* wrong mptable */
9812+ printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
9813+ printk(KERN_ERR "type %x\n", *mpt);
9814+ print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
9815+ 1, mpc, mpc->mpc_length, 1);
9816+ goto out;
9817+ }
9818+ }
9819+
9820+#ifdef CONFIG_X86_IO_APIC
9821+ for (i = 0; i < mp_irq_entries; i++) {
9822+ if (irq_used[i])
9823+ continue;
9824+
9825+ if (mp_irqs[i].mp_irqtype != mp_INT)
9826+ continue;
9827+
9828+ if (mp_irqs[i].mp_irqflag != 0x0f)
9829+ continue;
9830+
9831+ if (nr_m_spare > 0) {
9832+ apic_printk(APIC_VERBOSE, "*NEW* found\n");
9833+ nr_m_spare--;
9834+ assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
9835+ m_spare[nr_m_spare] = NULL;
9836+ } else {
9837+ struct mpc_config_intsrc *m =
9838+ (struct mpc_config_intsrc *)mpt;
9839+ count += sizeof(struct mpc_config_intsrc);
9840+ if (!mpc_new_phys) {
9841+ printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
9842+ } else {
9843+ if (count <= mpc_new_length)
9844+ printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
9845+ else {
9846+ printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
9847+ goto out;
9848+ }
9849+ }
9850+ assign_to_mpc_intsrc(&mp_irqs[i], m);
9851+ mpc->mpc_length = count;
9852+ mpt += sizeof(struct mpc_config_intsrc);
9853+ }
9854+ print_mp_irq_info(&mp_irqs[i]);
9855+ }
9856 #endif
9857- /*
9858- * Build basic GSI lookup table to facilitate gsi->io_apic lookups
9859- * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
9860- */
9861- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
9862- mp_ioapic_routing[idx].gsi_base = gsi_base;
9863- mp_ioapic_routing[idx].gsi_end = gsi_base +
9864- io_apic_get_redir_entries(idx);
9865-
9866- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
9867- "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
9868- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
9869- mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
9870+out:
9871+ /* update checksum */
9872+ mpc->mpc_checksum = 0;
9873+ mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
9874+ mpc->mpc_length);
9875
9876- nr_ioapics++;
9877+ return 0;
9878 }
9879
9880-void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
9881-{
9882- struct mpc_config_intsrc intsrc;
9883- int ioapic = -1;
9884- int pin = -1;
9885-
9886- /*
9887- * Convert 'gsi' to 'ioapic.pin'.
9888- */
9889- ioapic = mp_find_ioapic(gsi);
9890- if (ioapic < 0)
9891- return;
9892- pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
9893+static int __initdata enable_update_mptable;
9894
9895- /*
9896- * TBD: This check is for faulty timer entries, where the override
9897- * erroneously sets the trigger to level, resulting in a HUGE
9898- * increase of timer interrupts!
9899- */
9900- if ((bus_irq == 0) && (trigger == 3))
9901- trigger = 1;
9902+static int __init update_mptable_setup(char *str)
9903+{
9904+ enable_update_mptable = 1;
9905+ return 0;
9906+}
9907+early_param("update_mptable", update_mptable_setup);
9908
9909- intsrc.mpc_type = MP_INTSRC;
9910- intsrc.mpc_irqtype = mp_INT;
9911- intsrc.mpc_irqflag = (trigger << 2) | polarity;
9912- intsrc.mpc_srcbus = MP_ISA_BUS;
9913- intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
9914- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
9915- intsrc.mpc_dstirq = pin; /* INTIN# */
9916+static unsigned long __initdata mpc_new_phys;
9917+static unsigned long mpc_new_length __initdata = 4096;
9918
9919- MP_intsrc_info(&intsrc);
9920+/* alloc_mptable or alloc_mptable=4k */
9921+static int __initdata alloc_mptable;
9922+static int __init parse_alloc_mptable_opt(char *p)
9923+{
9924+ enable_update_mptable = 1;
9925+ alloc_mptable = 1;
9926+ if (!p)
9927+ return 0;
9928+ mpc_new_length = memparse(p, &p);
9929+ return 0;
9930 }
9931+early_param("alloc_mptable", parse_alloc_mptable_opt);
9932
9933-void __init mp_config_acpi_legacy_irqs(void)
9934+void __init early_reserve_e820_mpc_new(void)
9935 {
9936- struct mpc_config_intsrc intsrc;
9937- int i = 0;
9938- int ioapic = -1;
9939-
9940-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
9941- /*
9942- * Fabricate the legacy ISA bus (bus #31).
9943- */
9944- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
9945+ if (enable_update_mptable && alloc_mptable) {
9946+ u64 startt = 0;
9947+#ifdef CONFIG_X86_TRAMPOLINE
9948+ startt = TRAMPOLINE_BASE;
9949 #endif
9950- set_bit(MP_ISA_BUS, mp_bus_not_pci);
9951- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
9952-
9953- /*
9954- * Older generations of ES7000 have no legacy identity mappings
9955- */
9956- if (es7000_plat == 1)
9957- return;
9958-
9959- /*
9960- * Locate the IOAPIC that manages the ISA IRQs (0-15).
9961- */
9962- ioapic = mp_find_ioapic(0);
9963- if (ioapic < 0)
9964- return;
9965-
9966- intsrc.mpc_type = MP_INTSRC;
9967- intsrc.mpc_irqflag = 0; /* Conforming */
9968- intsrc.mpc_srcbus = MP_ISA_BUS;
9969-#ifdef CONFIG_X86_IO_APIC
9970- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
9971-#endif
9972- /*
9973- * Use the default configuration for the IRQs 0-15. Unless
9974- * overridden by (MADT) interrupt source override entries.
9975- */
9976- for (i = 0; i < 16; i++) {
9977- int idx;
9978-
9979- for (idx = 0; idx < mp_irq_entries; idx++) {
9980- struct mpc_config_intsrc *irq = mp_irqs + idx;
9981-
9982- /* Do we already have a mapping for this ISA IRQ? */
9983- if (irq->mpc_srcbus == MP_ISA_BUS
9984- && irq->mpc_srcbusirq == i)
9985- break;
9986-
9987- /* Do we already have a mapping for this IOAPIC pin */
9988- if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
9989- (irq->mpc_dstirq == i))
9990- break;
9991- }
9992-
9993- if (idx != mp_irq_entries) {
9994- printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
9995- continue; /* IRQ already used */
9996- }
9997-
9998- intsrc.mpc_irqtype = mp_INT;
9999- intsrc.mpc_srcbusirq = i; /* Identity mapped */
10000- intsrc.mpc_dstirq = i;
10001-
10002- MP_intsrc_info(&intsrc);
10003+ mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
10004 }
10005 }
10006
10007-int mp_register_gsi(u32 gsi, int triggering, int polarity)
10008+static int __init update_mp_table(void)
10009 {
10010- int ioapic;
10011- int ioapic_pin;
10012-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
10013-#define MAX_GSI_NUM 4096
10014-#define IRQ_COMPRESSION_START 64
10015+ char str[16];
10016+ char oem[10];
10017+ struct intel_mp_floating *mpf;
10018+ struct mp_config_table *mpc;
10019+ struct mp_config_table *mpc_new;
10020+
10021+ if (!enable_update_mptable)
10022+ return 0;
10023+
10024+ mpf = mpf_found;
10025+ if (!mpf)
10026+ return 0;
10027
10028- static int pci_irq = IRQ_COMPRESSION_START;
10029 /*
10030- * Mapping between Global System Interrupts, which
10031- * represent all possible interrupts, and IRQs
10032- * assigned to actual devices.
10033+ * Now see if we need to go further.
10034 */
10035- static int gsi_to_irq[MAX_GSI_NUM];
10036-#else
10037+ if (mpf->mpf_feature1 != 0)
10038+ return 0;
10039
10040- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
10041- return gsi;
10042-#endif
10043+ if (!mpf->mpf_physptr)
10044+ return 0;
10045
10046- /* Don't set up the ACPI SCI because it's already set up */
10047- if (acpi_gbl_FADT.sci_interrupt == gsi)
10048- return gsi;
10049+ mpc = _bus_to_virt(mpf->mpf_physptr);
10050
10051- ioapic = mp_find_ioapic(gsi);
10052- if (ioapic < 0) {
10053- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
10054- return gsi;
10055- }
10056+ if (!smp_check_mpc(mpc, oem, str))
10057+ return 0;
10058
10059- ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
10060+ printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf));
10061+ printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
10062
10063-#ifndef CONFIG_X86_32
10064- if (ioapic_renumber_irq)
10065- gsi = ioapic_renumber_irq(ioapic, gsi);
10066-#endif
10067+ if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
10068+ mpc_new_phys = 0;
10069+ printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
10070+ mpc_new_length);
10071+ }
10072+
10073+ if (!mpc_new_phys) {
10074+ unsigned char old, new;
10075+ /* check if we can change the postion */
10076+ mpc->mpc_checksum = 0;
10077+ old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
10078+ mpc->mpc_checksum = 0xff;
10079+ new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
10080+ if (old == new) {
10081+ printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
10082+ return 0;
10083+ }
10084+ printk(KERN_INFO "use in-positon replacing\n");
10085+ } else {
10086+ maddr_t mpc_new_bus;
10087
10088- /*
10089- * Avoid pin reprogramming. PRTs typically include entries
10090- * with redundant pin->gsi mappings (but unique PCI devices);
10091- * we only program the IOAPIC on the first.
10092- */
10093- if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
10094- printk(KERN_ERR "Invalid reference to IOAPIC pin "
10095- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
10096- ioapic_pin);
10097- return gsi;
10098- }
10099- if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
10100- Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
10101- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
10102-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
10103- return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
10104-#else
10105- return gsi;
10106-#endif
10107+ mpc_new_bus = phys_to_machine(mpc_new_phys);
10108+ mpf->mpf_physptr = mpc_new_bus;
10109+ mpc_new = phys_to_virt(mpc_new_phys);
10110+ memcpy(mpc_new, mpc, mpc->mpc_length);
10111+ mpc = mpc_new;
10112+ /* check if we can modify that */
10113+ if (mpc_new_bus - mpf->mpf_physptr) {
10114+ struct intel_mp_floating *mpf_new;
10115+ /* steal 16 bytes from [0, 1k) */
10116+ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
10117+ mpf_new = isa_bus_to_virt(0x400 - 16);
10118+ memcpy(mpf_new, mpf, 16);
10119+ mpf = mpf_new;
10120+ mpf->mpf_physptr = mpc_new_bus;
10121+ }
10122+ mpf->mpf_checksum = 0;
10123+ mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
10124+ printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
10125 }
10126
10127- set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
10128-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
10129 /*
10130- * For GSI >= 64, use IRQ compression
10131+ * only replace the one with mp_INT and
10132+ * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW,
10133+ * already in mp_irqs , stored by ... and mp_config_acpi_gsi,
10134+ * may need pci=routeirq for all coverage
10135 */
10136- if ((gsi >= IRQ_COMPRESSION_START)
10137- && (triggering == ACPI_LEVEL_SENSITIVE)) {
10138- /*
10139- * For PCI devices assign IRQs in order, avoiding gaps
10140- * due to unused I/O APIC pins.
10141- */
10142- int irq = gsi;
10143- if (gsi < MAX_GSI_NUM) {
10144- /*
10145- * Retain the VIA chipset work-around (gsi > 15), but
10146- * avoid a problem where the 8254 timer (IRQ0) is setup
10147- * via an override (so it's not on pin 0 of the ioapic),
10148- * and at the same time, the pin 0 interrupt is a PCI
10149- * type. The gsi > 15 test could cause these two pins
10150- * to be shared as IRQ0, and they are not shareable.
10151- * So test for this condition, and if necessary, avoid
10152- * the pin collision.
10153- */
10154- gsi = pci_irq++;
10155- /*
10156- * Don't assign IRQ used by ACPI SCI
10157- */
10158- if (gsi == acpi_gbl_FADT.sci_interrupt)
10159- gsi = pci_irq++;
10160- gsi_to_irq[irq] = gsi;
10161- } else {
10162- printk(KERN_ERR "GSI %u is too high\n", gsi);
10163- return gsi;
10164- }
10165- }
10166-#endif
10167- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
10168- triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
10169- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
10170- return gsi;
10171+ replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
10172+
10173+ return 0;
10174 }
10175
10176-#endif /* CONFIG_X86_IO_APIC */
10177-#endif /* CONFIG_ACPI */
10178+late_initcall(update_mp_table);
82094b55
AF
10179--- sle11-2009-10-16.orig/arch/x86/kernel/nmi.c 2009-10-28 14:55:02.000000000 +0100
10180+++ sle11-2009-10-16/arch/x86/kernel/nmi.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
10181@@ -27,7 +27,9 @@
10182 #include <linux/kdebug.h>
10183 #include <linux/smp.h>
10184
10185+#ifndef CONFIG_XEN
10186 #include <asm/i8259.h>
10187+#endif
10188 #include <asm/io_apic.h>
10189 #include <asm/smp.h>
10190 #include <asm/nmi.h>
10191@@ -179,8 +181,10 @@ int __init check_nmi_watchdog(void)
10192 kfree(prev_nmi_count);
10193 return 0;
10194 error:
10195+#ifndef CONFIG_XEN
10196 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
10197 disable_8259A_irq(0);
10198+#endif
10199 #ifdef CONFIG_X86_32
10200 timer_ack = 0;
10201 #endif
82094b55
AF
10202--- sle11-2009-10-16.orig/arch/x86/kernel/pci-dma-xen.c 2009-10-22 11:31:59.000000000 +0200
10203+++ sle11-2009-10-16/arch/x86/kernel/pci-dma-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
10204@@ -5,13 +5,13 @@
10205
10206 #include <asm/proto.h>
10207 #include <asm/dma.h>
10208-#include <asm/gart.h>
10209+#include <asm/iommu.h>
10210 #include <asm/calgary.h>
10211+#include <asm/amd_iommu.h>
10212
10213-int forbid_dac __read_mostly;
10214-EXPORT_SYMBOL(forbid_dac);
10215+static int forbid_dac __read_mostly;
10216
10217-const struct dma_mapping_ops *dma_ops;
10218+struct dma_mapping_ops *dma_ops;
10219 EXPORT_SYMBOL(dma_ops);
10220
10221 static int iommu_sac_force __read_mostly;
10222@@ -74,13 +74,17 @@ early_param("dma32_size", parse_dma32_si
10223 void __init dma32_reserve_bootmem(void)
10224 {
10225 unsigned long size, align;
10226- if (end_pfn <= MAX_DMA32_PFN)
10227+ if (max_pfn <= MAX_DMA32_PFN)
10228 return;
10229
10230+ /*
10231+ * check aperture_64.c allocate_aperture() for reason about
10232+ * using 512M as goal
10233+ */
10234 align = 64ULL<<20;
10235 size = round_up(dma32_bootmem_size, align);
10236 dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
10237- __pa(MAX_DMA_ADDRESS));
10238+ 512ULL<<20);
10239 if (dma32_bootmem_ptr)
10240 dma32_bootmem_size = size;
10241 else
10242@@ -88,17 +92,14 @@ void __init dma32_reserve_bootmem(void)
10243 }
10244 static void __init dma32_free_bootmem(void)
10245 {
10246- int node;
10247
10248- if (end_pfn <= MAX_DMA32_PFN)
10249+ if (max_pfn <= MAX_DMA32_PFN)
10250 return;
10251
10252 if (!dma32_bootmem_ptr)
10253 return;
10254
10255- for_each_online_node(node)
10256- free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
10257- dma32_bootmem_size);
10258+ free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
10259
10260 dma32_bootmem_ptr = NULL;
10261 dma32_bootmem_size = 0;
10262@@ -107,7 +108,7 @@ static void __init dma32_free_bootmem(vo
10263 #define dma32_free_bootmem() ((void)0)
10264 #endif
10265
10266-static const struct dma_mapping_ops swiotlb_dma_ops = {
10267+static struct dma_mapping_ops swiotlb_dma_ops = {
10268 .mapping_error = swiotlb_dma_mapping_error,
10269 .map_single = swiotlb_map_single_phys,
10270 .unmap_single = swiotlb_unmap_single,
10271@@ -130,25 +131,31 @@ void __init pci_iommu_alloc(void)
10272 * The order of these functions is important for
10273 * fall-back/fail-over reasons
10274 */
10275-#ifdef CONFIG_GART_IOMMU
10276 gart_iommu_hole_init();
10277-#endif
10278
10279-#ifdef CONFIG_CALGARY_IOMMU
10280 detect_calgary();
10281-#endif
10282
10283 detect_intel_iommu();
10284
10285-#ifdef CONFIG_SWIOTLB
10286+ amd_iommu_detect();
10287+
10288 swiotlb_init();
10289 if (swiotlb) {
10290 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
10291 dma_ops = &swiotlb_dma_ops;
10292 }
10293-#endif
10294 }
10295
10296+#ifndef CONFIG_XEN
10297+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
10298+{
10299+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
10300+
10301+ return size >> PAGE_SHIFT;
10302+}
10303+EXPORT_SYMBOL(iommu_num_pages);
10304+#endif
10305+
10306 /*
10307 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
10308 * documentation.
10309@@ -201,9 +208,7 @@ static __init int iommu_setup(char *p)
10310 swiotlb = 1;
10311 #endif
10312
10313-#ifdef CONFIG_GART_IOMMU
10314 gart_parse_options(p);
10315-#endif
10316
10317 #ifdef CONFIG_CALGARY_IOMMU
10318 if (!strncmp(p, "calgary", 7))
10319@@ -245,136 +250,19 @@ int range_straddles_page_boundary(paddr_
10320 !check_pages_physically_contiguous(pfn, offset, size));
10321 }
10322
10323-#ifdef CONFIG_X86_32
10324-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
10325- dma_addr_t device_addr, size_t size, int flags)
10326-{
10327- void __iomem *mem_base = NULL;
10328- int pages = size >> PAGE_SHIFT;
10329- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
10330-
10331- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
10332- goto out;
10333- if (!size)
10334- goto out;
10335- if (dev->dma_mem)
10336- goto out;
10337-
10338- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
10339-
10340- mem_base = ioremap(bus_addr, size);
10341- if (!mem_base)
10342- goto out;
10343-
10344- dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
10345- if (!dev->dma_mem)
10346- goto out;
10347- dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
10348- if (!dev->dma_mem->bitmap)
10349- goto free1_out;
10350-
10351- dev->dma_mem->virt_base = mem_base;
10352- dev->dma_mem->device_base = device_addr;
10353- dev->dma_mem->size = pages;
10354- dev->dma_mem->flags = flags;
10355-
10356- if (flags & DMA_MEMORY_MAP)
10357- return DMA_MEMORY_MAP;
10358-
10359- return DMA_MEMORY_IO;
10360-
10361- free1_out:
10362- kfree(dev->dma_mem);
10363- out:
10364- if (mem_base)
10365- iounmap(mem_base);
10366- return 0;
10367-}
10368-EXPORT_SYMBOL(dma_declare_coherent_memory);
10369-
10370-void dma_release_declared_memory(struct device *dev)
10371-{
10372- struct dma_coherent_mem *mem = dev->dma_mem;
10373-
10374- if (!mem)
10375- return;
10376- dev->dma_mem = NULL;
10377- iounmap(mem->virt_base);
10378- kfree(mem->bitmap);
10379- kfree(mem);
10380-}
10381-EXPORT_SYMBOL(dma_release_declared_memory);
10382-
10383-void *dma_mark_declared_memory_occupied(struct device *dev,
10384- dma_addr_t device_addr, size_t size)
10385-{
10386- struct dma_coherent_mem *mem = dev->dma_mem;
10387- int pos, err;
10388- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
10389-
10390- pages >>= PAGE_SHIFT;
10391-
10392- if (!mem)
10393- return ERR_PTR(-EINVAL);
10394-
10395- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
10396- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
10397- if (err != 0)
10398- return ERR_PTR(err);
10399- return mem->virt_base + (pos << PAGE_SHIFT);
10400-}
10401-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
10402-
10403-static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
10404- dma_addr_t *dma_handle, void **ret)
10405-{
10406- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
10407- int order = get_order(size);
10408-
10409- if (mem) {
10410- int page = bitmap_find_free_region(mem->bitmap, mem->size,
10411- order);
10412- if (page >= 0) {
10413- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
10414- *ret = mem->virt_base + (page << PAGE_SHIFT);
10415- memset(*ret, 0, size);
10416- }
10417- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
10418- *ret = NULL;
10419- }
10420- return (mem != NULL);
10421-}
10422-
10423-static int dma_release_coherent(struct device *dev, int order, void *vaddr)
10424-{
10425- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
10426-
10427- if (mem && vaddr >= mem->virt_base && vaddr <
10428- (mem->virt_base + (mem->size << PAGE_SHIFT))) {
10429- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
10430-
10431- bitmap_release_region(mem->bitmap, page, order);
10432- return 1;
10433- }
10434- return 0;
10435-}
10436-#else
10437-#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
10438-#define dma_release_coherent(dev, order, vaddr) (0)
10439-#endif /* CONFIG_X86_32 */
10440-
10441 int dma_supported(struct device *dev, u64 mask)
10442 {
10443+ struct dma_mapping_ops *ops = get_dma_ops(dev);
10444+
10445 #ifdef CONFIG_PCI
10446 if (mask > 0xffffffff && forbid_dac > 0) {
10447- printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
10448- dev->bus_id);
10449+ dev_info(dev, "PCI: Disallowing DAC for device\n");
10450 return 0;
10451 }
10452 #endif
10453
10454- if (dma_ops->dma_supported)
10455- return dma_ops->dma_supported(dev, mask);
10456+ if (ops->dma_supported)
10457+ return ops->dma_supported(dev, mask);
10458
10459 /* Copied from i386. Doesn't make much sense, because it will
10460 only work for pci_alloc_coherent.
10461@@ -395,8 +283,7 @@ int dma_supported(struct device *dev, u6
10462 type. Normally this doesn't make any difference, but gives
10463 more gentle handling of IOMMU overflow. */
10464 if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
10465- printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
10466- dev->bus_id, mask);
10467+ dev_info(dev, "Force SAC with mask %Lx\n", mask);
10468 return 0;
10469 }
10470
10471@@ -422,6 +309,9 @@ void *
10472 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
10473 gfp_t gfp)
10474 {
10475+#ifndef CONFIG_XEN
10476+ struct dma_mapping_ops *ops = get_dma_ops(dev);
10477+#endif
10478 void *memory = NULL;
10479 struct page *page;
10480 unsigned long dma_mask = 0;
10481@@ -431,7 +321,7 @@ dma_alloc_coherent(struct device *dev, s
10482 /* ignore region specifiers */
10483 gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
10484
10485- if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
10486+ if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
10487 return memory;
10488
10489 if (!dev) {
10490@@ -491,8 +381,8 @@ dma_alloc_coherent(struct device *dev, s
10491 /* Let low level make its own zone decisions */
10492 gfp &= ~(GFP_DMA32|GFP_DMA);
10493
10494- if (dma_ops->alloc_coherent)
10495- return dma_ops->alloc_coherent(dev, size,
10496+ if (ops->alloc_coherent)
10497+ return ops->alloc_coherent(dev, size,
10498 dma_handle, gfp);
10499 return NULL;
10500 }
10501@@ -504,14 +394,14 @@ dma_alloc_coherent(struct device *dev, s
10502 }
10503 }
10504
10505- if (dma_ops->alloc_coherent) {
10506+ if (ops->alloc_coherent) {
10507 free_pages((unsigned long)memory, order);
10508 gfp &= ~(GFP_DMA|GFP_DMA32);
10509- return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
10510+ return ops->alloc_coherent(dev, size, dma_handle, gfp);
10511 }
10512
10513- if (dma_ops->map_simple) {
10514- *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
10515+ if (ops->map_simple) {
10516+ *dma_handle = ops->map_simple(dev, virt_to_bus(memory),
10517 size,
10518 PCI_DMA_BIDIRECTIONAL);
10519 if (*dma_handle != bad_dma_address)
10520@@ -542,13 +432,17 @@ EXPORT_SYMBOL(dma_alloc_coherent);
10521 void dma_free_coherent(struct device *dev, size_t size,
10522 void *vaddr, dma_addr_t bus)
10523 {
10524+#ifndef CONFIG_XEN
10525+ struct dma_mapping_ops *ops = get_dma_ops(dev);
10526+#endif
10527+
10528 int order = get_order(size);
10529 WARN_ON(irqs_disabled()); /* for portability */
10530- if (dma_release_coherent(dev, order, vaddr))
10531+ if (dma_release_from_coherent(dev, order, vaddr))
10532 return;
10533 #ifndef CONFIG_XEN
10534- if (dma_ops->unmap_single)
10535- dma_ops->unmap_single(dev, bus, size, 0);
10536+ if (ops->unmap_single)
10537+ ops->unmap_single(dev, bus, size, 0);
10538 #endif
10539 xen_destroy_contiguous_region((unsigned long)vaddr, order);
10540 free_pages((unsigned long)vaddr, order);
10541@@ -557,15 +451,13 @@ EXPORT_SYMBOL(dma_free_coherent);
10542
10543 static int __init pci_iommu_init(void)
10544 {
10545-#ifdef CONFIG_CALGARY_IOMMU
10546 calgary_iommu_init();
10547-#endif
10548
10549 intel_iommu_init();
10550
10551-#ifdef CONFIG_GART_IOMMU
10552+ amd_iommu_init();
10553+
10554 gart_iommu_init();
10555-#endif
10556
10557 no_iommu_init();
10558 return 0;
82094b55
AF
10559--- sle11-2009-10-16.orig/arch/x86/kernel/pci-nommu-xen.c 2009-03-16 16:38:05.000000000 +0100
10560+++ sle11-2009-10-16/arch/x86/kernel/pci-nommu-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
10561@@ -79,18 +79,12 @@ gnttab_unmap_single(struct device *dev,
10562 gnttab_dma_unmap_page(dma_addr);
10563 }
10564
10565-static int nommu_mapping_error(dma_addr_t dma_addr)
10566-{
10567- return (dma_addr == bad_dma_address);
10568-}
10569-
10570-static const struct dma_mapping_ops nommu_dma_ops = {
10571+static struct dma_mapping_ops nommu_dma_ops = {
10572 .map_single = gnttab_map_single,
10573 .unmap_single = gnttab_unmap_single,
10574 .map_sg = gnttab_map_sg,
10575 .unmap_sg = gnttab_unmap_sg,
10576 .dma_supported = swiotlb_dma_supported,
10577- .mapping_error = nommu_mapping_error
10578 };
10579
10580 void __init no_iommu_init(void)
82094b55
AF
10581--- sle11-2009-10-16.orig/arch/x86/kernel/probe_roms_32.c 2009-10-28 14:55:02.000000000 +0100
10582+++ sle11-2009-10-16/arch/x86/kernel/probe_roms_32.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
10583@@ -99,6 +99,11 @@ void __init probe_roms(void)
10584 unsigned char c;
10585 int i;
10586
10587+#ifdef CONFIG_XEN
10588+ if (!is_initial_xendomain())
10589+ return;
10590+#endif
10591+
10592 /* video rom */
10593 upper = adapter_rom_resources[0].start;
10594 for (start = video_rom_resource.start; start < upper; start += 2048) {
10595@@ -131,7 +136,7 @@ void __init probe_roms(void)
10596 upper = system_rom_resource.start;
10597
10598 /* check for extension rom (ignore length byte!) */
10599- rom = isa_bus_to_virt(extension_rom_resource.start);
10600+ rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
10601 if (romsignature(rom)) {
10602 length = extension_rom_resource.end - extension_rom_resource.start + 1;
10603 if (romchecksum(rom, length)) {
82094b55
AF
10604--- sle11-2009-10-16.orig/arch/x86/kernel/process-xen.c 2009-03-16 16:38:05.000000000 +0100
10605+++ sle11-2009-10-16/arch/x86/kernel/process-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
10606@@ -6,6 +6,13 @@
10607 #include <linux/sched.h>
10608 #include <linux/module.h>
10609 #include <linux/pm.h>
10610+#include <linux/clockchips.h>
10611+#include <asm/system.h>
10612+
10613+unsigned long idle_halt;
10614+EXPORT_SYMBOL(idle_halt);
10615+unsigned long idle_nomwait;
10616+EXPORT_SYMBOL(idle_nomwait);
10617
10618 struct kmem_cache *task_xstate_cachep;
10619
10620@@ -45,6 +52,70 @@ void arch_task_cache_init(void)
10621 SLAB_PANIC, NULL);
10622 }
10623
10624+/*
10625+ * Idle related variables and functions
10626+ */
10627+unsigned long boot_option_idle_override = 0;
10628+EXPORT_SYMBOL(boot_option_idle_override);
10629+
10630+/*
10631+ * Powermanagement idle function, if any..
10632+ */
10633+void (*pm_idle)(void);
10634+EXPORT_SYMBOL(pm_idle);
10635+
10636+#ifdef CONFIG_X86_32
10637+/*
10638+ * This halt magic was a workaround for ancient floppy DMA
10639+ * wreckage. It should be safe to remove.
10640+ */
10641+static int hlt_counter;
10642+void disable_hlt(void)
10643+{
10644+ hlt_counter++;
10645+}
10646+EXPORT_SYMBOL(disable_hlt);
10647+
10648+void enable_hlt(void)
10649+{
10650+ hlt_counter--;
10651+}
10652+EXPORT_SYMBOL(enable_hlt);
10653+
10654+static inline int hlt_use_halt(void)
10655+{
10656+ return (!hlt_counter && boot_cpu_data.hlt_works_ok);
10657+}
10658+#else
10659+static inline int hlt_use_halt(void)
10660+{
10661+ return 1;
10662+}
10663+#endif
10664+
10665+/*
10666+ * We use this if we don't have any better
10667+ * idle routine..
10668+ */
10669+void xen_idle(void)
10670+{
10671+ current_thread_info()->status &= ~TS_POLLING;
10672+ /*
10673+ * TS_POLLING-cleared state must be visible before we
10674+ * test NEED_RESCHED:
10675+ */
10676+ smp_mb();
10677+
10678+ if (!need_resched())
10679+ safe_halt(); /* enables interrupts racelessly */
10680+ else
10681+ local_irq_enable();
10682+ current_thread_info()->status |= TS_POLLING;
10683+}
10684+#ifdef CONFIG_APM_MODULE
10685+EXPORT_SYMBOL(default_idle);
10686+#endif
10687+
10688 static void do_nothing(void *unused)
10689 {
10690 }
10691@@ -61,7 +132,7 @@ void cpu_idle_wait(void)
10692 {
10693 smp_mb();
10694 /* kick all the CPUs so that they exit out of pm_idle */
10695- smp_call_function(do_nothing, NULL, 0, 1);
10696+ smp_call_function(do_nothing, NULL, 1);
10697 }
10698 EXPORT_SYMBOL_GPL(cpu_idle_wait);
10699
10700@@ -125,60 +196,175 @@ static void poll_idle(void)
10701 *
10702 * idle=mwait overrides this decision and forces the usage of mwait.
10703 */
10704+static int __cpuinitdata force_mwait;
10705+
10706+#define MWAIT_INFO 0x05
10707+#define MWAIT_ECX_EXTENDED_INFO 0x01
10708+#define MWAIT_EDX_C1 0xf0
10709+
10710 static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
10711 {
10712+ u32 eax, ebx, ecx, edx;
10713+
10714 if (force_mwait)
10715 return 1;
10716
10717- if (c->x86_vendor == X86_VENDOR_AMD) {
10718- switch(c->x86) {
10719- case 0x10:
10720- case 0x11:
10721- return 0;
10722+ if (c->cpuid_level < MWAIT_INFO)
10723+ return 0;
10724+
10725+ cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
10726+ /* Check, whether EDX has extended info about MWAIT */
10727+ if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
10728+ return 1;
10729+
10730+ /*
10731+ * edx enumeratios MONITOR/MWAIT extensions. Check, whether
10732+ * C1 supports MWAIT
10733+ */
10734+ return (edx & MWAIT_EDX_C1);
10735+}
10736+
10737+/*
10738+ * Check for AMD CPUs, which have potentially C1E support
10739+ */
10740+static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
10741+{
10742+ if (c->x86_vendor != X86_VENDOR_AMD)
10743+ return 0;
10744+
10745+ if (c->x86 < 0x0F)
10746+ return 0;
10747+
10748+ /* Family 0x0f models < rev F do not have C1E */
10749+ if (c->x86 == 0x0f && c->x86_model < 0x40)
10750+ return 0;
10751+
10752+ return 1;
10753+}
10754+
10755+static cpumask_t c1e_mask = CPU_MASK_NONE;
10756+static int c1e_detected;
10757+
10758+void c1e_remove_cpu(int cpu)
10759+{
10760+ cpu_clear(cpu, c1e_mask);
10761+}
10762+
10763+/*
10764+ * C1E aware idle routine. We check for C1E active in the interrupt
10765+ * pending message MSR. If we detect C1E, then we handle it the same
10766+ * way as C3 power states (local apic timer and TSC stop)
10767+ */
10768+static void c1e_idle(void)
10769+{
10770+ if (need_resched())
10771+ return;
10772+
10773+ if (!c1e_detected) {
10774+ u32 lo, hi;
10775+
10776+ rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
10777+ if (lo & K8_INTP_C1E_ACTIVE_MASK) {
10778+ c1e_detected = 1;
10779+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
10780+ mark_tsc_unstable("TSC halt in AMD C1E");
10781+ printk(KERN_INFO "System has AMD C1E enabled\n");
10782+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
10783 }
10784 }
10785- return 1;
10786+
10787+ if (c1e_detected) {
10788+ int cpu = smp_processor_id();
10789+
10790+ if (!cpu_isset(cpu, c1e_mask)) {
10791+ cpu_set(cpu, c1e_mask);
10792+ /*
10793+ * Force broadcast so ACPI can not interfere. Needs
10794+ * to run with interrupts enabled as it uses
10795+ * smp_function_call.
10796+ */
10797+ local_irq_enable();
10798+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
10799+ &cpu);
10800+ printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
10801+ cpu);
10802+ local_irq_disable();
10803+ }
10804+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
10805+
10806+ default_idle();
10807+
10808+ /*
10809+ * The switch back from broadcast mode needs to be
10810+ * called with interrupts disabled.
10811+ */
10812+ local_irq_disable();
10813+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
10814+ local_irq_enable();
10815+ } else
10816+ default_idle();
10817 }
10818 #endif
10819
10820 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
10821 {
10822 #ifndef CONFIG_XEN
10823- static int selected;
10824-
10825- if (selected)
10826- return;
10827 #ifdef CONFIG_X86_SMP
10828 if (pm_idle == poll_idle && smp_num_siblings > 1) {
10829 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
10830 " performance may degrade.\n");
10831 }
10832 #endif
10833+ if (pm_idle)
10834+ return;
10835+
10836 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
10837 /*
10838- * Skip, if setup has overridden idle.
10839 * One CPU supports mwait => All CPUs supports mwait
10840 */
10841- if (!pm_idle) {
10842- printk(KERN_INFO "using mwait in idle threads.\n");
10843- pm_idle = mwait_idle;
10844- }
10845- }
10846- selected = 1;
10847+ printk(KERN_INFO "using mwait in idle threads.\n");
10848+ pm_idle = mwait_idle;
10849+ } else if (check_c1e_idle(c)) {
10850+ printk(KERN_INFO "using C1E aware idle routine\n");
10851+ pm_idle = c1e_idle;
10852+ } else
10853+ pm_idle = default_idle;
10854 #endif
10855 }
10856
10857 static int __init idle_setup(char *str)
10858 {
10859+ if (!str)
10860+ return -EINVAL;
10861+
10862 if (!strcmp(str, "poll")) {
10863 printk("using polling idle threads.\n");
10864 pm_idle = poll_idle;
10865- }
10866 #ifndef CONFIG_XEN
10867- else if (!strcmp(str, "mwait"))
10868+ } else if (!strcmp(str, "mwait"))
10869 force_mwait = 1;
10870+ else if (!strcmp(str, "halt")) {
10871+ /*
10872+ * When the boot option of idle=halt is added, halt is
10873+ * forced to be used for CPU idle. In such case CPU C2/C3
10874+ * won't be used again.
10875+ * To continue to load the CPU idle driver, don't touch
10876+ * the boot_option_idle_override.
10877+ */
10878+ pm_idle = default_idle;
10879+ idle_halt = 1;
10880+ return 0;
10881+ } else if (!strcmp(str, "nomwait")) {
10882+ /*
10883+ * If the boot option of "idle=nomwait" is added,
10884+ * it means that mwait will be disabled for CPU C2/C3
10885+ * states. In such case it won't touch the variable
10886+ * of boot_option_idle_override.
10887+ */
10888+ idle_nomwait = 1;
10889+ return 0;
10890 #endif
10891- else
10892+ } else
10893 return -1;
10894
10895 boot_option_idle_override = 1;
82094b55
AF
10896--- sle11-2009-10-16.orig/arch/x86/kernel/process_32-xen.c 2009-03-16 16:38:05.000000000 +0100
10897+++ sle11-2009-10-16/arch/x86/kernel/process_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
10898@@ -59,15 +59,11 @@
10899 #include <asm/tlbflush.h>
10900 #include <asm/cpu.h>
10901 #include <asm/kdebug.h>
10902+#include <asm/idle.h>
10903
10904 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
10905 asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
10906
10907-static int hlt_counter;
10908-
10909-unsigned long boot_option_idle_override = 0;
10910-EXPORT_SYMBOL(boot_option_idle_override);
10911-
10912 DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
10913 EXPORT_PER_CPU_SYMBOL(current_task);
10914
10915@@ -82,46 +78,27 @@ unsigned long thread_saved_pc(struct tas
10916 return ((unsigned long *)tsk->thread.sp)[3];
10917 }
10918
10919-/*
10920- * Powermanagement idle function, if any..
10921- */
10922-void (*pm_idle)(void);
10923-EXPORT_SYMBOL(pm_idle);
10924+#ifdef CONFIG_HOTPLUG_CPU
10925+#ifndef CONFIG_XEN
10926+#include <asm/nmi.h>
10927
10928-void disable_hlt(void)
10929+static void cpu_exit_clear(void)
10930 {
10931- hlt_counter++;
10932-}
10933+ int cpu = raw_smp_processor_id();
10934
10935-EXPORT_SYMBOL(disable_hlt);
10936-
10937-void enable_hlt(void)
10938-{
10939- hlt_counter--;
10940-}
10941+ idle_task_exit();
10942
10943-EXPORT_SYMBOL(enable_hlt);
10944+ cpu_uninit();
10945+ irq_ctx_exit(cpu);
10946
10947-static void xen_idle(void)
10948-{
10949- current_thread_info()->status &= ~TS_POLLING;
10950- /*
10951- * TS_POLLING-cleared state must be visible before we
10952- * test NEED_RESCHED:
10953- */
10954- smp_mb();
10955+ cpu_clear(cpu, cpu_callout_map);
10956+ cpu_clear(cpu, cpu_callin_map);
10957
10958- if (!need_resched())
10959- safe_halt(); /* enables interrupts racelessly */
10960- else
10961- local_irq_enable();
10962- current_thread_info()->status |= TS_POLLING;
10963+ numa_remove_cpu(cpu);
10964+ c1e_remove_cpu(cpu);
10965 }
10966-#ifdef CONFIG_APM_MODULE
10967-EXPORT_SYMBOL(default_idle);
10968 #endif
10969
10970-#ifdef CONFIG_HOTPLUG_CPU
10971 static inline void play_dead(void)
10972 {
10973 idle_task_exit();
10974@@ -152,13 +129,11 @@ void cpu_idle(void)
10975
10976 /* endless idle loop with no priority at all */
10977 while (1) {
10978- tick_nohz_stop_sched_tick();
10979+ tick_nohz_stop_sched_tick(1);
10980 while (!need_resched()) {
10981- void (*idle)(void);
10982
10983 check_pgt_cache();
10984 rmb();
10985- idle = xen_idle; /* no alternatives */
10986
10987 if (rcu_pending(cpu))
10988 rcu_check_callbacks(cpu, 0);
10989@@ -168,7 +143,10 @@ void cpu_idle(void)
10990
10991 local_irq_disable();
10992 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
10993- idle();
10994+ /* Don't trace irqs off for idle */
10995+ stop_critical_timings();
10996+ xen_idle();
10997+ start_critical_timings();
10998 }
10999 tick_nohz_restart_sched_tick();
11000 preempt_enable_no_resched();
82094b55
AF
11001--- sle11-2009-10-16.orig/arch/x86/kernel/process_64-xen.c 2009-03-16 16:38:05.000000000 +0100
11002+++ sle11-2009-10-16/arch/x86/kernel/process_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
11003@@ -65,15 +65,6 @@ asmlinkage extern void ret_from_fork(voi
11004
11005 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
11006
11007-unsigned long boot_option_idle_override = 0;
11008-EXPORT_SYMBOL(boot_option_idle_override);
11009-
11010-/*
11011- * Powermanagement idle function, if any..
11012- */
11013-void (*pm_idle)(void);
11014-EXPORT_SYMBOL(pm_idle);
11015-
11016 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
11017
11018 void idle_notifier_register(struct notifier_block *n)
11019@@ -103,25 +94,13 @@ void exit_idle(void)
11020 __exit_idle();
11021 }
11022
11023-static void xen_idle(void)
11024-{
11025- current_thread_info()->status &= ~TS_POLLING;
11026- /*
11027- * TS_POLLING-cleared state must be visible before we
11028- * test NEED_RESCHED:
11029- */
11030- smp_mb();
11031- if (!need_resched())
11032- safe_halt(); /* enables interrupts racelessly */
11033- else
11034- local_irq_enable();
11035- current_thread_info()->status |= TS_POLLING;
11036-}
11037-
11038 #ifdef CONFIG_HOTPLUG_CPU
11039 static inline void play_dead(void)
11040 {
11041 idle_task_exit();
11042+#ifndef CONFIG_XEN
11043+ c1e_remove_cpu(raw_smp_processor_id());
11044+#endif
11045 local_irq_disable();
11046 cpu_clear(smp_processor_id(), cpu_initialized);
11047 preempt_enable_no_resched();
11048@@ -146,12 +125,11 @@ void cpu_idle(void)
11049 current_thread_info()->status |= TS_POLLING;
11050 /* endless idle loop with no priority at all */
11051 while (1) {
11052- tick_nohz_stop_sched_tick();
11053+ tick_nohz_stop_sched_tick(1);
11054 while (!need_resched()) {
11055- void (*idle)(void);
11056
11057 rmb();
11058- idle = xen_idle; /* no alternatives */
11059+
11060 if (cpu_is_offline(smp_processor_id()))
11061 play_dead();
11062 /*
11063@@ -161,7 +139,10 @@ void cpu_idle(void)
11064 */
11065 local_irq_disable();
11066 enter_idle();
11067- idle();
11068+ /* Don't trace irqs off for idle */
11069+ stop_critical_timings();
11070+ xen_idle();
11071+ start_critical_timings();
11072 /* In many cases the interrupt that ended idle
11073 has already called exit_idle. But some idle
11074 loops can be woken up without interrupt. */
11075@@ -271,7 +252,7 @@ void exit_thread(void)
11076 }
11077 }
11078
11079-void load_gs_index(unsigned gs)
11080+void xen_load_gs_index(unsigned gs)
11081 {
11082 WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
11083 }
11084@@ -373,10 +354,10 @@ int copy_thread(int nr, unsigned long cl
11085 p->thread.fs = me->thread.fs;
11086 p->thread.gs = me->thread.gs;
11087
11088- asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
11089- asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
11090- asm("mov %%es,%0" : "=m" (p->thread.es));
11091- asm("mov %%ds,%0" : "=m" (p->thread.ds));
11092+ savesegment(gs, p->thread.gsindex);
11093+ savesegment(fs, p->thread.fsindex);
11094+ savesegment(es, p->thread.es);
11095+ savesegment(ds, p->thread.ds);
11096
11097 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
11098 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
11099@@ -417,7 +398,9 @@ out:
11100 void
11101 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
11102 {
11103- asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
11104+ loadsegment(fs, 0);
11105+ loadsegment(es, 0);
11106+ loadsegment(ds, 0);
11107 load_gs_index(0);
11108 regs->ip = new_ip;
11109 regs->sp = new_sp;
11110@@ -557,8 +540,8 @@ static inline void __switch_to_xtra(stru
11111 struct task_struct *
11112 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
11113 {
11114- struct thread_struct *prev = &prev_p->thread,
11115- *next = &next_p->thread;
11116+ struct thread_struct *prev = &prev_p->thread;
11117+ struct thread_struct *next = &next_p->thread;
11118 int cpu = smp_processor_id();
11119 #ifndef CONFIG_X86_NO_TSS
11120 struct tss_struct *tss = &per_cpu(init_tss, cpu);
11121@@ -661,12 +644,25 @@ __switch_to(struct task_struct *prev_p,
11122 */
11123 if (unlikely(next->es))
11124 loadsegment(es, next->es);
11125-
11126+
11127 if (unlikely(next->ds))
11128 loadsegment(ds, next->ds);
11129
11130+ /*
11131+ * Leave lazy mode, flushing any hypercalls made here.
11132+ * This must be done before restoring TLS segments so
11133+ * the GDT and LDT are properly updated, and must be
11134+ * done before math_state_restore, so the TS bit is up
11135+ * to date.
11136+ */
11137+ arch_leave_lazy_cpu_mode();
11138+
11139 /*
11140 * Switch FS and GS.
11141+ *
11142+ * Segment register != 0 always requires a reload. Also
11143+ * reload when it has changed. When prev process used 64bit
11144+ * base always reload to avoid an information leak.
11145 */
11146 if (unlikely(next->fsindex))
11147 loadsegment(fs, next->fsindex);
11148@@ -687,7 +683,8 @@ __switch_to(struct task_struct *prev_p,
11149 write_pda(oldrsp, next->usersp);
11150 write_pda(pcurrent, next_p);
11151 write_pda(kernelstack,
11152- (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
11153+ (unsigned long)task_stack_page(next_p) +
11154+ THREAD_SIZE - PDA_STACKOFFSET);
11155 #ifdef CONFIG_CC_STACKPROTECTOR
11156 write_pda(stack_canary, next_p->stack_canary);
11157
11158@@ -848,7 +845,7 @@ long do_arch_prctl(struct task_struct *t
11159 set_32bit_tls(task, FS_TLS, addr);
11160 if (doit) {
11161 load_TLS(&task->thread, cpu);
11162- asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
11163+ loadsegment(fs, FS_TLS_SEL);
11164 }
11165 task->thread.fsindex = FS_TLS_SEL;
11166 task->thread.fs = 0;
11167@@ -858,7 +855,7 @@ long do_arch_prctl(struct task_struct *t
11168 if (doit) {
11169 /* set the selector to 0 to not confuse
11170 __switch_to */
11171- asm volatile("movl %0,%%fs" :: "r" (0));
11172+ loadsegment(fs, 0);
11173 ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
11174 addr);
11175 }
11176@@ -882,7 +879,7 @@ long do_arch_prctl(struct task_struct *t
11177 if (task->thread.gsindex == GS_TLS_SEL)
11178 base = read_32bit_tls(task, GS_TLS);
11179 else if (doit) {
11180- asm("movl %%gs,%0" : "=r" (gsindex));
11181+ savesegment(gs, gsindex);
11182 if (gsindex)
11183 rdmsrl(MSR_KERNEL_GS_BASE, base);
11184 else
82094b55
AF
11185--- sle11-2009-10-16.orig/arch/x86/kernel/quirks-xen.c 2009-03-16 16:33:40.000000000 +0100
11186+++ sle11-2009-10-16/arch/x86/kernel/quirks-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
11187@@ -63,6 +63,7 @@ static enum {
11188 ICH_FORCE_HPET_RESUME,
11189 VT8237_FORCE_HPET_RESUME,
11190 NVIDIA_FORCE_HPET_RESUME,
11191+ ATI_FORCE_HPET_RESUME,
11192 } force_hpet_resume_type;
11193
11194 static void __iomem *rcba_base;
11195@@ -156,6 +157,8 @@ static void ich_force_enable_hpet(struct
11196
11197 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
11198 ich_force_enable_hpet);
11199+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0,
11200+ ich_force_enable_hpet);
11201 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
11202 ich_force_enable_hpet);
11203 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
11204@@ -172,6 +175,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I
11205
11206 static struct pci_dev *cached_dev;
11207
11208+static void hpet_print_force_info(void)
11209+{
11210+ printk(KERN_INFO "HPET not enabled in BIOS. "
11211+ "You might try hpet=force boot option\n");
11212+}
11213+
11214 static void old_ich_force_hpet_resume(void)
11215 {
11216 u32 val;
11217@@ -251,8 +260,12 @@ static void old_ich_force_enable_hpet_us
11218 {
11219 if (hpet_force_user)
11220 old_ich_force_enable_hpet(dev);
11221+ else
11222+ hpet_print_force_info();
11223 }
11224
11225+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
11226+ old_ich_force_enable_hpet_user);
11227 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
11228 old_ich_force_enable_hpet_user);
11229 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
11230@@ -288,9 +301,14 @@ static void vt8237_force_enable_hpet(str
11231 {
11232 u32 uninitialized_var(val);
11233
11234- if (!hpet_force_user || hpet_address || force_hpet_address)
11235+ if (hpet_address || force_hpet_address)
11236 return;
11237
11238+ if (!hpet_force_user) {
11239+ hpet_print_force_info();
11240+ return;
11241+ }
11242+
11243 pci_read_config_dword(dev, 0x68, &val);
11244 /*
11245 * Bit 7 is HPET enable bit.
11246@@ -328,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_V
11247 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
11248 vt8237_force_enable_hpet);
11249
11250+static void ati_force_hpet_resume(void)
11251+{
11252+ pci_write_config_dword(cached_dev, 0x14, 0xfed00000);
11253+ printk(KERN_DEBUG "Force enabled HPET at resume\n");
11254+}
11255+
11256+static void ati_force_enable_hpet(struct pci_dev *dev)
11257+{
11258+ u32 uninitialized_var(val);
11259+
11260+ if (hpet_address || force_hpet_address)
11261+ return;
11262+
11263+ if (!hpet_force_user) {
11264+ hpet_print_force_info();
11265+ return;
11266+ }
11267+
11268+ pci_write_config_dword(dev, 0x14, 0xfed00000);
11269+ pci_read_config_dword(dev, 0x14, &val);
11270+ force_hpet_address = val;
11271+ force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
11272+ dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
11273+ force_hpet_address);
11274+ cached_dev = dev;
11275+ return;
11276+}
11277+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
11278+ ati_force_enable_hpet);
11279+
11280 /*
11281 * Undocumented chipset feature taken from LinuxBIOS.
11282 */
11283@@ -341,8 +389,13 @@ static void nvidia_force_enable_hpet(str
11284 {
11285 u32 uninitialized_var(val);
11286
11287- if (!hpet_force_user || hpet_address || force_hpet_address)
11288+ if (hpet_address || force_hpet_address)
11289+ return;
11290+
11291+ if (!hpet_force_user) {
11292+ hpet_print_force_info();
11293 return;
11294+ }
11295
11296 pci_write_config_dword(dev, 0x44, 0xfed00001);
11297 pci_read_config_dword(dev, 0x44, &val);
11298@@ -395,6 +448,9 @@ void force_hpet_resume(void)
11299 case NVIDIA_FORCE_HPET_RESUME:
11300 nvidia_force_hpet_resume();
11301 return;
11302+ case ATI_FORCE_HPET_RESUME:
11303+ ati_force_hpet_resume();
11304+ return;
11305 default:
11306 break;
11307 }
82094b55
AF
11308--- sle11-2009-10-16.orig/arch/x86/kernel/setup-xen.c 2009-03-16 16:38:05.000000000 +0100
11309+++ sle11-2009-10-16/arch/x86/kernel/setup-xen.c 2009-06-04 10:21:39.000000000 +0200
11310@@ -1,141 +1,1132 @@
2cb7cef9
BS
11311-#include <linux/kernel.h>
11312+/*
11313+ * Copyright (C) 1995 Linus Torvalds
11314+ *
11315+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
11316+ *
11317+ * Memory region support
11318+ * David Parsons <orc@pell.chi.il.us>, July-August 1999
11319+ *
11320+ * Added E820 sanitization routine (removes overlapping memory regions);
11321+ * Brian Moyle <bmoyle@mvista.com>, February 2001
11322+ *
11323+ * Moved CPU detection code to cpu/${cpu}.c
11324+ * Patrick Mochel <mochel@osdl.org>, March 2002
11325+ *
11326+ * Provisions for empty E820 memory regions (reported by certain BIOSes).
11327+ * Alex Achenbach <xela@slit.de>, December 2002.
11328+ *
11329+ */
11330+
11331+/*
11332+ * This file handles the architecture-dependent parts of initialization
11333+ */
11334+
11335+#include <linux/sched.h>
11336+#include <linux/mm.h>
11337+#include <linux/mmzone.h>
11338+#include <linux/screen_info.h>
11339+#include <linux/ioport.h>
11340+#include <linux/acpi.h>
11341+#include <linux/apm_bios.h>
11342+#include <linux/initrd.h>
11343+#include <linux/bootmem.h>
11344+#include <linux/seq_file.h>
11345+#include <linux/console.h>
11346+#include <linux/mca.h>
11347+#include <linux/root_dev.h>
11348+#include <linux/highmem.h>
11349 #include <linux/module.h>
11350+#include <linux/efi.h>
11351 #include <linux/init.h>
11352-#include <linux/bootmem.h>
11353+#include <linux/edd.h>
11354+#include <linux/iscsi_ibft.h>
11355+#include <linux/nodemask.h>
11356+#include <linux/kexec.h>
11357+#include <linux/dmi.h>
11358+#include <linux/pfn.h>
11359+#include <linux/pci.h>
11360+#include <asm/pci-direct.h>
11361+#include <linux/init_ohci1394_dma.h>
11362+#include <linux/kvm_para.h>
11363+
11364+#include <linux/errno.h>
11365+#include <linux/kernel.h>
11366+#include <linux/stddef.h>
11367+#include <linux/unistd.h>
11368+#include <linux/ptrace.h>
11369+#include <linux/slab.h>
11370+#include <linux/user.h>
11371+#include <linux/delay.h>
11372+
11373+#include <linux/kallsyms.h>
11374+#include <linux/cpufreq.h>
11375+#include <linux/dma-mapping.h>
11376+#include <linux/ctype.h>
11377+#include <linux/uaccess.h>
11378+
11379 #include <linux/percpu.h>
11380-#include <asm/smp.h>
11381-#include <asm/percpu.h>
11382+#include <linux/crash_dump.h>
11383+
11384+#include <video/edid.h>
11385+
11386+#include <asm/mtrr.h>
11387+#include <asm/apic.h>
11388+#include <asm/e820.h>
11389+#include <asm/mpspec.h>
11390+#include <asm/setup.h>
11391+#include <asm/arch_hooks.h>
11392+#include <asm/efi.h>
11393 #include <asm/sections.h>
11394+#include <asm/dmi.h>
11395+#include <asm/io_apic.h>
11396+#include <asm/ist.h>
11397+#include <asm/vmi.h>
11398+#include <setup_arch.h>
11399+#include <asm/bios_ebda.h>
11400+#include <asm/cacheflush.h>
11401 #include <asm/processor.h>
11402-#include <asm/setup.h>
11403+#include <asm/bugs.h>
11404+
11405+#include <asm/system.h>
11406+#include <asm/vsyscall.h>
11407+#include <asm/smp.h>
11408+#include <asm/desc.h>
11409+#include <asm/dma.h>
11410+#include <asm/iommu.h>
11411+#include <asm/mmu_context.h>
11412+#include <asm/proto.h>
11413+
11414+#include <mach_apic.h>
11415+#include <asm/paravirt.h>
11416+
11417+#include <asm/percpu.h>
11418 #include <asm/topology.h>
11419-#include <asm/mpspec.h>
11420 #include <asm/apicdef.h>
11421+#ifdef CONFIG_X86_64
11422+#include <asm/numa_64.h>
11423+#endif
11424+
11425+#ifdef CONFIG_XEN
11426+#include <asm/hypervisor.h>
11427+#include <xen/interface/kexec.h>
11428+#include <xen/interface/memory.h>
11429+#include <xen/interface/nmi.h>
11430+#include <xen/interface/physdev.h>
11431+#include <xen/features.h>
11432+#include <xen/firmware.h>
11433+#include <xen/xencons.h>
11434+
11435+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
11436+EXPORT_SYMBOL(HYPERVISOR_shared_info);
11437
11438-#ifdef CONFIG_X86_LOCAL_APIC
11439-unsigned int num_processors;
11440-unsigned disabled_cpus __cpuinitdata;
11441-/* Processor that is doing the boot up */
11442-unsigned int boot_cpu_physical_apicid = -1U;
11443-EXPORT_SYMBOL(boot_cpu_physical_apicid);
11444+static int xen_panic_event(struct notifier_block *, unsigned long, void *);
11445+static struct notifier_block xen_panic_block = {
11446+ xen_panic_event, NULL, 0 /* try to go last */
11447+};
11448
11449-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
11450-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
11451+unsigned long *phys_to_machine_mapping;
11452+EXPORT_SYMBOL(phys_to_machine_mapping);
11453
11454-/* Bitmask of physically existing CPUs */
11455-physid_mask_t phys_cpu_present_map;
11456+unsigned long *pfn_to_mfn_frame_list_list,
11457+#ifdef CONFIG_X86_64
11458+ *pfn_to_mfn_frame_list[512];
11459+#else
11460+ *pfn_to_mfn_frame_list[128];
11461+#endif
11462+
11463+/* Raw start-of-day parameters from the hypervisor. */
11464+start_info_t *xen_start_info;
11465+EXPORT_SYMBOL(xen_start_info);
11466+#endif
11467+
11468+#ifndef ARCH_SETUP
11469+#define ARCH_SETUP
11470+#endif
11471+
11472+#ifndef CONFIG_XEN
11473+#ifndef CONFIG_DEBUG_BOOT_PARAMS
11474+struct boot_params __initdata boot_params;
11475+#else
11476+struct boot_params boot_params;
11477+#endif
11478 #endif
11479
11480-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
11481 /*
11482- * Copy data used in early init routines from the initial arrays to the
11483- * per cpu data areas. These arrays then become expendable and the
11484- * *_early_ptr's are zeroed indicating that the static arrays are gone.
11485+ * Machine setup..
11486 */
11487-static void __init setup_per_cpu_maps(void)
11488+static struct resource data_resource = {
11489+ .name = "Kernel data",
11490+ .start = 0,
11491+ .end = 0,
11492+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
11493+};
11494+
11495+static struct resource code_resource = {
11496+ .name = "Kernel code",
11497+ .start = 0,
11498+ .end = 0,
11499+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
11500+};
11501+
11502+static struct resource bss_resource = {
11503+ .name = "Kernel bss",
11504+ .start = 0,
11505+ .end = 0,
11506+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
11507+};
11508+
11509+
11510+#ifdef CONFIG_X86_32
11511+#ifndef CONFIG_XEN
11512+/* This value is set up by the early boot code to point to the value
11513+ immediately after the boot time page tables. It contains a *physical*
11514+ address, and must not be in the .bss segment! */
11515+unsigned long init_pg_tables_start __initdata = ~0UL;
11516+unsigned long init_pg_tables_end __initdata = ~0UL;
11517+#endif
11518+
11519+static struct resource video_ram_resource = {
11520+ .name = "Video RAM area",
11521+ .start = 0xa0000,
11522+ .end = 0xbffff,
11523+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
11524+};
11525+
11526+/* cpu data as detected by the assembly code in head.S */
11527+struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
11528+/* common cpu data for all cpus */
11529+struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
11530+EXPORT_SYMBOL(boot_cpu_data);
11531+#ifndef CONFIG_XEN
11532+static void set_mca_bus(int x)
11533+{
11534+#ifdef CONFIG_MCA
11535+ MCA_bus = x;
11536+#endif
11537+}
11538+
11539+unsigned int def_to_bigsmp;
11540+
11541+/* for MCA, but anyone else can use it if they want */
11542+unsigned int machine_id;
11543+unsigned int machine_submodel_id;
11544+unsigned int BIOS_revision;
11545+
11546+struct apm_info apm_info;
11547+EXPORT_SYMBOL(apm_info);
11548+#endif
11549+
11550+#if defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
11551+struct ist_info ist_info;
11552+EXPORT_SYMBOL(ist_info);
11553+#elif defined(CONFIG_X86_SPEEDSTEP_SMI)
11554+struct ist_info ist_info;
11555+#endif
11556+
11557+#else
11558+struct cpuinfo_x86 boot_cpu_data __read_mostly;
11559+EXPORT_SYMBOL(boot_cpu_data);
11560+#endif
11561+
11562+
11563+#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
11564+unsigned long mmu_cr4_features;
11565+#else
11566+unsigned long mmu_cr4_features = X86_CR4_PAE;
11567+#endif
11568+
11569+/* Boot loader ID as an integer, for the benefit of proc_dointvec */
11570+int bootloader_type;
11571+
11572+/*
11573+ * Early DMI memory
11574+ */
11575+int dmi_alloc_index;
11576+char dmi_alloc_data[DMI_MAX_DATA];
11577+
11578+/*
11579+ * Setup options
11580+ */
11581+struct screen_info screen_info;
11582+EXPORT_SYMBOL(screen_info);
11583+struct edid_info edid_info;
11584+EXPORT_SYMBOL_GPL(edid_info);
11585+
11586+extern int root_mountflags;
11587+
11588+unsigned long saved_video_mode;
11589+
11590+#define RAMDISK_IMAGE_START_MASK 0x07FF
11591+#define RAMDISK_PROMPT_FLAG 0x8000
11592+#define RAMDISK_LOAD_FLAG 0x4000
11593+
11594+static char __initdata command_line[COMMAND_LINE_SIZE];
11595+
11596+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
11597+struct edd edd;
11598+#ifdef CONFIG_EDD_MODULE
11599+EXPORT_SYMBOL(edd);
11600+#endif
11601+#ifndef CONFIG_XEN
11602+/**
11603+ * copy_edd() - Copy the BIOS EDD information
11604+ * from boot_params into a safe place.
11605+ *
11606+ */
11607+static inline void copy_edd(void)
11608+{
11609+ memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
11610+ sizeof(edd.mbr_signature));
11611+ memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
11612+ edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
11613+ edd.edd_info_nr = boot_params.eddbuf_entries;
11614+}
11615+#endif
11616+#else
11617+static inline void copy_edd(void)
11618+{
11619+}
11620+#endif
11621+
11622+#ifdef CONFIG_BLK_DEV_INITRD
11623+
11624+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
11625+
11626+#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
11627+static void __init relocate_initrd(void)
11628+{
11629+
11630+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
11631+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
11632+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
11633+ u64 ramdisk_here;
11634+ unsigned long slop, clen, mapaddr;
11635+ char *p, *q;
11636+
11637+ /* We need to move the initrd down into lowmem */
11638+ ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
11639+ PAGE_SIZE);
11640+
11641+ if (ramdisk_here == -1ULL)
11642+ panic("Cannot find place for new RAMDISK of size %lld\n",
11643+ ramdisk_size);
11644+
11645+ /* Note: this includes all the lowmem currently occupied by
11646+ the initrd, we rely on that fact to keep the data intact. */
11647+ reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
11648+ "NEW RAMDISK");
11649+ initrd_start = ramdisk_here + PAGE_OFFSET;
11650+ initrd_end = initrd_start + ramdisk_size;
11651+ printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
11652+ ramdisk_here, ramdisk_here + ramdisk_size);
11653+
11654+ q = (char *)initrd_start;
11655+
11656+ /* Copy any lowmem portion of the initrd */
11657+ if (ramdisk_image < end_of_lowmem) {
11658+ clen = end_of_lowmem - ramdisk_image;
11659+ p = (char *)__va(ramdisk_image);
11660+ memcpy(q, p, clen);
11661+ q += clen;
11662+ ramdisk_image += clen;
11663+ ramdisk_size -= clen;
11664+ }
11665+
11666+ /* Copy the highmem portion of the initrd */
11667+ while (ramdisk_size) {
11668+ slop = ramdisk_image & ~PAGE_MASK;
11669+ clen = ramdisk_size;
11670+ if (clen > MAX_MAP_CHUNK-slop)
11671+ clen = MAX_MAP_CHUNK-slop;
11672+ mapaddr = ramdisk_image & PAGE_MASK;
11673+ p = early_ioremap(mapaddr, clen+slop);
11674+ memcpy(q, p+slop, clen);
11675+ early_iounmap(p, clen+slop);
11676+ q += clen;
11677+ ramdisk_image += clen;
11678+ ramdisk_size -= clen;
11679+ }
11680+ /* high pages is not converted by early_res_to_bootmem */
11681+ ramdisk_image = boot_params.hdr.ramdisk_image;
11682+ ramdisk_size = boot_params.hdr.ramdisk_size;
11683+ printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
11684+ " %08llx - %08llx\n",
11685+ ramdisk_image, ramdisk_image + ramdisk_size - 1,
11686+ ramdisk_here, ramdisk_here + ramdisk_size - 1);
11687+}
11688+#endif
11689+
11690+static void __init reserve_initrd(void)
11691 {
11692 #ifndef CONFIG_XEN
11693- int cpu;
11694+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
11695+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
11696+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
11697+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
11698+
11699+ if (!boot_params.hdr.type_of_loader ||
11700+ !ramdisk_image || !ramdisk_size)
11701+ return; /* No initrd provided by bootloader */
11702+#else
11703+ unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
11704+ unsigned long ramdisk_size = xen_start_info->mod_len;
11705+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
11706+ unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
11707
11708- for_each_possible_cpu(cpu) {
11709- per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
11710- per_cpu(x86_bios_cpu_apicid, cpu) =
11711- x86_bios_cpu_apicid_init[cpu];
11712-#ifdef CONFIG_NUMA
11713- per_cpu(x86_cpu_to_node_map, cpu) =
11714- x86_cpu_to_node_map_init[cpu];
11715+ if (!xen_start_info->mod_start || !ramdisk_size)
11716+ return; /* No initrd provided by bootloader */
11717 #endif
11718+
11719+ initrd_start = 0;
11720+
11721+ if (ramdisk_size >= (end_of_lowmem>>1)) {
11722+ free_early(ramdisk_image, ramdisk_end);
11723+ printk(KERN_ERR "initrd too large to handle, "
11724+ "disabling initrd\n");
11725+ return;
11726 }
11727
11728- /* indicate the early static arrays will soon be gone */
11729- x86_cpu_to_apicid_early_ptr = NULL;
11730- x86_bios_cpu_apicid_early_ptr = NULL;
11731-#ifdef CONFIG_NUMA
11732- x86_cpu_to_node_map_early_ptr = NULL;
11733+ printk(KERN_INFO "RAMDISK: %08lx - %08lx\n", ramdisk_image,
11734+ ramdisk_end);
11735+
11736+
11737+ if (ramdisk_end <= end_of_lowmem) {
11738+ /* All in lowmem, easy case */
11739+ /*
11740+ * don't need to reserve again, already reserved early
11741+ * in i386_start_kernel
11742+ */
11743+ initrd_start = ramdisk_image + PAGE_OFFSET;
11744+ initrd_end = initrd_start + ramdisk_size;
11745+#ifdef CONFIG_X86_64_XEN
11746+ initrd_below_start_ok = 1;
11747 #endif
11748+ return;
11749+ }
11750+
11751+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
11752+ relocate_initrd();
11753+#else
11754+ printk(KERN_ERR "initrd extends beyond end of memory "
11755+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
11756+ ramdisk_end, end_of_lowmem);
11757+ initrd_start = 0;
11758 #endif
11759+ free_early(ramdisk_image, ramdisk_end);
11760 }
11761+#else
11762+static void __init reserve_initrd(void)
11763+{
11764+}
11765+#endif /* CONFIG_BLK_DEV_INITRD */
11766+
11767+static void __init parse_setup_data(void)
11768+{
11769+#ifndef CONFIG_XEN
11770+ struct setup_data *data;
11771+ u64 pa_data;
11772
11773-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
11774-cpumask_t *cpumask_of_cpu_map __read_mostly;
11775-EXPORT_SYMBOL(cpumask_of_cpu_map);
11776+ if (boot_params.hdr.version < 0x0209)
11777+ return;
11778+ pa_data = boot_params.hdr.setup_data;
11779+ while (pa_data) {
11780+ data = early_ioremap(pa_data, PAGE_SIZE);
11781+ switch (data->type) {
11782+ case SETUP_E820_EXT:
11783+ parse_e820_ext(data, pa_data);
11784+ break;
11785+ default:
11786+ break;
11787+ }
11788+ pa_data = data->next;
11789+ early_iounmap(data, PAGE_SIZE);
11790+ }
11791+#endif
11792+}
11793
11794-/* requires nr_cpu_ids to be initialized */
11795-static void __init setup_cpumask_of_cpu(void)
11796+static void __init e820_reserve_setup_data(void)
11797 {
11798- int i;
11799+#ifndef CONFIG_XEN
11800+ struct setup_data *data;
11801+ u64 pa_data;
11802+ int found = 0;
11803+
11804+ if (boot_params.hdr.version < 0x0209)
11805+ return;
11806+ pa_data = boot_params.hdr.setup_data;
11807+ while (pa_data) {
11808+ data = early_ioremap(pa_data, sizeof(*data));
11809+ e820_update_range(pa_data, sizeof(*data)+data->len,
11810+ E820_RAM, E820_RESERVED_KERN);
11811+ found = 1;
11812+ pa_data = data->next;
11813+ early_iounmap(data, sizeof(*data));
11814+ }
11815+ if (!found)
11816+ return;
11817
11818- /* alloc_bootmem zeroes memory */
11819- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
11820- for (i = 0; i < nr_cpu_ids; i++)
11821- cpu_set(i, cpumask_of_cpu_map[i]);
11822+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
11823+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
11824+ printk(KERN_INFO "extended physical RAM map:\n");
11825+ e820_print_map("reserve setup_data");
11826+#endif
11827 }
11828-#else
11829-static inline void setup_cpumask_of_cpu(void) { }
11830+
11831+static void __init reserve_early_setup_data(void)
11832+{
11833+#ifndef CONFIG_XEN
11834+ struct setup_data *data;
11835+ u64 pa_data;
11836+ char buf[32];
11837+
11838+ if (boot_params.hdr.version < 0x0209)
11839+ return;
11840+ pa_data = boot_params.hdr.setup_data;
11841+ while (pa_data) {
11842+ data = early_ioremap(pa_data, sizeof(*data));
11843+ sprintf(buf, "setup data %x", data->type);
11844+ reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
11845+ pa_data = data->next;
11846+ early_iounmap(data, sizeof(*data));
11847+ }
11848 #endif
11849+}
11850
11851-#ifdef CONFIG_X86_32
11852 /*
11853- * Great future not-so-futuristic plan: make i386 and x86_64 do it
11854- * the same way
11855+ * --------- Crashkernel reservation ------------------------------
11856+ */
11857+
11858+#ifdef CONFIG_KEXEC
11859+
11860+#ifndef CONFIG_XEN
11861+/**
11862+ * Reserve @size bytes of crashkernel memory at any suitable offset.
11863+ *
11864+ * @size: Size of the crashkernel memory to reserve.
11865+ * Returns the base address on success, and -1ULL on failure.
11866+ */
11867+unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
11868+{
11869+ const unsigned long long alignment = 16<<20; /* 16M */
11870+ unsigned long long start = 0LL;
11871+
11872+ while (1) {
11873+ int ret;
11874+
11875+ start = find_e820_area(start, ULONG_MAX, size, alignment);
11876+ if (start == -1ULL)
11877+ return start;
11878+
11879+ /* try to reserve it */
11880+ ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
11881+ if (ret >= 0)
11882+ return start;
11883+
11884+ start += alignment;
11885+ }
11886+}
11887+
11888+static inline unsigned long long get_total_mem(void)
11889+{
11890+ unsigned long long total;
11891+
11892+ total = max_low_pfn - min_low_pfn;
11893+#ifdef CONFIG_HIGHMEM
11894+ total += highend_pfn - highstart_pfn;
11895+#endif
11896+
11897+ return total << PAGE_SHIFT;
11898+}
11899+
11900+static void __init reserve_crashkernel(void)
11901+{
11902+ unsigned long long total_mem;
11903+ unsigned long long crash_size, crash_base;
11904+ int ret;
11905+
11906+ total_mem = get_total_mem();
11907+
11908+ ret = parse_crashkernel(boot_command_line, total_mem,
11909+ &crash_size, &crash_base);
11910+ if (ret != 0 || crash_size <= 0)
11911+ return;
11912+
11913+ /* 0 means: find the address automatically */
11914+ if (crash_base <= 0) {
11915+ crash_base = find_and_reserve_crashkernel(crash_size);
11916+ if (crash_base == -1ULL) {
11917+ pr_info("crashkernel reservation failed. "
11918+ "No suitable area found.\n");
11919+ return;
11920+ }
11921+ } else {
11922+ ret = reserve_bootmem_generic(crash_base, crash_size,
11923+ BOOTMEM_EXCLUSIVE);
11924+ if (ret < 0) {
11925+ pr_info("crashkernel reservation failed - "
11926+ "memory is in use\n");
11927+ return;
11928+ }
11929+ }
11930+
11931+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
11932+ "for crashkernel (System RAM: %ldMB)\n",
11933+ (unsigned long)(crash_size >> 20),
11934+ (unsigned long)(crash_base >> 20),
11935+ (unsigned long)(total_mem >> 20));
11936+
11937+ crashk_res.start = crash_base;
11938+ crashk_res.end = crash_base + crash_size - 1;
11939+ insert_resource(&iomem_resource, &crashk_res);
11940+}
11941+#else
11942+#define reserve_crashkernel xen_machine_kexec_setup_resources
11943+#endif
11944+#else
11945+static void __init reserve_crashkernel(void)
11946+{
11947+}
11948+#endif
11949+
11950+static struct resource standard_io_resources[] = {
11951+ { .name = "dma1", .start = 0x00, .end = 0x1f,
11952+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11953+ { .name = "pic1", .start = 0x20, .end = 0x21,
11954+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11955+ { .name = "timer0", .start = 0x40, .end = 0x43,
11956+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11957+ { .name = "timer1", .start = 0x50, .end = 0x53,
11958+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11959+ { .name = "keyboard", .start = 0x60, .end = 0x60,
11960+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11961+ { .name = "keyboard", .start = 0x64, .end = 0x64,
11962+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11963+ { .name = "dma page reg", .start = 0x80, .end = 0x8f,
11964+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11965+ { .name = "pic2", .start = 0xa0, .end = 0xa1,
11966+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11967+ { .name = "dma2", .start = 0xc0, .end = 0xdf,
11968+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
11969+ { .name = "fpu", .start = 0xf0, .end = 0xff,
11970+ .flags = IORESOURCE_BUSY | IORESOURCE_IO }
11971+};
11972+
11973+static void __init reserve_standard_io_resources(void)
11974+{
11975+ int i;
11976+
11977+ /* Nothing to do if not running in dom0. */
11978+ if (!is_initial_xendomain())
11979+ return;
11980+
11981+ /* request I/O space for devices used on all i[345]86 PCs */
11982+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
11983+ request_resource(&ioport_resource, &standard_io_resources[i]);
11984+
11985+}
11986+
11987+#ifdef CONFIG_PROC_VMCORE
11988+/* elfcorehdr= specifies the location of elf core header
11989+ * stored by the crashed kernel. This option will be passed
11990+ * by kexec loader to the capture kernel.
11991 */
11992-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
11993-EXPORT_SYMBOL(__per_cpu_offset);
11994+static int __init setup_elfcorehdr(char *arg)
11995+{
11996+ char *end;
11997+ if (!arg)
11998+ return -EINVAL;
11999+ elfcorehdr_addr = memparse(arg, &end);
12000+ return end > arg ? 0 : -EINVAL;
12001+}
12002+early_param("elfcorehdr", setup_elfcorehdr);
12003 #endif
12004
12005+static struct x86_quirks default_x86_quirks __initdata;
12006+
12007+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
12008+
12009+/*
12010+ * Determine if we were loaded by an EFI loader. If so, then we have also been
12011+ * passed the efi memmap, systab, etc., so we should use these data structures
12012+ * for initialization. Note, the efi init code path is determined by the
12013+ * global efi_enabled. This allows the same kernel image to be used on existing
12014+ * systems (with a traditional BIOS) as well as on EFI systems.
12015+ */
12016 /*
12017- * Great future plan:
12018- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
12019- * Always point %gs to its beginning
12020+ * setup_arch - architecture-specific boot-time initializations
12021+ *
12022+ * Note: On x86_64, fixmaps are ready for use even before this is called.
12023 */
12024-void __init setup_per_cpu_areas(void)
12025+
12026+void __init setup_arch(char **cmdline_p)
12027 {
12028- int i, highest_cpu = 0;
12029- unsigned long size;
12030+#ifdef CONFIG_XEN
12031+ unsigned int i;
12032+ unsigned long p2m_pages;
12033+ struct physdev_set_iopl set_iopl;
12034
12035-#ifdef CONFIG_HOTPLUG_CPU
12036- prefill_possible_map();
12037+#ifdef CONFIG_X86_32
12038+ /* Force a quick death if the kernel panics (not domain 0). */
12039+ extern int panic_timeout;
12040+ if (!panic_timeout && !is_initial_xendomain())
12041+ panic_timeout = 1;
12042 #endif
12043
12044- /* Copy section for each CPU (we discard the original) */
12045- size = PERCPU_ENOUGH_ROOM;
12046- printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
12047- size);
12048-
12049- for_each_possible_cpu(i) {
12050- char *ptr;
12051-#ifndef CONFIG_NEED_MULTIPLE_NODES
12052- ptr = alloc_bootmem_pages(size);
12053-#else
12054- int node = early_cpu_to_node(i);
12055- if (!node_online(node) || !NODE_DATA(node)) {
12056- ptr = alloc_bootmem_pages(size);
12057- printk(KERN_INFO
12058- "cpu %d has no node or node-local memory\n", i);
12059- }
12060- else
12061- ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
12062+ /* Register a call for panic conditions. */
12063+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
12064+
12065+ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
12066+ VMASST_TYPE_writable_pagetables));
12067+#ifdef CONFIG_X86_32
12068+ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
12069+ VMASST_TYPE_4gb_segments));
12070+#endif
12071+#endif /* CONFIG_XEN */
12072+
12073+#ifdef CONFIG_X86_32
12074+ memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
12075+ visws_early_detect();
12076+ pre_setup_arch_hook();
12077+#else
12078+ printk(KERN_INFO "Command line: %s\n", boot_command_line);
12079+#endif
12080+
12081+ early_cpu_init();
12082+ early_ioremap_init();
12083+
12084+#ifndef CONFIG_XEN
12085+ ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
12086+ screen_info = boot_params.screen_info;
12087+ edid_info = boot_params.edid_info;
12088+#ifdef CONFIG_X86_32
12089+ apm_info.bios = boot_params.apm_bios_info;
12090+ ist_info = boot_params.ist_info;
12091+ if (boot_params.sys_desc_table.length != 0) {
12092+ set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
12093+ machine_id = boot_params.sys_desc_table.table[0];
12094+ machine_submodel_id = boot_params.sys_desc_table.table[1];
12095+ BIOS_revision = boot_params.sys_desc_table.table[2];
12096+ }
12097+#endif
12098+ saved_video_mode = boot_params.hdr.vid_mode;
12099+ bootloader_type = boot_params.hdr.type_of_loader;
12100+
12101+#ifdef CONFIG_BLK_DEV_RAM
12102+ rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
12103+ rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
12104+ rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
12105+#endif
12106+#ifdef CONFIG_EFI
12107+ if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
12108+#ifdef CONFIG_X86_32
12109+ "EL32",
12110+#else
12111+ "EL64",
12112 #endif
12113- if (!ptr)
12114- panic("Cannot allocate cpu data for CPU %d\n", i);
12115+ 4)) {
12116+ efi_enabled = 1;
12117+ efi_reserve_early();
12118+ }
12119+#endif
12120+#else /* CONFIG_XEN */
12121+#ifdef CONFIG_X86_32
12122+ /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
12123+ properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
12124+ */
12125+ ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
12126+#else
12127+ ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
12128+#endif
12129+ if (is_initial_xendomain()) {
12130+ const struct dom0_vga_console_info *info =
12131+ (void *)((char *)xen_start_info +
12132+ xen_start_info->console.dom0.info_off);
12133+
12134+ dom0_init_screen_info(info,
12135+ xen_start_info->console.dom0.info_size);
12136+ xen_start_info->console.domU.mfn = 0;
12137+ xen_start_info->console.domU.evtchn = 0;
12138+ } else
12139+ screen_info.orig_video_isVGA = 0;
12140+ copy_edid();
12141+#endif /* CONFIG_XEN */
12142+
12143+ ARCH_SETUP
12144+
12145+ setup_memory_map();
12146+ parse_setup_data();
12147+ /* update the e820_saved too */
12148+ e820_reserve_setup_data();
12149+
12150+ copy_edd();
12151+
12152+#ifndef CONFIG_XEN
12153+ if (!boot_params.hdr.root_flags)
12154+ root_mountflags &= ~MS_RDONLY;
12155+#endif
12156+ init_mm.start_code = (unsigned long) _text;
12157+ init_mm.end_code = (unsigned long) _etext;
12158+ init_mm.end_data = (unsigned long) _edata;
12159+#ifdef CONFIG_X86_32
12160+#ifndef CONFIG_XEN
12161+ init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
12162+#else
12163+ init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
12164+ xen_start_info->nr_pt_frames) << PAGE_SHIFT;
12165+#endif
12166+#else
12167+ init_mm.brk = (unsigned long) &_end;
12168+#endif
12169+
12170+ code_resource.start = virt_to_phys(_text);
12171+ code_resource.end = virt_to_phys(_etext)-1;
12172+ data_resource.start = virt_to_phys(_etext);
12173+ data_resource.end = virt_to_phys(_edata)-1;
12174+ bss_resource.start = virt_to_phys(&__bss_start);
12175+ bss_resource.end = virt_to_phys(&__bss_stop)-1;
12176+
12177+ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
12178+ *cmdline_p = command_line;
12179+
12180+ parse_early_param();
12181+
12182 #ifdef CONFIG_X86_64
12183- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
12184+ check_efer();
12185+#endif
12186+
12187+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
12188+ /*
12189+ * Must be before kernel pagetables are setup
12190+ * or fixmap area is touched.
12191+ */
12192+ vmi_init();
12193+#endif
12194+
12195+ /* after early param, so could get panic from serial */
12196+ reserve_early_setup_data();
12197+
12198+ if (acpi_mps_check()) {
12199+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
12200+ disable_apic = 1;
12201+#endif
12202+ setup_clear_cpu_cap(X86_FEATURE_APIC);
12203+ }
12204+
12205+#ifdef CONFIG_PCI
12206+ if (pci_early_dump_regs)
12207+ early_dump_pci_devices();
12208+#endif
12209+
12210+ finish_e820_parsing();
12211+
12212+#ifdef CONFIG_X86_32
12213+ probe_roms();
12214+#endif
12215+
12216+#ifndef CONFIG_XEN
12217+ /* after parse_early_param, so could debug it */
12218+ insert_resource(&iomem_resource, &code_resource);
12219+ insert_resource(&iomem_resource, &data_resource);
12220+ insert_resource(&iomem_resource, &bss_resource);
12221+
12222+ if (efi_enabled)
12223+ efi_init();
12224+
12225+#ifdef CONFIG_X86_32
12226+ if (ppro_with_ram_bug()) {
12227+ e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
12228+ E820_RESERVED);
12229+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
12230+ printk(KERN_INFO "fixed physical RAM map:\n");
12231+ e820_print_map("bad_ppro");
12232+ }
12233 #else
12234- __per_cpu_offset[i] = ptr - __per_cpu_start;
12235+ early_gart_iommu_check();
12236 #endif
12237- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
12238+#endif /* CONFIG_XEN */
12239
12240- highest_cpu = i;
12241+ /*
12242+ * partially used pages are not usable - thus
12243+ * we are rounding upwards:
12244+ */
12245+ max_pfn = e820_end_of_ram_pfn();
12246+
12247+ /* preallocate 4k for mptable mpc */
12248+ early_reserve_e820_mpc_new();
12249+ /* update e820 for memory not covered by WB MTRRs */
12250+ mtrr_bp_init();
12251+#ifndef CONFIG_XEN
12252+ if (mtrr_trim_uncached_memory(max_pfn))
12253+ max_pfn = e820_end_of_ram_pfn();
12254+#endif
12255+
12256+#ifdef CONFIG_X86_32
12257+ /* max_low_pfn get updated here */
12258+ find_low_pfn_range();
12259+#else
12260+ num_physpages = max_pfn;
12261+ max_mapnr = max_pfn;
12262+
12263+
12264+ /* How many end-of-memory variables you have, grandma! */
12265+ /* need this before calling reserve_initrd */
12266+ if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
12267+ max_low_pfn = e820_end_of_low_ram_pfn();
12268+ else
12269+ max_low_pfn = max_pfn;
12270+
12271+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
12272+#endif
12273+
12274+ /* max_pfn_mapped is updated here */
12275+ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
12276+ max_pfn_mapped = max_low_pfn_mapped;
12277+
12278+#ifdef CONFIG_X86_64
12279+ if (max_pfn > max_low_pfn) {
12280+ max_pfn_mapped = init_memory_mapping(1UL<<32,
12281+ max_pfn<<PAGE_SHIFT);
12282+ /* can we preseve max_low_pfn ?*/
12283+ max_low_pfn = max_pfn;
12284 }
12285+#endif
12286
12287- nr_cpu_ids = highest_cpu + 1;
12288- printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
12289+ /*
12290+ * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
12291+ */
12292
12293- /* Setup percpu data maps */
12294- setup_per_cpu_maps();
12295+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
12296+ if (init_ohci1394_dma_early)
12297+ init_ohci1394_dma_on_all_controllers();
12298+#endif
12299
12300- /* Setup cpumask_of_cpu map */
12301- setup_cpumask_of_cpu();
12302-}
12303+ reserve_initrd();
12304+
12305+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
12306+ vsmp_init();
12307+#endif
12308+
12309+ if (is_initial_xendomain())
12310+ dmi_scan_machine();
12311+
12312+ io_delay_init();
12313+
12314+#ifdef CONFIG_ACPI
12315+ if (!is_initial_xendomain()) {
12316+ printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
12317+ disable_acpi();
12318+ }
12319+#endif
12320+
12321+ /*
12322+ * Parse the ACPI tables for possible boot-time SMP configuration.
12323+ */
12324+ acpi_boot_table_init();
12325+
12326+#ifdef CONFIG_ACPI_NUMA
12327+ /*
12328+ * Parse SRAT to discover nodes.
12329+ */
12330+ acpi_numa_init();
12331+#endif
12332+
12333+ initmem_init(0, max_pfn);
12334
12335+#ifdef CONFIG_ACPI_SLEEP
12336+ /*
12337+ * Reserve low memory region for sleep support.
12338+ */
12339+ acpi_reserve_bootmem();
12340 #endif
12341+#ifdef CONFIG_X86_FIND_SMP_CONFIG
12342+ /*
12343+ * Find and reserve possible boot-time SMP configuration:
12344+ */
12345+ find_smp_config();
12346+#endif
12347+ reserve_crashkernel();
12348+
12349+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
12350+ /*
12351+ * dma32_reserve_bootmem() allocates bootmem which may conflict
12352+ * with the crashkernel command line, so do that after
12353+ * reserve_crashkernel()
12354+ */
12355+ dma32_reserve_bootmem();
12356+#endif
12357+
12358+ reserve_ibft_region();
12359+
12360+#ifdef CONFIG_KVM_CLOCK
12361+ kvmclock_init();
12362+#endif
12363+
12364+ xen_pagetable_setup_start(swapper_pg_dir);
12365+ paging_init();
12366+ xen_pagetable_setup_done(swapper_pg_dir);
12367+ paravirt_post_allocator_init();
12368+
12369+#ifdef CONFIG_X86_64
12370+ map_vsyscall();
12371+#endif
12372+
12373+#ifdef CONFIG_XEN
12374+ p2m_pages = max_pfn;
12375+ if (xen_start_info->nr_pages > max_pfn) {
12376+ /*
12377+ * the max_pfn was shrunk (probably by mem= or highmem=
12378+ * kernel parameter); shrink reservation with the HV
12379+ */
12380+ struct xen_memory_reservation reservation = {
12381+ .address_bits = 0,
12382+ .extent_order = 0,
12383+ .domid = DOMID_SELF
12384+ };
12385+ unsigned int difference;
12386+ int ret;
12387+
12388+ difference = xen_start_info->nr_pages - max_pfn;
12389+
12390+ set_xen_guest_handle(reservation.extent_start,
12391+ ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
12392+ reservation.nr_extents = difference;
12393+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
12394+ &reservation);
12395+ BUG_ON(ret != difference);
12396+ }
12397+ else if (max_pfn > xen_start_info->nr_pages)
12398+ p2m_pages = xen_start_info->nr_pages;
12399+
12400+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
12401+ unsigned long i, j;
12402+ unsigned int k, fpp;
12403+
12404+ /* Make sure we have a large enough P->M table. */
12405+ phys_to_machine_mapping = alloc_bootmem_pages(
12406+ max_pfn * sizeof(unsigned long));
12407+ memset(phys_to_machine_mapping, ~0,
12408+ max_pfn * sizeof(unsigned long));
12409+ memcpy(phys_to_machine_mapping,
12410+ (unsigned long *)xen_start_info->mfn_list,
12411+ p2m_pages * sizeof(unsigned long));
12412+ free_bootmem(
12413+ __pa(xen_start_info->mfn_list),
12414+ PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
12415+ sizeof(unsigned long))));
12416+
12417+ /*
12418+ * Initialise the list of the frames that specify the list of
12419+ * frames that make up the p2m table. Used by save/restore.
12420+ */
12421+ pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
12422+
12423+ fpp = PAGE_SIZE/sizeof(unsigned long);
12424+ for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) {
12425+ if (j == fpp)
12426+ j = 0;
12427+ if (j == 0) {
12428+ k++;
12429+ BUG_ON(k>=ARRAY_SIZE(pfn_to_mfn_frame_list));
12430+ pfn_to_mfn_frame_list[k] =
12431+ alloc_bootmem_pages(PAGE_SIZE);
12432+ pfn_to_mfn_frame_list_list[k] =
12433+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
12434+ }
12435+ pfn_to_mfn_frame_list[k][j] =
12436+ virt_to_mfn(&phys_to_machine_mapping[i]);
12437+ }
12438+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
12439+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
12440+ virt_to_mfn(pfn_to_mfn_frame_list_list);
12441+ }
12442+
12443+ /* Mark all ISA DMA channels in-use - using them wouldn't work. */
12444+ for (i = 0; i < MAX_DMA_CHANNELS; ++i)
12445+ if (i != 4 && request_dma(i, "xen") != 0)
12446+ BUG();
12447+#endif /* CONFIG_XEN */
12448+
12449+#ifdef CONFIG_X86_GENERICARCH
12450+ generic_apic_probe();
12451+#endif
12452+
12453+#ifndef CONFIG_XEN
12454+ early_quirks();
12455+#endif
12456+
12457+ /*
12458+ * Read APIC and some other early information from ACPI tables.
12459+ */
12460+ acpi_boot_init();
12461+
12462+#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
12463+ /*
12464+ * get boot-time SMP configuration:
12465+ */
12466+ if (smp_found_config)
12467+ get_smp_config();
12468+#endif
12469+
12470+ prefill_possible_map();
12471+#ifdef CONFIG_X86_64
12472+ init_cpu_to_node();
12473+#endif
12474+
12475+#ifndef CONFIG_XEN
12476+ init_apic_mappings();
12477+ ioapic_init_mappings();
12478+
12479+ kvm_guest_init();
12480+
12481+ e820_reserve_resources();
12482+ e820_mark_nosave_regions(max_low_pfn);
12483+#else
12484+ if (is_initial_xendomain())
12485+ e820_reserve_resources();
12486+#endif
12487+
12488+#ifdef CONFIG_X86_32
82094b55
AF
12489+ if (is_initial_xendomain())
12490+ request_resource(&iomem_resource, &video_ram_resource);
2cb7cef9
BS
12491+#endif
12492+ reserve_standard_io_resources();
12493+
12494+#ifndef CONFIG_XEN
12495+ e820_setup_gap();
12496+
12497+#ifdef CONFIG_VT
12498+#if defined(CONFIG_VGA_CONSOLE)
12499+ if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
12500+ conswitchp = &vga_con;
12501+#elif defined(CONFIG_DUMMY_CONSOLE)
12502+ conswitchp = &dummy_con;
12503+#endif
12504+#endif
12505+#else /* CONFIG_XEN */
12506+ if (is_initial_xendomain())
12507+ e820_setup_gap();
12508+
12509+ set_iopl.iopl = 1;
12510+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
12511+
12512+#ifdef CONFIG_VT
12513+#ifdef CONFIG_DUMMY_CONSOLE
12514+ conswitchp = &dummy_con;
12515+#endif
12516+#ifdef CONFIG_VGA_CONSOLE
12517+ if (is_initial_xendomain())
12518+ conswitchp = &vga_con;
12519+#endif
12520+#endif
12521+#endif /* CONFIG_XEN */
12522+}
12523+
12524+#ifdef CONFIG_XEN
12525+static int
12526+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
12527+{
12528+ HYPERVISOR_shutdown(SHUTDOWN_crash);
12529+ /* we're never actually going to get here... */
12530+ return NOTIFY_DONE;
12531+}
12532+#endif /* !CONFIG_XEN */
82094b55 12533--- sle11-2009-10-16.orig/arch/x86/kernel/setup64-xen.c 2009-03-16 16:38:05.000000000 +0100
2cb7cef9
BS
12534+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
12535@@ -1,370 +0,0 @@
12536-/*
12537- * X86-64 specific CPU setup.
12538- * Copyright (C) 1995 Linus Torvalds
12539- * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
12540- * See setup.c for older changelog.
12541- *
12542- * Jun Nakajima <jun.nakajima@intel.com>
12543- * Modified for Xen
12544- *
12545- */
12546-#include <linux/init.h>
12547-#include <linux/kernel.h>
12548-#include <linux/sched.h>
12549-#include <linux/string.h>
12550-#include <linux/bootmem.h>
12551-#include <linux/bitops.h>
12552-#include <linux/module.h>
12553-#include <linux/kgdb.h>
12554-#include <asm/pda.h>
12555-#include <asm/pgtable.h>
12556-#include <asm/processor.h>
12557-#include <asm/desc.h>
12558-#include <asm/atomic.h>
12559-#include <asm/mmu_context.h>
12560-#include <asm/smp.h>
12561-#include <asm/i387.h>
12562-#include <asm/percpu.h>
12563-#include <asm/proto.h>
12564-#include <asm/sections.h>
12565-#include <asm/setup.h>
12566-#include <asm/genapic.h>
12567-#ifdef CONFIG_XEN
12568-#include <asm/hypervisor.h>
12569-#endif
12570-
12571-#ifndef CONFIG_DEBUG_BOOT_PARAMS
12572-struct boot_params __initdata boot_params;
12573-#else
12574-struct boot_params boot_params;
12575-#endif
12576-
12577-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
12578-
12579-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
12580-EXPORT_SYMBOL(_cpu_pda);
12581-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
12582-
12583-#ifndef CONFIG_X86_NO_IDT
12584-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
12585-#endif
12586-
12587-char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
12588-
12589-unsigned long __supported_pte_mask __read_mostly = ~0UL;
12590-EXPORT_SYMBOL(__supported_pte_mask);
12591-
12592-static int do_not_nx __cpuinitdata = 0;
12593-
12594-/* noexec=on|off
12595-Control non executable mappings for 64bit processes.
12596-
12597-on Enable(default)
12598-off Disable
12599-*/
12600-static int __init nonx_setup(char *str)
12601-{
12602- if (!str)
12603- return -EINVAL;
12604- if (!strncmp(str, "on", 2)) {
12605- __supported_pte_mask |= _PAGE_NX;
12606- do_not_nx = 0;
12607- } else if (!strncmp(str, "off", 3)) {
12608- do_not_nx = 1;
12609- __supported_pte_mask &= ~_PAGE_NX;
12610- }
12611- return 0;
12612-}
12613-early_param("noexec", nonx_setup);
12614-
12615-int force_personality32 = 0;
12616-
12617-/* noexec32=on|off
12618-Control non executable heap for 32bit processes.
12619-To control the stack too use noexec=off
12620-
12621-on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
12622-off PROT_READ implies PROT_EXEC
12623-*/
12624-static int __init nonx32_setup(char *str)
12625-{
12626- if (!strcmp(str, "on"))
12627- force_personality32 &= ~READ_IMPLIES_EXEC;
12628- else if (!strcmp(str, "off"))
12629- force_personality32 |= READ_IMPLIES_EXEC;
12630- return 1;
12631-}
12632-__setup("noexec32=", nonx32_setup);
12633-
12634-#ifdef CONFIG_XEN
12635-static void __init_refok switch_pt(int cpu)
12636-{
12637- if (cpu == 0)
12638- xen_init_pt();
12639- xen_pt_switch(__pa_symbol(init_level4_pgt));
12640- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
12641-}
12642-#define switch_pt() switch_pt(cpu)
12643-
12644-static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
12645-{
12646- unsigned long frames[16];
12647- unsigned long va;
12648- int f;
12649-
12650- for (va = gdt_descr->address, f = 0;
12651- va < gdt_descr->address + gdt_descr->size;
12652- va += PAGE_SIZE, f++) {
12653- frames[f] = virt_to_mfn(va);
12654- make_page_readonly(
12655- (void *)va, XENFEAT_writable_descriptor_tables);
12656- }
12657- if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) /
12658- sizeof (struct desc_struct)))
12659- BUG();
12660-}
12661-#else
12662-static void switch_pt(void)
12663-{
12664- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
12665-}
12666-
12667-static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
12668-{
12669- load_gdt(gdt_descr);
12670- load_idt(idt_descr);
12671-}
12672-#endif
12673-
12674-void pda_init(int cpu)
12675-{
12676- struct x8664_pda *pda = cpu_pda(cpu);
12677-
12678- /* Setup up data that may be needed in __get_free_pages early */
12679- asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
12680-#ifndef CONFIG_XEN
12681- /* Memory clobbers used to order PDA accessed */
12682- mb();
12683- wrmsrl(MSR_GS_BASE, pda);
12684- mb();
12685-#else
12686- if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
12687- (unsigned long)pda))
12688- BUG();
12689-#endif
12690- pda->cpunumber = cpu;
12691- pda->irqcount = -1;
12692- pda->kernelstack =
12693- (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
12694- pda->active_mm = &init_mm;
12695- pda->mmu_state = 0;
12696-
12697- if (cpu == 0) {
12698- /* others are initialized in smpboot.c */
12699- pda->pcurrent = &init_task;
12700- pda->irqstackptr = boot_cpu_stack;
12701- } else {
12702- pda->irqstackptr = (char *)
12703- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
12704- if (!pda->irqstackptr)
12705- panic("cannot allocate irqstack for cpu %d", cpu);
12706- }
12707-
12708- switch_pt();
12709-
12710- pda->irqstackptr += IRQSTACKSIZE-64;
12711-}
12712-
12713-#ifndef CONFIG_X86_NO_TSS
12714-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
12715-__attribute__((section(".bss.page_aligned")));
12716-#endif
12717-
12718-extern asmlinkage void ignore_sysret(void);
12719-
12720-/* May not be marked __init: used by software suspend */
12721-void syscall_init(void)
12722-{
12723-#ifndef CONFIG_XEN
12724- /*
12725- * LSTAR and STAR live in a bit strange symbiosis.
12726- * They both write to the same internal register. STAR allows to set CS/DS
12727- * but only a 32bit target. LSTAR sets the 64bit rip.
12728- */
12729- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
12730- wrmsrl(MSR_LSTAR, system_call);
12731- wrmsrl(MSR_CSTAR, ignore_sysret);
12732-
12733- /* Flags to clear on syscall */
12734- wrmsrl(MSR_SYSCALL_MASK,
12735- X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
12736-#endif
12737-#ifdef CONFIG_IA32_EMULATION
12738- syscall32_cpu_init ();
12739-#else
12740- {
12741- static const struct callback_register cstar = {
12742- .type = CALLBACKTYPE_syscall32,
12743- .address = (unsigned long)ignore_sysret
12744- };
12745- if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
12746- printk(KERN_WARNING "Unable to register CSTAR callback\n");
12747- }
12748-#endif
12749-}
12750-
12751-void __cpuinit check_efer(void)
12752-{
12753- unsigned long efer;
12754-
12755- rdmsrl(MSR_EFER, efer);
12756- if (!(efer & EFER_NX) || do_not_nx) {
12757- __supported_pte_mask &= ~_PAGE_NX;
12758- }
12759-}
12760-
12761-unsigned long kernel_eflags;
12762-
12763-#ifndef CONFIG_X86_NO_TSS
12764-/*
12765- * Copies of the original ist values from the tss are only accessed during
12766- * debugging, no special alignment required.
12767- */
12768-DEFINE_PER_CPU(struct orig_ist, orig_ist);
12769-#endif
12770-
12771-/*
12772- * cpu_init() initializes state that is per-CPU. Some data is already
12773- * initialized (naturally) in the bootstrap process, such as the GDT
12774- * and IDT. We reload them nevertheless, this function acts as a
12775- * 'CPU state barrier', nothing should get across.
12776- * A lot of state is already set up in PDA init.
12777- */
12778-void __cpuinit cpu_init (void)
12779-{
12780- int cpu = stack_smp_processor_id();
12781-#ifndef CONFIG_X86_NO_TSS
12782- struct tss_struct *t = &per_cpu(init_tss, cpu);
12783- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
12784- unsigned long v;
12785- char *estacks = NULL;
12786- unsigned i;
12787-#endif
12788- struct task_struct *me;
12789-
12790- /* CPU 0 is initialised in head64.c */
12791- if (cpu != 0) {
12792- pda_init(cpu);
12793- }
12794-#ifndef CONFIG_X86_NO_TSS
12795- else
12796- estacks = boot_exception_stacks;
12797-#endif
12798-
12799- me = current;
12800-
12801- if (cpu_test_and_set(cpu, cpu_initialized))
12802- panic("CPU#%d already initialized!\n", cpu);
12803-
12804- printk("Initializing CPU#%d\n", cpu);
12805-
12806- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
12807-
12808- /*
12809- * Initialize the per-CPU GDT with the boot GDT,
12810- * and set up the GDT descriptor:
12811- */
12812-#ifndef CONFIG_XEN
12813- if (cpu)
12814- memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
12815-#endif
12816-
12817- cpu_gdt_descr[cpu].size = GDT_SIZE;
12818- cpu_gdt_init(&cpu_gdt_descr[cpu]);
12819-
12820- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
12821- syscall_init();
12822-
12823- wrmsrl(MSR_FS_BASE, 0);
12824- wrmsrl(MSR_KERNEL_GS_BASE, 0);
12825- barrier();
12826-
12827- check_efer();
12828-
12829-#ifndef CONFIG_X86_NO_TSS
12830- /*
12831- * set up and load the per-CPU TSS
12832- */
12833- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
12834- static const unsigned int order[N_EXCEPTION_STACKS] = {
12835- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
12836- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
12837- };
12838- if (cpu) {
12839- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
12840- if (!estacks)
12841- panic("Cannot allocate exception stack %ld %d\n",
12842- v, cpu);
12843- }
12844- estacks += PAGE_SIZE << order[v];
12845- orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
12846- }
12847-
12848- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
12849- /*
12850- * <= is required because the CPU will access up to
12851- * 8 bits beyond the end of the IO permission bitmap.
12852- */
12853- for (i = 0; i <= IO_BITMAP_LONGS; i++)
12854- t->io_bitmap[i] = ~0UL;
12855-#endif
12856-
12857- atomic_inc(&init_mm.mm_count);
12858- me->active_mm = &init_mm;
12859- if (me->mm)
12860- BUG();
12861- enter_lazy_tlb(&init_mm, me);
12862-
12863-#ifndef CONFIG_X86_NO_TSS
12864- set_tss_desc(cpu, t);
12865-#endif
12866-#ifndef CONFIG_XEN
12867- load_TR_desc();
12868-#endif
12869- load_LDT(&init_mm.context);
12870-
12871-#ifdef CONFIG_KGDB
12872- /*
12873- * If the kgdb is connected no debug regs should be altered. This
12874- * is only applicable when KGDB and a KGDB I/O module are built
12875- * into the kernel and you are using early debugging with
12876- * kgdbwait. KGDB will control the kernel HW breakpoint registers.
12877- */
12878- if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
12879- arch_kgdb_ops.correct_hw_break();
12880- else {
12881-#endif
12882- /*
12883- * Clear all 6 debug registers:
12884- */
12885-
12886- set_debugreg(0UL, 0);
12887- set_debugreg(0UL, 1);
12888- set_debugreg(0UL, 2);
12889- set_debugreg(0UL, 3);
12890- set_debugreg(0UL, 6);
12891- set_debugreg(0UL, 7);
12892-#ifdef CONFIG_KGDB
12893- /* If the kgdb is connected no debug regs should be altered. */
12894- }
12895-#endif
12896-
12897- fpu_init();
12898-
12899- asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
12900- if (raw_irqs_disabled())
12901- kernel_eflags &= ~X86_EFLAGS_IF;
12902-
12903- if (is_uv_system())
12904- uv_cpu_init();
12905-}
82094b55 12906--- sle11-2009-10-16.orig/arch/x86/kernel/setup_32-xen.c 2009-03-16 16:38:05.000000000 +0100
2cb7cef9
BS
12907+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
12908@@ -1,1151 +0,0 @@
12909-/*
12910- * Copyright (C) 1995 Linus Torvalds
12911- *
12912- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
12913- *
12914- * Memory region support
12915- * David Parsons <orc@pell.chi.il.us>, July-August 1999
12916- *
12917- * Added E820 sanitization routine (removes overlapping memory regions);
12918- * Brian Moyle <bmoyle@mvista.com>, February 2001
12919- *
12920- * Moved CPU detection code to cpu/${cpu}.c
12921- * Patrick Mochel <mochel@osdl.org>, March 2002
12922- *
12923- * Provisions for empty E820 memory regions (reported by certain BIOSes).
12924- * Alex Achenbach <xela@slit.de>, December 2002.
12925- *
12926- */
12927-
12928-/*
12929- * This file handles the architecture-dependent parts of initialization
12930- */
12931-
12932-#include <linux/sched.h>
12933-#include <linux/mm.h>
12934-#include <linux/mmzone.h>
12935-#include <linux/screen_info.h>
12936-#include <linux/ioport.h>
12937-#include <linux/acpi.h>
12938-#include <linux/apm_bios.h>
12939-#include <linux/initrd.h>
12940-#include <linux/bootmem.h>
12941-#include <linux/seq_file.h>
12942-#include <linux/console.h>
12943-#include <linux/mca.h>
12944-#include <linux/root_dev.h>
12945-#include <linux/highmem.h>
12946-#include <linux/module.h>
12947-#include <linux/efi.h>
12948-#include <linux/init.h>
12949-#include <linux/edd.h>
12950-#include <linux/iscsi_ibft.h>
12951-#include <linux/nodemask.h>
12952-#include <linux/kernel.h>
12953-#include <linux/percpu.h>
12954-#include <linux/notifier.h>
12955-#include <linux/kexec.h>
12956-#include <linux/crash_dump.h>
12957-#include <linux/dmi.h>
12958-#include <linux/pfn.h>
12959-#include <linux/pci.h>
12960-#include <linux/init_ohci1394_dma.h>
12961-#include <linux/kvm_para.h>
12962-
12963-#include <video/edid.h>
12964-
12965-#include <asm/mtrr.h>
12966-#include <asm/apic.h>
12967-#include <asm/e820.h>
12968-#include <asm/mpspec.h>
12969-#include <asm/mmzone.h>
12970-#include <asm/setup.h>
12971-#include <asm/arch_hooks.h>
12972-#include <asm/sections.h>
12973-#include <asm/io_apic.h>
12974-#include <asm/ist.h>
12975-#include <asm/io.h>
12976-#include <asm/hypervisor.h>
12977-#include <xen/interface/physdev.h>
12978-#include <xen/interface/memory.h>
12979-#include <xen/features.h>
12980-#include <xen/firmware.h>
12981-#include <xen/xencons.h>
12982-#include <setup_arch.h>
12983-#include <asm/bios_ebda.h>
12984-#include <asm/cacheflush.h>
12985-#include <asm/processor.h>
12986-
12987-#ifdef CONFIG_XEN
12988-#include <xen/interface/kexec.h>
12989-#endif
12990-
12991-static int xen_panic_event(struct notifier_block *, unsigned long, void *);
12992-static struct notifier_block xen_panic_block = {
12993- xen_panic_event, NULL, 0 /* try to go last */
12994-};
12995-
12996-/*
12997- * Machine setup..
12998- */
12999-static struct resource data_resource = {
13000- .name = "Kernel data",
13001- .start = 0,
13002- .end = 0,
13003- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
13004-};
13005-
13006-static struct resource code_resource = {
13007- .name = "Kernel code",
13008- .start = 0,
13009- .end = 0,
13010- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
13011-};
13012-
13013-static struct resource bss_resource = {
13014- .name = "Kernel bss",
13015- .start = 0,
13016- .end = 0,
13017- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
13018-};
13019-
13020-static struct resource video_ram_resource = {
13021- .name = "Video RAM area",
13022- .start = 0xa0000,
13023- .end = 0xbffff,
13024- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
13025-};
13026-
13027-static struct resource standard_io_resources[] = { {
13028- .name = "dma1",
13029- .start = 0x0000,
13030- .end = 0x001f,
13031- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13032-}, {
13033- .name = "pic1",
13034- .start = 0x0020,
13035- .end = 0x0021,
13036- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13037-}, {
13038- .name = "timer0",
13039- .start = 0x0040,
13040- .end = 0x0043,
13041- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13042-}, {
13043- .name = "timer1",
13044- .start = 0x0050,
13045- .end = 0x0053,
13046- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13047-}, {
13048- .name = "keyboard",
13049- .start = 0x0060,
13050- .end = 0x0060,
13051- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13052-}, {
13053- .name = "keyboard",
13054- .start = 0x0064,
13055- .end = 0x0064,
13056- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13057-}, {
13058- .name = "dma page reg",
13059- .start = 0x0080,
13060- .end = 0x008f,
13061- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13062-}, {
13063- .name = "pic2",
13064- .start = 0x00a0,
13065- .end = 0x00a1,
13066- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13067-}, {
13068- .name = "dma2",
13069- .start = 0x00c0,
13070- .end = 0x00df,
13071- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13072-}, {
13073- .name = "fpu",
13074- .start = 0x00f0,
13075- .end = 0x00ff,
13076- .flags = IORESOURCE_BUSY | IORESOURCE_IO
13077-} };
13078-
13079-/* cpu data as detected by the assembly code in head.S */
13080-struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
13081-/* common cpu data for all cpus */
13082-struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
13083-EXPORT_SYMBOL(boot_cpu_data);
13084-
13085-unsigned int def_to_bigsmp;
13086-
13087-#ifndef CONFIG_X86_PAE
13088-unsigned long mmu_cr4_features;
13089-#else
13090-unsigned long mmu_cr4_features = X86_CR4_PAE;
13091-#endif
13092-
13093-/* for MCA, but anyone else can use it if they want */
13094-unsigned int machine_id;
13095-unsigned int machine_submodel_id;
13096-unsigned int BIOS_revision;
13097-
13098-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
13099-int bootloader_type;
13100-
13101-/* user-defined highmem size */
13102-static unsigned int highmem_pages = -1;
13103-
13104-/*
13105- * Setup options
13106- */
13107-struct screen_info screen_info;
13108-EXPORT_SYMBOL(screen_info);
13109-struct apm_info apm_info;
13110-EXPORT_SYMBOL(apm_info);
13111-struct edid_info edid_info;
13112-EXPORT_SYMBOL_GPL(edid_info);
13113-#ifndef CONFIG_XEN
13114-#define copy_edid() (edid_info = boot_params.edid_info)
13115-#endif
13116-struct ist_info ist_info;
13117-#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
13118- defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
13119-EXPORT_SYMBOL(ist_info);
13120-#endif
13121-
13122-extern void early_cpu_init(void);
13123-extern int root_mountflags;
13124-
13125-unsigned long saved_video_mode;
13126-
13127-#define RAMDISK_IMAGE_START_MASK 0x07FF
13128-#define RAMDISK_PROMPT_FLAG 0x8000
13129-#define RAMDISK_LOAD_FLAG 0x4000
13130-
13131-static char __initdata command_line[COMMAND_LINE_SIZE];
13132-
13133-#ifndef CONFIG_DEBUG_BOOT_PARAMS
13134-struct boot_params __initdata boot_params;
13135-#else
13136-struct boot_params boot_params;
13137-#endif
13138-
13139-/*
13140- * Point at the empty zero page to start with. We map the real shared_info
13141- * page as soon as fixmap is up and running.
13142- */
13143-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
13144-EXPORT_SYMBOL(HYPERVISOR_shared_info);
13145-
13146-unsigned long *phys_to_machine_mapping;
13147-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
13148-EXPORT_SYMBOL(phys_to_machine_mapping);
13149-
13150-/* Raw start-of-day parameters from the hypervisor. */
13151-start_info_t *xen_start_info;
13152-EXPORT_SYMBOL(xen_start_info);
13153-
13154-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
13155-struct edd edd;
13156-#ifdef CONFIG_EDD_MODULE
13157-EXPORT_SYMBOL(edd);
13158-#endif
13159-#ifndef CONFIG_XEN
13160-/**
13161- * copy_edd() - Copy the BIOS EDD information
13162- * from boot_params into a safe place.
13163- *
13164- */
13165-static inline void copy_edd(void)
13166-{
13167- memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
13168- sizeof(edd.mbr_signature));
13169- memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
13170- edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
13171- edd.edd_info_nr = boot_params.eddbuf_entries;
13172-}
13173-#endif
13174-#else
13175-static inline void copy_edd(void)
13176-{
13177-}
13178-#endif
13179-
13180-int __initdata user_defined_memmap;
13181-
13182-/*
13183- * "mem=nopentium" disables the 4MB page tables.
13184- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
13185- * to <mem>, overriding the bios size.
13186- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
13187- * <start> to <start>+<mem>, overriding the bios size.
13188- *
13189- * HPA tells me bootloaders need to parse mem=, so no new
13190- * option should be mem= [also see Documentation/i386/boot.txt]
13191- */
13192-static int __init parse_mem(char *arg)
13193-{
13194- if (!arg)
13195- return -EINVAL;
13196-
13197- if (strcmp(arg, "nopentium") == 0) {
13198- setup_clear_cpu_cap(X86_FEATURE_PSE);
13199- } else {
13200- /* If the user specifies memory size, we
13201- * limit the BIOS-provided memory map to
13202- * that size. exactmap can be used to specify
13203- * the exact map. mem=number can be used to
13204- * trim the existing memory map.
13205- */
13206- unsigned long long mem_size;
13207-
13208- mem_size = memparse(arg, &arg);
13209- limit_regions(mem_size);
13210- user_defined_memmap = 1;
13211- }
13212- return 0;
13213-}
13214-early_param("mem", parse_mem);
13215-
13216-#ifdef CONFIG_PROC_VMCORE
13217-/* elfcorehdr= specifies the location of elf core header
13218- * stored by the crashed kernel.
13219- */
13220-static int __init parse_elfcorehdr(char *arg)
13221-{
13222- if (!arg)
13223- return -EINVAL;
13224-
13225- elfcorehdr_addr = memparse(arg, &arg);
13226- return 0;
13227-}
13228-early_param("elfcorehdr", parse_elfcorehdr);
13229-#endif /* CONFIG_PROC_VMCORE */
13230-
13231-/*
13232- * highmem=size forces highmem to be exactly 'size' bytes.
13233- * This works even on boxes that have no highmem otherwise.
13234- * This also works to reduce highmem size on bigger boxes.
13235- */
13236-static int __init parse_highmem(char *arg)
13237-{
13238- if (!arg)
13239- return -EINVAL;
13240-
13241- highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
13242- return 0;
13243-}
13244-early_param("highmem", parse_highmem);
13245-
13246-/*
13247- * vmalloc=size forces the vmalloc area to be exactly 'size'
13248- * bytes. This can be used to increase (or decrease) the
13249- * vmalloc area - the default is 128m.
13250- */
13251-static int __init parse_vmalloc(char *arg)
13252-{
13253- if (!arg)
13254- return -EINVAL;
13255-
13256- __VMALLOC_RESERVE = memparse(arg, &arg);
13257- return 0;
13258-}
13259-early_param("vmalloc", parse_vmalloc);
13260-
13261-#ifndef CONFIG_XEN
13262-/*
13263- * reservetop=size reserves a hole at the top of the kernel address space which
13264- * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
13265- * so relocating the fixmap can be done before paging initialization.
13266- */
13267-static int __init parse_reservetop(char *arg)
13268-{
13269- unsigned long address;
13270-
13271- if (!arg)
13272- return -EINVAL;
13273-
13274- address = memparse(arg, &arg);
13275- reserve_top_address(address);
13276- return 0;
13277-}
13278-early_param("reservetop", parse_reservetop);
13279-#endif
13280-
13281-/*
13282- * Determine low and high memory ranges:
13283- */
13284-unsigned long __init find_max_low_pfn(void)
13285-{
13286- unsigned long max_low_pfn;
13287-
13288- max_low_pfn = max_pfn;
13289- if (max_low_pfn > MAXMEM_PFN) {
13290- if (highmem_pages == -1)
13291- highmem_pages = max_pfn - MAXMEM_PFN;
13292- if (highmem_pages + MAXMEM_PFN < max_pfn)
13293- max_pfn = MAXMEM_PFN + highmem_pages;
13294- if (highmem_pages + MAXMEM_PFN > max_pfn) {
13295- printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
13296- highmem_pages = 0;
13297- }
13298- max_low_pfn = MAXMEM_PFN;
13299-#ifndef CONFIG_HIGHMEM
13300- /* Maximum memory usable is what is directly addressable */
13301- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
13302- MAXMEM>>20);
13303- if (max_pfn > MAX_NONPAE_PFN)
13304- printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
13305- else
13306- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
13307- max_pfn = MAXMEM_PFN;
13308-#else /* !CONFIG_HIGHMEM */
13309-#ifndef CONFIG_HIGHMEM64G
13310- if (max_pfn > MAX_NONPAE_PFN) {
13311- max_pfn = MAX_NONPAE_PFN;
13312- printk(KERN_WARNING "Warning only 4GB will be used.\n");
13313- printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
13314- }
13315-#endif /* !CONFIG_HIGHMEM64G */
13316-#endif /* !CONFIG_HIGHMEM */
13317- } else {
13318- if (highmem_pages == -1)
13319- highmem_pages = 0;
13320-#ifdef CONFIG_HIGHMEM
13321- if (highmem_pages >= max_pfn) {
13322- printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
13323- highmem_pages = 0;
13324- }
13325- if (highmem_pages) {
13326- if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
13327- printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
13328- highmem_pages = 0;
13329- }
13330- max_low_pfn -= highmem_pages;
13331- }
13332-#else
13333- if (highmem_pages)
13334- printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
13335-#endif
13336- }
13337- return max_low_pfn;
13338-}
13339-
13340-#ifndef CONFIG_XEN
13341-#define BIOS_LOWMEM_KILOBYTES 0x413
13342-
13343-/*
13344- * The BIOS places the EBDA/XBDA at the top of conventional
13345- * memory, and usually decreases the reported amount of
13346- * conventional memory (int 0x12) too. This also contains a
13347- * workaround for Dell systems that neglect to reserve EBDA.
13348- * The same workaround also avoids a problem with the AMD768MPX
13349- * chipset: reserve a page before VGA to prevent PCI prefetch
13350- * into it (errata #56). Usually the page is reserved anyways,
13351- * unless you have no PS/2 mouse plugged in.
13352- */
13353-static void __init reserve_ebda_region(void)
13354-{
13355- unsigned int lowmem, ebda_addr;
13356-
13357- /* To determine the position of the EBDA and the */
13358- /* end of conventional memory, we need to look at */
13359- /* the BIOS data area. In a paravirtual environment */
13360- /* that area is absent. We'll just have to assume */
13361- /* that the paravirt case can handle memory setup */
13362- /* correctly, without our help. */
13363- if (paravirt_enabled())
13364- return;
13365-
13366- /* end of low (conventional) memory */
13367- lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
13368- lowmem <<= 10;
13369-
13370- /* start of EBDA area */
13371- ebda_addr = get_bios_ebda();
13372-
13373- /* Fixup: bios puts an EBDA in the top 64K segment */
13374- /* of conventional memory, but does not adjust lowmem. */
13375- if ((lowmem - ebda_addr) <= 0x10000)
13376- lowmem = ebda_addr;
13377-
13378- /* Fixup: bios does not report an EBDA at all. */
13379- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
13380- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
13381- lowmem = 0x9f000;
13382-
13383- /* Paranoia: should never happen, but... */
13384- if ((lowmem == 0) || (lowmem >= 0x100000))
13385- lowmem = 0x9f000;
13386-
13387- /* reserve all memory between lowmem and the 1MB mark */
13388- reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
13389-}
13390-#endif
13391-
13392-#ifndef CONFIG_NEED_MULTIPLE_NODES
13393-static void __init setup_bootmem_allocator(void);
13394-static unsigned long __init setup_memory(void)
13395-{
13396- /*
13397- * partially used pages are not usable - thus
13398- * we are rounding upwards:
13399- */
13400- min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
13401- xen_start_info->nr_pt_frames;
13402-
13403- max_low_pfn = find_max_low_pfn();
13404-
13405-#ifdef CONFIG_HIGHMEM
13406- highstart_pfn = highend_pfn = max_pfn;
13407- if (max_pfn > max_low_pfn) {
13408- highstart_pfn = max_low_pfn;
13409- }
13410- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
13411- pages_to_mb(highend_pfn - highstart_pfn));
13412- num_physpages = highend_pfn;
13413- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
13414-#else
13415- num_physpages = max_low_pfn;
13416- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
13417-#endif
13418-#ifdef CONFIG_FLATMEM
13419- max_mapnr = num_physpages;
13420-#endif
13421- printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
13422- pages_to_mb(max_low_pfn));
13423-
13424- setup_bootmem_allocator();
13425-
13426- return max_low_pfn;
13427-}
13428-
13429-static void __init zone_sizes_init(void)
13430-{
13431- unsigned long max_zone_pfns[MAX_NR_ZONES];
13432- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
13433- max_zone_pfns[ZONE_DMA] =
13434- virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
13435- max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
13436-#ifdef CONFIG_HIGHMEM
13437- max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
13438- add_active_range(0, 0, highend_pfn);
13439-#else
13440- add_active_range(0, 0, max_low_pfn);
13441-#endif
13442-
13443- free_area_init_nodes(max_zone_pfns);
13444-}
13445-#else
13446-extern unsigned long __init setup_memory(void);
13447-extern void zone_sizes_init(void);
13448-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
13449-
13450-static inline unsigned long long get_total_mem(void)
13451-{
13452- unsigned long long total;
13453-
13454- total = max_low_pfn - min_low_pfn;
13455-#ifdef CONFIG_HIGHMEM
13456- total += highend_pfn - highstart_pfn;
13457-#endif
13458-
13459- return total << PAGE_SHIFT;
13460-}
13461-
13462-#ifdef CONFIG_KEXEC
13463-#ifndef CONFIG_XEN
13464-static void __init reserve_crashkernel(void)
13465-{
13466- unsigned long long total_mem;
13467- unsigned long long crash_size, crash_base;
13468- int ret;
13469-
13470- total_mem = get_total_mem();
13471-
13472- ret = parse_crashkernel(boot_command_line, total_mem,
13473- &crash_size, &crash_base);
13474- if (ret == 0 && crash_size > 0) {
13475- if (crash_base > 0) {
13476- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
13477- "for crashkernel (System RAM: %ldMB)\n",
13478- (unsigned long)(crash_size >> 20),
13479- (unsigned long)(crash_base >> 20),
13480- (unsigned long)(total_mem >> 20));
13481-
13482- if (reserve_bootmem(crash_base, crash_size,
13483- BOOTMEM_EXCLUSIVE) < 0) {
13484- printk(KERN_INFO "crashkernel reservation "
13485- "failed - memory is in use\n");
13486- return;
13487- }
13488-
13489- crashk_res.start = crash_base;
13490- crashk_res.end = crash_base + crash_size - 1;
13491- } else
13492- printk(KERN_INFO "crashkernel reservation failed - "
13493- "you have to specify a base address\n");
13494- }
13495-}
13496-#else
13497-#define reserve_crashkernel xen_machine_kexec_setup_resources
13498-#endif
13499-#else
13500-static inline void __init reserve_crashkernel(void)
13501-{}
13502-#endif
13503-
13504-#ifdef CONFIG_BLK_DEV_INITRD
13505-
13506-static bool do_relocate_initrd = false;
13507-
13508-static void __init reserve_initrd(void)
13509-{
13510- unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
13511- unsigned long ramdisk_size = xen_start_info->mod_len;
13512- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
13513- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
13514- unsigned long ramdisk_here;
13515-
13516- initrd_start = 0;
13517-
13518- if (!xen_start_info->mod_start || !ramdisk_size)
13519- return; /* No initrd provided by bootloader */
13520-
13521- if (ramdisk_end < ramdisk_image) {
13522- printk(KERN_ERR "initrd wraps around end of memory, "
13523- "disabling initrd\n");
13524- return;
13525- }
13526- if (ramdisk_size >= end_of_lowmem/2) {
13527- printk(KERN_ERR "initrd too large to handle, "
13528- "disabling initrd\n");
13529- return;
13530- }
13531- if (ramdisk_end <= end_of_lowmem) {
13532- /* All in lowmem, easy case */
13533- reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
13534- initrd_start = ramdisk_image + PAGE_OFFSET;
13535- initrd_end = initrd_start+ramdisk_size;
13536- return;
13537- }
13538-
13539- /* We need to move the initrd down into lowmem */
13540- ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
13541-
13542- /* Note: this includes all the lowmem currently occupied by
13543- the initrd, we rely on that fact to keep the data intact. */
13544- reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
13545- initrd_start = ramdisk_here + PAGE_OFFSET;
13546- initrd_end = initrd_start + ramdisk_size;
13547-
13548- do_relocate_initrd = true;
13549-}
13550-
13551-#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
13552-
13553-static void __init relocate_initrd(void)
13554-{
13555- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
13556- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
13557- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
13558- unsigned long ramdisk_here;
13559- unsigned long slop, clen, mapaddr;
13560- char *p, *q;
13561-
13562- if (!do_relocate_initrd)
13563- return;
13564-
13565- ramdisk_here = initrd_start - PAGE_OFFSET;
13566-
13567- q = (char *)initrd_start;
13568-
13569- /* Copy any lowmem portion of the initrd */
13570- if (ramdisk_image < end_of_lowmem) {
13571- clen = end_of_lowmem - ramdisk_image;
13572- p = (char *)__va(ramdisk_image);
13573- memcpy(q, p, clen);
13574- q += clen;
13575- ramdisk_image += clen;
13576- ramdisk_size -= clen;
13577- }
13578-
13579- /* Copy the highmem portion of the initrd */
13580- while (ramdisk_size) {
13581- slop = ramdisk_image & ~PAGE_MASK;
13582- clen = ramdisk_size;
13583- if (clen > MAX_MAP_CHUNK-slop)
13584- clen = MAX_MAP_CHUNK-slop;
13585- mapaddr = ramdisk_image & PAGE_MASK;
13586- p = early_ioremap(mapaddr, clen+slop);
13587- memcpy(q, p+slop, clen);
13588- early_iounmap(p, clen+slop);
13589- q += clen;
13590- ramdisk_image += clen;
13591- ramdisk_size -= clen;
13592- }
13593-}
13594-
13595-#endif /* CONFIG_BLK_DEV_INITRD */
13596-
13597-void __init setup_bootmem_allocator(void)
13598-{
13599- unsigned long bootmap_size;
13600- /*
13601- * Initialize the boot-time allocator (with low memory only):
13602- */
13603- bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
13604-
13605- register_bootmem_low_pages(max_low_pfn);
13606-
13607- /*
13608- * Reserve the bootmem bitmap itself as well. We do this in two
13609- * steps (first step was init_bootmem()) because this catches
13610- * the (very unlikely) case of us accidentally initializing the
13611- * bootmem allocator with an invalid RAM area.
13612- */
13613- reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
13614- bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
13615- BOOTMEM_DEFAULT);
13616-
13617-#ifndef CONFIG_XEN
13618- /*
13619- * reserve physical page 0 - it's a special BIOS page on many boxes,
13620- * enabling clean reboots, SMP operation, laptop functions.
13621- */
13622- reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
13623-
13624- /* reserve EBDA region */
13625- reserve_ebda_region();
13626-
13627-#ifdef CONFIG_SMP
13628- /*
13629- * But first pinch a few for the stack/trampoline stuff
13630- * FIXME: Don't need the extra page at 4K, but need to fix
13631- * trampoline before removing it. (see the GDT stuff)
13632- */
13633- reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
13634-#endif
13635-#ifdef CONFIG_ACPI_SLEEP
13636- /*
13637- * Reserve low memory region for sleep support.
13638- */
13639- acpi_reserve_bootmem();
13640-#endif
13641-#endif /* !CONFIG_XEN */
13642-
13643-#ifdef CONFIG_BLK_DEV_INITRD
13644- reserve_initrd();
13645-#endif
13646- numa_kva_reserve();
13647- reserve_crashkernel();
13648-
13649- reserve_ibft_region();
13650-}
13651-
13652-/*
13653- * The node 0 pgdat is initialized before all of these because
13654- * it's needed for bootmem. node>0 pgdats have their virtual
13655- * space allocated before the pagetables are in place to access
13656- * them, so they can't be cleared then.
13657- *
13658- * This should all compile down to nothing when NUMA is off.
13659- */
13660-static void __init remapped_pgdat_init(void)
13661-{
13662- int nid;
13663-
13664- for_each_online_node(nid) {
13665- if (nid != 0)
13666- memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
13667- }
13668-}
13669-
13670-#ifdef CONFIG_MCA
13671-static void set_mca_bus(int x)
13672-{
13673- MCA_bus = x;
13674-}
13675-#else
13676-static void set_mca_bus(int x) { }
13677-#endif
13678-
13679-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
13680-char * __init __attribute__((weak)) memory_setup(void)
13681-{
13682- return machine_specific_memory_setup();
13683-}
13684-
13685-#ifdef CONFIG_NUMA
13686-/*
13687- * In the golden day, when everything among i386 and x86_64 will be
13688- * integrated, this will not live here
13689- */
13690-void *x86_cpu_to_node_map_early_ptr;
13691-int x86_cpu_to_node_map_init[NR_CPUS] = {
13692- [0 ... NR_CPUS-1] = NUMA_NO_NODE
13693-};
13694-DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
13695-#endif
13696-
13697-/*
13698- * Determine if we were loaded by an EFI loader. If so, then we have also been
13699- * passed the efi memmap, systab, etc., so we should use these data structures
13700- * for initialization. Note, the efi init code path is determined by the
13701- * global efi_enabled. This allows the same kernel image to be used on existing
13702- * systems (with a traditional BIOS) as well as on EFI systems.
13703- */
13704-void __init setup_arch(char **cmdline_p)
13705-{
13706- int i, j, k, fpp;
13707- struct physdev_set_iopl set_iopl;
13708- unsigned long max_low_pfn;
13709- unsigned long p2m_pages;
13710-
13711- /* Force a quick death if the kernel panics (not domain 0). */
13712- extern int panic_timeout;
13713- if (!panic_timeout && !is_initial_xendomain())
13714- panic_timeout = 1;
13715-
13716- /* Register a call for panic conditions. */
13717- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
13718-
13719- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
13720- VMASST_TYPE_4gb_segments));
13721- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
13722- VMASST_TYPE_writable_pagetables));
13723-
13724- memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
13725- pre_setup_arch_hook();
13726- early_cpu_init();
13727- early_ioremap_init();
13728-#ifdef CONFIG_SMP
13729- prefill_possible_map();
13730-#endif
13731-
13732-#ifdef CONFIG_EFI
13733- if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
13734- "EL32", 4))
13735- efi_enabled = 1;
13736-#endif
13737-
13738- /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
13739- properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
13740- */
13741- ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
13742- screen_info = boot_params.screen_info;
13743- copy_edid();
13744- apm_info.bios = boot_params.apm_bios_info;
13745- ist_info = boot_params.ist_info;
13746- saved_video_mode = boot_params.hdr.vid_mode;
13747- if( boot_params.sys_desc_table.length != 0 ) {
13748- set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
13749- machine_id = boot_params.sys_desc_table.table[0];
13750- machine_submodel_id = boot_params.sys_desc_table.table[1];
13751- BIOS_revision = boot_params.sys_desc_table.table[2];
13752- }
13753- bootloader_type = boot_params.hdr.type_of_loader;
13754-
13755- if (is_initial_xendomain()) {
13756- const struct dom0_vga_console_info *info =
13757- (void *)((char *)xen_start_info +
13758- xen_start_info->console.dom0.info_off);
13759-
13760- dom0_init_screen_info(info,
13761- xen_start_info->console.dom0.info_size);
13762- xen_start_info->console.domU.mfn = 0;
13763- xen_start_info->console.domU.evtchn = 0;
13764- } else
13765- screen_info.orig_video_isVGA = 0;
13766-
13767-#ifdef CONFIG_BLK_DEV_RAM
13768- rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
13769- rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
13770- rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
13771-#endif
13772-
13773- ARCH_SETUP
13774-
13775- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
13776- print_memory_map(memory_setup());
13777-
13778- copy_edd();
13779-
13780- if (!boot_params.hdr.root_flags)
13781- root_mountflags &= ~MS_RDONLY;
13782- init_mm.start_code = (unsigned long) _text;
13783- init_mm.end_code = (unsigned long) _etext;
13784- init_mm.end_data = (unsigned long) _edata;
13785- init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
13786- xen_start_info->nr_pt_frames) << PAGE_SHIFT;
13787-
13788- code_resource.start = virt_to_phys(_text);
13789- code_resource.end = virt_to_phys(_etext)-1;
13790- data_resource.start = virt_to_phys(_etext);
13791- data_resource.end = virt_to_phys(_edata)-1;
13792- bss_resource.start = virt_to_phys(&__bss_start);
13793- bss_resource.end = virt_to_phys(&__bss_stop)-1;
13794-
13795- if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
13796- i = COMMAND_LINE_SIZE;
13797- memcpy(boot_command_line, xen_start_info->cmd_line, i);
13798- boot_command_line[i - 1] = '\0';
13799- parse_early_param();
13800-
13801- if (user_defined_memmap) {
13802- printk(KERN_INFO "user-defined physical RAM map:\n");
13803- print_memory_map("user");
13804- }
13805-
13806- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
13807- *cmdline_p = command_line;
13808-
13809- if (efi_enabled)
13810- efi_init();
13811-
13812- /* update e820 for memory not covered by WB MTRRs */
13813- propagate_e820_map();
13814- mtrr_bp_init();
13815-#ifndef CONFIG_XEN
13816- if (mtrr_trim_uncached_memory(max_pfn))
13817- propagate_e820_map();
13818-#endif
13819-
13820- max_low_pfn = setup_memory();
13821-
13822-#ifdef CONFIG_KVM_CLOCK
13823- kvmclock_init();
13824-#endif
13825-
13826-#ifdef CONFIG_VMI
13827- /*
13828- * Must be after max_low_pfn is determined, and before kernel
13829- * pagetables are setup.
13830- */
13831- vmi_init();
13832-#endif
13833- kvm_guest_init();
13834-
13835- /*
13836- * NOTE: before this point _nobody_ is allowed to allocate
13837- * any memory using the bootmem allocator. Although the
13838- * allocator is now initialised only the first 8Mb of the kernel
13839- * virtual address space has been mapped. All allocations before
13840- * paging_init() has completed must use the alloc_bootmem_low_pages()
13841- * variant (which allocates DMA'able memory) and care must be taken
13842- * not to exceed the 8Mb limit.
13843- */
13844-
13845-#ifdef CONFIG_SMP
13846- smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
13847-#endif
13848- paging_init();
13849-
13850- /*
13851- * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
13852- */
13853-
13854-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
13855- if (init_ohci1394_dma_early)
13856- init_ohci1394_dma_on_all_controllers();
13857-#endif
13858-
13859- remapped_pgdat_init();
13860- sparse_init();
13861- zone_sizes_init();
13862-
13863-#ifdef CONFIG_X86_FIND_SMP_CONFIG
13864- /*
13865- * Find and reserve possible boot-time SMP configuration:
13866- */
13867- find_smp_config();
13868-#endif
13869-
13870- p2m_pages = max_pfn;
13871- if (xen_start_info->nr_pages > max_pfn) {
13872- /*
13873- * the max_pfn was shrunk (probably by mem= or highmem=
13874- * kernel parameter); shrink reservation with the HV
13875- */
13876- struct xen_memory_reservation reservation = {
13877- .address_bits = 0,
13878- .extent_order = 0,
13879- .domid = DOMID_SELF
13880- };
13881- unsigned int difference;
13882- int ret;
13883-
13884- difference = xen_start_info->nr_pages - max_pfn;
13885-
13886- set_xen_guest_handle(reservation.extent_start,
13887- ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
13888- reservation.nr_extents = difference;
13889- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
13890- &reservation);
13891- BUG_ON (ret != difference);
13892- }
13893- else if (max_pfn > xen_start_info->nr_pages)
13894- p2m_pages = xen_start_info->nr_pages;
13895-
13896- /* Make sure we have a correctly sized P->M table. */
13897- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
13898- phys_to_machine_mapping = alloc_bootmem_low_pages(
13899- max_pfn * sizeof(unsigned long));
13900- memset(phys_to_machine_mapping, ~0,
13901- max_pfn * sizeof(unsigned long));
13902- memcpy(phys_to_machine_mapping,
13903- (unsigned long *)xen_start_info->mfn_list,
13904- p2m_pages * sizeof(unsigned long));
13905- free_bootmem(
13906- __pa(xen_start_info->mfn_list),
13907- PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
13908- sizeof(unsigned long))));
13909-
13910- /*
13911- * Initialise the list of the frames that specify the list of
13912- * frames that make up the p2m table. Used by save/restore
13913- */
13914- pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
13915-
13916- fpp = PAGE_SIZE/sizeof(unsigned long);
13917- for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
13918- if ((j % fpp) == 0) {
13919- k++;
13920- BUG_ON(k>=16);
13921- pfn_to_mfn_frame_list[k] =
13922- alloc_bootmem_low_pages(PAGE_SIZE);
13923- pfn_to_mfn_frame_list_list[k] =
13924- virt_to_mfn(pfn_to_mfn_frame_list[k]);
13925- j=0;
13926- }
13927- pfn_to_mfn_frame_list[k][j] =
13928- virt_to_mfn(&phys_to_machine_mapping[i]);
13929- }
13930- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
13931- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
13932- virt_to_mfn(pfn_to_mfn_frame_list_list);
13933- }
13934-
13935- /* Mark all ISA DMA channels in-use - using them wouldn't work. */
13936- for (i = 0; i < MAX_DMA_CHANNELS; ++i)
13937- if (i != 4 && request_dma(i, "xen") != 0)
13938- BUG();
13939-
13940- /*
13941- * NOTE: at this point the bootmem allocator is fully available.
13942- */
13943-
13944-#ifdef CONFIG_BLK_DEV_INITRD
13945- relocate_initrd();
13946-#endif
13947-
13948- paravirt_post_allocator_init();
13949-
13950- if (is_initial_xendomain())
13951- dmi_scan_machine();
13952-
13953- io_delay_init();
13954-
13955-#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
13956- /*
13957- * setup to use the early static init tables during kernel startup
13958- * X86_SMP will exclude sub-arches that don't deal well with it.
13959- */
13960- x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
13961- x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
13962-#ifdef CONFIG_NUMA
13963- x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
13964-#endif
13965-#endif
13966-
13967-#ifdef CONFIG_X86_GENERICARCH
13968- generic_apic_probe();
13969-#endif
13970-
13971- set_iopl.iopl = 1;
13972- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
13973-
13974-#ifdef CONFIG_ACPI
13975- if (!is_initial_xendomain()) {
13976- printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
13977- acpi_disabled = 1;
13978- acpi_ht = 0;
13979- }
13980-
13981- /*
13982- * Parse the ACPI tables for possible boot-time SMP configuration.
13983- */
13984- acpi_boot_table_init();
13985-#endif
13986-
13987-#ifndef CONFIG_XEN
13988- early_quirks();
13989-#endif
13990-
13991-#ifdef CONFIG_ACPI
13992- acpi_boot_init();
13993-
13994-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
13995- if (def_to_bigsmp)
13996- printk(KERN_WARNING "More than 8 CPUs detected and "
13997- "CONFIG_X86_PC cannot handle it.\nUse "
13998- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
13999-#endif
14000-#endif
14001-#ifdef CONFIG_X86_LOCAL_APIC
14002- if (smp_found_config)
14003- get_smp_config();
14004-#endif
14005-
14006- e820_register_memory();
14007- e820_mark_nosave_regions();
14008-
14009- if (is_initial_xendomain()) {
14010-#ifdef CONFIG_VT
14011-#if defined(CONFIG_VGA_CONSOLE)
14012- if (!efi_enabled ||
14013- (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
14014- conswitchp = &vga_con;
14015-#elif defined(CONFIG_DUMMY_CONSOLE)
14016- conswitchp = &dummy_con;
14017-#endif
14018-#endif
14019- } else {
14020-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
14021- conswitchp = &dummy_con;
14022-#endif
14023- }
14024-}
14025-
14026-static int
14027-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
14028-{
14029- HYPERVISOR_shutdown(SHUTDOWN_crash);
14030- /* we're never actually going to get here... */
14031- return NOTIFY_DONE;
14032-}
14033-
14034-/*
14035- * Request address space for all standard resources
14036- *
14037- * This is called just before pcibios_init(), which is also a
14038- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
14039- */
14040-static int __init request_standard_resources(void)
14041-{
14042- int i;
14043-
14044- /* Nothing to do if not running in dom0. */
14045- if (!is_initial_xendomain())
14046- return 0;
14047-
14048- printk(KERN_INFO "Setting up standard PCI resources\n");
14049- init_iomem_resources(&code_resource, &data_resource, &bss_resource);
14050-
14051- request_resource(&iomem_resource, &video_ram_resource);
14052-
14053- /* request I/O space for devices used on all i[345]86 PCs */
14054- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
14055- request_resource(&ioport_resource, &standard_io_resources[i]);
14056- return 0;
14057-}
14058-
14059-subsys_initcall(request_standard_resources);
82094b55 14060--- sle11-2009-10-16.orig/arch/x86/kernel/setup_64-xen.c 2009-03-16 16:38:05.000000000 +0100
2cb7cef9
BS
14061+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
14062@@ -1,1433 +0,0 @@
14063-/*
14064- * Copyright (C) 1995 Linus Torvalds
14065- */
14066-
14067-/*
14068- * This file handles the architecture-dependent parts of initialization
14069- */
14070-
14071-#include <linux/errno.h>
14072-#include <linux/sched.h>
14073-#include <linux/kernel.h>
14074-#include <linux/mm.h>
14075-#include <linux/stddef.h>
14076-#include <linux/unistd.h>
14077-#include <linux/ptrace.h>
14078-#include <linux/slab.h>
14079-#include <linux/user.h>
14080-#include <linux/screen_info.h>
14081-#include <linux/ioport.h>
14082-#include <linux/delay.h>
14083-#include <linux/init.h>
14084-#include <linux/initrd.h>
14085-#include <linux/highmem.h>
14086-#include <linux/bootmem.h>
14087-#include <linux/module.h>
14088-#include <asm/processor.h>
14089-#include <linux/console.h>
14090-#include <linux/seq_file.h>
14091-#include <linux/crash_dump.h>
14092-#include <linux/root_dev.h>
14093-#include <linux/pci.h>
14094-#include <asm/pci-direct.h>
14095-#include <linux/efi.h>
14096-#include <linux/acpi.h>
14097-#include <linux/kallsyms.h>
14098-#include <linux/edd.h>
14099-#include <linux/iscsi_ibft.h>
14100-#include <linux/mmzone.h>
14101-#include <linux/kexec.h>
14102-#include <linux/cpufreq.h>
14103-#include <linux/dmi.h>
14104-#include <linux/dma-mapping.h>
14105-#include <linux/ctype.h>
14106-#include <linux/sort.h>
14107-#include <linux/uaccess.h>
14108-#include <linux/init_ohci1394_dma.h>
14109-#include <linux/kvm_para.h>
14110-
14111-#include <asm/mtrr.h>
14112-#include <asm/uaccess.h>
14113-#include <asm/system.h>
14114-#include <asm/vsyscall.h>
14115-#include <asm/io.h>
14116-#include <asm/smp.h>
14117-#include <asm/msr.h>
14118-#include <asm/desc.h>
14119-#include <video/edid.h>
14120-#include <asm/e820.h>
14121-#include <asm/dma.h>
14122-#include <asm/gart.h>
14123-#include <asm/mpspec.h>
14124-#include <asm/mmu_context.h>
14125-#include <asm/proto.h>
14126-#include <asm/setup.h>
14127-#include <asm/numa.h>
14128-#include <asm/sections.h>
14129-#include <asm/dmi.h>
14130-#include <asm/cacheflush.h>
14131-#include <asm/mce.h>
14132-#include <asm/ds.h>
14133-#include <asm/topology.h>
14134-#include <asm/pat.h>
14135-
14136-#include <mach_apic.h>
14137-#ifdef CONFIG_XEN
14138-#include <linux/percpu.h>
14139-#include <xen/interface/physdev.h>
14140-#include "setup_arch_pre.h"
14141-#include <asm/hypervisor.h>
14142-#include <xen/interface/nmi.h>
14143-#include <xen/features.h>
14144-#include <xen/firmware.h>
14145-#include <xen/xencons.h>
14146-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
14147-#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
14148-#include <asm/mach-xen/setup_arch_post.h>
14149-#include <xen/interface/memory.h>
14150-
14151-#ifdef CONFIG_XEN
14152-#include <xen/interface/kexec.h>
14153-#endif
14154-
14155-extern unsigned long start_pfn;
14156-extern struct edid_info edid_info;
14157-
14158-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
14159-EXPORT_SYMBOL(HYPERVISOR_shared_info);
14160-
14161-static int xen_panic_event(struct notifier_block *, unsigned long, void *);
14162-static struct notifier_block xen_panic_block = {
14163- xen_panic_event, NULL, 0 /* try to go last */
14164-};
14165-
14166-unsigned long *phys_to_machine_mapping;
14167-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512];
14168-
14169-EXPORT_SYMBOL(phys_to_machine_mapping);
14170-
14171-DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
14172-DEFINE_PER_CPU(int, nr_multicall_ents);
14173-
14174-/* Raw start-of-day parameters from the hypervisor. */
14175-start_info_t *xen_start_info;
14176-EXPORT_SYMBOL(xen_start_info);
14177-#endif
14178-
14179-/*
14180- * Machine setup..
14181- */
14182-
14183-struct cpuinfo_x86 boot_cpu_data __read_mostly;
14184-EXPORT_SYMBOL(boot_cpu_data);
14185-
14186-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
14187-
14188-unsigned long mmu_cr4_features;
14189-
14190-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
14191-int bootloader_type;
14192-
14193-unsigned long saved_video_mode;
14194-
14195-int force_mwait __cpuinitdata;
14196-
14197-/*
14198- * Early DMI memory
14199- */
14200-int dmi_alloc_index;
14201-char dmi_alloc_data[DMI_MAX_DATA];
14202-
14203-/*
14204- * Setup options
14205- */
14206-struct screen_info screen_info;
14207-EXPORT_SYMBOL(screen_info);
14208-struct sys_desc_table_struct {
14209- unsigned short length;
14210- unsigned char table[0];
14211-};
14212-
14213-struct edid_info edid_info;
14214-EXPORT_SYMBOL_GPL(edid_info);
14215-
14216-extern int root_mountflags;
14217-
14218-char __initdata command_line[COMMAND_LINE_SIZE];
14219-
14220-static struct resource standard_io_resources[] = {
14221- { .name = "dma1", .start = 0x00, .end = 0x1f,
14222- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14223- { .name = "pic1", .start = 0x20, .end = 0x21,
14224- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14225- { .name = "timer0", .start = 0x40, .end = 0x43,
14226- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14227- { .name = "timer1", .start = 0x50, .end = 0x53,
14228- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14229- { .name = "keyboard", .start = 0x60, .end = 0x60,
14230- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14231- { .name = "keyboard", .start = 0x64, .end = 0x64,
14232- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14233- { .name = "dma page reg", .start = 0x80, .end = 0x8f,
14234- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14235- { .name = "pic2", .start = 0xa0, .end = 0xa1,
14236- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14237- { .name = "dma2", .start = 0xc0, .end = 0xdf,
14238- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
14239- { .name = "fpu", .start = 0xf0, .end = 0xff,
14240- .flags = IORESOURCE_BUSY | IORESOURCE_IO }
14241-};
14242-
14243-#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
14244-
14245-static struct resource data_resource = {
14246- .name = "Kernel data",
14247- .start = 0,
14248- .end = 0,
14249- .flags = IORESOURCE_RAM,
14250-};
14251-static struct resource code_resource = {
14252- .name = "Kernel code",
14253- .start = 0,
14254- .end = 0,
14255- .flags = IORESOURCE_RAM,
14256-};
14257-static struct resource bss_resource = {
14258- .name = "Kernel bss",
14259- .start = 0,
14260- .end = 0,
14261- .flags = IORESOURCE_RAM,
14262-};
14263-
14264-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
14265-
14266-#ifdef CONFIG_PROC_VMCORE
14267-/* elfcorehdr= specifies the location of elf core header
14268- * stored by the crashed kernel. This option will be passed
14269- * by kexec loader to the capture kernel.
14270- */
14271-static int __init setup_elfcorehdr(char *arg)
14272-{
14273- char *end;
14274- if (!arg)
14275- return -EINVAL;
14276- elfcorehdr_addr = memparse(arg, &end);
14277- return end > arg ? 0 : -EINVAL;
14278-}
14279-early_param("elfcorehdr", setup_elfcorehdr);
14280-#endif
14281-
14282-#ifndef CONFIG_NUMA
14283-static void __init
14284-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
14285-{
14286- unsigned long bootmap_size, bootmap;
14287-
14288- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
14289- bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
14290- PAGE_SIZE);
14291- if (bootmap == -1L)
14292- panic("Cannot find bootmem map of size %ld\n", bootmap_size);
14293- bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
14294- e820_register_active_regions(0, start_pfn, end_pfn);
14295-#ifdef CONFIG_XEN
14296- free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
14297- early_res_to_bootmem(0, xen_start_info->nr_pages<<PAGE_SHIFT);
14298-#else
14299- free_bootmem_with_active_regions(0, end_pfn);
14300- early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
14301-#endif
14302- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
14303-}
14304-#endif
14305-
14306-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
14307-struct edd edd;
14308-#ifdef CONFIG_EDD_MODULE
14309-EXPORT_SYMBOL(edd);
14310-#endif
14311-#ifndef CONFIG_XEN
14312-/**
14313- * copy_edd() - Copy the BIOS EDD information
14314- * from boot_params into a safe place.
14315- *
14316- */
14317-static inline void copy_edd(void)
14318-{
14319- memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
14320- sizeof(edd.mbr_signature));
14321- memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
14322- edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
14323- edd.edd_info_nr = boot_params.eddbuf_entries;
14324-}
14325-#endif
14326-#else
14327-static inline void copy_edd(void)
14328-{
14329-}
14330-#endif
14331-
14332-#ifdef CONFIG_KEXEC
14333-#ifndef CONFIG_XEN
14334-static void __init reserve_crashkernel(void)
14335-{
14336- unsigned long long total_mem;
14337- unsigned long long crash_size, crash_base;
14338- int ret;
14339-
14340- total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
14341-
14342- ret = parse_crashkernel(boot_command_line, total_mem,
14343- &crash_size, &crash_base);
14344- if (ret == 0 && crash_size) {
14345- if (crash_base <= 0) {
14346- printk(KERN_INFO "crashkernel reservation failed - "
14347- "you have to specify a base address\n");
14348- return;
14349- }
14350-
14351- if (reserve_bootmem(crash_base, crash_size,
14352- BOOTMEM_EXCLUSIVE) < 0) {
14353- printk(KERN_INFO "crashkernel reservation failed - "
14354- "memory is in use\n");
14355- return;
14356- }
14357-
14358- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
14359- "for crashkernel (System RAM: %ldMB)\n",
14360- (unsigned long)(crash_size >> 20),
14361- (unsigned long)(crash_base >> 20),
14362- (unsigned long)(total_mem >> 20));
14363- crashk_res.start = crash_base;
14364- crashk_res.end = crash_base + crash_size - 1;
14365- insert_resource(&iomem_resource, &crashk_res);
14366- }
14367-}
14368-#else
14369-#define reserve_crashkernel xen_machine_kexec_setup_resources
14370-#endif
14371-#else
14372-static inline void __init reserve_crashkernel(void)
14373-{}
14374-#endif
14375-
14376-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
14377-void __attribute__((weak)) __init memory_setup(void)
14378-{
14379- machine_specific_memory_setup();
14380-}
14381-
14382-static void __init parse_setup_data(void)
14383-{
14384- struct setup_data *data;
14385- unsigned long pa_data;
14386-
14387- if (boot_params.hdr.version < 0x0209)
14388- return;
14389- pa_data = boot_params.hdr.setup_data;
14390- while (pa_data) {
14391- data = early_ioremap(pa_data, PAGE_SIZE);
14392- switch (data->type) {
14393- default:
14394- break;
14395- }
14396-#ifndef CONFIG_DEBUG_BOOT_PARAMS
14397- free_early(pa_data, pa_data+sizeof(*data)+data->len);
14398-#endif
14399- pa_data = data->next;
14400- early_iounmap(data, PAGE_SIZE);
14401- }
14402-}
14403-
14404-#ifdef CONFIG_PCI_MMCONFIG
14405-extern void __cpuinit fam10h_check_enable_mmcfg(void);
14406-extern void __init check_enable_amd_mmconf_dmi(void);
14407-#else
14408-void __cpuinit fam10h_check_enable_mmcfg(void)
14409-{
14410-}
14411-void __init check_enable_amd_mmconf_dmi(void)
14412-{
14413-}
14414-#endif
14415-
14416-/*
14417- * setup_arch - architecture-specific boot-time initializations
14418- *
14419- * Note: On x86_64, fixmaps are ready for use even before this is called.
14420- */
14421-void __init setup_arch(char **cmdline_p)
14422-{
14423- unsigned i;
14424-
14425-#ifdef CONFIG_XEN
14426- extern struct e820map machine_e820;
14427-
14428- printk(KERN_INFO "Command line: %s\n", boot_command_line);
14429-
14430- /* Register a call for panic conditions. */
14431- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
14432-
14433- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
14434- VMASST_TYPE_writable_pagetables));
14435-
14436- early_ioremap_init();
14437-
14438- ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
14439- screen_info = boot_params.screen_info;
14440-
14441- if (is_initial_xendomain()) {
14442- const struct dom0_vga_console_info *info =
14443- (void *)((char *)xen_start_info +
14444- xen_start_info->console.dom0.info_off);
14445-
14446- dom0_init_screen_info(info,
14447- xen_start_info->console.dom0.info_size);
14448- xen_start_info->console.domU.mfn = 0;
14449- xen_start_info->console.domU.evtchn = 0;
14450- } else
14451- screen_info.orig_video_isVGA = 0;
14452-
14453- copy_edid();
14454-#else
14455- printk(KERN_INFO "Command line: %s\n", boot_command_line);
14456-
14457- ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
14458- screen_info = boot_params.screen_info;
14459- edid_info = boot_params.edid_info;
14460-#endif /* !CONFIG_XEN */
14461- saved_video_mode = boot_params.hdr.vid_mode;
14462- bootloader_type = boot_params.hdr.type_of_loader;
14463-
14464-#ifdef CONFIG_BLK_DEV_RAM
14465- rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
14466- rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
14467- rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
14468-#endif
14469-#ifdef CONFIG_EFI
14470- if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
14471- "EL64", 4))
14472- efi_enabled = 1;
14473-#endif
14474-
14475- ARCH_SETUP
14476-
14477- memory_setup();
14478- copy_edd();
14479-
14480- if (!boot_params.hdr.root_flags)
14481- root_mountflags &= ~MS_RDONLY;
14482- init_mm.start_code = (unsigned long) &_text;
14483- init_mm.end_code = (unsigned long) &_etext;
14484- init_mm.end_data = (unsigned long) &_edata;
14485- init_mm.brk = (unsigned long) &_end;
14486-
14487- code_resource.start = virt_to_phys(&_text);
14488- code_resource.end = virt_to_phys(&_etext)-1;
14489- data_resource.start = virt_to_phys(&_etext);
14490- data_resource.end = virt_to_phys(&_edata)-1;
14491- bss_resource.start = virt_to_phys(&__bss_start);
14492- bss_resource.end = virt_to_phys(&__bss_stop)-1;
14493-
14494- early_identify_cpu(&boot_cpu_data);
14495-
14496- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
14497- *cmdline_p = command_line;
14498-
14499- parse_setup_data();
14500-
14501- parse_early_param();
14502-
14503-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
14504- if (init_ohci1394_dma_early)
14505- init_ohci1394_dma_on_all_controllers();
14506-#endif
14507-
14508- finish_e820_parsing();
14509-
14510-#ifndef CONFIG_XEN
14511- /* after parse_early_param, so could debug it */
14512- insert_resource(&iomem_resource, &code_resource);
14513- insert_resource(&iomem_resource, &data_resource);
14514- insert_resource(&iomem_resource, &bss_resource);
14515-#endif
14516-
14517- early_gart_iommu_check();
14518-
14519- e820_register_active_regions(0, 0, -1UL);
14520- /*
14521- * partially used pages are not usable - thus
14522- * we are rounding upwards:
14523- */
14524- end_pfn = e820_end_of_ram();
14525- /* update e820 for memory not covered by WB MTRRs */
14526- mtrr_bp_init();
14527-#ifndef CONFIG_XEN
14528- if (mtrr_trim_uncached_memory(end_pfn)) {
14529- e820_register_active_regions(0, 0, -1UL);
14530- end_pfn = e820_end_of_ram();
14531- }
14532-#endif
14533-
14534- num_physpages = end_pfn;
14535- max_mapnr = end_pfn;
14536-
14537- check_efer();
14538-
14539- max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
14540- if (efi_enabled)
14541- efi_init();
14542-
14543-#ifndef CONFIG_XEN
14544- vsmp_init();
14545-#endif
14546-
14547- if (is_initial_xendomain())
14548- dmi_scan_machine();
14549-
14550- io_delay_init();
14551-
14552-#ifdef CONFIG_KVM_CLOCK
14553- kvmclock_init();
14554-#endif
14555-
14556-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
14557- /* setup to use the early static init tables during kernel startup */
14558- x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
14559- x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
14560-#ifdef CONFIG_NUMA
14561- x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
14562-#endif
14563-#endif
14564-
14565- /* How many end-of-memory variables you have, grandma! */
14566- max_low_pfn = end_pfn;
14567- max_pfn = end_pfn;
14568- high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
14569-
14570- /* Remove active ranges so rediscovery with NUMA-awareness happens */
14571- remove_all_active_ranges();
14572-
14573-#ifdef CONFIG_ACPI_NUMA
14574- /*
14575- * Parse SRAT to discover nodes.
14576- */
14577- acpi_numa_init();
14578-#endif
14579-
14580-#ifdef CONFIG_NUMA
14581- numa_initmem_init(0, end_pfn);
14582-#else
14583- contig_initmem_init(0, end_pfn);
14584-#endif
14585-
14586-#ifndef CONFIG_XEN
14587- dma32_reserve_bootmem();
14588-
14589-#ifdef CONFIG_ACPI_SLEEP
14590- /*
14591- * Reserve low memory region for sleep support.
14592- */
14593- acpi_reserve_bootmem();
14594-#endif
14595-
14596- if (efi_enabled)
14597- efi_reserve_bootmem();
14598-#endif
14599-
14600-#ifdef CONFIG_BLK_DEV_INITRD
14601-#ifdef CONFIG_XEN
14602- if (xen_start_info->mod_start) {
14603- unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
14604- unsigned long ramdisk_size = xen_start_info->mod_len;
14605-#else
14606- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
14607- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
14608- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
14609-#endif
14610- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
14611- unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
14612-
14613- if (ramdisk_end <= end_of_mem) {
14614- /*
14615- * don't need to reserve again, already reserved early
14616- * in x86_64_start_kernel, and early_res_to_bootmem
14617- * convert that to reserved in bootmem
14618- */
14619- initrd_start = ramdisk_image + PAGE_OFFSET;
14620- initrd_end = initrd_start+ramdisk_size;
14621-#ifdef CONFIG_XEN
14622- initrd_below_start_ok = 1;
14623-#endif
14624- } else {
14625- free_bootmem(ramdisk_image, ramdisk_size);
14626- printk(KERN_ERR "initrd extends beyond end of memory "
14627- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
14628- ramdisk_end, end_of_mem);
14629- initrd_start = 0;
14630- }
14631- }
14632-#endif
14633- reserve_crashkernel();
14634-
14635- reserve_ibft_region();
14636-
14637- paging_init();
14638- map_vsyscall();
14639-#ifdef CONFIG_X86_LOCAL_APIC
14640- /*
14641- * Find and reserve possible boot-time SMP configuration:
14642- */
14643- find_smp_config();
14644-#endif
14645-#ifdef CONFIG_XEN
14646- {
14647- int i, j, k, fpp;
14648- unsigned long p2m_pages;
14649-
14650- p2m_pages = end_pfn;
14651- if (xen_start_info->nr_pages > end_pfn) {
14652- /*
14653- * the end_pfn was shrunk (probably by mem= or highmem=
14654- * kernel parameter); shrink reservation with the HV
14655- */
14656- struct xen_memory_reservation reservation = {
14657- .address_bits = 0,
14658- .extent_order = 0,
14659- .domid = DOMID_SELF
14660- };
14661- unsigned int difference;
14662- int ret;
14663-
14664- difference = xen_start_info->nr_pages - end_pfn;
14665-
14666- set_xen_guest_handle(reservation.extent_start,
14667- ((unsigned long *)xen_start_info->mfn_list) + end_pfn);
14668- reservation.nr_extents = difference;
14669- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
14670- &reservation);
14671- BUG_ON (ret != difference);
14672- }
14673- else if (end_pfn > xen_start_info->nr_pages)
14674- p2m_pages = xen_start_info->nr_pages;
14675-
14676- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
14677- /* Make sure we have a large enough P->M table. */
14678- phys_to_machine_mapping = alloc_bootmem_pages(
14679- end_pfn * sizeof(unsigned long));
14680- memset(phys_to_machine_mapping, ~0,
14681- end_pfn * sizeof(unsigned long));
14682- memcpy(phys_to_machine_mapping,
14683- (unsigned long *)xen_start_info->mfn_list,
14684- p2m_pages * sizeof(unsigned long));
14685- free_bootmem(
14686- __pa(xen_start_info->mfn_list),
14687- PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
14688- sizeof(unsigned long))));
14689-
14690- /*
14691- * Initialise the list of the frames that specify the
14692- * list of frames that make up the p2m table. Used by
14693- * save/restore.
14694- */
14695- pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
14696-
14697- fpp = PAGE_SIZE/sizeof(unsigned long);
14698- for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) {
14699- if ((j % fpp) == 0) {
14700- k++;
14701- BUG_ON(k>=fpp);
14702- pfn_to_mfn_frame_list[k] =
14703- alloc_bootmem_pages(PAGE_SIZE);
14704- pfn_to_mfn_frame_list_list[k] =
14705- virt_to_mfn(pfn_to_mfn_frame_list[k]);
14706- j=0;
14707- }
14708- pfn_to_mfn_frame_list[k][j] =
14709- virt_to_mfn(&phys_to_machine_mapping[i]);
14710- }
14711- HYPERVISOR_shared_info->arch.max_pfn = end_pfn;
14712- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
14713- virt_to_mfn(pfn_to_mfn_frame_list_list);
14714- }
14715-
14716- /* Mark all ISA DMA channels in-use - using them wouldn't work. */
14717- for (i = 0; i < MAX_DMA_CHANNELS; ++i)
14718- if (i != 4 && request_dma(i, "xen") != 0)
14719- BUG();
14720- }
14721-
14722-#ifdef CONFIG_ACPI
14723- if (!is_initial_xendomain()) {
14724- acpi_disabled = 1;
14725- acpi_ht = 0;
14726- }
14727-#endif
14728-#endif
14729-
14730-#ifndef CONFIG_XEN
14731- early_quirks();
14732-#endif
14733-
14734-#ifdef CONFIG_ACPI
14735- /*
14736- * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
14737- * Call this early for SRAT node setup.
14738- */
14739- acpi_boot_table_init();
14740-
14741- /*
14742- * Read APIC and some other early information from ACPI tables.
14743- */
14744- acpi_boot_init();
14745-#endif
14746-
14747- init_cpu_to_node();
14748-
14749-#ifdef CONFIG_X86_LOCAL_APIC
14750- /*
14751- * get boot-time SMP configuration:
14752- */
14753- if (smp_found_config)
14754- get_smp_config();
14755-#ifndef CONFIG_XEN
14756- init_apic_mappings();
14757- ioapic_init_mappings();
14758-#endif
14759-#endif
14760-#if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
14761- prefill_possible_map();
14762-#endif
14763-
14764- kvm_guest_init();
14765-
14766- /*
14767- * We trust e820 completely. No explicit ROM probing in memory.
14768- */
14769-#ifdef CONFIG_XEN
14770- if (is_initial_xendomain())
14771- e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
14772-#else
14773- e820_reserve_resources(e820.map, e820.nr_map);
14774- e820_mark_nosave_regions();
14775-#endif
14776-
14777- /* request I/O space for devices used on all i[345]86 PCs */
14778- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
14779- request_resource(&ioport_resource, &standard_io_resources[i]);
14780-
14781-#ifdef CONFIG_XEN
14782- if (is_initial_xendomain())
14783- e820_setup_gap(machine_e820.map, machine_e820.nr_map);
14784-#else
14785- e820_setup_gap(e820.map, e820.nr_map);
14786-#endif
14787-
14788-#ifdef CONFIG_XEN
14789- {
14790- struct physdev_set_iopl set_iopl;
14791-
14792- set_iopl.iopl = 1;
14793- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
14794-
14795- if (is_initial_xendomain()) {
14796-#ifdef CONFIG_VT
14797-#if defined(CONFIG_VGA_CONSOLE)
14798- conswitchp = &vga_con;
14799-#elif defined(CONFIG_DUMMY_CONSOLE)
14800- conswitchp = &dummy_con;
14801-#endif
14802-#endif
14803- } else {
14804-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
14805- conswitchp = &dummy_con;
14806-#endif
14807- }
14808- }
14809-#else /* CONFIG_XEN */
14810-
14811-#ifdef CONFIG_VT
14812-#if defined(CONFIG_VGA_CONSOLE)
14813- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
14814- conswitchp = &vga_con;
14815-#elif defined(CONFIG_DUMMY_CONSOLE)
14816- conswitchp = &dummy_con;
14817-#endif
14818-#endif
14819-
14820-#endif /* !CONFIG_XEN */
14821-
14822- /* do this before identify_cpu for boot cpu */
14823- check_enable_amd_mmconf_dmi();
14824-}
14825-
14826-#ifdef CONFIG_XEN
14827-static int
14828-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
14829-{
14830- HYPERVISOR_shutdown(SHUTDOWN_crash);
14831- /* we're never actually going to get here... */
14832- return NOTIFY_DONE;
14833-}
14834-#endif /* !CONFIG_XEN */
14835-
14836-
14837-static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
14838-{
14839- unsigned int *v;
14840-
14841- if (c->extended_cpuid_level < 0x80000004)
14842- return 0;
14843-
14844- v = (unsigned int *) c->x86_model_id;
14845- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
14846- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
14847- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
14848- c->x86_model_id[48] = 0;
14849- return 1;
14850-}
14851-
14852-
14853-static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
14854-{
14855- unsigned int n, dummy, eax, ebx, ecx, edx;
14856-
14857- n = c->extended_cpuid_level;
14858-
14859- if (n >= 0x80000005) {
14860- cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
14861- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
14862- "D cache %dK (%d bytes/line)\n",
14863- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
14864- c->x86_cache_size = (ecx>>24) + (edx>>24);
14865- /* On K8 L1 TLB is inclusive, so don't count it */
14866- c->x86_tlbsize = 0;
14867- }
14868-
14869- if (n >= 0x80000006) {
14870- cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
14871- ecx = cpuid_ecx(0x80000006);
14872- c->x86_cache_size = ecx >> 16;
14873- c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
14874-
14875- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
14876- c->x86_cache_size, ecx & 0xFF);
14877- }
14878- if (n >= 0x80000008) {
14879- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
14880- c->x86_virt_bits = (eax >> 8) & 0xff;
14881- c->x86_phys_bits = eax & 0xff;
14882- }
14883-}
14884-
14885-#ifdef CONFIG_NUMA
14886-static int __cpuinit nearby_node(int apicid)
14887-{
14888- int i, node;
14889-
14890- for (i = apicid - 1; i >= 0; i--) {
14891- node = apicid_to_node[i];
14892- if (node != NUMA_NO_NODE && node_online(node))
14893- return node;
14894- }
14895- for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
14896- node = apicid_to_node[i];
14897- if (node != NUMA_NO_NODE && node_online(node))
14898- return node;
14899- }
14900- return first_node(node_online_map); /* Shouldn't happen */
14901-}
14902-#endif
14903-
14904-/*
14905- * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
14906- * Assumes number of cores is a power of two.
14907- */
14908-static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
14909-{
14910-#ifdef CONFIG_SMP
14911- unsigned bits;
14912-#ifdef CONFIG_NUMA
14913- int cpu = smp_processor_id();
14914- int node = 0;
14915- unsigned apicid = hard_smp_processor_id();
14916-#endif
14917- bits = c->x86_coreid_bits;
14918-
14919- /* Low order bits define the core id (index of core in socket) */
14920- c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
14921- /* Convert the initial APIC ID into the socket ID */
14922- c->phys_proc_id = c->initial_apicid >> bits;
14923-
14924-#ifdef CONFIG_NUMA
14925- node = c->phys_proc_id;
14926- if (apicid_to_node[apicid] != NUMA_NO_NODE)
14927- node = apicid_to_node[apicid];
14928- if (!node_online(node)) {
14929- /* Two possibilities here:
14930- - The CPU is missing memory and no node was created.
14931- In that case try picking one from a nearby CPU
14932- - The APIC IDs differ from the HyperTransport node IDs
14933- which the K8 northbridge parsing fills in.
14934- Assume they are all increased by a constant offset,
14935- but in the same order as the HT nodeids.
14936- If that doesn't result in a usable node fall back to the
14937- path for the previous case. */
14938-
14939- int ht_nodeid = c->initial_apicid;
14940-
14941- if (ht_nodeid >= 0 &&
14942- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
14943- node = apicid_to_node[ht_nodeid];
14944- /* Pick a nearby node */
14945- if (!node_online(node))
14946- node = nearby_node(apicid);
14947- }
14948- numa_set_node(cpu, node);
14949-
14950- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
14951-#endif
14952-#endif
14953-}
14954-
14955-static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
14956-{
14957-#ifdef CONFIG_SMP
14958- unsigned bits, ecx;
14959-
14960- /* Multi core CPU? */
14961- if (c->extended_cpuid_level < 0x80000008)
14962- return;
14963-
14964- ecx = cpuid_ecx(0x80000008);
14965-
14966- c->x86_max_cores = (ecx & 0xff) + 1;
14967-
14968- /* CPU telling us the core id bits shift? */
14969- bits = (ecx >> 12) & 0xF;
14970-
14971- /* Otherwise recompute */
14972- if (bits == 0) {
14973- while ((1 << bits) < c->x86_max_cores)
14974- bits++;
14975- }
14976-
14977- c->x86_coreid_bits = bits;
14978-
14979-#endif
14980-}
14981-
14982-#define ENABLE_C1E_MASK 0x18000000
14983-#define CPUID_PROCESSOR_SIGNATURE 1
14984-#define CPUID_XFAM 0x0ff00000
14985-#define CPUID_XFAM_K8 0x00000000
14986-#define CPUID_XFAM_10H 0x00100000
14987-#define CPUID_XFAM_11H 0x00200000
14988-#define CPUID_XMOD 0x000f0000
14989-#define CPUID_XMOD_REV_F 0x00040000
14990-
14991-#ifndef CONFIG_XEN
14992-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
14993-static __cpuinit int amd_apic_timer_broken(void)
14994-{
14995- u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
14996-
14997- switch (eax & CPUID_XFAM) {
14998- case CPUID_XFAM_K8:
14999- if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
15000- break;
15001- case CPUID_XFAM_10H:
15002- case CPUID_XFAM_11H:
15003- rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
15004- if (lo & ENABLE_C1E_MASK)
15005- return 1;
15006- break;
15007- default:
15008- /* err on the side of caution */
15009- return 1;
15010- }
15011- return 0;
15012-}
15013-#endif
15014-
15015-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
15016-{
15017- early_init_amd_mc(c);
15018-
15019- /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
15020- if (c->x86_power & (1<<8))
15021- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15022-}
15023-
15024-static void __cpuinit init_amd(struct cpuinfo_x86 *c)
15025-{
15026- unsigned level;
15027-
15028-#ifdef CONFIG_SMP
15029- unsigned long value;
15030-
15031- /*
15032- * Disable TLB flush filter by setting HWCR.FFDIS on K8
15033- * bit 6 of msr C001_0015
15034- *
15035- * Errata 63 for SH-B3 steppings
15036- * Errata 122 for all steppings (F+ have it disabled by default)
15037- */
15038- if (c->x86 == 15) {
15039- rdmsrl(MSR_K8_HWCR, value);
15040- value |= 1 << 6;
15041- wrmsrl(MSR_K8_HWCR, value);
15042- }
15043-#endif
15044-
15045- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
15046- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
15047- clear_cpu_cap(c, 0*32+31);
15048-
15049- /* On C+ stepping K8 rep microcode works well for copy/memset */
15050- level = cpuid_eax(1);
15051- if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
15052- level >= 0x0f58))
15053- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
15054- if (c->x86 == 0x10 || c->x86 == 0x11)
15055- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
15056-
15057- /* Enable workaround for FXSAVE leak */
15058- if (c->x86 >= 6)
15059- set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
15060-
15061- level = get_model_name(c);
15062- if (!level) {
15063- switch (c->x86) {
15064- case 15:
15065- /* Should distinguish Models here, but this is only
15066- a fallback anyways. */
15067- strcpy(c->x86_model_id, "Hammer");
15068- break;
15069- }
15070- }
15071- display_cacheinfo(c);
15072-
15073- /* Multi core CPU? */
15074- if (c->extended_cpuid_level >= 0x80000008)
15075- amd_detect_cmp(c);
15076-
15077- if (c->extended_cpuid_level >= 0x80000006 &&
15078- (cpuid_edx(0x80000006) & 0xf000))
15079- num_cache_leaves = 4;
15080- else
15081- num_cache_leaves = 3;
15082-
15083- if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
15084- set_cpu_cap(c, X86_FEATURE_K8);
15085-
15086- /* MFENCE stops RDTSC speculation */
15087- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
15088-
15089- if (c->x86 == 0x10)
15090- fam10h_check_enable_mmcfg();
15091-
15092-#ifndef CONFIG_XEN
15093- if (amd_apic_timer_broken())
15094- disable_apic_timer = 1;
15095-
15096- if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
15097- unsigned long long tseg;
15098-
15099- /*
15100- * Split up direct mapping around the TSEG SMM area.
15101- * Don't do it for gbpages because there seems very little
15102- * benefit in doing so.
15103- */
15104- if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
15105- (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
15106- set_memory_4k((unsigned long)__va(tseg), 1);
15107- }
15108-#endif
15109-}
15110-
15111-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
15112-{
15113-#ifdef CONFIG_SMP
15114- u32 eax, ebx, ecx, edx;
15115- int index_msb, core_bits;
15116-
15117- cpuid(1, &eax, &ebx, &ecx, &edx);
15118-
15119-
15120- if (!cpu_has(c, X86_FEATURE_HT))
15121- return;
15122- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
15123- goto out;
15124-
15125- smp_num_siblings = (ebx & 0xff0000) >> 16;
15126-
15127- if (smp_num_siblings == 1) {
15128- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
15129- } else if (smp_num_siblings > 1) {
15130-
15131- if (smp_num_siblings > NR_CPUS) {
15132- printk(KERN_WARNING "CPU: Unsupported number of "
15133- "siblings %d", smp_num_siblings);
15134- smp_num_siblings = 1;
15135- return;
15136- }
15137-
15138- index_msb = get_count_order(smp_num_siblings);
15139- c->phys_proc_id = phys_pkg_id(index_msb);
15140-
15141- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
15142-
15143- index_msb = get_count_order(smp_num_siblings);
15144-
15145- core_bits = get_count_order(c->x86_max_cores);
15146-
15147- c->cpu_core_id = phys_pkg_id(index_msb) &
15148- ((1 << core_bits) - 1);
15149- }
15150-out:
15151- if ((c->x86_max_cores * smp_num_siblings) > 1) {
15152- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
15153- c->phys_proc_id);
15154- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
15155- c->cpu_core_id);
15156- }
15157-
15158-#endif
15159-}
15160-
15161-/*
15162- * find out the number of processor cores on the die
15163- */
15164-static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
15165-{
15166- unsigned int eax, t;
15167-
15168- if (c->cpuid_level < 4)
15169- return 1;
15170-
15171- cpuid_count(4, 0, &eax, &t, &t, &t);
15172-
15173- if (eax & 0x1f)
15174- return ((eax >> 26) + 1);
15175- else
15176- return 1;
15177-}
15178-
15179-static void __cpuinit srat_detect_node(void)
15180-{
15181-#ifdef CONFIG_NUMA
15182- unsigned node;
15183- int cpu = smp_processor_id();
15184- int apicid = hard_smp_processor_id();
15185-
15186- /* Don't do the funky fallback heuristics the AMD version employs
15187- for now. */
15188- node = apicid_to_node[apicid];
15189- if (node == NUMA_NO_NODE || !node_online(node))
15190- node = first_node(node_online_map);
15191- numa_set_node(cpu, node);
15192-
15193- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
15194-#endif
15195-}
15196-
15197-static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
15198-{
15199- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
15200- (c->x86 == 0x6 && c->x86_model >= 0x0e))
15201- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15202-}
15203-
15204-static void __cpuinit init_intel(struct cpuinfo_x86 *c)
15205-{
15206- /* Cache sizes */
15207- unsigned n;
15208-
15209- init_intel_cacheinfo(c);
15210- if (c->cpuid_level > 9) {
15211- unsigned eax = cpuid_eax(10);
15212- /* Check for version and the number of counters */
15213- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
15214- set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
15215- }
15216-
15217- if (cpu_has_ds) {
15218- unsigned int l1, l2;
15219- rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
15220- if (!(l1 & (1<<11)))
15221- set_cpu_cap(c, X86_FEATURE_BTS);
15222- if (!(l1 & (1<<12)))
15223- set_cpu_cap(c, X86_FEATURE_PEBS);
15224- }
15225-
15226-
15227- if (cpu_has_bts)
15228- ds_init_intel(c);
15229-
15230- n = c->extended_cpuid_level;
15231- if (n >= 0x80000008) {
15232- unsigned eax = cpuid_eax(0x80000008);
15233- c->x86_virt_bits = (eax >> 8) & 0xff;
15234- c->x86_phys_bits = eax & 0xff;
15235- /* CPUID workaround for Intel 0F34 CPU */
15236- if (c->x86_vendor == X86_VENDOR_INTEL &&
15237- c->x86 == 0xF && c->x86_model == 0x3 &&
15238- c->x86_mask == 0x4)
15239- c->x86_phys_bits = 36;
15240- }
15241-
15242- if (c->x86 == 15)
15243- c->x86_cache_alignment = c->x86_clflush_size * 2;
15244- if (c->x86 == 6)
15245- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
15246- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
15247- c->x86_max_cores = intel_num_cpu_cores(c);
15248-
15249- srat_detect_node();
15250-}
15251-
15252-static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
15253-{
15254- if (c->x86 == 0x6 && c->x86_model >= 0xf)
15255- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15256-}
15257-
15258-static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
15259-{
15260- /* Cache sizes */
15261- unsigned n;
15262-
15263- n = c->extended_cpuid_level;
15264- if (n >= 0x80000008) {
15265- unsigned eax = cpuid_eax(0x80000008);
15266- c->x86_virt_bits = (eax >> 8) & 0xff;
15267- c->x86_phys_bits = eax & 0xff;
15268- }
15269-
15270- if (c->x86 == 0x6 && c->x86_model >= 0xf) {
15271- c->x86_cache_alignment = c->x86_clflush_size * 2;
15272- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15273- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
15274- }
15275- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
15276-}
15277-
15278-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
15279-{
15280- char *v = c->x86_vendor_id;
15281-
15282- if (!strcmp(v, "AuthenticAMD"))
15283- c->x86_vendor = X86_VENDOR_AMD;
15284- else if (!strcmp(v, "GenuineIntel"))
15285- c->x86_vendor = X86_VENDOR_INTEL;
15286- else if (!strcmp(v, "CentaurHauls"))
15287- c->x86_vendor = X86_VENDOR_CENTAUR;
15288- else
15289- c->x86_vendor = X86_VENDOR_UNKNOWN;
15290-}
15291-
15292-/* Do some early cpuid on the boot CPU to get some parameter that are
15293- needed before check_bugs. Everything advanced is in identify_cpu
15294- below. */
15295-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
15296-{
15297- u32 tfms, xlvl;
15298-
15299- c->loops_per_jiffy = loops_per_jiffy;
15300- c->x86_cache_size = -1;
15301- c->x86_vendor = X86_VENDOR_UNKNOWN;
15302- c->x86_model = c->x86_mask = 0; /* So far unknown... */
15303- c->x86_vendor_id[0] = '\0'; /* Unset */
15304- c->x86_model_id[0] = '\0'; /* Unset */
15305- c->x86_clflush_size = 64;
15306- c->x86_cache_alignment = c->x86_clflush_size;
15307- c->x86_max_cores = 1;
15308- c->x86_coreid_bits = 0;
15309- c->extended_cpuid_level = 0;
15310- memset(&c->x86_capability, 0, sizeof c->x86_capability);
15311-
15312- /* Get vendor name */
15313- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
15314- (unsigned int *)&c->x86_vendor_id[0],
15315- (unsigned int *)&c->x86_vendor_id[8],
15316- (unsigned int *)&c->x86_vendor_id[4]);
15317-
15318- get_cpu_vendor(c);
15319-
15320- /* Initialize the standard set of capabilities */
15321- /* Note that the vendor-specific code below might override */
15322-
15323- /* Intel-defined flags: level 0x00000001 */
15324- if (c->cpuid_level >= 0x00000001) {
15325- __u32 misc;
15326- cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
15327- &c->x86_capability[0]);
15328- c->x86 = (tfms >> 8) & 0xf;
15329- c->x86_model = (tfms >> 4) & 0xf;
15330- c->x86_mask = tfms & 0xf;
15331- if (c->x86 == 0xf)
15332- c->x86 += (tfms >> 20) & 0xff;
15333- if (c->x86 >= 0x6)
15334- c->x86_model += ((tfms >> 16) & 0xF) << 4;
15335- if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
15336- c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
15337- } else {
15338- /* Have CPUID level 0 only - unheard of */
15339- c->x86 = 4;
15340- }
15341-
15342- c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
15343-#ifdef CONFIG_SMP
15344- c->phys_proc_id = c->initial_apicid;
15345-#endif
15346- /* AMD-defined flags: level 0x80000001 */
15347- xlvl = cpuid_eax(0x80000000);
15348- c->extended_cpuid_level = xlvl;
15349- if ((xlvl & 0xffff0000) == 0x80000000) {
15350- if (xlvl >= 0x80000001) {
15351- c->x86_capability[1] = cpuid_edx(0x80000001);
15352- c->x86_capability[6] = cpuid_ecx(0x80000001);
15353- }
15354- if (xlvl >= 0x80000004)
15355- get_model_name(c); /* Default name */
15356- }
15357-
15358- /* Transmeta-defined flags: level 0x80860001 */
15359- xlvl = cpuid_eax(0x80860000);
15360- if ((xlvl & 0xffff0000) == 0x80860000) {
15361- /* Don't set x86_cpuid_level here for now to not confuse. */
15362- if (xlvl >= 0x80860001)
15363- c->x86_capability[2] = cpuid_edx(0x80860001);
15364- }
15365-
15366- c->extended_cpuid_level = cpuid_eax(0x80000000);
15367- if (c->extended_cpuid_level >= 0x80000007)
15368- c->x86_power = cpuid_edx(0x80000007);
15369-
15370- switch (c->x86_vendor) {
15371- case X86_VENDOR_AMD:
15372- early_init_amd(c);
15373- break;
15374- case X86_VENDOR_INTEL:
15375- early_init_intel(c);
15376- break;
15377- case X86_VENDOR_CENTAUR:
15378- early_init_centaur(c);
15379- break;
15380- }
15381-
15382- validate_pat_support(c);
15383-}
15384-
15385-/*
15386- * This does the hard work of actually picking apart the CPU stuff...
15387- */
15388-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
15389-{
15390- int i;
15391-
15392- early_identify_cpu(c);
15393-
15394- init_scattered_cpuid_features(c);
15395-
15396- c->apicid = phys_pkg_id(0);
15397-
15398- /*
15399- * Vendor-specific initialization. In this section we
15400- * canonicalize the feature flags, meaning if there are
15401- * features a certain CPU supports which CPUID doesn't
15402- * tell us, CPUID claiming incorrect flags, or other bugs,
15403- * we handle them here.
15404- *
15405- * At the end of this section, c->x86_capability better
15406- * indicate the features this CPU genuinely supports!
15407- */
15408- switch (c->x86_vendor) {
15409- case X86_VENDOR_AMD:
15410- init_amd(c);
15411- break;
15412-
15413- case X86_VENDOR_INTEL:
15414- init_intel(c);
15415- break;
15416-
15417- case X86_VENDOR_CENTAUR:
15418- init_centaur(c);
15419- break;
15420-
15421- case X86_VENDOR_UNKNOWN:
15422- default:
15423- display_cacheinfo(c);
15424- break;
15425- }
15426-
15427- detect_ht(c);
15428-
15429- /*
15430- * On SMP, boot_cpu_data holds the common feature set between
15431- * all CPUs; so make sure that we indicate which features are
15432- * common between the CPUs. The first time this routine gets
15433- * executed, c == &boot_cpu_data.
15434- */
15435- if (c != &boot_cpu_data) {
15436- /* AND the already accumulated flags with these */
15437- for (i = 0; i < NCAPINTS; i++)
15438- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
15439- }
15440-
15441- /* Clear all flags overriden by options */
15442- for (i = 0; i < NCAPINTS; i++)
15443- c->x86_capability[i] &= ~cleared_cpu_caps[i];
15444-
15445-#ifdef CONFIG_X86_MCE
15446- mcheck_init(c);
15447-#endif
15448- select_idle_routine(c);
15449-
15450-#ifdef CONFIG_NUMA
15451- numa_add_cpu(smp_processor_id());
15452-#endif
15453-
15454-}
15455-
15456-void __cpuinit identify_boot_cpu(void)
15457-{
15458- identify_cpu(&boot_cpu_data);
15459-}
15460-
15461-void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
15462-{
15463- BUG_ON(c == &boot_cpu_data);
15464- identify_cpu(c);
15465- mtrr_ap_init();
15466-}
15467-
15468-static __init int setup_noclflush(char *arg)
15469-{
15470- setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
15471- return 1;
15472-}
15473-__setup("noclflush", setup_noclflush);
15474-
15475-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
15476-{
15477- if (c->x86_model_id[0])
15478- printk(KERN_CONT "%s", c->x86_model_id);
15479-
15480- if (c->x86_mask || c->cpuid_level >= 0)
15481- printk(KERN_CONT " stepping %02x\n", c->x86_mask);
15482- else
15483- printk(KERN_CONT "\n");
15484-}
15485-
15486-static __init int setup_disablecpuid(char *arg)
15487-{
15488- int bit;
15489- if (get_option(&arg, &bit) && bit < NCAPINTS*32)
15490- setup_clear_cpu_cap(bit);
15491- else
15492- return 0;
15493- return 1;
15494-}
15495-__setup("clearcpuid=", setup_disablecpuid);
15496--- /dev/null 1970-01-01 00:00:00.000000000 +0000
82094b55 15497+++ sle11-2009-10-16/arch/x86/kernel/setup_percpu-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
15498@@ -0,0 +1,385 @@
15499+#include <linux/kernel.h>
15500+#include <linux/module.h>
15501+#include <linux/init.h>
15502+#include <linux/bootmem.h>
15503+#include <linux/percpu.h>
15504+#include <linux/kexec.h>
15505+#include <linux/crash_dump.h>
15506+#include <asm/smp.h>
15507+#include <asm/percpu.h>
15508+#include <asm/sections.h>
15509+#include <asm/processor.h>
15510+#include <asm/setup.h>
15511+#include <asm/topology.h>
15512+#include <asm/mpspec.h>
15513+#include <asm/apicdef.h>
15514+#include <asm/highmem.h>
15515+
15516+#ifdef CONFIG_X86_LOCAL_APIC
15517+unsigned int num_processors;
15518+unsigned disabled_cpus __cpuinitdata;
15519+/* Processor that is doing the boot up */
15520+unsigned int boot_cpu_physical_apicid = -1U;
15521+unsigned int max_physical_apicid;
15522+EXPORT_SYMBOL(boot_cpu_physical_apicid);
15523+
15524+/* Bitmask of physically existing CPUs */
15525+physid_mask_t phys_cpu_present_map;
15526+#endif
15527+
15528+/* map cpu index to physical APIC ID */
15529+#ifndef CONFIG_XEN
15530+DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
15531+DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
15532+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
15533+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
15534+#else
15535+DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
15536+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
15537+#endif
15538+
15539+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
15540+#define X86_64_NUMA 1
15541+
15542+/* map cpu index to node index */
15543+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
15544+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
15545+
15546+/* which logical CPUs are on which nodes */
15547+cpumask_t *node_to_cpumask_map;
15548+EXPORT_SYMBOL(node_to_cpumask_map);
15549+
15550+/* setup node_to_cpumask_map */
15551+static void __init setup_node_to_cpumask_map(void);
15552+
15553+#else
15554+static inline void setup_node_to_cpumask_map(void) { }
15555+#endif
15556+
15557+#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
15558+/*
15559+ * Copy data used in early init routines from the initial arrays to the
15560+ * per cpu data areas. These arrays then become expendable and the
15561+ * *_early_ptr's are zeroed indicating that the static arrays are gone.
15562+ */
15563+static void __init setup_per_cpu_maps(void)
15564+{
15565+#ifndef CONFIG_XEN
15566+ int cpu;
15567+
15568+ for_each_possible_cpu(cpu) {
15569+ per_cpu(x86_cpu_to_apicid, cpu) =
15570+ early_per_cpu_map(x86_cpu_to_apicid, cpu);
15571+ per_cpu(x86_bios_cpu_apicid, cpu) =
15572+ early_per_cpu_map(x86_bios_cpu_apicid, cpu);
15573+#ifdef X86_64_NUMA
15574+ per_cpu(x86_cpu_to_node_map, cpu) =
15575+ early_per_cpu_map(x86_cpu_to_node_map, cpu);
15576+#endif
15577+ }
15578+
15579+ /* indicate the early static arrays will soon be gone */
15580+ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
15581+ early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
15582+#ifdef X86_64_NUMA
15583+ early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
15584+#endif
15585+#endif
15586+}
15587+
15588+#ifdef CONFIG_X86_32
15589+/*
15590+ * Great future not-so-futuristic plan: make i386 and x86_64 do it
15591+ * the same way
15592+ */
15593+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
15594+EXPORT_SYMBOL(__per_cpu_offset);
15595+static inline void setup_cpu_pda_map(void) { }
15596+
15597+#elif !defined(CONFIG_SMP)
15598+static inline void setup_cpu_pda_map(void) { }
15599+
15600+#else /* CONFIG_SMP && CONFIG_X86_64 */
15601+
15602+/*
15603+ * Allocate cpu_pda pointer table and array via alloc_bootmem.
15604+ */
15605+static void __init setup_cpu_pda_map(void)
15606+{
15607+ char *pda;
15608+ struct x8664_pda **new_cpu_pda;
15609+ unsigned long size;
15610+ int cpu;
15611+
15612+ size = roundup(sizeof(struct x8664_pda), cache_line_size());
15613+
15614+ /* allocate cpu_pda array and pointer table */
15615+ {
15616+ unsigned long tsize = nr_cpu_ids * sizeof(void *);
15617+ unsigned long asize = size * (nr_cpu_ids - 1);
15618+
15619+ tsize = roundup(tsize, cache_line_size());
15620+ new_cpu_pda = alloc_bootmem(tsize + asize);
15621+ pda = (char *)new_cpu_pda + tsize;
15622+ }
15623+
15624+ /* initialize pointer table to static pda's */
15625+ for_each_possible_cpu(cpu) {
15626+ if (cpu == 0) {
15627+ /* leave boot cpu pda in place */
15628+ new_cpu_pda[0] = cpu_pda(0);
15629+ continue;
15630+ }
15631+ new_cpu_pda[cpu] = (struct x8664_pda *)pda;
15632+ new_cpu_pda[cpu]->in_bootmem = 1;
15633+ pda += size;
15634+ }
15635+
15636+ /* point to new pointer table */
15637+ _cpu_pda = new_cpu_pda;
15638+}
15639+#endif
15640+
15641+/*
15642+ * Great future plan:
15643+ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
15644+ * Always point %gs to its beginning
15645+ */
15646+void __init setup_per_cpu_areas(void)
15647+{
15648+ ssize_t size = PERCPU_ENOUGH_ROOM;
15649+ char *ptr;
15650+ int cpu;
15651+
15652+ /* Setup cpu_pda map */
15653+ setup_cpu_pda_map();
15654+
15655+ /* Copy section for each CPU (we discard the original) */
15656+ size = PERCPU_ENOUGH_ROOM;
15657+ printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
15658+ size);
15659+
15660+ for_each_possible_cpu(cpu) {
15661+#ifndef CONFIG_NEED_MULTIPLE_NODES
15662+ ptr = alloc_bootmem_pages(size);
15663+#else
15664+ int node = early_cpu_to_node(cpu);
15665+ if (!node_online(node) || !NODE_DATA(node)) {
15666+ ptr = alloc_bootmem_pages(size);
15667+ printk(KERN_INFO
15668+ "cpu %d has no node %d or node-local memory\n",
15669+ cpu, node);
15670+ }
15671+ else
15672+ ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
15673+#endif
15674+ per_cpu_offset(cpu) = ptr - __per_cpu_start;
15675+ memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
15676+
15677+ }
15678+
15679+ printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
15680+ NR_CPUS, nr_cpu_ids, nr_node_ids);
15681+
15682+ /* Setup percpu data maps */
15683+ setup_per_cpu_maps();
15684+
15685+ /* Setup node to cpumask map */
15686+ setup_node_to_cpumask_map();
15687+}
15688+
15689+#endif
15690+
15691+#ifdef X86_64_NUMA
15692+
15693+/*
15694+ * Allocate node_to_cpumask_map based on number of available nodes
15695+ * Requires node_possible_map to be valid.
15696+ *
15697+ * Note: node_to_cpumask() is not valid until after this is done.
15698+ */
15699+static void __init setup_node_to_cpumask_map(void)
15700+{
15701+ unsigned int node, num = 0;
15702+ cpumask_t *map;
15703+
15704+ /* setup nr_node_ids if not done yet */
15705+ if (nr_node_ids == MAX_NUMNODES) {
15706+ for_each_node_mask(node, node_possible_map)
15707+ num = node;
15708+ nr_node_ids = num + 1;
15709+ }
15710+
15711+ /* allocate the map */
15712+ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
15713+
15714+ pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n",
15715+ map, nr_node_ids);
15716+
15717+ /* node_to_cpumask() will now work */
15718+ node_to_cpumask_map = map;
15719+}
15720+
15721+void __cpuinit numa_set_node(int cpu, int node)
15722+{
15723+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
15724+
15725+ if (cpu_pda(cpu) && node != NUMA_NO_NODE)
15726+ cpu_pda(cpu)->nodenumber = node;
15727+
15728+ if (cpu_to_node_map)
15729+ cpu_to_node_map[cpu] = node;
15730+
15731+ else if (per_cpu_offset(cpu))
15732+ per_cpu(x86_cpu_to_node_map, cpu) = node;
15733+
15734+ else
15735+ pr_debug("Setting node for non-present cpu %d\n", cpu);
15736+}
15737+
15738+void __cpuinit numa_clear_node(int cpu)
15739+{
15740+ numa_set_node(cpu, NUMA_NO_NODE);
15741+}
15742+
15743+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
15744+
15745+void __cpuinit numa_add_cpu(int cpu)
15746+{
15747+ cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
15748+}
15749+
15750+void __cpuinit numa_remove_cpu(int cpu)
15751+{
15752+ cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
15753+}
15754+
15755+#else /* CONFIG_DEBUG_PER_CPU_MAPS */
15756+
15757+/*
15758+ * --------- debug versions of the numa functions ---------
15759+ */
15760+static void __cpuinit numa_set_cpumask(int cpu, int enable)
15761+{
15762+ int node = cpu_to_node(cpu);
15763+ cpumask_t *mask;
15764+ char buf[64];
15765+
15766+ if (node_to_cpumask_map == NULL) {
15767+ printk(KERN_ERR "node_to_cpumask_map NULL\n");
15768+ dump_stack();
15769+ return;
15770+ }
15771+
15772+ mask = &node_to_cpumask_map[node];
15773+ if (enable)
15774+ cpu_set(cpu, *mask);
15775+ else
15776+ cpu_clear(cpu, *mask);
15777+
15778+ cpulist_scnprintf(buf, sizeof(buf), *mask);
15779+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
15780+ enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
15781+ }
15782+
15783+void __cpuinit numa_add_cpu(int cpu)
15784+{
15785+ numa_set_cpumask(cpu, 1);
15786+}
15787+
15788+void __cpuinit numa_remove_cpu(int cpu)
15789+{
15790+ numa_set_cpumask(cpu, 0);
15791+}
15792+
15793+int cpu_to_node(int cpu)
15794+{
15795+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
15796+ printk(KERN_WARNING
15797+ "cpu_to_node(%d): usage too early!\n", cpu);
15798+ dump_stack();
15799+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
15800+ }
15801+ return per_cpu(x86_cpu_to_node_map, cpu);
15802+}
15803+EXPORT_SYMBOL(cpu_to_node);
15804+
15805+/*
15806+ * Same function as cpu_to_node() but used if called before the
15807+ * per_cpu areas are setup.
15808+ */
15809+int early_cpu_to_node(int cpu)
15810+{
15811+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
15812+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
15813+
15814+ if (!per_cpu_offset(cpu)) {
15815+ printk(KERN_WARNING
15816+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
15817+ dump_stack();
15818+ return NUMA_NO_NODE;
15819+ }
15820+ return per_cpu(x86_cpu_to_node_map, cpu);
15821+}
15822+
15823+
15824+/* empty cpumask */
15825+static const cpumask_t cpu_mask_none;
15826+
15827+/*
15828+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
15829+ */
15830+const cpumask_t *_node_to_cpumask_ptr(int node)
15831+{
15832+ if (node_to_cpumask_map == NULL) {
15833+ printk(KERN_WARNING
15834+ "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
15835+ node);
15836+ dump_stack();
15837+ return (const cpumask_t *)&cpu_online_map;
15838+ }
15839+ if (node >= nr_node_ids) {
15840+ printk(KERN_WARNING
15841+ "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
15842+ node, nr_node_ids);
15843+ dump_stack();
15844+ return &cpu_mask_none;
15845+ }
15846+ return &node_to_cpumask_map[node];
15847+}
15848+EXPORT_SYMBOL(_node_to_cpumask_ptr);
15849+
15850+/*
15851+ * Returns a bitmask of CPUs on Node 'node'.
15852+ *
15853+ * Side note: this function creates the returned cpumask on the stack
15854+ * so with a high NR_CPUS count, excessive stack space is used. The
15855+ * node_to_cpumask_ptr function should be used whenever possible.
15856+ */
15857+cpumask_t node_to_cpumask(int node)
15858+{
15859+ if (node_to_cpumask_map == NULL) {
15860+ printk(KERN_WARNING
15861+ "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
15862+ dump_stack();
15863+ return cpu_online_map;
15864+ }
15865+ if (node >= nr_node_ids) {
15866+ printk(KERN_WARNING
15867+ "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
15868+ node, nr_node_ids);
15869+ dump_stack();
15870+ return cpu_mask_none;
15871+ }
15872+ return node_to_cpumask_map[node];
15873+}
15874+EXPORT_SYMBOL(node_to_cpumask);
15875+
15876+/*
15877+ * --------- end of debug versions of the numa functions ---------
15878+ */
15879+
15880+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
15881+
15882+#endif /* X86_64_NUMA */
15883+
82094b55
AF
15884--- sle11-2009-10-16.orig/arch/x86/kernel/smp-xen.c 2009-03-16 16:38:05.000000000 +0100
15885+++ sle11-2009-10-16/arch/x86/kernel/smp-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
15886@@ -121,132 +121,14 @@ void xen_smp_send_reschedule(int cpu)
15887 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
15888 }
15889
15890-/*
15891- * Structure and data for smp_call_function(). This is designed to minimise
15892- * static memory requirements. It also looks cleaner.
15893- */
15894-static DEFINE_SPINLOCK(call_lock);
15895-
15896-struct call_data_struct {
15897- void (*func) (void *info);
15898- void *info;
15899- atomic_t started;
15900- atomic_t finished;
15901- int wait;
15902-};
15903-
15904-void lock_ipi_call_lock(void)
15905+void xen_send_call_func_single_ipi(int cpu)
15906 {
15907- spin_lock_irq(&call_lock);
15908+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_VECTOR);
15909 }
15910
15911-void unlock_ipi_call_lock(void)
15912+void xen_send_call_func_ipi(cpumask_t mask)
15913 {
15914- spin_unlock_irq(&call_lock);
15915-}
15916-
15917-static struct call_data_struct *call_data;
15918-
15919-static void __smp_call_function(void (*func) (void *info), void *info,
15920- int nonatomic, int wait)
15921-{
15922- struct call_data_struct data;
15923- int cpus = num_online_cpus() - 1;
15924-
15925- if (!cpus)
15926- return;
15927-
15928- data.func = func;
15929- data.info = info;
15930- atomic_set(&data.started, 0);
15931- data.wait = wait;
15932- if (wait)
15933- atomic_set(&data.finished, 0);
15934-
15935- call_data = &data;
15936- mb();
15937-
15938- /* Send a message to all other CPUs and wait for them to respond */
15939- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
15940-
15941- /* Wait for response */
15942- while (atomic_read(&data.started) != cpus)
15943- cpu_relax();
15944-
15945- if (wait)
15946- while (atomic_read(&data.finished) != cpus)
15947- cpu_relax();
15948-}
15949-
15950-
15951-/**
15952- * smp_call_function_mask(): Run a function on a set of other CPUs.
15953- * @mask: The set of cpus to run on. Must not include the current cpu.
15954- * @func: The function to run. This must be fast and non-blocking.
15955- * @info: An arbitrary pointer to pass to the function.
15956- * @wait: If true, wait (atomically) until function has completed on other CPUs.
15957- *
15958- * Returns 0 on success, else a negative status code.
15959- *
15960- * If @wait is true, then returns once @func has returned; otherwise
15961- * it returns just before the target cpu calls @func.
15962- *
15963- * You must not call this function with disabled interrupts or from a
15964- * hardware interrupt handler or from a bottom half handler.
15965- */
15966-int
15967-xen_smp_call_function_mask(cpumask_t mask,
15968- void (*func)(void *), void *info,
15969- int wait)
15970-{
15971- struct call_data_struct data;
15972- cpumask_t allbutself;
15973- int cpus;
15974-
15975- /* Can deadlock when called with interrupts disabled */
15976- WARN_ON(irqs_disabled());
15977-
15978- /* Holding any lock stops cpus from going down. */
15979- spin_lock(&call_lock);
15980-
15981- allbutself = cpu_online_map;
15982- cpu_clear(smp_processor_id(), allbutself);
15983-
15984- cpus_and(mask, mask, allbutself);
15985- cpus = cpus_weight(mask);
15986-
15987- if (!cpus) {
15988- spin_unlock(&call_lock);
15989- return 0;
15990- }
15991-
15992- data.func = func;
15993- data.info = info;
15994- atomic_set(&data.started, 0);
15995- data.wait = wait;
15996- if (wait)
15997- atomic_set(&data.finished, 0);
15998-
15999- call_data = &data;
16000- wmb();
16001-
16002- /* Send a message to other CPUs */
16003- if (cpus_equal(mask, allbutself) &&
16004- cpus_equal(cpu_online_map, cpu_callout_map))
16005- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
16006- else
16007- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
16008-
16009- /* Wait for response */
16010- while (atomic_read(&data.started) != cpus)
16011- cpu_relax();
16012-
16013- if (wait)
16014- while (atomic_read(&data.finished) != cpus)
16015- cpu_relax();
16016- spin_unlock(&call_lock);
16017-
16018- return 0;
16019+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
16020 }
16021
16022 static void stop_this_cpu(void *dummy)
16023@@ -268,15 +150,10 @@ static void stop_this_cpu(void *dummy)
16024
16025 void xen_smp_send_stop(void)
16026 {
16027- int nolock;
16028 unsigned long flags;
16029
16030- /* Don't deadlock on the call lock in panic */
16031- nolock = !spin_trylock(&call_lock);
16032+ smp_call_function(stop_this_cpu, NULL, 0);
16033 local_irq_save(flags);
16034- __smp_call_function(stop_this_cpu, NULL, 0, 0);
16035- if (!nolock)
16036- spin_unlock(&call_lock);
16037 disable_all_local_evtchn();
16038 local_irq_restore(flags);
16039 }
16040@@ -298,21 +175,8 @@ irqreturn_t smp_reschedule_interrupt(int
16041
16042 irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
16043 {
16044- void (*func) (void *info) = call_data->func;
16045- void *info = call_data->info;
16046- int wait = call_data->wait;
16047-
16048- /*
16049- * Notify initiating CPU that I've grabbed the data and am
16050- * about to execute the function
16051- */
16052- mb();
16053- atomic_inc(&call_data->started);
16054- /*
16055- * At this point the info structure may be out of scope unless wait==1
16056- */
16057 irq_enter();
16058- (*func)(info);
16059+ generic_smp_call_function_interrupt();
16060 #ifdef CONFIG_X86_32
16061 __get_cpu_var(irq_stat).irq_call_count++;
16062 #else
16063@@ -320,10 +184,19 @@ irqreturn_t smp_call_function_interrupt(
16064 #endif
16065 irq_exit();
16066
16067- if (wait) {
16068- mb();
16069- atomic_inc(&call_data->finished);
16070- }
16071+ return IRQ_HANDLED;
16072+}
16073+
16074+irqreturn_t smp_call_function_single_interrupt(int irq, void *dev_id)
16075+{
16076+ irq_enter();
16077+ generic_smp_call_function_single_interrupt();
16078+#ifdef CONFIG_X86_32
16079+ __get_cpu_var(irq_stat).irq_call_count++;
16080+#else
16081+ add_pda(irq_call_count, 1);
16082+#endif
16083+ irq_exit();
16084
16085 return IRQ_HANDLED;
16086 }
82094b55
AF
16087--- sle11-2009-10-16.orig/arch/x86/kernel/time_32-xen.c 2009-10-28 14:58:12.000000000 +0100
16088+++ sle11-2009-10-16/arch/x86/kernel/time_32-xen.c 2009-10-28 14:58:19.000000000 +0100
2cb7cef9
BS
16089@@ -468,7 +468,7 @@ irqreturn_t timer_interrupt(int irq, voi
16090
16091 /* Keep nmi watchdog up to date */
16092 #ifdef __i386__
16093- per_cpu(irq_stat, smp_processor_id()).irq0_irqs++;
16094+ x86_add_percpu(irq_stat.irq0_irqs, 1);
16095 #else
16096 add_pda(irq0_irqs, 1);
16097 #endif
82094b55 16098@@ -747,9 +747,7 @@ void __init time_init(void)
2cb7cef9
BS
16099
16100 update_wallclock();
16101
16102-#ifndef CONFIG_X86_64
16103 use_tsc_delay();
16104-#endif
16105
16106 /* Cannot request_irq() until kmem is initialised. */
16107 late_time_init = setup_cpu0_timer_irq;
82094b55 16108@@ -806,7 +804,8 @@ static void stop_hz_timer(void)
2cb7cef9
BS
16109
16110 /* Leave ourselves in tick mode if rcu or softirq or timer pending. */
16111 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
16112- (j = next_timer_interrupt(), time_before_eq(j, jiffies))) {
16113+ (j = get_next_timer_interrupt(jiffies),
16114+ time_before_eq(j, jiffies))) {
16115 cpu_clear(cpu, nohz_cpu_mask);
16116 j = jiffies + 1;
16117 }
82094b55
AF
16118--- sle11-2009-10-16.orig/arch/x86/kernel/traps_32-xen.c 2009-03-16 16:38:05.000000000 +0100
16119+++ sle11-2009-10-16/arch/x86/kernel/traps_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
16120@@ -1,5 +1,6 @@
16121 /*
16122 * Copyright (C) 1991, 1992 Linus Torvalds
16123+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
16124 *
16125 * Pentium III FXSR, SSE support
16126 * Gareth Hughes <gareth@valinux.com>, May 2000
16127@@ -57,11 +58,10 @@
16128 #include <asm/nmi.h>
16129 #include <asm/smp.h>
16130 #include <asm/io.h>
16131+#include <asm/traps.h>
16132
16133 #include "mach_traps.h"
16134
16135-int panic_on_unrecovered_nmi;
16136-
16137 #ifndef CONFIG_XEN
16138 DECLARE_BITMAP(used_vectors, NR_VECTORS);
16139 EXPORT_SYMBOL_GPL(used_vectors);
16140@@ -82,43 +82,22 @@ gate_desc idt_table[256]
16141 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
16142 #endif
16143
16144-asmlinkage void divide_error(void);
16145-asmlinkage void debug(void);
16146-asmlinkage void nmi(void);
16147-asmlinkage void int3(void);
16148-asmlinkage void overflow(void);
16149-asmlinkage void bounds(void);
16150-asmlinkage void invalid_op(void);
16151-asmlinkage void device_not_available(void);
16152-asmlinkage void coprocessor_segment_overrun(void);
16153-asmlinkage void invalid_TSS(void);
16154-asmlinkage void segment_not_present(void);
16155-asmlinkage void stack_segment(void);
16156-asmlinkage void general_protection(void);
16157-asmlinkage void page_fault(void);
16158-asmlinkage void coprocessor_error(void);
16159-asmlinkage void simd_coprocessor_error(void);
16160-asmlinkage void alignment_check(void);
16161-#ifndef CONFIG_XEN
16162-asmlinkage void spurious_interrupt_bug(void);
16163-#else
16164-asmlinkage void fixup_4gb_segment(void);
16165-#endif
16166-asmlinkage void machine_check(void);
16167-
16168+int panic_on_unrecovered_nmi;
16169 int kstack_depth_to_print = 24;
16170 static unsigned int code_bytes = 64;
16171+static int ignore_nmis;
16172+static int die_counter;
16173
16174 void printk_address(unsigned long address, int reliable)
16175 {
16176 #ifdef CONFIG_KALLSYMS
16177- char namebuf[KSYM_NAME_LEN];
16178 unsigned long offset = 0;
16179 unsigned long symsize;
16180 const char *symname;
16181- char reliab[4] = "";
16182- char *delim = ":";
16183 char *modname;
16184+ char *delim = ":";
16185+ char namebuf[KSYM_NAME_LEN];
16186+ char reliab[4] = "";
16187
16188 symname = kallsyms_lookup(address, &symsize, &offset,
16189 &modname, namebuf);
16190@@ -138,22 +117,23 @@ void printk_address(unsigned long addres
16191 #endif
16192 }
16193
16194-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
16195+static inline int valid_stack_ptr(struct thread_info *tinfo,
16196+ void *p, unsigned int size)
16197 {
16198- return p > (void *)tinfo &&
16199- p <= (void *)tinfo + THREAD_SIZE - size;
16200+ void *t = tinfo;
16201+ return p > t && p <= t + THREAD_SIZE - size;
16202 }
16203
16204 /* The form of the top of the frame on the stack */
16205 struct stack_frame {
16206- struct stack_frame *next_frame;
16207- unsigned long return_address;
16208+ struct stack_frame *next_frame;
16209+ unsigned long return_address;
16210 };
16211
16212 static inline unsigned long
16213 print_context_stack(struct thread_info *tinfo,
16214- unsigned long *stack, unsigned long bp,
16215- const struct stacktrace_ops *ops, void *data)
16216+ unsigned long *stack, unsigned long bp,
16217+ const struct stacktrace_ops *ops, void *data)
16218 {
16219 struct stack_frame *frame = (struct stack_frame *)bp;
16220
16221@@ -175,8 +155,6 @@ print_context_stack(struct thread_info *
16222 return bp;
16223 }
16224
16225-#define MSG(msg) ops->warning(data, msg)
16226-
16227 void dump_trace(struct task_struct *task, struct pt_regs *regs,
16228 unsigned long *stack, unsigned long bp,
16229 const struct stacktrace_ops *ops, void *data)
16230@@ -186,7 +164,6 @@ void dump_trace(struct task_struct *task
16231
16232 if (!stack) {
16233 unsigned long dummy;
16234-
16235 stack = &dummy;
16236 if (task != current)
16237 stack = (unsigned long *)task->thread.sp;
16238@@ -204,7 +181,7 @@ void dump_trace(struct task_struct *task
16239 }
16240 #endif
16241
16242- while (1) {
16243+ for (;;) {
16244 struct thread_info *context;
16245
16246 context = (struct thread_info *)
16247@@ -256,15 +233,15 @@ static void print_trace_address(void *da
16248 }
16249
16250 static const struct stacktrace_ops print_trace_ops = {
16251- .warning = print_trace_warning,
16252- .warning_symbol = print_trace_warning_symbol,
16253- .stack = print_trace_stack,
16254- .address = print_trace_address,
16255+ .warning = print_trace_warning,
16256+ .warning_symbol = print_trace_warning_symbol,
16257+ .stack = print_trace_stack,
16258+ .address = print_trace_address,
16259 };
16260
16261 static void
16262 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
16263- unsigned long *stack, unsigned long bp, char *log_lvl)
16264+ unsigned long *stack, unsigned long bp, char *log_lvl)
16265 {
16266 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
16267 printk("%s =======================\n", log_lvl);
16268@@ -359,15 +336,14 @@ void show_registers(struct pt_regs *regs
16269 printk(KERN_EMERG "Code: ");
16270
16271 ip = (u8 *)regs->ip - code_prologue;
16272- if (ip < (u8 *)PAGE_OFFSET ||
16273- probe_kernel_address(ip, c)) {
16274+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
16275 /* try starting at EIP */
16276 ip = (u8 *)regs->ip;
16277 code_len = code_len - code_prologue + 1;
16278 }
16279 for (i = 0; i < code_len; i++, ip++) {
16280 if (ip < (u8 *)PAGE_OFFSET ||
16281- probe_kernel_address(ip, c)) {
16282+ probe_kernel_address(ip, c)) {
16283 printk(" Bad EIP value.");
16284 break;
16285 }
16286@@ -392,7 +368,53 @@ int is_valid_bugaddr(unsigned long ip)
16287 return ud2 == 0x0b0f;
16288 }
16289
16290-static int die_counter;
16291+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
16292+static int die_owner = -1;
16293+static unsigned int die_nest_count;
16294+
16295+unsigned __kprobes long oops_begin(void)
16296+{
16297+ unsigned long flags;
16298+
16299+ oops_enter();
16300+
16301+ if (die_owner != raw_smp_processor_id()) {
16302+ console_verbose();
16303+ raw_local_irq_save(flags);
16304+ __raw_spin_lock(&die_lock);
16305+ die_owner = smp_processor_id();
16306+ die_nest_count = 0;
16307+ bust_spinlocks(1);
16308+ } else {
16309+ raw_local_irq_save(flags);
16310+ }
16311+ die_nest_count++;
16312+ return flags;
16313+}
16314+
16315+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
16316+{
16317+ bust_spinlocks(0);
16318+ die_owner = -1;
16319+ add_taint(TAINT_DIE);
16320+ __raw_spin_unlock(&die_lock);
16321+ raw_local_irq_restore(flags);
16322+
16323+ if (!regs)
16324+ return;
16325+
16326+ if (kexec_should_crash(current))
16327+ crash_kexec(regs);
16328+
16329+ if (in_interrupt())
16330+ panic("Fatal exception in interrupt");
16331+
16332+ if (panic_on_oops)
16333+ panic("Fatal exception");
16334+
16335+ oops_exit();
16336+ do_exit(signr);
16337+}
16338
16339 int __kprobes __die(const char *str, struct pt_regs *regs, long err)
16340 {
16341@@ -410,26 +432,22 @@ int __kprobes __die(const char *str, str
16342 printk("DEBUG_PAGEALLOC");
16343 #endif
16344 printk("\n");
16345-
16346 if (notify_die(DIE_OOPS, str, regs, err,
16347- current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
16348+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
16349+ return 1;
16350
16351- show_registers(regs);
16352- /* Executive summary in case the oops scrolled away */
16353- sp = (unsigned long) (&regs->sp);
16354- savesegment(ss, ss);
16355- if (user_mode(regs)) {
16356- sp = regs->sp;
16357- ss = regs->ss & 0xffff;
16358- }
16359- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
16360- print_symbol("%s", regs->ip);
16361- printk(" SS:ESP %04x:%08lx\n", ss, sp);
16362-
16363- return 0;
16364- }
16365-
16366- return 1;
16367+ show_registers(regs);
16368+ /* Executive summary in case the oops scrolled away */
16369+ sp = (unsigned long) (&regs->sp);
16370+ savesegment(ss, ss);
16371+ if (user_mode(regs)) {
16372+ sp = regs->sp;
16373+ ss = regs->ss & 0xffff;
16374+ }
16375+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
16376+ print_symbol("%s", regs->ip);
16377+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
16378+ return 0;
16379 }
16380
16381 /*
16382@@ -438,31 +456,9 @@ int __kprobes __die(const char *str, str
16383 */
16384 void die(const char *str, struct pt_regs *regs, long err)
16385 {
16386- static struct {
16387- raw_spinlock_t lock;
16388- u32 lock_owner;
16389- int lock_owner_depth;
16390- } die = {
16391- .lock = __RAW_SPIN_LOCK_UNLOCKED,
16392- .lock_owner = -1,
16393- .lock_owner_depth = 0
16394- };
16395- unsigned long flags;
16396-
16397- oops_enter();
16398+ unsigned long flags = oops_begin();
16399
16400- if (die.lock_owner != raw_smp_processor_id()) {
16401- console_verbose();
16402- raw_local_irq_save(flags);
16403- __raw_spin_lock(&die.lock);
16404- die.lock_owner = smp_processor_id();
16405- die.lock_owner_depth = 0;
16406- bust_spinlocks(1);
16407- } else {
16408- raw_local_irq_save(flags);
16409- }
16410-
16411- if (++die.lock_owner_depth < 3) {
16412+ if (die_nest_count < 3) {
16413 report_bug(regs->ip, regs);
16414
16415 if (__die(str, regs, err))
16416@@ -471,26 +467,7 @@ void die(const char *str, struct pt_regs
16417 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
16418 }
16419
16420- bust_spinlocks(0);
16421- die.lock_owner = -1;
16422- add_taint(TAINT_DIE);
16423- __raw_spin_unlock(&die.lock);
16424- raw_local_irq_restore(flags);
16425-
16426- if (!regs)
16427- return;
16428-
16429- if (kexec_should_crash(current))
16430- crash_kexec(regs);
16431-
16432- if (in_interrupt())
16433- panic("Fatal exception in interrupt");
16434-
16435- if (panic_on_oops)
16436- panic("Fatal exception");
16437-
16438- oops_exit();
16439- do_exit(SIGSEGV);
16440+ oops_end(flags, regs, SIGSEGV);
16441 }
16442
16443 static inline void
16444@@ -554,7 +531,7 @@ void do_##name(struct pt_regs *regs, lon
16445 { \
16446 trace_hardirqs_fixup(); \
16447 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
16448- == NOTIFY_STOP) \
16449+ == NOTIFY_STOP) \
16450 return; \
16451 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
16452 }
16453@@ -570,7 +547,7 @@ void do_##name(struct pt_regs *regs, lon
16454 info.si_code = sicode; \
16455 info.si_addr = (void __user *)siaddr; \
16456 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
16457- == NOTIFY_STOP) \
16458+ == NOTIFY_STOP) \
16459 return; \
16460 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
16461 }
16462@@ -579,7 +556,7 @@ void do_##name(struct pt_regs *regs, lon
16463 void do_##name(struct pt_regs *regs, long error_code) \
16464 { \
16465 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
16466- == NOTIFY_STOP) \
16467+ == NOTIFY_STOP) \
16468 return; \
16469 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
16470 }
16471@@ -594,28 +571,29 @@ void do_##name(struct pt_regs *regs, lon
16472 info.si_addr = (void __user *)siaddr; \
16473 trace_hardirqs_fixup(); \
16474 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
16475- == NOTIFY_STOP) \
16476+ == NOTIFY_STOP) \
16477 return; \
16478 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
16479 }
16480
16481-DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
16482+DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
16483 #ifndef CONFIG_KPROBES
16484 DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
16485 #endif
16486 DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
16487 DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
16488-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
16489-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
16490+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
16491+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
16492 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
16493-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
16494-DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
16495+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
16496+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
16497 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
16498 DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
16499
16500-void __kprobes do_general_protection(struct pt_regs * regs,
16501- long error_code)
16502+void __kprobes
16503+do_general_protection(struct pt_regs *regs, long error_code)
16504 {
16505+ struct task_struct *tsk;
16506 struct thread_struct *thread;
16507
16508 thread = &current->thread;
16509@@ -623,23 +601,24 @@ void __kprobes do_general_protection(str
16510 if (regs->flags & X86_VM_MASK)
16511 goto gp_in_vm86;
16512
16513+ tsk = current;
16514 if (!user_mode(regs))
16515 goto gp_in_kernel;
16516
16517- current->thread.error_code = error_code;
16518- current->thread.trap_no = 13;
16519+ tsk->thread.error_code = error_code;
16520+ tsk->thread.trap_no = 13;
16521
16522- if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
16523- printk_ratelimit()) {
16524+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
16525+ printk_ratelimit()) {
16526 printk(KERN_INFO
16527- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
16528- current->comm, task_pid_nr(current),
16529- regs->ip, regs->sp, error_code);
16530+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
16531+ tsk->comm, task_pid_nr(tsk),
16532+ regs->ip, regs->sp, error_code);
16533 print_vma_addr(" in ", regs->ip);
16534 printk("\n");
16535 }
16536
16537- force_sig(SIGSEGV, current);
16538+ force_sig(SIGSEGV, tsk);
16539 return;
16540
16541 gp_in_vm86:
16542@@ -648,14 +627,15 @@ gp_in_vm86:
16543 return;
16544
16545 gp_in_kernel:
16546- if (!fixup_exception(regs)) {
16547- current->thread.error_code = error_code;
16548- current->thread.trap_no = 13;
16549- if (notify_die(DIE_GPF, "general protection fault", regs,
16550+ if (fixup_exception(regs))
16551+ return;
16552+
16553+ tsk->thread.error_code = error_code;
16554+ tsk->thread.trap_no = 13;
16555+ if (notify_die(DIE_GPF, "general protection fault", regs,
16556 error_code, 13, SIGSEGV) == NOTIFY_STOP)
16557- return;
16558- die("general protection fault", regs, error_code);
16559- }
16560+ return;
16561+ die("general protection fault", regs, error_code);
16562 }
16563
16564 static notrace __kprobes void
16565@@ -722,9 +702,9 @@ unknown_nmi_error(unsigned char reason,
16566
16567 static DEFINE_SPINLOCK(nmi_print_lock);
16568
16569-void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
16570+void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
16571 {
16572- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
16573+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
16574 return;
16575
16576 spin_lock(&nmi_print_lock);
16577@@ -733,10 +713,12 @@ void notrace __kprobes die_nmi(struct pt
16578 * to get a message out:
16579 */
16580 bust_spinlocks(1);
16581- printk(KERN_EMERG "%s", msg);
16582+ printk(KERN_EMERG "%s", str);
16583 printk(" on CPU%d, ip %08lx, registers:\n",
16584 smp_processor_id(), regs->ip);
16585 show_registers(regs);
16586+ if (do_panic)
16587+ panic("Non maskable interrupt");
16588 console_silent();
16589 spin_unlock(&nmi_print_lock);
16590 bust_spinlocks(0);
16591@@ -756,14 +738,17 @@ void notrace __kprobes die_nmi(struct pt
16592 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
16593 {
16594 unsigned char reason = 0;
16595+ int cpu;
16596
16597- /* Only the BSP gets external NMIs from the system: */
16598- if (!smp_processor_id())
16599+ cpu = smp_processor_id();
16600+
16601+ /* Only the BSP gets external NMIs from the system. */
16602+ if (!cpu)
16603 reason = get_nmi_reason();
16604
16605 if (!(reason & 0xc0)) {
16606 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
16607- == NOTIFY_STOP)
16608+ == NOTIFY_STOP)
16609 return;
16610 #ifdef CONFIG_X86_LOCAL_APIC
16611 /*
16612@@ -772,7 +757,7 @@ static notrace __kprobes void default_do
16613 */
16614 if (nmi_watchdog_tick(regs, reason))
16615 return;
16616- if (!do_nmi_callback(regs, smp_processor_id()))
16617+ if (!do_nmi_callback(regs, cpu))
16618 unknown_nmi_error(reason, regs);
16619 #else
16620 unknown_nmi_error(reason, regs);
16621@@ -782,6 +767,8 @@ static notrace __kprobes void default_do
16622 }
16623 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
16624 return;
16625+
16626+ /* AK: following checks seem to be broken on modern chipsets. FIXME */
16627 if (reason & 0x80)
16628 mem_parity_error(reason, regs);
16629 if (reason & 0x40)
16630@@ -793,8 +780,6 @@ static notrace __kprobes void default_do
16631 reassert_nmi();
16632 }
16633
16634-static int ignore_nmis;
16635-
16636 notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
16637 {
16638 int cpu;
16639@@ -879,7 +864,7 @@ void __kprobes do_debug(struct pt_regs *
16640 tsk->thread.debugctlmsr = 0;
16641
16642 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
16643- SIGTRAP) == NOTIFY_STOP)
16644+ SIGTRAP) == NOTIFY_STOP)
16645 return;
16646 /* It's safe to allow irq's after DR6 has been saved */
16647 if (regs->flags & X86_EFLAGS_IF)
16648@@ -940,9 +925,8 @@ clear_TF_reenable:
16649 void math_error(void __user *ip)
16650 {
16651 struct task_struct *task;
16652- unsigned short cwd;
16653- unsigned short swd;
16654 siginfo_t info;
16655+ unsigned short cwd, swd;
16656
16657 /*
16658 * Save the info for the exception handler and clear the error.
16659@@ -961,7 +945,7 @@ void math_error(void __user *ip)
16660 * C1 reg you need in case of a stack fault, 0x040 is the stack
16661 * fault bit. We should only be taking one exception at a time,
16662 * so if this combination doesn't produce any single exception,
16663- * then we have a bad program that isn't syncronizing its FPU usage
16664+ * then we have a bad program that isn't synchronizing its FPU usage
16665 * and it will suffer the consequences since we won't be able to
16666 * fully reproduce the context of the exception
16667 */
16668@@ -970,7 +954,7 @@ void math_error(void __user *ip)
16669 switch (swd & ~cwd & 0x3f) {
16670 case 0x000: /* No unmasked exception */
16671 return;
16672- default: /* Multiple exceptions */
16673+ default: /* Multiple exceptions */
16674 break;
16675 case 0x001: /* Invalid Op */
16676 /*
16677@@ -1006,8 +990,8 @@ void do_coprocessor_error(struct pt_regs
16678 static void simd_math_error(void __user *ip)
16679 {
16680 struct task_struct *task;
16681- unsigned short mxcsr;
16682 siginfo_t info;
16683+ unsigned short mxcsr;
16684
16685 /*
16686 * Save the info for the exception handler and clear the error.
16687@@ -1084,7 +1068,7 @@ void do_spurious_interrupt_bug(struct pt
16688
16689 unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
16690 {
16691- struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
16692+ struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
16693 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
16694 unsigned long new_kesp = kesp - base;
16695 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
82094b55
AF
16696--- sle11-2009-10-16.orig/arch/x86/kernel/traps_64-xen.c 2009-03-16 16:38:05.000000000 +0100
16697+++ sle11-2009-10-16/arch/x86/kernel/traps_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
16698@@ -10,73 +10,56 @@
16699 * 'Traps.c' handles hardware traps and faults after we have saved some
16700 * state in 'entry.S'.
16701 */
16702-#include <linux/sched.h>
16703+#include <linux/moduleparam.h>
16704+#include <linux/interrupt.h>
16705+#include <linux/kallsyms.h>
16706+#include <linux/spinlock.h>
16707+#include <linux/kprobes.h>
16708+#include <linux/uaccess.h>
16709+#include <linux/utsname.h>
16710+#include <linux/kdebug.h>
16711 #include <linux/kernel.h>
16712+#include <linux/module.h>
16713+#include <linux/ptrace.h>
16714 #include <linux/string.h>
16715+#include <linux/unwind.h>
16716+#include <linux/delay.h>
16717 #include <linux/errno.h>
16718-#include <linux/ptrace.h>
16719+#include <linux/kexec.h>
16720+#include <linux/sched.h>
16721 #include <linux/timer.h>
16722-#include <linux/mm.h>
16723 #include <linux/init.h>
16724-#include <linux/delay.h>
16725-#include <linux/spinlock.h>
16726-#include <linux/interrupt.h>
16727-#include <linux/kallsyms.h>
16728-#include <linux/module.h>
16729-#include <linux/moduleparam.h>
16730-#include <linux/nmi.h>
16731-#include <linux/kprobes.h>
16732-#include <linux/kexec.h>
16733-#include <linux/unwind.h>
16734-#include <linux/uaccess.h>
16735 #include <linux/bug.h>
16736-#include <linux/kdebug.h>
16737-#include <linux/utsname.h>
16738-
16739-#include <mach_traps.h>
16740+#include <linux/nmi.h>
16741+#include <linux/mm.h>
16742
16743 #if defined(CONFIG_EDAC)
16744 #include <linux/edac.h>
16745 #endif
16746
16747-#include <asm/system.h>
16748-#include <asm/io.h>
16749-#include <asm/atomic.h>
16750+#include <asm/stacktrace.h>
16751+#include <asm/processor.h>
16752 #include <asm/debugreg.h>
16753+#include <asm/atomic.h>
16754+#include <asm/system.h>
16755+#include <asm/unwind.h>
16756 #include <asm/desc.h>
16757 #include <asm/i387.h>
16758-#include <asm/processor.h>
16759-#include <asm/unwind.h>
16760+#include <asm/nmi.h>
16761 #include <asm/smp.h>
16762+#include <asm/io.h>
16763 #include <asm/pgalloc.h>
16764-#include <asm/pda.h>
16765 #include <asm/proto.h>
16766-#include <asm/nmi.h>
16767-#include <asm/stacktrace.h>
16768+#include <asm/pda.h>
16769+#include <asm/traps.h>
16770
16771-asmlinkage void divide_error(void);
16772-asmlinkage void debug(void);
16773-asmlinkage void nmi(void);
16774-asmlinkage void int3(void);
16775-asmlinkage void overflow(void);
16776-asmlinkage void bounds(void);
16777-asmlinkage void invalid_op(void);
16778-asmlinkage void device_not_available(void);
16779-asmlinkage void double_fault(void);
16780-asmlinkage void coprocessor_segment_overrun(void);
16781-asmlinkage void invalid_TSS(void);
16782-asmlinkage void segment_not_present(void);
16783-asmlinkage void stack_segment(void);
16784-asmlinkage void general_protection(void);
16785-asmlinkage void page_fault(void);
16786-asmlinkage void coprocessor_error(void);
16787-asmlinkage void simd_coprocessor_error(void);
16788-asmlinkage void reserved(void);
16789-asmlinkage void alignment_check(void);
16790-asmlinkage void machine_check(void);
16791-asmlinkage void spurious_interrupt_bug(void);
16792+#include <mach_traps.h>
16793
16794+int panic_on_unrecovered_nmi;
16795+int kstack_depth_to_print = 12;
16796 static unsigned int code_bytes = 64;
16797+static int ignore_nmis;
16798+static int die_counter;
16799
16800 static inline void conditional_sti(struct pt_regs *regs)
16801 {
16802@@ -100,34 +83,9 @@ static inline void preempt_conditional_c
16803 dec_preempt_count();
16804 }
16805
16806-int kstack_depth_to_print = 12;
16807-
16808 void printk_address(unsigned long address, int reliable)
16809 {
16810-#ifdef CONFIG_KALLSYMS
16811- unsigned long offset = 0, symsize;
16812- const char *symname;
16813- char *modname;
16814- char *delim = ":";
16815- char namebuf[KSYM_NAME_LEN];
16816- char reliab[4] = "";
16817-
16818- symname = kallsyms_lookup(address, &symsize, &offset,
16819- &modname, namebuf);
16820- if (!symname) {
16821- printk(" [<%016lx>]\n", address);
16822- return;
16823- }
16824- if (!reliable)
16825- strcpy(reliab, "? ");
16826-
16827- if (!modname)
16828- modname = delim = "";
16829- printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
16830- address, reliab, delim, modname, delim, symname, offset, symsize);
16831-#else
16832- printk(" [<%016lx>]\n", address);
16833-#endif
16834+ printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
16835 }
16836
16837 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
16838@@ -206,8 +164,6 @@ static unsigned long *in_exception_stack
16839 return NULL;
16840 }
16841
16842-#define MSG(txt) ops->warning(data, txt)
16843-
16844 /*
16845 * x86-64 can have up to three kernel stacks:
16846 * process stack
16847@@ -234,11 +190,11 @@ struct stack_frame {
16848 unsigned long return_address;
16849 };
16850
16851-
16852-static inline unsigned long print_context_stack(struct thread_info *tinfo,
16853- unsigned long *stack, unsigned long bp,
16854- const struct stacktrace_ops *ops, void *data,
16855- unsigned long *end)
16856+static inline unsigned long
16857+print_context_stack(struct thread_info *tinfo,
16858+ unsigned long *stack, unsigned long bp,
16859+ const struct stacktrace_ops *ops, void *data,
16860+ unsigned long *end)
16861 {
16862 struct stack_frame *frame = (struct stack_frame *)bp;
16863
16864@@ -260,7 +216,7 @@ static inline unsigned long print_contex
16865 return bp;
16866 }
16867
16868-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
16869+void dump_trace(struct task_struct *task, struct pt_regs *regs,
16870 unsigned long *stack, unsigned long bp,
16871 const struct stacktrace_ops *ops, void *data)
16872 {
16873@@ -269,36 +225,34 @@ void dump_trace(struct task_struct *tsk,
16874 unsigned used = 0;
16875 struct thread_info *tinfo;
16876
16877- if (!tsk)
16878- tsk = current;
16879- tinfo = task_thread_info(tsk);
16880+ if (!task)
16881+ task = current;
16882
16883 if (!stack) {
16884 unsigned long dummy;
16885 stack = &dummy;
16886- if (tsk && tsk != current)
16887- stack = (unsigned long *)tsk->thread.sp;
16888+ if (task && task != current)
16889+ stack = (unsigned long *)task->thread.sp;
16890 }
16891
16892 #ifdef CONFIG_FRAME_POINTER
16893 if (!bp) {
16894- if (tsk == current) {
16895+ if (task == current) {
16896 /* Grab bp right from our regs */
16897- asm("movq %%rbp, %0" : "=r" (bp):);
16898+ asm("movq %%rbp, %0" : "=r" (bp) :);
16899 } else {
16900 /* bp is the last reg pushed by switch_to */
16901- bp = *(unsigned long *) tsk->thread.sp;
16902+ bp = *(unsigned long *) task->thread.sp;
16903 }
16904 }
16905 #endif
16906
16907-
16908-
16909 /*
16910 * Print function call entries in all stacks, starting at the
16911 * current stack address. If the stacks consist of nested
16912 * exceptions
16913 */
16914+ tinfo = task_thread_info(task);
16915 for (;;) {
16916 char *id;
16917 unsigned long *estack_end;
16918@@ -383,18 +337,24 @@ static const struct stacktrace_ops print
16919 .address = print_trace_address,
16920 };
16921
16922-void
16923-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
16924- unsigned long bp)
16925+static void
16926+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
16927+ unsigned long *stack, unsigned long bp, char *log_lvl)
16928 {
16929 printk("\nCall Trace:\n");
16930- dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL);
16931+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
16932 printk("\n");
16933 }
16934
16935+void show_trace(struct task_struct *task, struct pt_regs *regs,
16936+ unsigned long *stack, unsigned long bp)
16937+{
16938+ show_trace_log_lvl(task, regs, stack, bp, "");
16939+}
16940+
16941 static void
16942-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
16943- unsigned long bp)
16944+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
16945+ unsigned long *sp, unsigned long bp, char *log_lvl)
16946 {
16947 unsigned long *stack;
16948 int i;
16949@@ -406,14 +366,14 @@ _show_stack(struct task_struct *tsk, str
16950 // back trace for this cpu.
16951
16952 if (sp == NULL) {
16953- if (tsk)
16954- sp = (unsigned long *)tsk->thread.sp;
16955+ if (task)
16956+ sp = (unsigned long *)task->thread.sp;
16957 else
16958 sp = (unsigned long *)&sp;
16959 }
16960
16961 stack = sp;
16962- for(i=0; i < kstack_depth_to_print; i++) {
16963+ for (i = 0; i < kstack_depth_to_print; i++) {
16964 if (stack >= irqstack && stack <= irqstack_end) {
16965 if (stack == irqstack_end) {
16966 stack = (unsigned long *) (irqstack_end[-1]);
16967@@ -428,12 +388,12 @@ _show_stack(struct task_struct *tsk, str
16968 printk(" %016lx", *stack++);
16969 touch_nmi_watchdog();
16970 }
16971- show_trace(tsk, regs, sp, bp);
16972+ show_trace_log_lvl(task, regs, sp, bp, log_lvl);
16973 }
16974
16975-void show_stack(struct task_struct *tsk, unsigned long * sp)
16976+void show_stack(struct task_struct *task, unsigned long *sp)
16977 {
16978- _show_stack(tsk, NULL, sp, 0);
16979+ show_stack_log_lvl(task, NULL, sp, 0, "");
16980 }
16981
16982 /*
16983@@ -441,8 +401,8 @@ void show_stack(struct task_struct *tsk,
16984 */
16985 void dump_stack(void)
16986 {
16987- unsigned long dummy;
16988 unsigned long bp = 0;
16989+ unsigned long stack;
16990
16991 #ifdef CONFIG_FRAME_POINTER
16992 if (!bp)
16993@@ -454,7 +414,7 @@ void dump_stack(void)
16994 init_utsname()->release,
16995 (int)strcspn(init_utsname()->version, " "),
16996 init_utsname()->version);
16997- show_trace(NULL, NULL, &dummy, bp);
16998+ show_trace(NULL, NULL, &stack, bp);
16999 }
17000
17001 EXPORT_SYMBOL(dump_stack);
17002@@ -465,12 +425,8 @@ void show_registers(struct pt_regs *regs
17003 unsigned long sp;
17004 const int cpu = smp_processor_id();
17005 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
17006- u8 *ip;
17007- unsigned int code_prologue = code_bytes * 43 / 64;
17008- unsigned int code_len = code_bytes;
17009
17010 sp = regs->sp;
17011- ip = (u8 *) regs->ip - code_prologue;
17012 printk("CPU %d ", cpu);
17013 __show_regs(regs);
17014 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
17015@@ -481,15 +437,22 @@ void show_registers(struct pt_regs *regs
17016 * time of the fault..
17017 */
17018 if (!user_mode(regs)) {
17019+ unsigned int code_prologue = code_bytes * 43 / 64;
17020+ unsigned int code_len = code_bytes;
17021 unsigned char c;
17022+ u8 *ip;
17023+
17024 printk("Stack: ");
17025- _show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
17026+ show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
17027+ regs->bp, "");
17028 printk("\n");
17029
17030 printk(KERN_EMERG "Code: ");
17031+
17032+ ip = (u8 *)regs->ip - code_prologue;
17033 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
17034 /* try starting at RIP */
17035- ip = (u8 *) regs->ip;
17036+ ip = (u8 *)regs->ip;
17037 code_len = code_len - code_prologue + 1;
17038 }
17039 for (i = 0; i < code_len; i++, ip++) {
17040@@ -505,7 +468,7 @@ void show_registers(struct pt_regs *regs
17041 }
17042 }
17043 printk("\n");
17044-}
17045+}
17046
17047 int is_valid_bugaddr(unsigned long ip)
17048 {
17049@@ -545,7 +508,7 @@ unsigned __kprobes long oops_begin(void)
17050 }
17051
17052 void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
17053-{
17054+{
17055 die_owner = -1;
17056 bust_spinlocks(0);
17057 die_nest_count--;
17058@@ -563,10 +526,9 @@ void __kprobes oops_end(unsigned long fl
17059 do_exit(signr);
17060 }
17061
17062-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
17063+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
17064 {
17065- static int die_counter;
17066- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
17067+ printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
17068 #ifdef CONFIG_PREEMPT
17069 printk("PREEMPT ");
17070 #endif
17071@@ -577,8 +539,10 @@ int __kprobes __die(const char * str, st
17072 printk("DEBUG_PAGEALLOC");
17073 #endif
17074 printk("\n");
17075- if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
17076+ if (notify_die(DIE_OOPS, str, regs, err,
17077+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
17078 return 1;
17079+
17080 show_registers(regs);
17081 add_taint(TAINT_DIE);
17082 /* Executive summary in case the oops scrolled away */
17083@@ -590,7 +554,7 @@ int __kprobes __die(const char * str, st
17084 return 0;
17085 }
17086
17087-void die(const char * str, struct pt_regs * regs, long err)
17088+void die(const char *str, struct pt_regs *regs, long err)
17089 {
17090 unsigned long flags = oops_begin();
17091
17092@@ -608,8 +572,7 @@ die_nmi(char *str, struct pt_regs *regs,
17093 {
17094 unsigned long flags;
17095
17096- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
17097- NOTIFY_STOP)
17098+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
17099 return;
17100
17101 flags = oops_begin();
17102@@ -617,7 +580,9 @@ die_nmi(char *str, struct pt_regs *regs,
17103 * We are in trouble anyway, lets at least try
17104 * to get a message out.
17105 */
17106- printk(str, smp_processor_id());
17107+ printk(KERN_EMERG "%s", str);
17108+ printk(" on CPU%d, ip %08lx, registers:\n",
17109+ smp_processor_id(), regs->ip);
17110 show_registers(regs);
17111 if (kexec_should_crash(current))
17112 crash_kexec(regs);
17113@@ -630,44 +595,44 @@ die_nmi(char *str, struct pt_regs *regs,
17114 }
17115 #endif
17116
17117-static void __kprobes do_trap(int trapnr, int signr, char *str,
17118- struct pt_regs * regs, long error_code,
17119- siginfo_t *info)
17120+static void __kprobes
17121+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
17122+ long error_code, siginfo_t *info)
17123 {
17124 struct task_struct *tsk = current;
17125
17126- if (user_mode(regs)) {
17127- /*
17128- * We want error_code and trap_no set for userspace
17129- * faults and kernelspace faults which result in
17130- * die(), but not kernelspace faults which are fixed
17131- * up. die() gives the process no chance to handle
17132- * the signal and notice the kernel fault information,
17133- * so that won't result in polluting the information
17134- * about previously queued, but not yet delivered,
17135- * faults. See also do_general_protection below.
17136- */
17137- tsk->thread.error_code = error_code;
17138- tsk->thread.trap_no = trapnr;
17139+ if (!user_mode(regs))
17140+ goto kernel_trap;
17141
17142- if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
17143- printk_ratelimit()) {
17144- printk(KERN_INFO
17145- "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
17146- tsk->comm, tsk->pid, str,
17147- regs->ip, regs->sp, error_code);
17148- print_vma_addr(" in ", regs->ip);
17149- printk("\n");
17150- }
17151+ /*
17152+ * We want error_code and trap_no set for userspace faults and
17153+ * kernelspace faults which result in die(), but not
17154+ * kernelspace faults which are fixed up. die() gives the
17155+ * process no chance to handle the signal and notice the
17156+ * kernel fault information, so that won't result in polluting
17157+ * the information about previously queued, but not yet
17158+ * delivered, faults. See also do_general_protection below.
17159+ */
17160+ tsk->thread.error_code = error_code;
17161+ tsk->thread.trap_no = trapnr;
17162
17163- if (info)
17164- force_sig_info(signr, info, tsk);
17165- else
17166- force_sig(signr, tsk);
17167- return;
17168+ if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
17169+ printk_ratelimit()) {
17170+ printk(KERN_INFO
17171+ "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
17172+ tsk->comm, tsk->pid, str,
17173+ regs->ip, regs->sp, error_code);
17174+ print_vma_addr(" in ", regs->ip);
17175+ printk("\n");
17176 }
17177
17178+ if (info)
17179+ force_sig_info(signr, info, tsk);
17180+ else
17181+ force_sig(signr, tsk);
17182+ return;
17183
17184+kernel_trap:
17185 if (!fixup_exception(regs)) {
17186 tsk->thread.error_code = error_code;
17187 tsk->thread.trap_no = trapnr;
17188@@ -677,41 +642,39 @@ static void __kprobes do_trap(int trapnr
17189 }
17190
17191 #define DO_ERROR(trapnr, signr, str, name) \
17192-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
17193-{ \
17194- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
17195- == NOTIFY_STOP) \
17196- return; \
17197+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
17198+{ \
17199+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
17200+ == NOTIFY_STOP) \
17201+ return; \
17202 conditional_sti(regs); \
17203- do_trap(trapnr, signr, str, regs, error_code, NULL); \
17204+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
17205 }
17206
17207-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
17208-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
17209-{ \
17210- siginfo_t info; \
17211- info.si_signo = signr; \
17212- info.si_errno = 0; \
17213- info.si_code = sicode; \
17214- info.si_addr = (void __user *)siaddr; \
17215- trace_hardirqs_fixup(); \
17216- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
17217- == NOTIFY_STOP) \
17218- return; \
17219+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
17220+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
17221+{ \
17222+ siginfo_t info; \
17223+ info.si_signo = signr; \
17224+ info.si_errno = 0; \
17225+ info.si_code = sicode; \
17226+ info.si_addr = (void __user *)siaddr; \
17227+ trace_hardirqs_fixup(); \
17228+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
17229+ == NOTIFY_STOP) \
17230+ return; \
17231 conditional_sti(regs); \
17232- do_trap(trapnr, signr, str, regs, error_code, &info); \
17233+ do_trap(trapnr, signr, str, regs, error_code, &info); \
17234 }
17235
17236-DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
17237-DO_ERROR( 4, SIGSEGV, "overflow", overflow)
17238-DO_ERROR( 5, SIGSEGV, "bounds", bounds)
17239-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
17240-DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
17241-DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
17242+DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
17243+DO_ERROR(4, SIGSEGV, "overflow", overflow)
17244+DO_ERROR(5, SIGSEGV, "bounds", bounds)
17245+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
17246+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
17247 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
17248-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
17249+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
17250 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
17251-DO_ERROR(18, SIGSEGV, "reserved", reserved)
17252
17253 /* Runs on IST stack */
17254 asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
17255@@ -741,31 +704,34 @@ asmlinkage void do_double_fault(struct p
17256 die(str, regs, error_code);
17257 }
17258
17259-asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
17260- long error_code)
17261+asmlinkage void __kprobes
17262+do_general_protection(struct pt_regs *regs, long error_code)
17263 {
17264- struct task_struct *tsk = current;
17265+ struct task_struct *tsk;
17266
17267 conditional_sti(regs);
17268
17269- if (user_mode(regs)) {
17270- tsk->thread.error_code = error_code;
17271- tsk->thread.trap_no = 13;
17272+ tsk = current;
17273+ if (!user_mode(regs))
17274+ goto gp_in_kernel;
17275
17276- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
17277- printk_ratelimit()) {
17278- printk(KERN_INFO
17279- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
17280- tsk->comm, tsk->pid,
17281- regs->ip, regs->sp, error_code);
17282- print_vma_addr(" in ", regs->ip);
17283- printk("\n");
17284- }
17285+ tsk->thread.error_code = error_code;
17286+ tsk->thread.trap_no = 13;
17287
17288- force_sig(SIGSEGV, tsk);
17289- return;
17290- }
17291+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
17292+ printk_ratelimit()) {
17293+ printk(KERN_INFO
17294+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
17295+ tsk->comm, tsk->pid,
17296+ regs->ip, regs->sp, error_code);
17297+ print_vma_addr(" in ", regs->ip);
17298+ printk("\n");
17299+ }
17300
17301+ force_sig(SIGSEGV, tsk);
17302+ return;
17303+
17304+gp_in_kernel:
17305 if (fixup_exception(regs))
17306 return;
17307
17308@@ -778,14 +744,14 @@ asmlinkage void __kprobes do_general_pro
17309 }
17310
17311 static notrace __kprobes void
17312-mem_parity_error(unsigned char reason, struct pt_regs * regs)
17313+mem_parity_error(unsigned char reason, struct pt_regs *regs)
17314 {
17315 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
17316 reason);
17317 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
17318
17319 #if defined(CONFIG_EDAC)
17320- if(edac_handler_set()) {
17321+ if (edac_handler_set()) {
17322 edac_atomic_assert_error();
17323 return;
17324 }
17325@@ -801,7 +767,7 @@ mem_parity_error(unsigned char reason, s
17326 }
17327
17328 static notrace __kprobes void
17329-io_check_error(unsigned char reason, struct pt_regs * regs)
17330+io_check_error(unsigned char reason, struct pt_regs *regs)
17331 {
17332 printk("NMI: IOCK error (debug interrupt?)\n");
17333 show_registers(regs);
17334@@ -827,14 +793,14 @@ unknown_nmi_error(unsigned char reason,
17335
17336 /* Runs on IST stack. This code must keep interrupts off all the time.
17337 Nested NMIs are prevented by the CPU. */
17338-asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
17339+asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
17340 {
17341 unsigned char reason = 0;
17342 int cpu;
17343
17344 cpu = smp_processor_id();
17345
17346- /* Only the BSP gets external NMIs from the system. */
17347+ /* Only the BSP gets external NMIs from the system. */
17348 if (!cpu)
17349 reason = get_nmi_reason();
17350
17351@@ -847,33 +813,58 @@ asmlinkage notrace __kprobes void defau
17352 * Ok, so this is none of the documented NMI sources,
17353 * so it must be the NMI watchdog.
17354 */
17355- if (nmi_watchdog_tick(regs,reason))
17356+ if (nmi_watchdog_tick(regs, reason))
17357 return;
17358 #endif
17359- if (!do_nmi_callback(regs,cpu))
17360+ if (!do_nmi_callback(regs, cpu))
17361 unknown_nmi_error(reason, regs);
17362
17363 return;
17364 }
17365 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
17366- return;
17367+ return;
17368
17369 /* AK: following checks seem to be broken on modern chipsets. FIXME */
17370-
17371 if (reason & 0x80)
17372 mem_parity_error(reason, regs);
17373 if (reason & 0x40)
17374 io_check_error(reason, regs);
17375 }
17376
17377+asmlinkage notrace __kprobes void
17378+do_nmi(struct pt_regs *regs, long error_code)
17379+{
17380+ nmi_enter();
17381+
17382+ add_pda(__nmi_count, 1);
17383+
17384+ if (!ignore_nmis)
17385+ default_do_nmi(regs);
17386+
17387+ nmi_exit();
17388+}
17389+
17390+void stop_nmi(void)
17391+{
17392+ acpi_nmi_disable();
17393+ ignore_nmis++;
17394+}
17395+
17396+void restart_nmi(void)
17397+{
17398+ ignore_nmis--;
17399+ acpi_nmi_enable();
17400+}
17401+
17402 /* runs on IST stack. */
17403 asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
17404 {
17405 trace_hardirqs_fixup();
17406
17407- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
17408+ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
17409+ == NOTIFY_STOP)
17410 return;
17411- }
17412+
17413 preempt_conditional_sti(regs);
17414 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
17415 preempt_conditional_cli(regs);
17416@@ -904,8 +895,8 @@ asmlinkage __kprobes struct pt_regs *syn
17417 asmlinkage void __kprobes do_debug(struct pt_regs * regs,
17418 unsigned long error_code)
17419 {
17420- unsigned long condition;
17421 struct task_struct *tsk = current;
17422+ unsigned long condition;
17423 siginfo_t info;
17424
17425 trace_hardirqs_fixup();
17426@@ -926,21 +917,19 @@ asmlinkage void __kprobes do_debug(struc
17427
17428 /* Mask out spurious debug traps due to lazy DR7 setting */
17429 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
17430- if (!tsk->thread.debugreg7) {
17431+ if (!tsk->thread.debugreg7)
17432 goto clear_dr7;
17433- }
17434 }
17435
17436 tsk->thread.debugreg6 = condition;
17437
17438-
17439 /*
17440 * Single-stepping through TF: make sure we ignore any events in
17441 * kernel space (but re-enable TF when returning to user mode).
17442 */
17443 if (condition & DR_STEP) {
17444- if (!user_mode(regs))
17445- goto clear_TF_reenable;
17446+ if (!user_mode(regs))
17447+ goto clear_TF_reenable;
17448 }
17449
17450 /* Ok, finally something we can handle */
17451@@ -953,7 +942,7 @@ asmlinkage void __kprobes do_debug(struc
17452 force_sig_info(SIGTRAP, &info, tsk);
17453
17454 clear_dr7:
17455- set_debugreg(0UL, 7);
17456+ set_debugreg(0, 7);
17457 preempt_conditional_cli(regs);
17458 return;
17459
17460@@ -961,6 +950,7 @@ clear_TF_reenable:
17461 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
17462 regs->flags &= ~X86_EFLAGS_TF;
17463 preempt_conditional_cli(regs);
17464+ return;
17465 }
17466
17467 static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
17468@@ -983,7 +973,7 @@ static int kernel_math_error(struct pt_r
17469 asmlinkage void do_coprocessor_error(struct pt_regs *regs)
17470 {
17471 void __user *ip = (void __user *)(regs->ip);
17472- struct task_struct * task;
17473+ struct task_struct *task;
17474 siginfo_t info;
17475 unsigned short cwd, swd;
17476
17477@@ -1016,30 +1006,30 @@ asmlinkage void do_coprocessor_error(str
17478 cwd = get_fpu_cwd(task);
17479 swd = get_fpu_swd(task);
17480 switch (swd & ~cwd & 0x3f) {
17481- case 0x000:
17482- default:
17483- break;
17484- case 0x001: /* Invalid Op */
17485- /*
17486- * swd & 0x240 == 0x040: Stack Underflow
17487- * swd & 0x240 == 0x240: Stack Overflow
17488- * User must clear the SF bit (0x40) if set
17489- */
17490- info.si_code = FPE_FLTINV;
17491- break;
17492- case 0x002: /* Denormalize */
17493- case 0x010: /* Underflow */
17494- info.si_code = FPE_FLTUND;
17495- break;
17496- case 0x004: /* Zero Divide */
17497- info.si_code = FPE_FLTDIV;
17498- break;
17499- case 0x008: /* Overflow */
17500- info.si_code = FPE_FLTOVF;
17501- break;
17502- case 0x020: /* Precision */
17503- info.si_code = FPE_FLTRES;
17504- break;
17505+ case 0x000: /* No unmasked exception */
17506+ default: /* Multiple exceptions */
17507+ break;
17508+ case 0x001: /* Invalid Op */
17509+ /*
17510+ * swd & 0x240 == 0x040: Stack Underflow
17511+ * swd & 0x240 == 0x240: Stack Overflow
17512+ * User must clear the SF bit (0x40) if set
17513+ */
17514+ info.si_code = FPE_FLTINV;
17515+ break;
17516+ case 0x002: /* Denormalize */
17517+ case 0x010: /* Underflow */
17518+ info.si_code = FPE_FLTUND;
17519+ break;
17520+ case 0x004: /* Zero Divide */
17521+ info.si_code = FPE_FLTDIV;
17522+ break;
17523+ case 0x008: /* Overflow */
17524+ info.si_code = FPE_FLTOVF;
17525+ break;
17526+ case 0x020: /* Precision */
17527+ info.si_code = FPE_FLTRES;
17528+ break;
17529 }
17530 force_sig_info(SIGFPE, &info, task);
17531 }
17532@@ -1052,7 +1042,7 @@ asmlinkage void bad_intr(void)
17533 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
17534 {
17535 void __user *ip = (void __user *)(regs->ip);
17536- struct task_struct * task;
17537+ struct task_struct *task;
17538 siginfo_t info;
17539 unsigned short mxcsr;
17540
17541@@ -1080,25 +1070,25 @@ asmlinkage void do_simd_coprocessor_erro
17542 */
17543 mxcsr = get_fpu_mxcsr(task);
17544 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
17545- case 0x000:
17546- default:
17547- break;
17548- case 0x001: /* Invalid Op */
17549- info.si_code = FPE_FLTINV;
17550- break;
17551- case 0x002: /* Denormalize */
17552- case 0x010: /* Underflow */
17553- info.si_code = FPE_FLTUND;
17554- break;
17555- case 0x004: /* Zero Divide */
17556- info.si_code = FPE_FLTDIV;
17557- break;
17558- case 0x008: /* Overflow */
17559- info.si_code = FPE_FLTOVF;
17560- break;
17561- case 0x020: /* Precision */
17562- info.si_code = FPE_FLTRES;
17563- break;
17564+ case 0x000:
17565+ default:
17566+ break;
17567+ case 0x001: /* Invalid Op */
17568+ info.si_code = FPE_FLTINV;
17569+ break;
17570+ case 0x002: /* Denormalize */
17571+ case 0x010: /* Underflow */
17572+ info.si_code = FPE_FLTUND;
17573+ break;
17574+ case 0x004: /* Zero Divide */
17575+ info.si_code = FPE_FLTDIV;
17576+ break;
17577+ case 0x008: /* Overflow */
17578+ info.si_code = FPE_FLTOVF;
17579+ break;
17580+ case 0x020: /* Precision */
17581+ info.si_code = FPE_FLTRES;
17582+ break;
17583 }
17584 force_sig_info(SIGFPE, &info, task);
17585 }
17586@@ -1118,7 +1108,7 @@ asmlinkage void __attribute__((weak)) mc
17587 }
17588
17589 /*
17590- * 'math_state_restore()' saves the current math information in the
17591+ * 'math_state_restore()' saves the current math information in the
17592 * old math state array, and gets the new ones from the current task
17593 *
17594 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
17595@@ -1145,7 +1135,14 @@ asmlinkage void math_state_restore(void)
17596
17597 /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
17598
17599- restore_fpu_checking(&me->thread.xstate->fxsave);
17600+ /*
17601+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
17602+ */
17603+ if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
17604+ stts();
17605+ force_sig(SIGSEGV, me);
17606+ return;
17607+ }
17608 task_thread_info(me)->status |= TS_USEDFPU;
17609 me->fpu_counter++;
17610 }
17611@@ -1190,13 +1187,12 @@ void __init trap_init(void)
17612 ret = HYPERVISOR_set_trap_table(trap_table);
17613 if (ret)
17614 printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
17615-
17616 /*
17617 * initialize the per thread extended state:
17618 */
17619- init_thread_xstate();
17620+ init_thread_xstate();
17621 /*
17622- * Should be a barrier for any external CPU state.
17623+ * Should be a barrier for any external CPU state:
17624 */
17625 cpu_init();
17626 }
17627@@ -1212,27 +1208,25 @@ void __cpuinit smp_trap_init(trap_info_t
17628 }
17629 }
17630
17631-
17632 static int __init oops_setup(char *s)
17633-{
17634+{
17635 if (!s)
17636 return -EINVAL;
17637 if (!strcmp(s, "panic"))
17638 panic_on_oops = 1;
17639 return 0;
17640-}
17641+}
17642 early_param("oops", oops_setup);
17643
17644 static int __init kstack_setup(char *s)
17645 {
17646 if (!s)
17647 return -EINVAL;
17648- kstack_depth_to_print = simple_strtoul(s,NULL,0);
17649+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
17650 return 0;
17651 }
17652 early_param("kstack", kstack_setup);
17653
17654-
17655 static int __init code_bytes_setup(char *s)
17656 {
17657 code_bytes = simple_strtoul(s, NULL, 0);
82094b55
AF
17658--- sle11-2009-10-16.orig/arch/x86/kernel/vsyscall_64-xen.c 2009-03-16 16:38:05.000000000 +0100
17659+++ sle11-2009-10-16/arch/x86/kernel/vsyscall_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
17660@@ -42,7 +42,8 @@
17661 #include <asm/topology.h>
17662 #include <asm/vgtod.h>
17663
17664-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
17665+#define __vsyscall(nr) \
17666+ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
17667 #define __syscall_clobber "r11","cx","memory"
17668
17669 /*
17670@@ -264,10 +265,7 @@ static void __cpuinit vsyscall_set_cpu(i
17671 d |= cpu;
17672 d |= (node & 0xf) << 12;
17673 d |= (node >> 4) << 48;
17674- if (HYPERVISOR_update_descriptor(virt_to_machine(get_cpu_gdt_table(cpu)
17675- + GDT_ENTRY_PER_CPU),
17676- d))
17677- BUG();
17678+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
17679 }
17680
17681 static void __cpuinit cpu_vsyscall_init(void *arg)
17682@@ -281,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_bl
17683 {
17684 long cpu = (long)arg;
17685 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
17686- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
17687+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
17688 return NOTIFY_DONE;
17689 }
17690
17691@@ -311,7 +309,7 @@ static int __init vsyscall_init(void)
17692 #ifdef CONFIG_SYSCTL
17693 register_sysctl_table(kernel_root_table2);
17694 #endif
17695- on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
17696+ on_each_cpu(cpu_vsyscall_init, NULL, 1);
17697 hotcpu_notifier(cpu_vsyscall_notifier, 0);
17698 return 0;
17699 }
82094b55
AF
17700--- sle11-2009-10-16.orig/arch/x86/mach-xen/setup.c 2009-03-16 16:33:40.000000000 +0100
17701+++ sle11-2009-10-16/arch/x86/mach-xen/setup.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
17702@@ -17,6 +17,8 @@
17703 #include <xen/interface/callback.h>
17704 #include <xen/interface/memory.h>
17705
17706+#ifdef CONFIG_X86_32
17707+
17708 #ifdef CONFIG_HOTPLUG_CPU
17709 #define DEFAULT_SEND_IPI (1)
17710 #else
17711@@ -44,51 +46,6 @@ static int __init print_ipi_mode(void)
17712
17713 late_initcall(print_ipi_mode);
17714
17715-/**
17716- * machine_specific_memory_setup - Hook for machine specific memory setup.
17717- *
17718- * Description:
17719- * This is included late in kernel/setup.c so that it can make
17720- * use of all of the static functions.
17721- **/
17722-
17723-char * __init machine_specific_memory_setup(void)
17724-{
17725- int rc;
17726- struct xen_memory_map memmap;
17727- /*
17728- * This is rather large for a stack variable but this early in
17729- * the boot process we know we have plenty slack space.
17730- */
17731- struct e820entry map[E820MAX];
17732-
17733- memmap.nr_entries = E820MAX;
17734- set_xen_guest_handle(memmap.buffer, map);
17735-
17736- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
17737- if ( rc == -ENOSYS ) {
17738- memmap.nr_entries = 1;
17739- map[0].addr = 0ULL;
17740- map[0].size = PFN_PHYS((unsigned long long)xen_start_info->nr_pages);
17741- /* 8MB slack (to balance backend allocations). */
17742- map[0].size += 8ULL << 20;
17743- map[0].type = E820_RAM;
17744- rc = 0;
17745- }
17746- BUG_ON(rc);
17747-
17748- sanitize_e820_map(map, (char *)&memmap.nr_entries);
17749-
17750- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
17751-
17752- return "Xen";
17753-}
17754-
17755-
17756-extern void hypervisor_callback(void);
17757-extern void failsafe_callback(void);
17758-extern void nmi(void);
17759-
17760 unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
17761 EXPORT_SYMBOL(machine_to_phys_mapping);
17762 unsigned int machine_to_phys_order;
17763@@ -121,33 +78,66 @@ void __init pre_setup_arch_hook(void)
17764 (unsigned long *)xen_start_info->mfn_list;
17765 }
17766
17767+#endif /* CONFIG_X86_32 */
17768+
17769+extern void hypervisor_callback(void);
17770+extern void failsafe_callback(void);
17771+extern void nmi(void);
17772+
17773+#ifdef CONFIG_X86_64
17774+#include <asm/proto.h>
17775+#define CALLBACK_ADDR(fn) ((unsigned long)(fn))
17776+#else
17777+#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) }
17778+#endif
17779+
17780 void __init machine_specific_arch_setup(void)
17781 {
17782 int ret;
17783 static struct callback_register __initdata event = {
17784 .type = CALLBACKTYPE_event,
17785- .address = { __KERNEL_CS, (unsigned long)hypervisor_callback },
17786+ .address = CALLBACK_ADDR(hypervisor_callback)
17787 };
17788 static struct callback_register __initdata failsafe = {
17789 .type = CALLBACKTYPE_failsafe,
17790- .address = { __KERNEL_CS, (unsigned long)failsafe_callback },
17791+ .address = CALLBACK_ADDR(failsafe_callback)
17792+ };
17793+#ifdef CONFIG_X86_64
17794+ static struct callback_register __initdata syscall = {
17795+ .type = CALLBACKTYPE_syscall,
17796+ .address = CALLBACK_ADDR(system_call)
17797 };
17798+#endif
17799+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32)
17800 static struct callback_register __initdata nmi_cb = {
17801 .type = CALLBACKTYPE_nmi,
17802- .address = { __KERNEL_CS, (unsigned long)nmi },
17803+ .address = CALLBACK_ADDR(nmi)
17804 };
17805+#endif
17806
17807 ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
17808 if (ret == 0)
17809 ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
17810+#ifdef CONFIG_X86_64
17811+ if (ret == 0)
17812+ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
17813+#endif
17814 #if CONFIG_XEN_COMPAT <= 0x030002
17815+#ifdef CONFIG_X86_32
17816 if (ret == -ENOSYS)
17817 ret = HYPERVISOR_set_callbacks(
17818 event.address.cs, event.address.eip,
17819 failsafe.address.cs, failsafe.address.eip);
17820+#else
17821+ ret = HYPERVISOR_set_callbacks(
17822+ event.address,
17823+ failsafe.address,
17824+ syscall.address);
17825+#endif
17826 #endif
17827 BUG_ON(ret);
17828
17829+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32)
17830 ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
17831 #if CONFIG_XEN_COMPAT <= 0x030002
17832 if (ret == -ENOSYS) {
17833@@ -158,15 +148,43 @@ void __init machine_specific_arch_setup(
17834 HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
17835 }
17836 #endif
17837+#endif
17838
17839+#ifdef CONFIG_X86_32
17840 /* Do an early initialization of the fixmap area */
17841 {
17842 extern pte_t swapper_pg_fixmap[PTRS_PER_PTE];
17843 unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
17844 pud_t *pud = pud_offset(swapper_pg_dir + pgd_index(addr), addr);
17845 pmd_t *pmd = pmd_offset(pud, addr);
17846+ unsigned int i;
17847
17848 make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables);
17849 set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
17850+
17851+#define __FIXADDR_TOP (-PAGE_SIZE)
17852+#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \
17853+ != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE)))
17854+ FIX_BUG_ON(SHARED_INFO);
17855+ FIX_BUG_ON(ISAMAP_BEGIN);
17856+ FIX_BUG_ON(ISAMAP_END);
17857+#undef __FIXADDR_TOP
17858+ BUG_ON(pte_index(hypervisor_virt_start));
17859+
17860+ /* Switch to the real shared_info page, and clear the
17861+ * dummy page. */
17862+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
17863+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
17864+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
17865+
17866+ /* Setup mapping of lower 1st MB */
17867+ for (i = 0; i < NR_FIX_ISAMAPS; i++)
17868+ if (is_initial_xendomain())
17869+ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
17870+ else
17871+ __set_fixmap(FIX_ISAMAP_BEGIN - i,
17872+ virt_to_machine(empty_zero_page),
17873+ PAGE_KERNEL_RO);
17874 }
17875+#endif
17876 }
82094b55
AF
17877--- sle11-2009-10-16.orig/arch/x86/mm/fault-xen.c 2009-03-16 16:38:05.000000000 +0100
17878+++ sle11-2009-10-16/arch/x86/mm/fault-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
17879@@ -10,6 +10,7 @@
17880 #include <linux/string.h>
17881 #include <linux/types.h>
17882 #include <linux/ptrace.h>
17883+#include <linux/mmiotrace.h>
17884 #include <linux/mman.h>
17885 #include <linux/mm.h>
17886 #include <linux/smp.h>
17887@@ -49,17 +50,23 @@
17888 #define PF_RSVD (1<<3)
17889 #define PF_INSTR (1<<4)
17890
17891+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
17892+{
17893+#ifdef CONFIG_MMIOTRACE_HOOKS
17894+ if (unlikely(is_kmmio_active()))
17895+ if (kmmio_handler(regs, addr) == 1)
17896+ return -1;
17897+#endif
17898+ return 0;
17899+}
17900+
17901 static inline int notify_page_fault(struct pt_regs *regs)
17902 {
17903 #ifdef CONFIG_KPROBES
17904 int ret = 0;
17905
17906 /* kprobe_running() needs smp_processor_id() */
17907-#ifdef CONFIG_X86_32
17908 if (!user_mode_vm(regs)) {
17909-#else
17910- if (!user_mode(regs)) {
17911-#endif
17912 preempt_disable();
17913 if (kprobe_running() && kprobe_fault_handler(regs, 14))
17914 ret = 1;
17915@@ -409,11 +416,7 @@ static void show_fault_oops(struct pt_re
17916 printk(KERN_CONT "NULL pointer dereference");
17917 else
17918 printk(KERN_CONT "paging request");
17919-#ifdef CONFIG_X86_32
17920- printk(KERN_CONT " at %08lx\n", address);
17921-#else
17922- printk(KERN_CONT " at %016lx\n", address);
17923-#endif
17924+ printk(KERN_CONT " at %p\n", (void *) address);
17925 printk(KERN_ALERT "IP:");
17926 printk_address(regs->ip, 1);
17927 dump_pagetable(address);
17928@@ -628,6 +631,8 @@ void __kprobes do_page_fault(struct pt_r
17929
17930 if (notify_page_fault(regs))
17931 return;
17932+ if (unlikely(kmmio_fault(regs, address)))
17933+ return;
17934
17935 /*
17936 * We fault-in kernel-space virtual memory on-demand. The
17937@@ -831,14 +836,10 @@ bad_area_nosemaphore:
17938 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
17939 printk_ratelimit()) {
17940 printk(
17941-#ifdef CONFIG_X86_32
17942- "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
17943-#else
17944- "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
17945-#endif
17946+ "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
17947 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
17948- tsk->comm, task_pid_nr(tsk), address, regs->ip,
17949- regs->sp, error_code);
17950+ tsk->comm, task_pid_nr(tsk), address,
17951+ (void *) regs->ip, (void *) regs->sp, error_code);
17952 print_vma_addr(" in ", regs->ip);
17953 printk("\n");
17954 }
17955@@ -946,81 +947,45 @@ LIST_HEAD(pgd_list);
17956 void vmalloc_sync_all(void)
17957 {
17958 #ifdef CONFIG_X86_32
17959- /*
17960- * Note that races in the updates of insync and start aren't
17961- * problematic: insync can only get set bits added, and updates to
17962- * start are only improving performance (without affecting correctness
17963- * if undone).
17964- * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs.
17965- * This change works just fine with 2-level paging too.
17966- */
17967-#define sync_index(a) ((a) >> PMD_SHIFT)
17968- static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD);
17969- static unsigned long start = TASK_SIZE;
17970- unsigned long address;
17971+ unsigned long address = VMALLOC_START & PGDIR_MASK;
17972
17973 if (SHARED_KERNEL_PMD)
17974 return;
17975
17976 BUILD_BUG_ON(TASK_SIZE & ~PMD_MASK);
17977- for (address = start;
17978- address < hypervisor_virt_start;
17979- address += PMD_SIZE) {
17980- if (!test_bit(sync_index(address), insync)) {
17981- unsigned long flags;
17982- struct page *page;
17983-
17984- spin_lock_irqsave(&pgd_lock, flags);
17985- /* XEN: failure path assumes non-empty pgd_list. */
17986- if (unlikely(list_empty(&pgd_list))) {
17987- spin_unlock_irqrestore(&pgd_lock, flags);
17988- return;
17989- }
17990- list_for_each_entry(page, &pgd_list, lru) {
17991- if (!vmalloc_sync_one(page_address(page),
17992- address))
17993- break;
17994- }
17995- spin_unlock_irqrestore(&pgd_lock, flags);
17996- if (!page)
17997- set_bit(sync_index(address), insync);
17998+ for (; address < hypervisor_virt_start; address += PMD_SIZE) {
17999+ unsigned long flags;
18000+ struct page *page;
18001+
18002+ spin_lock_irqsave(&pgd_lock, flags);
18003+ list_for_each_entry(page, &pgd_list, lru) {
18004+ if (!vmalloc_sync_one(page_address(page),
18005+ address))
18006+ break;
18007 }
18008- if (address == start && test_bit(sync_index(address), insync))
18009- start = address + PMD_SIZE;
18010+ spin_unlock_irqrestore(&pgd_lock, flags);
18011 }
18012 #else /* CONFIG_X86_64 */
18013- /*
18014- * Note that races in the updates of insync and start aren't
18015- * problematic: insync can only get set bits added, and updates to
18016- * start are only improving performance (without affecting correctness
18017- * if undone).
18018- */
18019- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
18020- static unsigned long start = VMALLOC_START & PGDIR_MASK;
18021+ unsigned long start = VMALLOC_START & PGDIR_MASK;
18022 unsigned long address;
18023
18024 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
18025- if (!test_bit(pgd_index(address), insync)) {
18026- const pgd_t *pgd_ref = pgd_offset_k(address);
18027- unsigned long flags;
18028- struct page *page;
18029-
18030- if (pgd_none(*pgd_ref))
18031- continue;
18032- spin_lock_irqsave(&pgd_lock, flags);
18033- list_for_each_entry(page, &pgd_list, lru) {
18034- pgd_t *pgd;
18035- pgd = (pgd_t *)page_address(page) + pgd_index(address);
18036- if (pgd_none(*pgd))
18037- set_pgd(pgd, *pgd_ref);
18038- else
18039- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
18040- }
18041- spin_unlock_irqrestore(&pgd_lock, flags);
18042- set_bit(pgd_index(address), insync);
18043+ const pgd_t *pgd_ref = pgd_offset_k(address);
18044+ unsigned long flags;
18045+ struct page *page;
18046+
18047+ if (pgd_none(*pgd_ref))
18048+ continue;
18049+ spin_lock_irqsave(&pgd_lock, flags);
18050+ list_for_each_entry(page, &pgd_list, lru) {
18051+ pgd_t *pgd;
18052+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
18053+ if (pgd_none(*pgd))
18054+ set_pgd(pgd, *pgd_ref);
18055+ else
18056+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
18057 }
18058- if (address == start)
18059- start = address + PGDIR_SIZE;
18060+ spin_unlock_irqrestore(&pgd_lock, flags);
18061 }
18062 #endif
18063 }
82094b55
AF
18064--- sle11-2009-10-16.orig/arch/x86/mm/hypervisor.c 2009-05-14 11:18:39.000000000 +0200
18065+++ sle11-2009-10-16/arch/x86/mm/hypervisor.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
18066@@ -709,6 +709,72 @@ void xen_destroy_contiguous_region(unsig
18067 }
18068 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
18069
18070+int __init early_create_contiguous_region(unsigned long pfn,
18071+ unsigned int order,
18072+ unsigned int address_bits)
18073+{
18074+ unsigned long *in_frames = discontig_frames, out_frame = pfn;
18075+ unsigned int i;
18076+ int rc, success;
18077+ struct xen_memory_exchange exchange = {
18078+ .in = {
18079+ .nr_extents = 1UL << order,
18080+ .extent_order = 0,
18081+ .domid = DOMID_SELF
18082+ },
18083+ .out = {
18084+ .nr_extents = 1,
18085+ .extent_order = order,
18086+ .address_bits = address_bits,
18087+ .domid = DOMID_SELF
18088+ }
18089+ };
18090+
18091+ if (xen_feature(XENFEAT_auto_translated_physmap))
18092+ return 0;
18093+
18094+ if (unlikely(order > MAX_CONTIG_ORDER))
18095+ return -ENOMEM;
18096+
18097+ for (i = 0; i < (1U << order); ++i) {
18098+ in_frames[i] = pfn_to_mfn(pfn + i);
18099+ set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY);
18100+ }
18101+
18102+ set_xen_guest_handle(exchange.in.extent_start, in_frames);
18103+ set_xen_guest_handle(exchange.out.extent_start, &out_frame);
18104+
18105+ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
18106+ success = (exchange.nr_exchanged == (1UL << order));
18107+ BUG_ON(!success && (exchange.nr_exchanged || !rc));
18108+ BUG_ON(success && rc);
18109+#if CONFIG_XEN_COMPAT <= 0x030002
18110+ if (unlikely(rc == -ENOSYS)) {
18111+ /* Compatibility when XENMEM_exchange is unavailable. */
18112+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
18113+ &exchange.in) != (1UL << order))
18114+ BUG();
18115+ success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
18116+ &exchange.out) == 1);
18117+ if (!success) {
18118+ for (i = 0; i < (1U << order); ++i)
18119+ in_frames[i] = pfn + i;
18120+ if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
18121+ &exchange.in) != (1UL << order))
18122+ BUG();
18123+ }
18124+ }
18125+#endif
18126+
18127+ for (i = 0; i < (1U << order); ++i, ++out_frame) {
18128+ if (!success)
18129+ out_frame = in_frames[i];
18130+ set_phys_to_machine(pfn + i, out_frame);
18131+ }
18132+
18133+ return success ? 0 : -ENOMEM;
18134+}
18135+
18136 static void undo_limit_pages(struct page *pages, unsigned int order)
18137 {
18138 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
18139@@ -875,42 +941,9 @@ int write_ldt_entry(struct desc_struct *
18140 return HYPERVISOR_update_descriptor(mach_lp, *(const u64*)desc);
18141 }
18142
18143-#define MAX_BATCHED_FULL_PTES 32
18144-
18145-int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
18146- unsigned long addr, unsigned long end, pgprot_t newprot,
18147- int dirty_accountable)
18148+int write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc,
18149+ int type)
18150 {
18151- int rc = 0, i = 0;
18152- mmu_update_t u[MAX_BATCHED_FULL_PTES];
18153- pte_t *pte;
18154- spinlock_t *ptl;
18155-
18156- if (!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))
18157- return 0;
18158-
18159- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
18160- do {
18161- if (pte_present(*pte)) {
18162- pte_t ptent = pte_modify(*pte, newprot);
18163-
18164- if (dirty_accountable && pte_dirty(ptent))
18165- ptent = pte_mkwrite(ptent);
18166- u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
18167- | ((unsigned long)pte & ~PAGE_MASK)
18168- | MMU_PT_UPDATE_PRESERVE_AD;
18169- u[i].val = __pte_val(ptent);
18170- if (++i == MAX_BATCHED_FULL_PTES) {
18171- if ((rc = HYPERVISOR_mmu_update(
18172- &u[0], i, NULL, DOMID_SELF)) != 0)
18173- break;
18174- i = 0;
18175- }
18176- }
18177- } while (pte++, addr += PAGE_SIZE, addr != end);
18178- if (i)
18179- rc = HYPERVISOR_mmu_update( &u[0], i, NULL, DOMID_SELF);
18180- pte_unmap_unlock(pte - 1, ptl);
18181- BUG_ON(rc && rc != -ENOSYS);
18182- return !rc;
18183+ maddr_t mach_gp = virt_to_machine(gdt + entry);
18184+ return HYPERVISOR_update_descriptor(mach_gp, *(const u64*)desc);
18185 }
82094b55
AF
18186--- sle11-2009-10-16.orig/arch/x86/mm/init_32-xen.c 2009-03-16 16:38:05.000000000 +0100
18187+++ sle11-2009-10-16/arch/x86/mm/init_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
18188@@ -54,6 +54,7 @@
18189
18190 unsigned int __VMALLOC_RESERVE = 128 << 20;
18191
18192+unsigned long max_low_pfn_mapped;
18193 unsigned long max_pfn_mapped;
18194
18195 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
18196@@ -61,6 +62,27 @@ unsigned long highstart_pfn, highend_pfn
18197
18198 static noinline int do_test_wp_bit(void);
18199
18200+
18201+static unsigned long __initdata table_start;
18202+static unsigned long __initdata table_end;
18203+static unsigned long __initdata table_top;
18204+
18205+static int __initdata after_init_bootmem;
18206+
18207+static __init void *alloc_low_page(unsigned long *phys)
18208+{
18209+ unsigned long pfn = table_end++;
18210+ void *adr;
18211+
18212+ if (pfn >= table_top)
18213+ panic("alloc_low_page: ran out of memory");
18214+
18215+ adr = __va(pfn * PAGE_SIZE);
18216+ memset(adr, 0, PAGE_SIZE);
18217+ *phys = pfn * PAGE_SIZE;
18218+ return adr;
18219+}
18220+
18221 /*
18222 * Creates a middle page table and puts a pointer to it in the
18223 * given global directory entry. This only returns the gd entry
18224@@ -72,9 +94,12 @@ static pmd_t * __init one_md_table_init(
18225 pmd_t *pmd_table;
18226
18227 #ifdef CONFIG_X86_PAE
18228+ unsigned long phys;
18229 if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
18230- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
18231-
18232+ if (after_init_bootmem)
18233+ pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
18234+ else
18235+ pmd_table = (pmd_t *)alloc_low_page(&phys);
18236 paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
18237 make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
18238 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
18239@@ -101,12 +126,16 @@ static pte_t * __init one_page_table_ini
18240 #endif
18241 pte_t *page_table = NULL;
18242
18243+ if (after_init_bootmem) {
18244 #ifdef CONFIG_DEBUG_PAGEALLOC
18245- page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
18246+ page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
18247 #endif
18248- if (!page_table) {
18249- page_table =
18250+ if (!page_table)
18251+ page_table =
18252 (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
18253+ } else {
18254+ unsigned long phys;
18255+ page_table = (pte_t *)alloc_low_page(&phys);
18256 }
18257
18258 paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
18259@@ -167,24 +196,24 @@ static inline int is_kernel_text(unsigne
18260 * of max_low_pfn pages, by creating page tables starting from address
18261 * PAGE_OFFSET:
18262 */
18263-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
18264+static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
18265+ unsigned long start_pfn,
18266+ unsigned long end_pfn,
18267+ int use_pse)
18268 {
18269 int pgd_idx, pmd_idx, pte_ofs;
18270 unsigned long pfn;
18271 pgd_t *pgd;
18272 pmd_t *pmd;
18273 pte_t *pte;
18274+ unsigned pages_2m = 0, pages_4k = 0;
18275
18276- unsigned long max_ram_pfn = xen_start_info->nr_pages;
18277- if (max_ram_pfn > max_low_pfn)
18278- max_ram_pfn = max_low_pfn;
18279+ if (!cpu_has_pse)
18280+ use_pse = 0;
18281
18282- pgd_idx = pgd_index(PAGE_OFFSET);
18283+ pfn = start_pfn;
18284+ pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
18285 pgd = pgd_base + pgd_idx;
18286- pfn = 0;
18287- pmd_idx = pmd_index(PAGE_OFFSET);
18288- pte_ofs = pte_index(PAGE_OFFSET);
18289-
18290 for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
18291 #ifdef CONFIG_XEN
18292 /*
18293@@ -198,10 +227,16 @@ static void __init kernel_physical_mappi
18294 #else
18295 pmd = one_md_table_init(pgd);
18296 #endif
18297- if (pfn >= max_low_pfn)
18298+
18299+ if (pfn >= end_pfn)
18300 continue;
18301+#ifdef CONFIG_X86_PAE
18302+ pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
18303 pmd += pmd_idx;
18304- for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
18305+#else
18306+ pmd_idx = 0;
18307+#endif
18308+ for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
18309 pmd++, pmd_idx++) {
18310 unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
18311
18312@@ -211,13 +246,8 @@ static void __init kernel_physical_mappi
18313 /*
18314 * Map with big pages if possible, otherwise
18315 * create normal page tables:
18316- *
18317- * Don't use a large page for the first 2/4MB of memory
18318- * because there are often fixed size MTRRs in there
18319- * and overlapping MTRRs into large pages can cause
18320- * slowdowns.
18321 */
18322- if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
18323+ if (use_pse) {
18324 unsigned int addr2;
18325 pgprot_t prot = PAGE_KERNEL_LARGE;
18326
18327@@ -228,49 +258,35 @@ static void __init kernel_physical_mappi
18328 is_kernel_text(addr2))
18329 prot = PAGE_KERNEL_LARGE_EXEC;
18330
18331+ pages_2m++;
18332 set_pmd(pmd, pfn_pmd(pfn, prot));
18333
18334 pfn += PTRS_PER_PTE;
18335- max_pfn_mapped = pfn;
18336 continue;
18337 }
18338 pte = one_page_table_init(pmd);
18339
18340- for (pte += pte_ofs;
18341- pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
18342+ pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
18343+ pte += pte_ofs;
18344+ for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
18345 pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
18346 pgprot_t prot = PAGE_KERNEL;
18347
18348 /* XEN: Only map initial RAM allocation. */
18349- if ((pfn >= max_ram_pfn) || pte_present(*pte))
18350+ if (pfn >= xen_start_info->nr_pages || pte_present(*pte))
18351 continue;
18352 if (is_kernel_text(addr))
18353 prot = PAGE_KERNEL_EXEC;
18354
18355+ pages_4k++;
18356 set_pte(pte, pfn_pte(pfn, prot));
18357 }
18358- max_pfn_mapped = pfn;
18359- pte_ofs = 0;
18360 }
18361- pmd_idx = 0;
18362 }
18363+ update_page_count(PG_LEVEL_2M, pages_2m);
18364+ update_page_count(PG_LEVEL_4K, pages_4k);
18365 }
18366
18367-#ifndef CONFIG_XEN
18368-
18369-static inline int page_kills_ppro(unsigned long pagenr)
18370-{
18371- if (pagenr >= 0x70000 && pagenr <= 0x7003F)
18372- return 1;
18373- return 0;
18374-}
18375-
18376-#else
18377-
18378-#define page_kills_ppro(p) 0
18379-
18380-#endif
18381-
18382 /*
18383 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
18384 * is valid. The argument is a physical page number.
18385@@ -331,30 +347,63 @@ static void __init permanent_kmaps_init(
18386 pkmap_page_table = pte;
18387 }
18388
18389-void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
18390+static void __init add_one_highpage_init(struct page *page, int pfn)
18391 {
18392- if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
18393- ClearPageReserved(page);
18394- init_page_count(page);
18395- if (pfn < xen_start_info->nr_pages)
18396- __free_page(page);
18397- totalhigh_pages++;
18398- } else
18399- SetPageReserved(page);
18400+ ClearPageReserved(page);
18401+ init_page_count(page);
18402+ if (pfn < xen_start_info->nr_pages)
18403+ __free_page(page);
18404+ totalhigh_pages++;
18405+}
18406+
18407+struct add_highpages_data {
18408+ unsigned long start_pfn;
18409+ unsigned long end_pfn;
18410+};
18411+
18412+static int __init add_highpages_work_fn(unsigned long start_pfn,
18413+ unsigned long end_pfn, void *datax)
18414+{
18415+ int node_pfn;
18416+ struct page *page;
18417+ unsigned long final_start_pfn, final_end_pfn;
18418+ struct add_highpages_data *data;
18419+
18420+ data = (struct add_highpages_data *)datax;
18421+
18422+ final_start_pfn = max(start_pfn, data->start_pfn);
18423+ final_end_pfn = min(end_pfn, data->end_pfn);
18424+ if (final_start_pfn >= final_end_pfn)
18425+ return 0;
18426+
18427+ for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
18428+ node_pfn++) {
18429+ if (!pfn_valid(node_pfn))
18430+ continue;
18431+ page = pfn_to_page(node_pfn);
18432+ add_one_highpage_init(page, node_pfn);
18433+ }
18434+
18435+ return 0;
18436+
18437+}
18438+
18439+void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
18440+ unsigned long end_pfn)
18441+{
18442+ struct add_highpages_data data;
18443+
18444+ data.start_pfn = start_pfn;
18445+ data.end_pfn = end_pfn;
18446+
18447+ work_with_active_regions(nid, add_highpages_work_fn, &data);
18448 }
18449
18450 #ifndef CONFIG_NUMA
18451-static void __init set_highmem_pages_init(int bad_ppro)
18452+static void __init set_highmem_pages_init(void)
18453 {
18454- int pfn;
18455+ add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
18456
18457- for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
18458- /*
18459- * Holes under sparsemem might not have no mem_map[]:
18460- */
18461- if (pfn_valid(pfn))
18462- add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
18463- }
18464 totalram_pages += totalhigh_pages;
18465 }
18466 #endif /* !CONFIG_NUMA */
18467@@ -362,24 +411,11 @@ static void __init set_highmem_pages_ini
18468 #else
18469 # define kmap_init() do { } while (0)
18470 # define permanent_kmaps_init(pgd_base) do { } while (0)
18471-# define set_highmem_pages_init(bad_ppro) do { } while (0)
18472+# define set_highmem_pages_init() do { } while (0)
18473 #endif /* CONFIG_HIGHMEM */
18474
18475-pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
18476-EXPORT_SYMBOL(__PAGE_KERNEL);
18477-
18478-pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
18479-
18480 pgd_t *swapper_pg_dir;
18481
18482-static void __init xen_pagetable_setup_start(pgd_t *base)
18483-{
18484-}
18485-
18486-static void __init xen_pagetable_setup_done(pgd_t *base)
18487-{
18488-}
18489-
18490 /*
18491 * Build a proper pagetable for the kernel mappings. Up until this
18492 * point, we've been running on some set of pagetables constructed by
18493@@ -399,27 +435,10 @@ static void __init xen_pagetable_setup_d
18494 * be partially populated, and so it avoids stomping on any existing
18495 * mappings.
18496 */
18497-static void __init pagetable_init(void)
18498+static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
18499 {
18500- pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
18501 unsigned long vaddr, end;
18502
18503- xen_pagetable_setup_start(pgd_base);
18504-
18505- /* Enable PSE if available */
18506- if (cpu_has_pse)
18507- set_in_cr4(X86_CR4_PSE);
18508-
18509- /* Enable PGE if available */
18510- if (cpu_has_pge) {
18511- set_in_cr4(X86_CR4_PGE);
18512- __PAGE_KERNEL |= _PAGE_GLOBAL;
18513- __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
18514- }
18515-
18516- kernel_physical_mapping_init(pgd_base);
18517- remap_numa_kva();
18518-
18519 /*
18520 * Fixed mappings, only the page table structure has to be
18521 * created - mappings will be set by set_fixmap():
18522@@ -429,10 +448,13 @@ static void __init pagetable_init(void)
18523 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
18524 page_table_range_init(vaddr, end, pgd_base);
18525 early_ioremap_reset();
18526+}
18527
18528- permanent_kmaps_init(pgd_base);
18529+static void __init pagetable_init(void)
18530+{
18531+ pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
18532
18533- xen_pagetable_setup_done(pgd_base);
18534+ permanent_kmaps_init(pgd_base);
18535 }
18536
18537 #if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN)
18538@@ -475,7 +497,7 @@ void zap_low_mappings(void)
18539
18540 int nx_enabled;
18541
18542-pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
18543+pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
18544 EXPORT_SYMBOL_GPL(__supported_pte_mask);
18545
18546 #ifdef CONFIG_X86_PAE
18547@@ -528,42 +550,364 @@ static void __init set_nx(void)
18548 }
18549 #endif
18550
18551+/* user-defined highmem size */
18552+static unsigned int highmem_pages = -1;
18553+
18554 /*
18555- * paging_init() sets up the page tables - note that the first 8MB are
18556- * already mapped by head.S.
18557- *
18558- * This routines also unmaps the page at virtual kernel address 0, so
18559- * that we can trap those pesky NULL-reference errors in the kernel.
18560+ * highmem=size forces highmem to be exactly 'size' bytes.
18561+ * This works even on boxes that have no highmem otherwise.
18562+ * This also works to reduce highmem size on bigger boxes.
18563 */
18564-void __init paging_init(void)
18565+static int __init parse_highmem(char *arg)
18566+{
18567+ if (!arg)
18568+ return -EINVAL;
18569+
18570+ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
18571+ return 0;
18572+}
18573+early_param("highmem", parse_highmem);
18574+
18575+/*
18576+ * Determine low and high memory ranges:
18577+ */
18578+void __init find_low_pfn_range(void)
18579+{
18580+ /* it could update max_pfn */
18581+
18582+ /* max_low_pfn is 0, we already have early_res support */
18583+
18584+ max_low_pfn = max_pfn;
18585+ if (max_low_pfn > MAXMEM_PFN) {
18586+ if (highmem_pages == -1)
18587+ highmem_pages = max_pfn - MAXMEM_PFN;
18588+ if (highmem_pages + MAXMEM_PFN < max_pfn)
18589+ max_pfn = MAXMEM_PFN + highmem_pages;
18590+ if (highmem_pages + MAXMEM_PFN > max_pfn) {
18591+ printk(KERN_WARNING "only %luMB highmem pages "
18592+ "available, ignoring highmem size of %uMB.\n",
18593+ pages_to_mb(max_pfn - MAXMEM_PFN),
18594+ pages_to_mb(highmem_pages));
18595+ highmem_pages = 0;
18596+ }
18597+ max_low_pfn = MAXMEM_PFN;
18598+#ifndef CONFIG_HIGHMEM
18599+ /* Maximum memory usable is what is directly addressable */
18600+ printk(KERN_WARNING "Warning only %ldMB will be used.\n",
18601+ MAXMEM>>20);
18602+ if (max_pfn > MAX_NONPAE_PFN)
18603+ printk(KERN_WARNING
18604+ "Use a HIGHMEM64G enabled kernel.\n");
18605+ else
18606+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
18607+ max_pfn = MAXMEM_PFN;
18608+#else /* !CONFIG_HIGHMEM */
18609+#ifndef CONFIG_HIGHMEM64G
18610+ if (max_pfn > MAX_NONPAE_PFN) {
18611+ max_pfn = MAX_NONPAE_PFN;
18612+ printk(KERN_WARNING "Warning only 4GB will be used."
18613+ "Use a HIGHMEM64G enabled kernel.\n");
18614+ }
18615+#endif /* !CONFIG_HIGHMEM64G */
18616+#endif /* !CONFIG_HIGHMEM */
18617+ } else {
18618+ if (highmem_pages == -1)
18619+ highmem_pages = 0;
18620+#ifdef CONFIG_HIGHMEM
18621+ if (highmem_pages >= max_pfn) {
18622+ printk(KERN_ERR "highmem size specified (%uMB) is "
18623+ "bigger than pages available (%luMB)!.\n",
18624+ pages_to_mb(highmem_pages),
18625+ pages_to_mb(max_pfn));
18626+ highmem_pages = 0;
18627+ }
18628+ if (highmem_pages) {
18629+ if (max_low_pfn - highmem_pages <
18630+ 64*1024*1024/PAGE_SIZE){
18631+ printk(KERN_ERR "highmem size %uMB results in "
18632+ "smaller than 64MB lowmem, ignoring it.\n"
18633+ , pages_to_mb(highmem_pages));
18634+ highmem_pages = 0;
18635+ }
18636+ max_low_pfn -= highmem_pages;
18637+ }
18638+#else
18639+ if (highmem_pages)
18640+ printk(KERN_ERR "ignoring highmem size on non-highmem"
18641+ " kernel!\n");
18642+#endif
18643+ }
18644+}
18645+
18646+#ifndef CONFIG_NEED_MULTIPLE_NODES
18647+void __init initmem_init(unsigned long start_pfn,
18648+ unsigned long end_pfn)
18649+{
18650+#ifdef CONFIG_HIGHMEM
18651+ highstart_pfn = highend_pfn = max_pfn;
18652+ if (max_pfn > max_low_pfn)
18653+ highstart_pfn = max_low_pfn;
18654+ memory_present(0, 0, highend_pfn);
18655+ e820_register_active_regions(0, 0, highend_pfn);
18656+ printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
18657+ pages_to_mb(highend_pfn - highstart_pfn));
18658+ num_physpages = highend_pfn;
18659+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
18660+#else
18661+ memory_present(0, 0, max_low_pfn);
18662+ e820_register_active_regions(0, 0, max_low_pfn);
18663+ num_physpages = max_low_pfn;
18664+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
18665+#endif
18666+#ifdef CONFIG_FLATMEM
18667+ max_mapnr = num_physpages;
18668+#endif
18669+ printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
18670+ pages_to_mb(max_low_pfn));
18671+
18672+ setup_bootmem_allocator();
18673+}
18674+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
18675+
18676+static void __init zone_sizes_init(void)
18677+{
18678+ unsigned long max_zone_pfns[MAX_NR_ZONES];
18679+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
18680+ max_zone_pfns[ZONE_DMA] =
18681+ virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
18682+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
18683+#ifdef CONFIG_HIGHMEM
18684+ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
18685+#endif
18686+
18687+ free_area_init_nodes(max_zone_pfns);
18688+}
18689+
18690+void __init setup_bootmem_allocator(void)
18691 {
18692 int i;
18693+ unsigned long bootmap_size, bootmap;
18694+ unsigned long end_pfn = min(max_low_pfn, xen_start_info->nr_pages);
18695+
18696+ /*
18697+ * Initialize the boot-time allocator (with low memory only):
18698+ */
18699+ bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
18700+ bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
18701+ min(max_pfn_mapped, xen_start_info->nr_pages)<<PAGE_SHIFT,
18702+ bootmap_size, PAGE_SIZE);
18703+ if (bootmap == -1L)
18704+ panic("Cannot find bootmem map of size %ld\n", bootmap_size);
18705+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
18706+
18707+ /* don't touch min_low_pfn */
18708+ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
18709+ min_low_pfn, end_pfn);
18710+ printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
18711+ max_pfn_mapped<<PAGE_SHIFT);
18712+ printk(KERN_INFO " low ram: %08lx - %08lx\n",
18713+ min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
18714+ printk(KERN_INFO " bootmap %08lx - %08lx\n",
18715+ bootmap, bootmap + bootmap_size);
18716+ for_each_online_node(i)
18717+ free_bootmem_with_active_regions(i, end_pfn);
18718+ early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
18719+
18720+ after_init_bootmem = 1;
18721+}
18722+
18723+static unsigned long __init extend_init_mapping(unsigned long tables_space)
18724+{
18725+ unsigned long start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT)
18726+ + xen_start_info->nr_pt_frames;
18727+ unsigned long start = start_pfn, va = (unsigned long)&_text;
18728+ pgd_t *pgd;
18729+ pud_t *pud;
18730+ pmd_t *pmd;
18731+ pte_t *pte;
18732+
18733+ /* Ensure init mappings cover kernel text/data and initial tables. */
18734+ while (va < PAGE_OFFSET + (start_pfn << PAGE_SHIFT) + tables_space) {
18735+ pgd = pgd_offset_k(va);
18736+ pud = pud_offset(pgd, va);
18737+ pmd = pmd_offset(pud, va);
18738+ if (pmd_none(*pmd)) {
18739+ unsigned long pa = start_pfn++ << PAGE_SHIFT;
18740+
18741+ memset(__va(pa), 0, PAGE_SIZE);
18742+ make_lowmem_page_readonly(__va(pa),
18743+ XENFEAT_writable_page_tables);
18744+ xen_l2_entry_update(pmd, __pmd(pa | _KERNPG_TABLE));
18745+ }
18746+ pte = pte_offset_kernel(pmd, va);
18747+ if (pte_none(*pte)) {
18748+ pte_t new_pte = __pte(__pa(va) | _KERNPG_TABLE);
18749+
18750+ if (HYPERVISOR_update_va_mapping(va, new_pte, 0))
18751+ BUG();
18752+ }
18753+ va += PAGE_SIZE;
18754+ }
18755+
18756+ /* Finally, blow away any spurious initial mappings. */
18757+ while (1) {
18758+ pgd = pgd_offset_k(va);
18759+ pud = pud_offset(pgd, va);
18760+ pmd = pmd_offset(pud, va);
18761+ if (pmd_none(*pmd))
18762+ break;
18763+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
18764+ BUG();
18765+ va += PAGE_SIZE;
18766+ }
18767+
18768+ if (start_pfn > start)
18769+ reserve_early(start << PAGE_SHIFT,
18770+ start_pfn << PAGE_SHIFT, "INITMAP");
18771+
18772+ return start_pfn;
18773+}
18774+
18775+static void __init find_early_table_space(unsigned long end)
18776+{
18777+ unsigned long puds, pmds, ptes, tables;
18778+
18779+ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
18780+ tables = PAGE_ALIGN(puds * sizeof(pud_t));
18781+
18782+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
18783+ tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
18784+
18785+ if (cpu_has_pse) {
18786+ unsigned long extra;
18787+
18788+ extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
18789+ extra += PMD_SIZE;
18790+ ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
18791+ } else
18792+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
18793+
18794+ tables += PAGE_ALIGN(ptes * sizeof(pte_t));
18795+
18796+ /* for fixmap */
18797+ tables += PAGE_SIZE
18798+ * ((((FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK)
18799+ - (__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK))
18800+ >> PMD_SHIFT);
18801+
18802+ table_start = extend_init_mapping(tables);
18803+
18804+ table_end = table_start;
18805+ table_top = table_start + (tables>>PAGE_SHIFT);
18806+
18807+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
18808+ end, table_start << PAGE_SHIFT,
18809+ (table_start << PAGE_SHIFT) + tables);
18810+}
18811+
18812+unsigned long __init_refok init_memory_mapping(unsigned long start,
18813+ unsigned long end)
18814+{
18815+ pgd_t *pgd_base = swapper_pg_dir;
18816+ unsigned long start_pfn, end_pfn;
18817+ unsigned long big_page_start;
18818+
18819+ /*
18820+ * Find space for the kernel direct mapping tables.
18821+ */
18822+ if (!after_init_bootmem)
18823+ find_early_table_space(end);
18824
18825 #ifdef CONFIG_X86_PAE
18826 set_nx();
18827 if (nx_enabled)
18828 printk(KERN_INFO "NX (Execute Disable) protection: active\n");
18829 #endif
18830+
18831+ /* Enable PSE if available */
18832+ if (cpu_has_pse)
18833+ set_in_cr4(X86_CR4_PSE);
18834+
18835+ /* Enable PGE if available */
18836+ if (cpu_has_pge) {
18837+ set_in_cr4(X86_CR4_PGE);
18838+ __supported_pte_mask |= _PAGE_GLOBAL;
18839+ }
18840+
18841+ /*
18842+ * Don't use a large page for the first 2/4MB of memory
18843+ * because there are often fixed size MTRRs in there
18844+ * and overlapping MTRRs into large pages can cause
18845+ * slowdowns.
18846+ */
18847+ big_page_start = PMD_SIZE;
18848+
18849+ if (start < big_page_start) {
18850+ start_pfn = start >> PAGE_SHIFT;
18851+ end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
18852+ } else {
18853+ /* head is not big page alignment ? */
18854+ start_pfn = start >> PAGE_SHIFT;
18855+ end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
18856+ << (PMD_SHIFT - PAGE_SHIFT);
18857+ }
18858+ if (start_pfn < end_pfn)
18859+ kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
18860+
18861+ /* big page range */
18862+ start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
18863+ << (PMD_SHIFT - PAGE_SHIFT);
18864+ if (start_pfn < (big_page_start >> PAGE_SHIFT))
18865+ start_pfn = big_page_start >> PAGE_SHIFT;
18866+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
18867+ if (start_pfn < end_pfn)
18868+ kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
18869+ cpu_has_pse);
18870+
18871+ /* tail is not big page alignment ? */
18872+ start_pfn = end_pfn;
18873+ if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
18874+ end_pfn = end >> PAGE_SHIFT;
18875+ if (start_pfn < end_pfn)
18876+ kernel_physical_mapping_init(pgd_base, start_pfn,
18877+ end_pfn, 0);
18878+ }
18879+
18880+ early_ioremap_page_table_range_init(pgd_base);
18881+
18882+ __flush_tlb_all();
18883+
18884+ if (!after_init_bootmem)
18885+ reserve_early(table_start << PAGE_SHIFT,
18886+ table_end << PAGE_SHIFT, "PGTABLE");
18887+
18888+ if (!after_init_bootmem)
18889+ early_memtest(start, end);
18890+
18891+ return end >> PAGE_SHIFT;
18892+}
18893+
18894+
18895+/*
18896+ * paging_init() sets up the page tables - note that the first 8MB are
18897+ * already mapped by head.S.
18898+ *
18899+ * This routines also unmaps the page at virtual kernel address 0, so
18900+ * that we can trap those pesky NULL-reference errors in the kernel.
18901+ */
18902+void __init paging_init(void)
18903+{
18904 pagetable_init();
18905
18906 __flush_tlb_all();
18907
18908 kmap_init();
18909
18910- /* Switch to the real shared_info page, and clear the
18911- * dummy page. */
18912- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
18913- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
18914- memset(empty_zero_page, 0, sizeof(empty_zero_page));
18915-
18916- /* Setup mapping of lower 1st MB */
18917- for (i = 0; i < NR_FIX_ISAMAPS; i++)
18918- if (is_initial_xendomain())
18919- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
18920- else
18921- __set_fixmap(FIX_ISAMAP_BEGIN - i,
18922- virt_to_machine(empty_zero_page),
18923- PAGE_KERNEL_RO);
18924+ /*
18925+ * NOTE: at this point the bootmem allocator is fully available.
18926+ */
18927+ sparse_init();
18928+ zone_sizes_init();
18929 }
18930
18931 /*
18932@@ -598,7 +942,7 @@ static struct kcore_list kcore_mem, kcor
18933 void __init mem_init(void)
18934 {
18935 int codesize, reservedpages, datasize, initsize;
18936- int tmp, bad_ppro;
18937+ int tmp;
18938 unsigned long pfn;
18939
18940 pci_iommu_alloc();
18941@@ -606,19 +950,6 @@ void __init mem_init(void)
18942 #ifdef CONFIG_FLATMEM
18943 BUG_ON(!mem_map);
18944 #endif
18945- bad_ppro = ppro_with_ram_bug();
18946-
18947-#ifdef CONFIG_HIGHMEM
18948- /* check that fixmap and pkmap do not overlap */
18949- if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
18950- printk(KERN_ERR
18951- "fixmap and kmap areas overlap - this will crash\n");
18952- printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
18953- PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
18954- FIXADDR_START);
18955- BUG();
18956- }
18957-#endif
18958 /* this will put all low memory onto the freelists */
18959 totalram_pages += free_all_bootmem();
18960 /* XEN: init and count low-mem pages outside initial allocation. */
18961@@ -636,7 +967,7 @@ void __init mem_init(void)
18962 if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
18963 reservedpages++;
18964
18965- set_highmem_pages_init(bad_ppro);
18966+ set_highmem_pages_init();
18967
18968 codesize = (unsigned long) &_etext - (unsigned long) &_text;
18969 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
18970@@ -657,7 +988,6 @@ void __init mem_init(void)
18971 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
18972 );
18973
18974-#if 1 /* double-sanity-check paranoia */
18975 printk(KERN_INFO "virtual kernel memory layout:\n"
18976 " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
18977 #ifdef CONFIG_HIGHMEM
18978@@ -698,7 +1028,6 @@ void __init mem_init(void)
18979 #endif
18980 BUG_ON(VMALLOC_START > VMALLOC_END);
18981 BUG_ON((unsigned long)high_memory > VMALLOC_START);
18982-#endif /* double-sanity-check paranoia */
18983
18984 if (boot_cpu_data.wp_works_ok < 0)
18985 test_wp_bit();
18986@@ -755,6 +1084,8 @@ void mark_rodata_ro(void)
18987 unsigned long start = PFN_ALIGN(_text);
18988 unsigned long size = PFN_ALIGN(_etext) - start;
18989
18990+#ifndef CONFIG_DYNAMIC_FTRACE
18991+ /* Dynamic tracing modifies the kernel text section */
18992 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
18993 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
18994 size >> 10);
18995@@ -767,6 +1098,8 @@ void mark_rodata_ro(void)
18996 printk(KERN_INFO "Testing CPA: write protecting again\n");
18997 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
18998 #endif
18999+#endif /* CONFIG_DYNAMIC_FTRACE */
19000+
19001 start += size;
19002 size = (unsigned long)__end_rodata - start;
19003 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
19004@@ -829,3 +1162,9 @@ void free_initrd_mem(unsigned long start
19005 free_init_pages("initrd memory", start, end);
19006 }
19007 #endif
19008+
19009+int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
19010+ int flags)
19011+{
19012+ return reserve_bootmem(phys, len, flags);
19013+}
82094b55
AF
19014--- sle11-2009-10-16.orig/arch/x86/mm/init_64-xen.c 2009-03-16 16:38:05.000000000 +0100
19015+++ sle11-2009-10-16/arch/x86/mm/init_64-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
19016@@ -21,6 +21,7 @@
19017 #include <linux/swap.h>
19018 #include <linux/smp.h>
19019 #include <linux/init.h>
19020+#include <linux/initrd.h>
19021 #include <linux/pagemap.h>
19022 #include <linux/bootmem.h>
19023 #include <linux/proc_fs.h>
19024@@ -52,6 +53,14 @@
19025
19026 #include <xen/features.h>
19027
19028+/*
19029+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
19030+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
19031+ * apertures, ACPI and other tables without having to play with fixmaps.
19032+ */
19033+unsigned long max_low_pfn_mapped;
19034+unsigned long max_pfn_mapped;
19035+
19036 #if CONFIG_XEN_COMPAT <= 0x030002
19037 unsigned int __kernel_page_user;
19038 EXPORT_SYMBOL(__kernel_page_user);
19039@@ -60,13 +69,12 @@ EXPORT_SYMBOL(__kernel_page_user);
19040 int after_bootmem;
19041
19042 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
19043-extern unsigned long start_pfn;
19044
19045 extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
19046 extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
19047
19048 #ifndef CONFIG_XEN
19049-int direct_gbpages __meminitdata
19050+int direct_gbpages
19051 #ifdef CONFIG_DIRECT_GBPAGES
19052 = 1
19053 #endif
19054@@ -145,55 +153,23 @@ void __meminit early_make_page_readonly(
19055 * around without checking the pgd every time.
19056 */
19057
19058-void show_mem(void)
19059-{
19060- long i, total = 0, reserved = 0;
19061- long shared = 0, cached = 0;
19062- struct page *page;
19063- pg_data_t *pgdat;
19064-
19065- printk(KERN_INFO "Mem-info:\n");
19066- show_free_areas();
19067- for_each_online_pgdat(pgdat) {
19068- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
19069- /*
19070- * This loop can take a while with 256 GB and
19071- * 4k pages so defer the NMI watchdog:
19072- */
19073- if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
19074- touch_nmi_watchdog();
19075-
19076- if (!pfn_valid(pgdat->node_start_pfn + i))
19077- continue;
19078-
19079- page = pfn_to_page(pgdat->node_start_pfn + i);
19080- total++;
19081- if (PageReserved(page))
19082- reserved++;
19083- else if (PageSwapCache(page))
19084- cached++;
19085- else if (page_count(page))
19086- shared += page_count(page) - 1;
19087- }
19088- }
19089- printk(KERN_INFO "%lu pages of RAM\n", total);
19090- printk(KERN_INFO "%lu reserved pages\n", reserved);
19091- printk(KERN_INFO "%lu pages shared\n", shared);
19092- printk(KERN_INFO "%lu pages swap cached\n", cached);
19093-}
19094-
19095 static unsigned long __meminitdata table_start;
19096-static unsigned long __meminitdata table_end;
19097+static unsigned long __meminitdata table_cur;
19098+static unsigned long __meminitdata table_top;
19099
19100-static __init void *spp_getpage(void)
19101+/*
19102+ * NOTE: This function is marked __ref because it calls __init function
19103+ * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
19104+ */
19105+static __ref void *spp_getpage(void)
19106 {
19107 void *ptr;
19108
19109 if (after_bootmem)
19110 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
19111- else if (start_pfn < table_end) {
19112- ptr = __va(start_pfn << PAGE_SHIFT);
19113- start_pfn++;
19114+ else if (table_cur < table_top) {
19115+ ptr = __va(table_cur << PAGE_SHIFT);
19116+ table_cur++;
19117 memset(ptr, 0, PAGE_SIZE);
19118 } else
19119 ptr = alloc_bootmem_pages(PAGE_SIZE);
19120@@ -208,30 +184,18 @@ static __init void *spp_getpage(void)
19121 return ptr;
19122 }
19123
19124-#define pgd_offset_u(address) (__user_pgd(init_level4_pgt) + pgd_index(address))
19125-#define pud_offset_u(address) (level3_user_pgt + pud_index(address))
19126-
19127-static __init void
19128-set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot, int user_mode)
19129+void
19130+set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
19131 {
19132- pgd_t *pgd;
19133 pud_t *pud;
19134 pmd_t *pmd;
19135- pte_t *pte, new_pte;
19136-
19137- pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys);
19138+ pte_t *pte;
19139
19140- pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
19141- if (pgd_none(*pgd)) {
19142- printk(KERN_ERR
19143- "PGD FIXMAP MISSING, it should be setup in head.S!\n");
19144- return;
19145- }
19146- pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
19147+ pud = pud_page + pud_index(vaddr);
19148 if (pud_none(*pud)) {
19149 pmd = (pmd_t *) spp_getpage();
19150 make_page_readonly(pmd, XENFEAT_writable_page_tables);
19151- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
19152+ pud_populate(&init_mm, pud, pmd);
19153 if (pmd != pmd_offset(pud, 0)) {
19154 printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
19155 pmd, pmd_offset(pud, 0));
19156@@ -242,19 +206,20 @@ set_pte_phys(unsigned long vaddr, unsign
19157 if (pmd_none(*pmd)) {
19158 pte = (pte_t *) spp_getpage();
19159 make_page_readonly(pte, XENFEAT_writable_page_tables);
19160- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
19161+ pmd_populate_kernel(&init_mm, pmd, pte);
19162 if (pte != pte_offset_kernel(pmd, 0)) {
19163 printk(KERN_ERR "PAGETABLE BUG #02!\n");
19164 return;
19165 }
19166 }
19167- if (pgprot_val(prot))
19168- new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
19169- else
19170- new_pte = __pte(0);
19171
19172 pte = pte_offset_kernel(pmd, vaddr);
19173 if (!pte_none(*pte) && __pte_val(new_pte) &&
19174+#ifdef CONFIG_ACPI
19175+ /* __acpi_map_table() fails to properly call clear_fixmap() */
19176+ (vaddr < __fix_to_virt(FIX_ACPI_END) ||
19177+ vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) &&
19178+#endif
19179 __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
19180 pte_ERROR(*pte);
19181 set_pte(pte, new_pte);
19182@@ -266,15 +231,13 @@ set_pte_phys(unsigned long vaddr, unsign
19183 __flush_tlb_one(vaddr);
19184 }
19185
19186-static __init void
19187-set_pte_phys_ma(unsigned long vaddr, unsigned long phys, pgprot_t prot)
19188+void
19189+set_pte_vaddr(unsigned long vaddr, pte_t pteval)
19190 {
19191 pgd_t *pgd;
19192- pud_t *pud;
19193- pmd_t *pmd;
19194- pte_t *pte, new_pte;
19195+ pud_t *pud_page;
19196
19197- pr_debug("set_pte_phys_ma %lx to %lx\n", vaddr, phys);
19198+ pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, __pte_val(pteval));
19199
19200 pgd = pgd_offset_k(vaddr);
19201 if (pgd_none(*pgd)) {
19202@@ -282,47 +245,51 @@ set_pte_phys_ma(unsigned long vaddr, uns
19203 "PGD FIXMAP MISSING, it should be setup in head.S!\n");
19204 return;
19205 }
19206- pud = pud_offset(pgd, vaddr);
19207- if (pud_none(*pud)) {
19208- pmd = (pmd_t *) spp_getpage();
19209- make_page_readonly(pmd, XENFEAT_writable_page_tables);
19210- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
19211- if (pmd != pmd_offset(pud, 0)) {
19212- printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
19213- pmd, pmd_offset(pud, 0));
19214+ pud_page = (pud_t*)pgd_page_vaddr(*pgd);
19215+ set_pte_vaddr_pud(pud_page, vaddr, pteval);
19216+}
19217+
19218+#ifndef CONFIG_XEN
19219+/*
19220+ * Create large page table mappings for a range of physical addresses.
19221+ */
19222+static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
19223+ pgprot_t prot)
19224+{
19225+ pgd_t *pgd;
19226+ pud_t *pud;
19227+ pmd_t *pmd;
19228+
19229+ BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
19230+ for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
19231+ pgd = pgd_offset_k((unsigned long)__va(phys));
19232+ if (pgd_none(*pgd)) {
19233+ pud = (pud_t *) spp_getpage();
19234+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
19235+ _PAGE_USER));
19236 }
19237- }
19238- pmd = pmd_offset(pud, vaddr);
19239- if (pmd_none(*pmd)) {
19240- pte = (pte_t *) spp_getpage();
19241- make_page_readonly(pte, XENFEAT_writable_page_tables);
19242- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
19243- if (pte != pte_offset_kernel(pmd, 0)) {
19244- printk(KERN_ERR "PAGETABLE BUG #02!\n");
19245- return;
19246+ pud = pud_offset(pgd, (unsigned long)__va(phys));
19247+ if (pud_none(*pud)) {
19248+ pmd = (pmd_t *) spp_getpage();
19249+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
19250+ _PAGE_USER));
19251 }
19252+ pmd = pmd_offset(pud, phys);
19253+ BUG_ON(!pmd_none(*pmd));
19254+ set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
19255 }
19256- new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
19257+}
19258
19259- pte = pte_offset_kernel(pmd, vaddr);
19260- if (!pte_none(*pte) && __pte_val(new_pte) &&
19261-#ifdef CONFIG_ACPI
19262- /* __acpi_map_table() fails to properly call clear_fixmap() */
19263- (vaddr < __fix_to_virt(FIX_ACPI_END) ||
19264- vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) &&
19265-#endif
19266- __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
19267- pte_ERROR(*pte);
19268- set_pte(pte, new_pte);
19269+void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
19270+{
19271+ __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
19272+}
19273
19274- /*
19275- * It's enough to flush this one mapping.
19276- * (PGE mappings get flushed as well)
19277- */
19278- __flush_tlb_one(vaddr);
19279+void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
19280+{
19281+ __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
19282 }
19283
19284-#ifndef CONFIG_XEN
19285 /*
19286 * The head.S code sets up the kernel high mapping:
19287 *
19288@@ -352,63 +319,52 @@ void __init cleanup_highmap(void)
19289 }
19290 #endif
19291
19292-/* NOTE: this is meant to be run only at boot */
19293-void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
19294-{
19295- unsigned long address = __fix_to_virt(idx);
19296-
19297- if (idx >= __end_of_fixed_addresses) {
19298- printk(KERN_ERR "Invalid __set_fixmap\n");
19299- return;
19300- }
19301- switch (idx) {
19302- case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
19303- set_pte_phys(address, phys, prot, 0);
19304- set_pte_phys(address, phys, prot, 1);
19305- break;
19306- case FIX_EARLYCON_MEM_BASE:
19307- xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
19308- pfn_pte_ma(phys >> PAGE_SHIFT, prot));
19309- break;
19310- default:
19311- set_pte_phys_ma(address, phys, prot);
19312- break;
19313- }
19314-}
19315-
19316-static __meminit void *alloc_static_page(unsigned long *phys)
19317+static __ref void *alloc_low_page(unsigned long *phys)
19318 {
19319- unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map;
19320+ unsigned long pfn;
19321+ void *adr;
19322
19323 if (after_bootmem) {
19324- void *adr = (void *)get_zeroed_page(GFP_ATOMIC);
19325+ adr = (void *)get_zeroed_page(GFP_ATOMIC);
19326 *phys = __pa(adr);
19327
19328 return adr;
19329 }
19330
19331- *phys = start_pfn << PAGE_SHIFT;
19332- start_pfn++;
19333- memset((void *)va, 0, PAGE_SIZE);
19334- return (void *)va;
19335+ BUG_ON(!table_cur);
19336+ pfn = table_cur++;
19337+ if (pfn >= table_top)
19338+ panic("alloc_low_page: ran out of memory");
19339+
19340+ adr = early_ioremap(pfn_to_mfn(pfn) * PAGE_SIZE, PAGE_SIZE);
19341+ memset(adr, 0, PAGE_SIZE);
19342+ *phys = pfn * PAGE_SIZE;
19343+ return adr;
19344 }
19345
19346-#define PTE_SIZE PAGE_SIZE
19347+static __ref void unmap_low_page(void *adr)
19348+{
19349+ if (after_bootmem)
19350+ return;
19351+
19352+ early_iounmap(adr, PAGE_SIZE);
19353+}
19354
19355 static inline int __meminit make_readonly(unsigned long paddr)
19356 {
19357 extern char __vsyscall_0;
19358 int readonly = 0;
19359
19360- /* Make new page tables read-only. */
19361+ /* Make new page tables read-only on the first pass. */
19362 if (!xen_feature(XENFEAT_writable_page_tables)
19363+ && !max_pfn_mapped
19364 && (paddr >= (table_start << PAGE_SHIFT))
19365- && (paddr < (table_end << PAGE_SHIFT)))
19366+ && (paddr < (table_top << PAGE_SHIFT)))
19367 readonly = 1;
19368 /* Make old page tables read-only. */
19369 if (!xen_feature(XENFEAT_writable_page_tables)
19370 && (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
19371- && (paddr < (start_pfn << PAGE_SHIFT)))
19372+ && (paddr < (table_cur << PAGE_SHIFT)))
19373 readonly = 1;
19374
19375 /*
19376@@ -425,118 +381,131 @@ static inline int __meminit make_readonl
19377 return readonly;
19378 }
19379
19380-#ifndef CONFIG_XEN
19381-/* Must run before zap_low_mappings */
19382-__meminit void *early_ioremap(unsigned long addr, unsigned long size)
19383+static unsigned long __meminit
19384+phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
19385 {
19386- pmd_t *pmd, *last_pmd;
19387- unsigned long vaddr;
19388- int i, pmds;
19389-
19390- pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
19391- vaddr = __START_KERNEL_map;
19392- pmd = level2_kernel_pgt;
19393- last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
19394-
19395- for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
19396- for (i = 0; i < pmds; i++) {
19397- if (pmd_present(pmd[i]))
19398- goto continue_outer_loop;
19399- }
19400- vaddr += addr & ~PMD_MASK;
19401- addr &= PMD_MASK;
19402+ unsigned pages = 0;
19403+ unsigned long last_map_addr = end;
19404+ int i;
19405+
19406+ pte_t *pte = pte_page + pte_index(addr);
19407+
19408+ for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
19409+ unsigned long pteval = addr | __PAGE_KERNEL;
19410
19411- for (i = 0; i < pmds; i++, addr += PMD_SIZE)
19412- set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
19413- __flush_tlb_all();
19414-
19415- return (void *)vaddr;
19416-continue_outer_loop:
19417- ;
19418+ if (addr >= end ||
19419+ (!after_bootmem &&
19420+ (addr >> PAGE_SHIFT) >= xen_start_info->nr_pages))
19421+ break;
19422+
19423+ if (__pte_val(*pte))
19424+ continue;
19425+
19426+ if (make_readonly(addr))
19427+ pteval &= ~_PAGE_RW;
19428+ if (0)
19429+ printk(" pte=%p addr=%lx pte=%016lx\n",
19430+ pte, addr, pteval);
19431+ if (!after_bootmem)
19432+ *pte = __pte(pteval & __supported_pte_mask);
19433+ else
19434+ set_pte(pte, __pte(pteval & __supported_pte_mask));
19435+ last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
19436+ pages++;
19437 }
19438- printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
19439- return NULL;
19440+ update_page_count(PG_LEVEL_4K, pages);
19441+
19442+ return last_map_addr;
19443 }
19444
19445-/*
19446- * To avoid virtual aliases later:
19447- */
19448-__meminit void early_iounmap(void *addr, unsigned long size)
19449+static unsigned long __meminit
19450+phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
19451 {
19452- unsigned long vaddr;
19453- pmd_t *pmd;
19454- int i, pmds;
19455-
19456- vaddr = (unsigned long)addr;
19457- pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
19458- pmd = level2_kernel_pgt + pmd_index(vaddr);
19459-
19460- for (i = 0; i < pmds; i++)
19461- pmd_clear(pmd + i);
19462+ pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
19463
19464- __flush_tlb_all();
19465+ BUG_ON(!max_pfn_mapped);
19466+ return phys_pte_init(pte, address, end);
19467 }
19468-#endif
19469
19470 static unsigned long __meminit
19471-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
19472+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
19473+ unsigned long page_size_mask)
19474 {
19475+ unsigned long pages = 0;
19476+ unsigned long last_map_addr = end;
19477+ unsigned long start = address;
19478+
19479 int i = pmd_index(address);
19480
19481- for (; i < PTRS_PER_PMD; i++) {
19482+ for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
19483 unsigned long pte_phys;
19484- pmd_t *pmd = pmd_page + i;
19485- pte_t *pte, *pte_save;
19486- int k;
19487+ pmd_t *pmd = pmd_page + pmd_index(address);
19488+ pte_t *pte;
19489
19490 if (address >= end)
19491 break;
19492
19493 if (__pmd_val(*pmd)) {
19494- address += PMD_SIZE;
19495+ if (!pmd_large(*pmd)) {
19496+ spin_lock(&init_mm.page_table_lock);
19497+ last_map_addr = phys_pte_update(pmd, address,
19498+ end);
19499+ spin_unlock(&init_mm.page_table_lock);
19500+ }
19501+ /* Count entries we're using from level2_ident_pgt */
19502+ if (start == 0)
19503+ pages++;
19504 continue;
19505 }
19506
19507- pte = alloc_static_page(&pte_phys);
19508- pte_save = pte;
19509- for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
19510- unsigned long pteval = address | _PAGE_NX | _KERNPG_TABLE;
19511-
19512- if (address >= (after_bootmem
19513- ? end
19514- : xen_start_info->nr_pages << PAGE_SHIFT))
19515- pteval = 0;
19516- else if (make_readonly(address))
19517- pteval &= ~_PAGE_RW;
19518- set_pte(pte, __pte(pteval & __supported_pte_mask));
19519+ if (page_size_mask & (1<<PG_LEVEL_2M)) {
19520+ pages++;
19521+ spin_lock(&init_mm.page_table_lock);
19522+ set_pte((pte_t *)pmd,
19523+ pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
19524+ spin_unlock(&init_mm.page_table_lock);
19525+ last_map_addr = (address & PMD_MASK) + PMD_SIZE;
19526+ continue;
19527 }
19528+
19529+ pte = alloc_low_page(&pte_phys);
19530+ last_map_addr = phys_pte_init(pte, address, end);
19531+ unmap_low_page(pte);
19532+
19533 if (!after_bootmem) {
19534- early_make_page_readonly(pte_save, XENFEAT_writable_page_tables);
19535- *pmd = __pmd(pte_phys | _KERNPG_TABLE);
19536+ if (max_pfn_mapped)
19537+ make_page_readonly(__va(pte_phys),
19538+ XENFEAT_writable_page_tables);
19539+ *pmd = __pmd(pte_phys | _PAGE_TABLE);
19540 } else {
19541- make_page_readonly(pte_save, XENFEAT_writable_page_tables);
19542- set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
19543+ make_page_readonly(pte, XENFEAT_writable_page_tables);
19544+ spin_lock(&init_mm.page_table_lock);
19545+ pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
19546+ spin_unlock(&init_mm.page_table_lock);
19547 }
19548 }
19549- return address;
19550+ update_page_count(PG_LEVEL_2M, pages);
19551+ return last_map_addr;
19552 }
19553
19554 static unsigned long __meminit
19555-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
19556+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
19557+ unsigned long page_size_mask)
19558 {
19559 pmd_t *pmd = pmd_offset(pud, 0);
19560 unsigned long last_map_addr;
19561
19562- spin_lock(&init_mm.page_table_lock);
19563- last_map_addr = phys_pmd_init(pmd, address, end);
19564- spin_unlock(&init_mm.page_table_lock);
19565+ BUG_ON(!max_pfn_mapped);
19566+ last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
19567 __flush_tlb_all();
19568 return last_map_addr;
19569 }
19570
19571 static unsigned long __meminit
19572-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
19573+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
19574+ unsigned long page_size_mask)
19575 {
19576+ unsigned long pages = 0;
19577 unsigned long last_map_addr = end;
19578 int i = pud_index(addr);
19579
19580@@ -550,29 +519,55 @@ phys_pud_init(pud_t *pud_page, unsigned
19581
19582 if (__pud_val(*pud)) {
19583 if (!pud_large(*pud))
19584- last_map_addr = phys_pmd_update(pud, addr, end);
19585+ last_map_addr = phys_pmd_update(pud, addr, end,
19586+ page_size_mask);
19587 continue;
19588 }
19589
19590- if (direct_gbpages) {
19591+ if (page_size_mask & (1<<PG_LEVEL_1G)) {
19592+ pages++;
19593+ spin_lock(&init_mm.page_table_lock);
19594 set_pte((pte_t *)pud,
19595 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
19596+ spin_unlock(&init_mm.page_table_lock);
19597 last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
19598 continue;
19599 }
19600
19601- pmd = alloc_static_page(&pmd_phys);
19602-
19603- spin_lock(&init_mm.page_table_lock);
19604- *pud = __pud(pmd_phys | _KERNPG_TABLE);
19605- last_map_addr = phys_pmd_init(pmd, addr, end);
19606- spin_unlock(&init_mm.page_table_lock);
19607+ pmd = alloc_low_page(&pmd_phys);
19608+ last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
19609+ unmap_low_page(pmd);
19610
19611- early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
19612+ if (!after_bootmem) {
19613+ if (max_pfn_mapped)
19614+ make_page_readonly(__va(pmd_phys),
19615+ XENFEAT_writable_page_tables);
19616+ if (page_size_mask & (1 << PG_LEVEL_NUM))
19617+ xen_l3_entry_update(pud, __pud(pmd_phys | _PAGE_TABLE));
19618+ else
19619+ *pud = __pud(pmd_phys | _PAGE_TABLE);
19620+ } else {
19621+ make_page_readonly(pmd, XENFEAT_writable_page_tables);
19622+ spin_lock(&init_mm.page_table_lock);
19623+ pud_populate(&init_mm, pud, __va(pmd_phys));
19624+ spin_unlock(&init_mm.page_table_lock);
19625+ }
19626 }
19627 __flush_tlb_all();
19628+ update_page_count(PG_LEVEL_1G, pages);
19629
19630- return last_map_addr >> PAGE_SHIFT;
19631+ return last_map_addr;
19632+}
19633+
19634+static unsigned long __meminit
19635+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
19636+ unsigned long page_size_mask)
19637+{
19638+ pud_t *pud;
19639+
19640+ pud = (pud_t *)pgd_page_vaddr(*pgd);
19641+
19642+ return phys_pud_init(pud, addr, end, page_size_mask | (1 << PG_LEVEL_NUM));
19643 }
19644
19645 void __init xen_init_pt(void)
19646@@ -651,86 +646,36 @@ void __init xen_init_pt(void)
19647 }
19648 }
19649
19650-static void __init extend_init_mapping(unsigned long tables_space)
19651-{
19652- unsigned long va = __START_KERNEL_map;
19653- unsigned long start = start_pfn;
19654- unsigned long phys, addr, *pte_page;
19655- pmd_t *pmd;
19656- pte_t *pte, new_pte;
19657- unsigned long *page = (unsigned long *)init_level4_pgt;
19658-
19659- addr = page[pgd_index(va)];
19660- addr_to_page(addr, page);
19661- addr = page[pud_index(va)];
19662- addr_to_page(addr, page);
19663-
19664- /* Kill mapping of low 1MB. */
19665- while (va < (unsigned long)&_text) {
19666- if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
19667- BUG();
19668- va += PAGE_SIZE;
19669- }
19670-
19671- /* Ensure init mappings cover kernel text/data and initial tables. */
19672- while (va < (__START_KERNEL_map
19673- + (start_pfn << PAGE_SHIFT)
19674- + tables_space)) {
19675- pmd = (pmd_t *)&page[pmd_index(va)];
19676- if (pmd_none(*pmd)) {
19677- pte_page = alloc_static_page(&phys);
19678- early_make_page_readonly(
19679- pte_page, XENFEAT_writable_page_tables);
19680- set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
19681- } else {
19682- addr = page[pmd_index(va)];
19683- addr_to_page(addr, pte_page);
19684- }
19685- pte = (pte_t *)&pte_page[pte_index(va)];
19686- if (pte_none(*pte)) {
19687- new_pte = pfn_pte(
19688- (va - __START_KERNEL_map) >> PAGE_SHIFT,
19689- __pgprot(_KERNPG_TABLE));
19690- xen_l1_entry_update(pte, new_pte);
19691- }
19692- va += PAGE_SIZE;
19693- }
19694-
19695- /* Finally, blow away any spurious initial mappings. */
19696- while (1) {
19697- pmd = (pmd_t *)&page[pmd_index(va)];
19698- if (pmd_none(*pmd))
19699- break;
19700- if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
19701- BUG();
19702- va += PAGE_SIZE;
19703- }
19704-
19705- if (start_pfn > start)
19706- reserve_early(start << PAGE_SHIFT,
19707- start_pfn << PAGE_SHIFT, "INITMAP");
19708-}
19709-
19710 static void __init find_early_table_space(unsigned long end)
19711 {
19712 unsigned long puds, pmds, ptes, tables;
19713
19714 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
19715+ tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
19716 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
19717- ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
19718+ tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
19719
19720- tables = round_up(puds * 8, PAGE_SIZE) +
19721- round_up(pmds * 8, PAGE_SIZE) +
19722- round_up(ptes * 8, PAGE_SIZE);
19723+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
19724+ tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
19725
19726- extend_init_mapping(tables);
19727+ if (!table_top) {
19728+ table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
19729+ xen_start_info->nr_pt_frames;
19730+ table_cur = table_start;
19731+ } else {
19732+ /*
19733+ * [table_start, table_top) gets passed to reserve_early(),
19734+ * so we must not use table_cur here, despite continuing
19735+ * to allocate from there. table_cur possibly being below
19736+ * table_start is otoh not a problem.
19737+ */
19738+ table_start = table_top;
19739+ }
19740
19741- table_start = start_pfn;
19742- table_end = table_start + (tables>>PAGE_SHIFT);
19743+ table_top = table_cur + (tables >> PAGE_SHIFT);
19744
19745- early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
19746- end, table_start << PAGE_SHIFT,
19747- (table_start << PAGE_SHIFT) + tables);
19748+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
19749+ end, table_cur << PAGE_SHIFT, table_top << PAGE_SHIFT);
19750 }
19751
19752 static void __init xen_finish_init_mapping(void)
19753@@ -752,18 +697,18 @@ static void __init xen_finish_init_mappi
19754 xen_start_info->mod_start = (unsigned long)
19755 __va(__pa(xen_start_info->mod_start));
19756
19757- /* Destroy the Xen-created mappings beyond the kernel image as
19758- * well as the temporary mappings created above. Prevents
19759- * overlap with modules area (if init mapping is very big).
19760- */
19761+ /* Destroy the Xen-created mappings beyond the kernel image. */
19762 start = PAGE_ALIGN((unsigned long)_end);
19763- end = __START_KERNEL_map + (table_end << PAGE_SHIFT);
19764+ end = __START_KERNEL_map + (table_start << PAGE_SHIFT);
19765 for (; start < end; start += PAGE_SIZE)
19766 if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0))
19767 BUG();
19768
19769- /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */
19770- table_end = ~0UL;
19771+ /* Allocate pte's for initial fixmaps from 'table_cur' allocator. */
19772+ start = table_top;
19773+ WARN(table_cur != start, "start=%lx cur=%lx top=%lx\n",
19774+ table_start, table_cur, start);
19775+ table_top = ~0UL;
19776
19777 /* Switch to the real shared_info page, and clear the dummy page. */
19778 set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
19779@@ -780,8 +725,7 @@ static void __init xen_finish_init_mappi
19780 << PAGE_SHIFT,
19781 PAGE_KERNEL_RO);
19782
19783- /* Disable the 'start_pfn' allocator. */
19784- table_end = start_pfn;
19785+ table_top = max(table_cur, start);
19786 }
19787
19788 static void __init init_gbpages(void)
19789@@ -794,126 +738,91 @@ static void __init init_gbpages(void)
19790 #endif
19791 }
19792
19793-#ifdef CONFIG_MEMTEST_BOOTPARAM
19794-
19795-static void __init memtest(unsigned long start_phys, unsigned long size,
19796- unsigned pattern)
19797+static unsigned long __init kernel_physical_mapping_init(unsigned long start,
19798+ unsigned long end,
19799+ unsigned long page_size_mask)
19800 {
19801- unsigned long i;
19802- unsigned long *start;
19803- unsigned long start_bad;
19804- unsigned long last_bad;
19805- unsigned long val;
19806- unsigned long start_phys_aligned;
19807- unsigned long count;
19808- unsigned long incr;
19809-
19810- switch (pattern) {
19811- case 0:
19812- val = 0UL;
19813- break;
19814- case 1:
19815- val = -1UL;
19816- break;
19817- case 2:
19818- val = 0x5555555555555555UL;
19819- break;
19820- case 3:
19821- val = 0xaaaaaaaaaaaaaaaaUL;
19822- break;
19823- default:
19824- return;
19825- }
19826-
19827- incr = sizeof(unsigned long);
19828- start_phys_aligned = ALIGN(start_phys, incr);
19829- count = (size - (start_phys_aligned - start_phys))/incr;
19830- start = __va(start_phys_aligned);
19831- start_bad = 0;
19832- last_bad = 0;
19833-
19834- for (i = 0; i < count; i++)
19835- start[i] = val;
19836- for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
19837- if (*start != val) {
19838- if (start_phys_aligned == last_bad + incr) {
19839- last_bad += incr;
19840- } else {
19841- if (start_bad) {
19842- printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
19843- val, start_bad, last_bad + incr);
19844- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
19845- }
19846- start_bad = last_bad = start_phys_aligned;
19847- }
19848- }
19849- }
19850- if (start_bad) {
19851- printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
19852- val, start_bad, last_bad + incr);
19853- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
19854- }
19855
19856-}
19857+ unsigned long next, last_map_addr = end;
19858
19859-static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
19860+ start = (unsigned long)__va(start);
19861+ end = (unsigned long)__va(end);
19862
19863-static int __init parse_memtest(char *arg)
19864-{
19865- if (arg)
19866- memtest_pattern = simple_strtoul(arg, NULL, 0);
19867- return 0;
19868-}
19869+ for (; start < end; start = next) {
19870+ pgd_t *pgd = pgd_offset_k(start);
19871+ unsigned long pud_phys;
19872+ pud_t *pud;
19873
19874-early_param("memtest", parse_memtest);
19875+ next = (start + PGDIR_SIZE) & PGDIR_MASK;
19876+ if (next > end)
19877+ next = end;
19878
19879-static void __init early_memtest(unsigned long start, unsigned long end)
19880-{
19881- u64 t_start, t_size;
19882- unsigned pattern;
19883+ if (__pgd_val(*pgd)) {
19884+ last_map_addr = phys_pud_update(pgd, __pa(start),
19885+ __pa(end), page_size_mask);
19886+ continue;
19887+ }
19888
19889- if (!memtest_pattern)
19890- return;
19891+ pud = alloc_low_page(&pud_phys);
19892+ last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
19893+ page_size_mask);
19894+ unmap_low_page(pud);
19895+
19896+ if(!after_bootmem) {
19897+ if (max_pfn_mapped)
19898+ make_page_readonly(__va(pud_phys),
19899+ XENFEAT_writable_page_tables);
19900+ xen_l4_entry_update(pgd, __pgd(pud_phys | _PAGE_TABLE));
19901+ } else {
19902+ make_page_readonly(pud, XENFEAT_writable_page_tables);
19903+ spin_lock(&init_mm.page_table_lock);
19904+ pgd_populate(&init_mm, pgd, __va(pud_phys));
19905+ spin_unlock(&init_mm.page_table_lock);
19906+ }
19907+ }
19908
19909- printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
19910- for (pattern = 0; pattern < memtest_pattern; pattern++) {
19911- t_start = start;
19912- t_size = 0;
19913- while (t_start < end) {
19914- t_start = find_e820_area_size(t_start, &t_size, 1);
19915+ return last_map_addr;
19916+}
19917
19918- /* done ? */
19919- if (t_start >= end)
19920- break;
19921- if (t_start + t_size > end)
19922- t_size = end - t_start;
19923+struct map_range {
19924+ unsigned long start;
19925+ unsigned long end;
19926+ unsigned page_size_mask;
19927+};
19928
19929- printk(KERN_CONT "\n %016llx - %016llx pattern %d",
19930- (unsigned long long)t_start,
19931- (unsigned long long)t_start + t_size, pattern);
19932+#define NR_RANGE_MR 5
19933
19934- memtest(t_start, t_size, pattern);
19935+static int save_mr(struct map_range *mr, int nr_range,
19936+ unsigned long start_pfn, unsigned long end_pfn,
19937+ unsigned long page_size_mask)
19938+{
19939
19940- t_start += t_size;
19941- }
19942+ if (start_pfn < end_pfn) {
19943+ if (nr_range >= NR_RANGE_MR)
19944+ panic("run out of range for init_memory_mapping\n");
19945+ mr[nr_range].start = start_pfn<<PAGE_SHIFT;
19946+ mr[nr_range].end = end_pfn<<PAGE_SHIFT;
19947+ mr[nr_range].page_size_mask = page_size_mask;
19948+ nr_range++;
19949 }
19950- printk(KERN_CONT "\n");
19951-}
19952-#else
19953-static void __init early_memtest(unsigned long start, unsigned long end)
19954-{
19955+
19956+ return nr_range;
19957 }
19958-#endif
19959
19960 /*
19961 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
19962 * This runs before bootmem is initialized and gets pages directly from
19963 * the physical memory. To access them they are temporarily mapped.
19964 */
19965-unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
19966+unsigned long __init_refok init_memory_mapping(unsigned long start,
19967+ unsigned long end)
19968 {
19969- unsigned long next, last_map_addr = end;
19970- unsigned long start_phys = start, end_phys = end;
19971+ unsigned long last_map_addr = 0;
19972+ unsigned long page_size_mask = 0;
19973+ unsigned long start_pfn, end_pfn;
19974+
19975+ struct map_range mr[NR_RANGE_MR];
19976+ int nr_range, i;
19977
19978 printk(KERN_INFO "init_memory_mapping\n");
19979
19980@@ -924,51 +833,150 @@ unsigned long __init_refok init_memory_m
19981 * memory mapped. Unfortunately this is done currently before the
19982 * nodes are discovered.
19983 */
19984- if (!after_bootmem) {
19985+ if (!after_bootmem)
19986 init_gbpages();
19987- find_early_table_space(end);
19988+
19989+ if (direct_gbpages)
19990+ page_size_mask |= 1 << PG_LEVEL_1G;
19991+ if (cpu_has_pse)
19992+ page_size_mask |= 1 << PG_LEVEL_2M;
19993+
19994+ memset(mr, 0, sizeof(mr));
19995+ nr_range = 0;
19996+
19997+ /* head if not big page alignment ?*/
19998+ start_pfn = start >> PAGE_SHIFT;
19999+ end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
20000+ << (PMD_SHIFT - PAGE_SHIFT);
20001+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
20002+
20003+ /* big page (2M) range*/
20004+ start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
20005+ << (PMD_SHIFT - PAGE_SHIFT);
20006+ end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
20007+ << (PUD_SHIFT - PAGE_SHIFT);
20008+ if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
20009+ end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
20010+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
20011+ page_size_mask & (1<<PG_LEVEL_2M));
20012+
20013+ /* big page (1G) range */
20014+ start_pfn = end_pfn;
20015+ end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
20016+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
20017+ page_size_mask &
20018+ ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
20019+
20020+ /* tail is not big page (1G) alignment */
20021+ start_pfn = end_pfn;
20022+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
20023+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
20024+ page_size_mask & (1<<PG_LEVEL_2M));
20025+
20026+ /* tail is not big page (2M) alignment */
20027+ start_pfn = end_pfn;
20028+ end_pfn = end>>PAGE_SHIFT;
20029+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
20030+
20031+ /* try to merge same page size and continuous */
20032+ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
20033+ unsigned long old_start;
20034+ if (mr[i].end != mr[i+1].start ||
20035+ mr[i].page_size_mask != mr[i+1].page_size_mask)
20036+ continue;
20037+ /* move it */
20038+ old_start = mr[i].start;
20039+ memmove(&mr[i], &mr[i+1],
20040+ (nr_range - 1 - i) * sizeof (struct map_range));
20041+ mr[i--].start = old_start;
20042+ nr_range--;
20043 }
20044
20045- start = (unsigned long)__va(start);
20046- end = (unsigned long)__va(end);
20047+ for (i = 0; i < nr_range; i++)
20048+ printk(KERN_DEBUG " %010lx - %010lx page %s\n",
20049+ mr[i].start, mr[i].end,
20050+ (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
20051+ (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
20052
20053- for (; start < end; start = next) {
20054- pgd_t *pgd = pgd_offset_k(start);
20055- unsigned long pud_phys;
20056- pud_t *pud;
20057+ if (!after_bootmem)
20058+ find_early_table_space(end);
20059
20060- if (after_bootmem)
20061- pud = pud_offset(pgd, start & PGDIR_MASK);
20062- else
20063- pud = alloc_static_page(&pud_phys);
20064- next = start + PGDIR_SIZE;
20065- if (next > end)
20066- next = end;
20067- last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
20068- if (!after_bootmem) {
20069- early_make_page_readonly(pud, XENFEAT_writable_page_tables);
20070- set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
20071+ if (!start) {
20072+ unsigned long addr, va = __START_KERNEL_map;
20073+ unsigned long *page = (unsigned long *)init_level4_pgt;
20074+
20075+ /* Kill mapping of memory below _text. */
20076+ while (va < (unsigned long)&_text) {
20077+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
20078+ BUG();
20079+ va += PAGE_SIZE;
20080+ }
20081+
20082+ /* Blow away any spurious initial mappings. */
20083+ va = __START_KERNEL_map + (table_start << PAGE_SHIFT);
20084+ addr = page[pgd_index(va)];
20085+ addr_to_page(addr, page);
20086+ addr = page[pud_index(va)];
20087+ addr_to_page(addr, page);
20088+ while (pmd_index(va) | pte_index(va)) {
20089+ if (pmd_none(*(pmd_t *)&page[pmd_index(va)]))
20090+ break;
20091+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
20092+ BUG();
20093+ va += PAGE_SIZE;
20094 }
20095 }
20096
20097- if (!after_bootmem) {
20098- BUG_ON(start_pfn != table_end);
20099+ for (i = 0; i < nr_range; i++)
20100+ last_map_addr = kernel_physical_mapping_init(
20101+ mr[i].start, mr[i].end,
20102+ mr[i].page_size_mask);
20103+
20104+ BUG_ON(table_cur > table_top);
20105+ if (!start)
20106 xen_finish_init_mapping();
20107- }
20108+ else if (table_cur < table_top)
20109+ /* Disable the 'table_cur' allocator. */
20110+ table_top = table_cur;
20111
20112 __flush_tlb_all();
20113
20114- if (!after_bootmem)
20115+ if (!after_bootmem && table_top > table_start)
20116 reserve_early(table_start << PAGE_SHIFT,
20117- table_end << PAGE_SHIFT, "PGTABLE");
20118+ table_top << PAGE_SHIFT, "PGTABLE");
20119+
20120+ printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
20121+ last_map_addr, end);
20122
20123 if (!after_bootmem)
20124- early_memtest(start_phys, end_phys);
20125+ early_memtest(start, end);
20126
20127- return last_map_addr;
20128+ return last_map_addr >> PAGE_SHIFT;
20129 }
20130
20131 #ifndef CONFIG_NUMA
20132+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
20133+{
20134+ unsigned long bootmap_size, bootmap;
20135+
20136+ e820_register_active_regions(0, start_pfn, end_pfn);
20137+#ifdef CONFIG_XEN
20138+ if (end_pfn > xen_start_info->nr_pages)
20139+ end_pfn = xen_start_info->nr_pages;
20140+#endif
20141+ bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
20142+ bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
20143+ PAGE_SIZE);
20144+ if (bootmap == -1L)
20145+ panic("Cannot find bootmem map of size %ld\n", bootmap_size);
20146+ /* don't touch min_low_pfn */
20147+ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
20148+ 0, end_pfn);
20149+ free_bootmem_with_active_regions(0, end_pfn);
20150+ early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
20151+ reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
20152+}
20153+
20154 void __init paging_init(void)
20155 {
20156 unsigned long max_zone_pfns[MAX_NR_ZONES];
20157@@ -976,9 +984,9 @@ void __init paging_init(void)
20158 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
20159 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
20160 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
20161- max_zone_pfns[ZONE_NORMAL] = end_pfn;
20162+ max_zone_pfns[ZONE_NORMAL] = max_pfn;
20163
20164- memory_present(0, 0, end_pfn);
20165+ memory_present(0, 0, max_pfn);
20166 sparse_init();
20167 free_area_init_nodes(max_zone_pfns);
20168
20169@@ -1069,8 +1077,8 @@ void __init mem_init(void)
20170 init_page_count(pfn_to_page(pfn));
20171 totalram_pages++;
20172 }
20173- reservedpages = end_pfn - totalram_pages -
20174- absent_pages_in_range(0, end_pfn);
20175+ reservedpages = max_pfn - totalram_pages -
20176+ absent_pages_in_range(0, max_pfn);
20177 after_bootmem = 1;
20178
20179 codesize = (unsigned long) &_etext - (unsigned long) &_text;
20180@@ -1089,7 +1097,7 @@ void __init mem_init(void)
20181 printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
20182 "%ldk reserved, %ldk data, %ldk init)\n",
20183 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
20184- end_pfn << (PAGE_SHIFT-10),
20185+ max_pfn << (PAGE_SHIFT-10),
20186 codesize >> 10,
20187 reservedpages << (PAGE_SHIFT-10),
20188 datasize >> 10,
20189@@ -1152,6 +1160,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
20190 void mark_rodata_ro(void)
20191 {
20192 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
20193+ unsigned long rodata_start =
20194+ ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
20195+
20196+#ifdef CONFIG_DYNAMIC_FTRACE
20197+ /* Dynamic tracing modifies the kernel text section */
20198+ start = rodata_start;
20199+#endif
20200
20201 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
20202 (end - start) >> 10);
20203@@ -1161,8 +1176,7 @@ void mark_rodata_ro(void)
20204 * The rodata section (but not the kernel text!) should also be
20205 * not-executable.
20206 */
20207- start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
20208- set_memory_nx(start, (end - start) >> PAGE_SHIFT);
20209+ set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
20210
20211 rodata_test();
20212
20213@@ -1184,24 +1198,26 @@ void free_initrd_mem(unsigned long start
20214 }
20215 #endif
20216
20217-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
20218+int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
20219+ int flags)
20220 {
20221 #ifdef CONFIG_NUMA
20222 int nid, next_nid;
20223+ int ret;
20224 #endif
20225 unsigned long pfn = phys >> PAGE_SHIFT;
20226
20227- if (pfn >= end_pfn) {
20228+ if (pfn >= max_pfn) {
20229 /*
20230 * This can happen with kdump kernels when accessing
20231 * firmware tables:
20232 */
20233 if (pfn < max_pfn_mapped)
20234- return;
20235+ return -EFAULT;
20236
20237- printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
20238+ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
20239 phys, len);
20240- return;
20241+ return -EFAULT;
20242 }
20243
20244 /* Should check here against the e820 map to avoid double free */
20245@@ -1209,9 +1225,13 @@ void __init reserve_bootmem_generic(unsi
20246 nid = phys_to_nid(phys);
20247 next_nid = phys_to_nid(phys + len - 1);
20248 if (nid == next_nid)
20249- reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
20250+ ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
20251 else
20252- reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
20253+ ret = reserve_bootmem(phys, len, flags);
20254+
20255+ if (ret != 0)
20256+ return ret;
20257+
20258 #else
20259 reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
20260 #endif
20261@@ -1222,6 +1242,8 @@ void __init reserve_bootmem_generic(unsi
20262 set_dma_reserve(dma_reserve);
20263 }
20264 #endif
20265+
20266+ return 0;
20267 }
20268
20269 int kern_addr_valid(unsigned long addr)
20270@@ -1326,7 +1348,7 @@ vmemmap_populate(struct page *start_page
20271 pmd_t *pmd;
20272
20273 for (; addr < end; addr = next) {
20274- next = pmd_addr_end(addr, end);
20275+ void *p = NULL;
20276
20277 pgd = vmemmap_pgd_populate(addr, node);
20278 if (!pgd)
20279@@ -1336,33 +1358,51 @@ vmemmap_populate(struct page *start_page
20280 if (!pud)
20281 return -ENOMEM;
20282
20283- pmd = pmd_offset(pud, addr);
20284- if (pmd_none(*pmd)) {
20285- pte_t entry;
20286- void *p;
20287+ if (!cpu_has_pse) {
20288+ next = (addr + PAGE_SIZE) & PAGE_MASK;
20289+ pmd = vmemmap_pmd_populate(pud, addr, node);
20290+
20291+ if (!pmd)
20292+ return -ENOMEM;
20293+
20294+ p = vmemmap_pte_populate(pmd, addr, node);
20295
20296- p = vmemmap_alloc_block(PMD_SIZE, node);
20297 if (!p)
20298 return -ENOMEM;
20299
20300- entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
20301- PAGE_KERNEL_LARGE);
20302- set_pmd(pmd, __pmd_ma(__pte_val(entry)));
20303-
20304- /* check to see if we have contiguous blocks */
20305- if (p_end != p || node_start != node) {
20306- if (p_start)
20307- printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
20308- addr_start, addr_end-1, p_start, p_end-1, node_start);
20309- addr_start = addr;
20310- node_start = node;
20311- p_start = p;
20312- }
20313- addr_end = addr + PMD_SIZE;
20314- p_end = p + PMD_SIZE;
20315+ addr_end = addr + PAGE_SIZE;
20316+ p_end = p + PAGE_SIZE;
20317 } else {
20318- vmemmap_verify((pte_t *)pmd, node, addr, next);
20319+ next = pmd_addr_end(addr, end);
20320+
20321+ pmd = pmd_offset(pud, addr);
20322+ if (pmd_none(*pmd)) {
20323+ pte_t entry;
20324+
20325+ p = vmemmap_alloc_block(PMD_SIZE, node);
20326+ if (!p)
20327+ return -ENOMEM;
20328+
20329+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
20330+ PAGE_KERNEL_LARGE);
20331+ set_pmd(pmd, __pmd_ma(__pte_val(entry)));
20332+
20333+ /* check to see if we have contiguous blocks */
20334+ if (p_end != p || node_start != node) {
20335+ if (p_start)
20336+ printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
20337+ addr_start, addr_end-1, p_start, p_end-1, node_start);
20338+ addr_start = addr;
20339+ node_start = node;
20340+ p_start = p;
20341+ }
20342+
20343+ addr_end = addr + PMD_SIZE;
20344+ p_end = p + PMD_SIZE;
20345+ } else
20346+ vmemmap_verify((pte_t *)pmd, node, addr, next);
20347 }
20348+
20349 }
20350 return 0;
20351 }
82094b55
AF
20352--- sle11-2009-10-16.orig/arch/x86/mm/ioremap-xen.c 2009-03-16 16:38:05.000000000 +0100
20353+++ sle11-2009-10-16/arch/x86/mm/ioremap-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
20354@@ -13,6 +13,7 @@
20355 #include <linux/pfn.h>
20356 #include <linux/slab.h>
20357 #include <linux/vmalloc.h>
20358+#include <linux/mmiotrace.h>
20359
20360 #include <asm/cacheflush.h>
20361 #include <asm/e820.h>
20362@@ -274,7 +275,8 @@ int ioremap_check_change_attr(unsigned l
20363 for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
20364 unsigned long pfn = mfn_to_local_pfn(mfn);
20365
20366- if (pfn >= max_pfn_mapped)
20367+ if (pfn >= max_low_pfn_mapped &&
20368+ (pfn < (1UL<<(32 - PAGE_SHIFT)) || pfn >= max_pfn_mapped))
20369 continue;
20370 rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
20371 PAGE_SIZE, prot_val);
20372@@ -297,11 +299,14 @@ static void __iomem *__ioremap_caller(re
20373 {
20374 unsigned long mfn, offset, vaddr;
20375 resource_size_t last_addr;
20376+ const resource_size_t unaligned_phys_addr = phys_addr;
20377+ const unsigned long unaligned_size = size;
20378 struct vm_struct *area;
20379 unsigned long new_prot_val;
20380 pgprot_t prot;
20381 int retval;
20382 domid_t domid = DOMID_IO;
20383+ void __iomem *ret_addr;
20384
20385 /* Don't allow wraparound or zero size */
20386 last_addr = phys_addr + size - 1;
20387@@ -318,7 +323,7 @@ static void __iomem *__ioremap_caller(re
20388 /*
20389 * Don't remap the low PCI/ISA area, it's always mapped..
20390 */
20391- if (is_initial_xendomain() && last_addr < ISA_END_ADDRESS)
20392+ if (is_initial_xendomain() && is_ISA_range(phys_addr, last_addr))
20393 return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr);
20394
20395 /*
20396@@ -342,7 +347,7 @@ static void __iomem *__ioremap_caller(re
20397 phys_addr &= PAGE_MASK;
20398 size = PAGE_ALIGN(last_addr+1) - phys_addr;
20399
20400- retval = reserve_memtype(phys_addr, phys_addr + size,
20401+ retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
20402 prot_val, &new_prot_val);
20403 if (retval) {
20404 pr_debug("Warning: reserve_memtype returned %d\n", retval);
20405@@ -410,7 +415,10 @@ static void __iomem *__ioremap_caller(re
20406 return NULL;
20407 }
20408
20409- return (void __iomem *) (vaddr + offset);
20410+ ret_addr = (void __iomem *) (vaddr + offset);
20411+ mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
20412+
20413+ return ret_addr;
20414 }
20415
20416 /**
20417@@ -438,7 +446,7 @@ void __iomem *ioremap_nocache(resource_s
20418 {
20419 /*
20420 * Ideally, this should be:
20421- * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
20422+ * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
20423 *
20424 * Till we fix all X drivers to use ioremap_wc(), we will use
20425 * UC MINUS.
20426@@ -462,7 +470,7 @@ EXPORT_SYMBOL(ioremap_nocache);
20427 */
20428 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
20429 {
20430- if (pat_wc_enabled)
20431+ if (pat_enabled)
20432 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
20433 __builtin_return_address(0));
20434 else
20435@@ -502,6 +510,14 @@ static void __iomem *ioremap_default(res
20436 }
20437 #endif
20438
20439+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
20440+ unsigned long prot_val)
20441+{
20442+ return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
20443+ __builtin_return_address(0));
20444+}
20445+EXPORT_SYMBOL(ioremap_prot);
20446+
20447 /**
20448 * iounmap - Free a IO remapping
20449 * @addr: virtual address from ioremap_*
20450@@ -526,6 +542,8 @@ void iounmap(volatile void __iomem *addr
20451 addr = (volatile void __iomem *)
20452 (PAGE_MASK & (unsigned long __force)addr);
20453
20454+ mmiotrace_iounmap(addr);
20455+
20456 /* Use the vm area unlocked, assuming the caller
20457 ensures there isn't another iounmap for the same address
20458 in parallel. Reuse of the virtual address is prevented by
20459@@ -533,7 +551,7 @@ void iounmap(volatile void __iomem *addr
20460 cpa takes care of the direct mappings. */
20461 read_lock(&vmlist_lock);
20462 for (p = vmlist; p; p = p->next) {
20463- if (p->addr == addr)
20464+ if (p->addr == (void __force *)addr)
20465 break;
20466 }
20467 read_unlock(&vmlist_lock);
20468@@ -547,7 +565,7 @@ void iounmap(volatile void __iomem *addr
20469 free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
20470
20471 /* Finally remove it */
20472- o = remove_vm_area((void *)addr);
20473+ o = remove_vm_area((void __force *)addr);
20474 BUG_ON(p != o || o == NULL);
20475 kfree(p);
20476 }
20477@@ -567,7 +585,7 @@ void *xlate_dev_mem_ptr(unsigned long ph
20478 if (page_is_ram(start >> PAGE_SHIFT))
20479 return __va(phys);
20480
20481- addr = (void *)ioremap_default(start, PAGE_SIZE);
20482+ addr = (void __force *)ioremap_default(start, PAGE_SIZE);
20483 if (addr)
20484 addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
20485
20486@@ -595,8 +613,7 @@ static int __init early_ioremap_debug_se
20487 early_param("early_ioremap_debug", early_ioremap_debug_setup);
20488
20489 static __initdata int after_paging_init;
20490-static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
20491- __section(.bss.page_aligned);
20492+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
20493
20494 #ifdef CONFIG_X86_32
20495 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
20496@@ -695,10 +712,11 @@ static void __init __early_set_fixmap(en
20497 return;
20498 }
20499 pte = early_ioremap_pte(addr);
20500+
20501 if (pgprot_val(flags))
20502 set_pte(pte, pfn_pte_ma(phys >> PAGE_SHIFT, flags));
20503 else
20504- pte_clear(NULL, addr, pte);
20505+ pte_clear(&init_mm, addr, pte);
20506 __flush_tlb_one(addr);
20507 }
20508
20509@@ -726,13 +744,11 @@ static int __init check_early_ioremap_le
20510 {
20511 if (!early_ioremap_nested)
20512 return 0;
20513-
20514- printk(KERN_WARNING
20515+ WARN(1, KERN_WARNING
20516 "Debug warning: early ioremap leak of %d areas detected.\n",
20517- early_ioremap_nested);
20518+ early_ioremap_nested);
20519 printk(KERN_WARNING
20520- "please boot with early_ioremap_debug and report the dmesg.\n");
20521- WARN_ON(1);
20522+ "please boot with early_ioremap_debug and report the dmesg.\n");
20523
20524 return 1;
20525 }
82094b55
AF
20526--- sle11-2009-10-16.orig/arch/x86/mm/pageattr-xen.c 2009-03-16 16:38:05.000000000 +0100
20527+++ sle11-2009-10-16/arch/x86/mm/pageattr-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
20528@@ -34,6 +34,47 @@ struct cpa_data {
20529 unsigned force_split : 1;
20530 };
20531
20532+#ifdef CONFIG_PROC_FS
20533+static unsigned long direct_pages_count[PG_LEVEL_NUM];
20534+
20535+void update_page_count(int level, unsigned long pages)
20536+{
20537+ unsigned long flags;
20538+
20539+ /* Protect against CPA */
20540+ spin_lock_irqsave(&pgd_lock, flags);
20541+ direct_pages_count[level] += pages;
20542+ spin_unlock_irqrestore(&pgd_lock, flags);
20543+}
20544+
20545+static void split_page_count(int level)
20546+{
20547+ direct_pages_count[level]--;
20548+ direct_pages_count[level - 1] += PTRS_PER_PTE;
20549+}
20550+
20551+int arch_report_meminfo(char *page)
20552+{
20553+ int n = sprintf(page, "DirectMap4k: %8lu kB\n",
20554+ direct_pages_count[PG_LEVEL_4K] << 2);
20555+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
20556+ n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
20557+ direct_pages_count[PG_LEVEL_2M] << 11);
20558+#else
20559+ n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
20560+ direct_pages_count[PG_LEVEL_2M] << 12);
20561+#endif
20562+#ifdef CONFIG_X86_64
20563+ if (direct_gbpages)
20564+ n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
20565+ direct_pages_count[PG_LEVEL_1G] << 20);
20566+#endif
20567+ return n;
20568+}
20569+#else
20570+static inline void split_page_count(int level) { }
20571+#endif
20572+
20573 #ifdef CONFIG_X86_64
20574
20575 static inline unsigned long highmap_start_pfn(void)
20576@@ -106,7 +147,7 @@ static void cpa_flush_all(unsigned long
20577 {
20578 BUG_ON(irqs_disabled());
20579
20580- on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
20581+ on_each_cpu(__cpa_flush_all, (void *) cache, 1);
20582 }
20583
20584 static void __cpa_flush_range(void *arg)
20585@@ -127,7 +168,7 @@ static void cpa_flush_range(unsigned lon
20586 BUG_ON(irqs_disabled());
20587 WARN_ON(PAGE_ALIGN(start) != start);
20588
20589- on_each_cpu(__cpa_flush_range, NULL, 1, 1);
20590+ on_each_cpu(__cpa_flush_range, NULL, 1);
20591
20592 if (!cache)
20593 return;
20594@@ -229,6 +270,7 @@ pte_t *lookup_address(unsigned long addr
20595
20596 return pte_offset_kernel(pmd, address);
20597 }
20598+EXPORT_SYMBOL_GPL(lookup_address);
20599
20600 /*
20601 * Set the new pmd in all the pgds we know about:
20602@@ -509,6 +551,16 @@ static int split_large_page(pte_t *kpte,
20603 }
20604 #endif
20605
20606+ if (address >= (unsigned long)__va(0) &&
20607+ address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
20608+ split_page_count(level);
20609+
20610+#ifdef CONFIG_X86_64
20611+ if (address >= (unsigned long)__va(1UL<<32) &&
20612+ address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
20613+ split_page_count(level);
20614+#endif
20615+
20616 /*
20617 * Get the target mfn from the original entry:
20618 */
20619@@ -566,10 +618,9 @@ repeat:
20620 if (!__pte_val(old_pte)) {
20621 if (!primary)
20622 return 0;
20623- printk(KERN_WARNING "CPA: called for zero pte. "
20624+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
20625 "vaddr = %lx cpa->vaddr = %lx\n", address,
20626 cpa->vaddr);
20627- WARN_ON(1);
20628 return -EINVAL;
20629 }
20630
20631@@ -634,15 +685,24 @@ static int cpa_process_alias(struct cpa_
20632 struct cpa_data alias_cpa;
20633 int ret = 0;
20634
20635- if (cpa->pfn > max_pfn_mapped)
20636+ if (cpa->pfn >= max_pfn_mapped)
20637 return 0;
20638
20639+#ifdef CONFIG_X86_64
20640+ if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
20641+ return 0;
20642+#endif
20643 /*
20644 * No need to redo, when the primary call touched the direct
20645 * mapping already:
20646 */
20647- if (!within(cpa->vaddr, PAGE_OFFSET,
20648- PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
20649+ if (!(within(cpa->vaddr, PAGE_OFFSET,
20650+ PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
20651+#ifdef CONFIG_X86_64
20652+ || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
20653+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
20654+#endif
20655+ )) {
20656
20657 alias_cpa = *cpa;
20658 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
20659@@ -796,6 +856,51 @@ static inline int change_page_attr_clear
20660 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
20661 }
20662
20663+#ifdef CONFIG_XEN
20664+static void _free_memtype(u64 pstart, u64 pend)
20665+{
20666+ u64 pa = pstart &= __PHYSICAL_MASK;
20667+ u64 ma = phys_to_machine(pa);
20668+
20669+ while ((pa += PAGE_SIZE) < pend) {
20670+ if (phys_to_machine(pa) != ma + (pa - pstart)) {
20671+ free_memtype(ma, ma + (pa - pstart));
20672+ pstart = pa;
20673+ ma = phys_to_machine(pa);
20674+ }
20675+ }
20676+ free_memtype(ma, ma + (pend - pstart));
20677+}
20678+#define free_memtype _free_memtype
20679+
20680+static int _reserve_memtype(u64 pstart, u64 pend, unsigned long req_type)
20681+{
20682+ u64 pcur = pstart &= __PHYSICAL_MASK, pa = pcur;
20683+ u64 ma = phys_to_machine(pa);
20684+ int rc = 0;
20685+
20686+ while ((pa += PAGE_SIZE) < pend) {
20687+ if (phys_to_machine(pa) != ma + (pa - pcur)) {
20688+ rc = reserve_memtype(ma, ma + (pa - pcur),
20689+ req_type, NULL);
20690+ if (rc)
20691+ break;
20692+ pcur = pa;
20693+ ma = phys_to_machine(pa);
20694+ }
20695+ }
20696+ if (likely(!rc))
20697+ rc = reserve_memtype(ma, ma + (pend - pcur), req_type, NULL);
20698+
20699+ if (unlikely(!rc) && pstart < pcur)
20700+ _free_memtype(pstart, pcur);
20701+
20702+ return rc;
20703+}
20704+#define reserve_memtype(s, e, r, n) \
20705+ _reserve_memtype(s, e, BUILD_BUG_ON_ZERO(n) ?: (r))
20706+#endif
20707+
20708 int _set_memory_uc(unsigned long addr, int numpages)
20709 {
20710 /*
20711@@ -810,7 +915,7 @@ int set_memory_uc(unsigned long addr, in
20712 /*
20713 * for now UC MINUS. see comments in ioremap_nocache()
20714 */
20715- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
20716+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
20717 _PAGE_CACHE_UC_MINUS, NULL))
20718 return -EINVAL;
20719
20720@@ -826,10 +931,10 @@ int _set_memory_wc(unsigned long addr, i
20721
20722 int set_memory_wc(unsigned long addr, int numpages)
20723 {
20724- if (!pat_wc_enabled)
20725+ if (!pat_enabled)
20726 return set_memory_uc(addr, numpages);
20727
20728- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
20729+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
20730 _PAGE_CACHE_WC, NULL))
20731 return -EINVAL;
20732
20733@@ -845,7 +950,7 @@ int _set_memory_wb(unsigned long addr, i
20734
20735 int set_memory_wb(unsigned long addr, int numpages)
20736 {
20737- free_memtype(addr, addr + numpages * PAGE_SIZE);
20738+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
20739
20740 return _set_memory_wb(addr, numpages);
20741 }
82094b55
AF
20742--- sle11-2009-10-16.orig/arch/x86/mm/pat-xen.c 2009-03-16 16:38:05.000000000 +0100
20743+++ sle11-2009-10-16/arch/x86/mm/pat-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
20744@@ -12,6 +12,8 @@
20745 #include <linux/gfp.h>
20746 #include <linux/fs.h>
20747 #include <linux/bootmem.h>
20748+#include <linux/debugfs.h>
20749+#include <linux/seq_file.h>
20750
20751 #include <asm/msr.h>
20752 #include <asm/tlbflush.h>
20753@@ -26,11 +28,11 @@
20754 #include <asm/io.h>
20755
20756 #ifdef CONFIG_X86_PAT
20757-int __read_mostly pat_wc_enabled = 1;
20758+int __read_mostly pat_enabled = 1;
20759
20760 void __cpuinit pat_disable(char *reason)
20761 {
20762- pat_wc_enabled = 0;
20763+ pat_enabled = 0;
20764 printk(KERN_INFO "%s\n", reason);
20765 }
20766
20767@@ -42,6 +44,19 @@ static int __init nopat(char *str)
20768 early_param("nopat", nopat);
20769 #endif
20770
20771+
20772+static int debug_enable;
20773+static int __init pat_debug_setup(char *str)
20774+{
20775+ debug_enable = 1;
20776+ return 0;
20777+}
20778+__setup("debugpat", pat_debug_setup);
20779+
20780+#define dprintk(fmt, arg...) \
20781+ do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
20782+
20783+
20784 static u64 __read_mostly boot_pat_state;
20785
20786 enum {
20787@@ -53,24 +68,25 @@ enum {
20788 PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
20789 };
20790
20791-#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
20792+#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
20793
20794 void pat_init(void)
20795 {
20796 u64 pat;
20797
20798- if (!pat_wc_enabled)
20799+ if (!pat_enabled)
20800 return;
20801
20802 /* Paranoia check. */
20803- if (!cpu_has_pat) {
20804- printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
20805+ if (!cpu_has_pat && boot_pat_state) {
20806 /*
20807- * Panic if this happens on the secondary CPU, and we
20808+ * If this happens we are on a secondary CPU, but
20809 * switched to PAT on the boot CPU. We have no way to
20810 * undo PAT.
20811- */
20812- BUG_ON(boot_pat_state);
20813+ */
20814+ printk(KERN_ERR "PAT enabled, "
20815+ "but not supported by secondary CPU\n");
20816+ BUG();
20817 }
20818
20819 #ifndef CONFIG_XEN
20820@@ -87,8 +103,8 @@ void pat_init(void)
20821 * 011 UC _PAGE_CACHE_UC
20822 * PAT bit unused
20823 */
20824- pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
20825- PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
20826+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
20827+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
20828
20829 /* Boot CPU check */
20830 if (!boot_pat_state)
20831@@ -113,13 +129,13 @@ void pat_init(void)
20832 static char *cattr_name(unsigned long flags)
20833 {
20834 switch (flags & _PAGE_CACHE_MASK) {
20835- case _PAGE_CACHE_UC: return "uncached";
20836- case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
20837- case _PAGE_CACHE_WB: return "write-back";
20838- case _PAGE_CACHE_WC: return "write-combining";
20839- case _PAGE_CACHE_WP: return "write-protected";
20840- case _PAGE_CACHE_WT: return "write-through";
20841- default: return "broken";
20842+ case _PAGE_CACHE_UC: return "uncached";
20843+ case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
20844+ case _PAGE_CACHE_WB: return "write-back";
20845+ case _PAGE_CACHE_WC: return "write-combining";
20846+ case _PAGE_CACHE_WP: return "write-protected";
20847+ case _PAGE_CACHE_WT: return "write-through";
20848+ default: return "broken";
20849 }
20850 }
20851
20852@@ -157,49 +173,55 @@ static DEFINE_SPINLOCK(memtype_lock); /
20853 * The intersection is based on "Effective Memory Type" tables in IA-32
20854 * SDM vol 3a
20855 */
20856-static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
20857- unsigned long *ret_prot)
20858+static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
20859 {
20860- unsigned long pat_type;
20861- u8 mtrr_type;
20862-
20863- pat_type = prot & _PAGE_CACHE_MASK;
20864- prot &= (~_PAGE_CACHE_MASK);
20865-
20866- /*
20867- * We return the PAT request directly for types where PAT takes
20868- * precedence with respect to MTRR and for UC_MINUS.
20869- * Consistency checks with other PAT requests is done later
20870- * while going through memtype list.
20871- */
20872- if (pat_type == _PAGE_CACHE_WC) {
20873- *ret_prot = prot | _PAGE_CACHE_WC;
20874- return 0;
20875- } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
20876- *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
20877- return 0;
20878- } else if (pat_type == _PAGE_CACHE_UC) {
20879- *ret_prot = prot | _PAGE_CACHE_UC;
20880- return 0;
20881- }
20882-
20883 /*
20884 * Look for MTRR hint to get the effective type in case where PAT
20885 * request is for WB.
20886 */
20887- mtrr_type = mtrr_type_lookup(start, end);
20888+ if (req_type == _PAGE_CACHE_WB) {
20889+ u8 mtrr_type;
20890
20891- if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
20892- *ret_prot = prot | _PAGE_CACHE_UC;
20893- } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
20894- *ret_prot = prot | _PAGE_CACHE_WC;
20895- } else {
20896- *ret_prot = prot | _PAGE_CACHE_WB;
20897+ mtrr_type = mtrr_type_lookup(start, end);
20898+ if (mtrr_type == MTRR_TYPE_UNCACHABLE)
20899+ return _PAGE_CACHE_UC;
20900+ if (mtrr_type == MTRR_TYPE_WRCOMB)
20901+ return _PAGE_CACHE_WC;
20902+ }
20903+
20904+ return req_type;
20905+}
20906+
20907+static int chk_conflict(struct memtype *new, struct memtype *entry,
20908+ unsigned long *type)
20909+{
20910+ if (new->type != entry->type) {
20911+ if (type) {
20912+ new->type = entry->type;
20913+ *type = entry->type;
20914+ } else
20915+ goto conflict;
20916 }
20917
20918+ /* check overlaps with more than one entry in the list */
20919+ list_for_each_entry_continue(entry, &memtype_list, nd) {
20920+ if (new->end <= entry->start)
20921+ break;
20922+ else if (new->type != entry->type)
20923+ goto conflict;
20924+ }
20925 return 0;
20926+
20927+ conflict:
20928+ printk(KERN_INFO "%s:%d conflicting memory types "
20929+ "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
20930+ new->end, cattr_name(new->type), cattr_name(entry->type));
20931+ return -EBUSY;
20932 }
20933
20934+static struct memtype *cached_entry;
20935+static u64 cached_start;
20936+
20937 /*
20938 * req_type typically has one of the:
20939 * - _PAGE_CACHE_WB
20940@@ -210,37 +232,36 @@ static int pat_x_mtrr_type(u64 start, u6
20941 * req_type will have a special case value '-1', when requester want to inherit
20942 * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
20943 *
20944- * If ret_type is NULL, function will return an error if it cannot reserve the
20945- * region with req_type. If ret_type is non-null, function will return
20946- * available type in ret_type in case of no error. In case of any error
20947+ * If new_type is NULL, function will return an error if it cannot reserve the
20948+ * region with req_type. If new_type is non-NULL, function will return
20949+ * available type in new_type in case of no error. In case of any error
20950 * it will return a negative return value.
20951 */
20952 int reserve_memtype(u64 start, u64 end, unsigned long req_type,
20953- unsigned long *ret_type)
20954+ unsigned long *new_type)
20955 {
20956- struct memtype *new_entry = NULL;
20957- struct memtype *parse;
20958+ struct memtype *new, *entry;
20959 unsigned long actual_type;
20960+ struct list_head *where;
20961 int err = 0;
20962
20963- /* Only track when pat_wc_enabled */
20964- if (!pat_wc_enabled) {
20965+ BUG_ON(start >= end); /* end is exclusive */
20966+
20967+ if (!pat_enabled) {
20968 /* This is identical to page table setting without PAT */
20969- if (ret_type) {
20970- if (req_type == -1) {
20971- *ret_type = _PAGE_CACHE_WB;
20972- } else {
20973- *ret_type = req_type;
20974- }
20975+ if (new_type) {
20976+ if (req_type == -1)
20977+ *new_type = _PAGE_CACHE_WB;
20978+ else
20979+ *new_type = req_type & _PAGE_CACHE_MASK;
20980 }
20981 return 0;
20982 }
20983
20984 /* Low ISA region is always mapped WB in page table. No need to track */
20985- if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
20986- if (ret_type)
20987- *ret_type = _PAGE_CACHE_WB;
20988-
20989+ if (is_ISA_range(start, end - 1)) {
20990+ if (new_type)
20991+ *new_type = _PAGE_CACHE_WB;
20992 return 0;
20993 }
20994
20995@@ -253,206 +274,118 @@ int reserve_memtype(u64 start, u64 end,
20996 */
20997 u8 mtrr_type = mtrr_type_lookup(start, end);
20998
20999- if (mtrr_type == MTRR_TYPE_WRBACK) {
21000- req_type = _PAGE_CACHE_WB;
21001+ if (mtrr_type == MTRR_TYPE_WRBACK)
21002 actual_type = _PAGE_CACHE_WB;
21003- } else {
21004- req_type = _PAGE_CACHE_UC_MINUS;
21005+ else
21006 actual_type = _PAGE_CACHE_UC_MINUS;
21007- }
21008- } else {
21009- req_type &= _PAGE_CACHE_MASK;
21010- err = pat_x_mtrr_type(start, end, req_type, &actual_type);
21011- }
21012-
21013- if (err) {
21014- if (ret_type)
21015- *ret_type = actual_type;
21016+ } else
21017+ actual_type = pat_x_mtrr_type(start, end,
21018+ req_type & _PAGE_CACHE_MASK);
21019
21020- return -EINVAL;
21021- }
21022-
21023- new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
21024- if (!new_entry)
21025+ new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
21026+ if (!new)
21027 return -ENOMEM;
21028
21029- new_entry->start = start;
21030- new_entry->end = end;
21031- new_entry->type = actual_type;
21032+ new->start = start;
21033+ new->end = end;
21034+ new->type = actual_type;
21035
21036- if (ret_type)
21037- *ret_type = actual_type;
21038+ if (new_type)
21039+ *new_type = actual_type;
21040
21041 spin_lock(&memtype_lock);
21042
21043- /* Search for existing mapping that overlaps the current range */
21044- list_for_each_entry(parse, &memtype_list, nd) {
21045- struct memtype *saved_ptr;
21046+ if (cached_entry && start >= cached_start)
21047+ entry = cached_entry;
21048+ else
21049+ entry = list_entry(&memtype_list, struct memtype, nd);
21050
21051- if (parse->start >= end) {
21052- pr_debug("New Entry\n");
21053- list_add(&new_entry->nd, parse->nd.prev);
21054- new_entry = NULL;
21055+ /* Search for existing mapping that overlaps the current range */
21056+ where = NULL;
21057+ list_for_each_entry_continue(entry, &memtype_list, nd) {
21058+ if (end <= entry->start) {
21059+ where = entry->nd.prev;
21060+ cached_entry = list_entry(where, struct memtype, nd);
21061 break;
21062- }
21063-
21064- if (start <= parse->start && end >= parse->start) {
21065- if (actual_type != parse->type && ret_type) {
21066- actual_type = parse->type;
21067- *ret_type = actual_type;
21068- new_entry->type = actual_type;
21069- }
21070-
21071- if (actual_type != parse->type) {
21072- printk(
21073- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
21074- current->comm, current->pid,
21075- start, end,
21076- cattr_name(actual_type),
21077- cattr_name(parse->type));
21078- err = -EBUSY;
21079- break;
21080- }
21081-
21082- saved_ptr = parse;
21083- /*
21084- * Check to see whether the request overlaps more
21085- * than one entry in the list
21086- */
21087- list_for_each_entry_continue(parse, &memtype_list, nd) {
21088- if (end <= parse->start) {
21089- break;
21090- }
21091-
21092- if (actual_type != parse->type) {
21093- printk(
21094- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
21095- current->comm, current->pid,
21096- start, end,
21097- cattr_name(actual_type),
21098- cattr_name(parse->type));
21099- err = -EBUSY;
21100- break;
21101- }
21102- }
21103-
21104- if (err) {
21105- break;
21106+ } else if (start <= entry->start) { /* end > entry->start */
21107+ err = chk_conflict(new, entry, new_type);
21108+ if (!err) {
21109+ dprintk("Overlap at 0x%Lx-0x%Lx\n",
21110+ entry->start, entry->end);
21111+ where = entry->nd.prev;
21112+ cached_entry = list_entry(where,
21113+ struct memtype, nd);
21114 }
21115-
21116- pr_debug("Overlap at 0x%Lx-0x%Lx\n",
21117- saved_ptr->start, saved_ptr->end);
21118- /* No conflict. Go ahead and add this new entry */
21119- list_add(&new_entry->nd, saved_ptr->nd.prev);
21120- new_entry = NULL;
21121 break;
21122- }
21123-
21124- if (start < parse->end) {
21125- if (actual_type != parse->type && ret_type) {
21126- actual_type = parse->type;
21127- *ret_type = actual_type;
21128- new_entry->type = actual_type;
21129- }
21130-
21131- if (actual_type != parse->type) {
21132- printk(
21133- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
21134- current->comm, current->pid,
21135- start, end,
21136- cattr_name(actual_type),
21137- cattr_name(parse->type));
21138- err = -EBUSY;
21139- break;
21140- }
21141-
21142- saved_ptr = parse;
21143- /*
21144- * Check to see whether the request overlaps more
21145- * than one entry in the list
21146- */
21147- list_for_each_entry_continue(parse, &memtype_list, nd) {
21148- if (end <= parse->start) {
21149- break;
21150- }
21151-
21152- if (actual_type != parse->type) {
21153- printk(
21154- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
21155- current->comm, current->pid,
21156- start, end,
21157- cattr_name(actual_type),
21158- cattr_name(parse->type));
21159- err = -EBUSY;
21160- break;
21161+ } else if (start < entry->end) { /* start > entry->start */
21162+ err = chk_conflict(new, entry, new_type);
21163+ if (!err) {
21164+ dprintk("Overlap at 0x%Lx-0x%Lx\n",
21165+ entry->start, entry->end);
21166+ cached_entry = list_entry(entry->nd.prev,
21167+ struct memtype, nd);
21168+
21169+ /*
21170+ * Move to right position in the linked
21171+ * list to add this new entry
21172+ */
21173+ list_for_each_entry_continue(entry,
21174+ &memtype_list, nd) {
21175+ if (start <= entry->start) {
21176+ where = entry->nd.prev;
21177+ break;
21178+ }
21179 }
21180 }
21181-
21182- if (err) {
21183- break;
21184- }
21185-
21186- pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
21187- saved_ptr->start, saved_ptr->end);
21188- /* No conflict. Go ahead and add this new entry */
21189- list_add(&new_entry->nd, &saved_ptr->nd);
21190- new_entry = NULL;
21191 break;
21192 }
21193 }
21194
21195 if (err) {
21196- printk(KERN_INFO
21197- "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
21198- start, end, cattr_name(new_entry->type),
21199- cattr_name(req_type));
21200- kfree(new_entry);
21201+ printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
21202+ "track %s, req %s\n",
21203+ start, end, cattr_name(new->type), cattr_name(req_type));
21204+ kfree(new);
21205 spin_unlock(&memtype_lock);
21206 return err;
21207 }
21208
21209- if (new_entry) {
21210- /* No conflict. Not yet added to the list. Add to the tail */
21211- list_add_tail(&new_entry->nd, &memtype_list);
21212- pr_debug("New Entry\n");
21213- }
21214+ cached_start = start;
21215
21216- if (ret_type) {
21217- pr_debug(
21218- "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
21219- start, end, cattr_name(actual_type),
21220- cattr_name(req_type), cattr_name(*ret_type));
21221- } else {
21222- pr_debug(
21223- "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
21224- start, end, cattr_name(actual_type),
21225- cattr_name(req_type));
21226- }
21227+ if (where)
21228+ list_add(&new->nd, where);
21229+ else
21230+ list_add_tail(&new->nd, &memtype_list);
21231
21232 spin_unlock(&memtype_lock);
21233+
21234+ dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
21235+ start, end, cattr_name(new->type), cattr_name(req_type),
21236+ new_type ? cattr_name(*new_type) : "-");
21237+
21238 return err;
21239 }
21240
21241 int free_memtype(u64 start, u64 end)
21242 {
21243- struct memtype *ml;
21244+ struct memtype *entry;
21245 int err = -EINVAL;
21246
21247- /* Only track when pat_wc_enabled */
21248- if (!pat_wc_enabled) {
21249+ if (!pat_enabled)
21250 return 0;
21251- }
21252
21253 /* Low ISA region is always mapped WB. No need to track */
21254- if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
21255+ if (is_ISA_range(start, end - 1))
21256 return 0;
21257- }
21258
21259 spin_lock(&memtype_lock);
21260- list_for_each_entry(ml, &memtype_list, nd) {
21261- if (ml->start == start && ml->end == end) {
21262- list_del(&ml->nd);
21263- kfree(ml);
21264+ list_for_each_entry(entry, &memtype_list, nd) {
21265+ if (entry->start == start && entry->end == end) {
21266+ if (cached_entry == entry || cached_start == start)
21267+ cached_entry = NULL;
21268+
21269+ list_del(&entry->nd);
21270+ kfree(entry);
21271 err = 0;
21272 break;
21273 }
21274@@ -464,27 +397,19 @@ int free_memtype(u64 start, u64 end)
21275 current->comm, current->pid, start, end);
21276 }
21277
21278- pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
21279+ dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
21280 return err;
21281 }
21282
21283
21284-/*
21285- * /dev/mem mmap interface. The memtype used for mapping varies:
21286- * - Use UC for mappings with O_SYNC flag
21287- * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
21288- * inherit the memtype from existing mapping.
21289- * - Else use UC_MINUS memtype (for backward compatibility with existing
21290- * X drivers.
21291- */
21292 pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
21293 unsigned long size, pgprot_t vma_prot)
21294 {
21295 return vma_prot;
21296 }
21297
21298-#ifdef CONFIG_NONPROMISC_DEVMEM
21299-/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
21300+#ifdef CONFIG_STRICT_DEVMEM
21301+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
21302 static inline int range_is_allowed(unsigned long mfn, unsigned long size)
21303 {
21304 return 1;
21305@@ -508,20 +433,20 @@ static inline int range_is_allowed(unsig
21306 }
21307 return 1;
21308 }
21309-#endif /* CONFIG_NONPROMISC_DEVMEM */
21310+#endif /* CONFIG_STRICT_DEVMEM */
21311
21312 int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
21313 unsigned long size, pgprot_t *vma_prot)
21314 {
21315 u64 addr = (u64)mfn << PAGE_SHIFT;
21316- unsigned long flags = _PAGE_CACHE_UC_MINUS;
21317+ unsigned long flags = -1;
21318 int retval;
21319
21320 if (!range_is_allowed(mfn, size))
21321 return 0;
21322
21323 if (file->f_flags & O_SYNC) {
21324- flags = _PAGE_CACHE_UC;
21325+ flags = _PAGE_CACHE_UC_MINUS;
21326 }
21327
21328 #ifndef CONFIG_X86_32
21329@@ -534,25 +459,26 @@ int phys_mem_access_prot_allowed(struct
21330 * caching for the high addresses through the KEN pin, but
21331 * we maintain the tradition of paranoia in this code.
21332 */
21333- if (!pat_wc_enabled &&
21334- ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
21335- test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
21336- test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
21337- test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
21338- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
21339+ if (!pat_enabled &&
21340+ !(boot_cpu_has(X86_FEATURE_MTRR) ||
21341+ boot_cpu_has(X86_FEATURE_K6_MTRR) ||
21342+ boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
21343+ boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
21344+ (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
21345 flags = _PAGE_CACHE_UC;
21346 }
21347 #endif
21348 #endif
21349
21350 /*
21351- * With O_SYNC, we can only take UC mapping. Fail if we cannot.
21352+ * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
21353+ *
21354 * Without O_SYNC, we want to get
21355 * - WB for WB-able memory and no other conflicting mappings
21356 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
21357 * - Inherit from confliting mappings otherwise
21358 */
21359- if (flags != _PAGE_CACHE_UC_MINUS) {
21360+ if (flags != -1) {
21361 retval = reserve_memtype(addr, addr + size, flags, NULL);
21362 } else {
21363 retval = reserve_memtype(addr, addr + size, -1, &flags);
21364@@ -600,3 +526,88 @@ void unmap_devmem(unsigned long mfn, uns
21365 free_memtype(addr, addr + size);
21366 }
21367
21368+#if defined(CONFIG_DEBUG_FS)
21369+
21370+/* get Nth element of the linked list */
21371+static struct memtype *memtype_get_idx(loff_t pos)
21372+{
21373+ struct memtype *list_node, *print_entry;
21374+ int i = 1;
21375+
21376+ print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
21377+ if (!print_entry)
21378+ return NULL;
21379+
21380+ spin_lock(&memtype_lock);
21381+ list_for_each_entry(list_node, &memtype_list, nd) {
21382+ if (pos == i) {
21383+ *print_entry = *list_node;
21384+ spin_unlock(&memtype_lock);
21385+ return print_entry;
21386+ }
21387+ ++i;
21388+ }
21389+ spin_unlock(&memtype_lock);
21390+ kfree(print_entry);
21391+ return NULL;
21392+}
21393+
21394+static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
21395+{
21396+ if (*pos == 0) {
21397+ ++*pos;
21398+ seq_printf(seq, "PAT memtype list:\n");
21399+ }
21400+
21401+ return memtype_get_idx(*pos);
21402+}
21403+
21404+static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
21405+{
21406+ ++*pos;
21407+ return memtype_get_idx(*pos);
21408+}
21409+
21410+static void memtype_seq_stop(struct seq_file *seq, void *v)
21411+{
21412+}
21413+
21414+static int memtype_seq_show(struct seq_file *seq, void *v)
21415+{
21416+ struct memtype *print_entry = (struct memtype *)v;
21417+
21418+ seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
21419+ print_entry->start, print_entry->end);
21420+ kfree(print_entry);
21421+ return 0;
21422+}
21423+
21424+static struct seq_operations memtype_seq_ops = {
21425+ .start = memtype_seq_start,
21426+ .next = memtype_seq_next,
21427+ .stop = memtype_seq_stop,
21428+ .show = memtype_seq_show,
21429+};
21430+
21431+static int memtype_seq_open(struct inode *inode, struct file *file)
21432+{
21433+ return seq_open(file, &memtype_seq_ops);
21434+}
21435+
21436+static const struct file_operations memtype_fops = {
21437+ .open = memtype_seq_open,
21438+ .read = seq_read,
21439+ .llseek = seq_lseek,
21440+ .release = seq_release,
21441+};
21442+
21443+static int __init pat_memtype_list_init(void)
21444+{
21445+ debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
21446+ NULL, &memtype_fops);
21447+ return 0;
21448+}
21449+
21450+late_initcall(pat_memtype_list_init);
21451+
21452+#endif /* CONFIG_DEBUG_FS */
82094b55
AF
21453--- sle11-2009-10-16.orig/arch/x86/mm/pgtable-xen.c 2009-03-16 16:38:05.000000000 +0100
21454+++ sle11-2009-10-16/arch/x86/mm/pgtable-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
21455@@ -4,6 +4,7 @@
21456 #include <asm/pgalloc.h>
21457 #include <asm/pgtable.h>
21458 #include <asm/tlb.h>
21459+#include <asm/fixmap.h>
21460 #include <asm/hypervisor.h>
21461 #include <asm/mmu_context.h>
21462
21463@@ -410,15 +411,9 @@ static inline void pgd_list_del(pgd_t *p
21464 static void pgd_ctor(void *p)
21465 {
21466 pgd_t *pgd = p;
21467- unsigned long flags;
21468
21469 pgd_test_and_unpin(pgd);
21470
21471- /* Clear usermode parts of PGD */
21472- memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
21473-
21474- spin_lock_irqsave(&pgd_lock, flags);
21475-
21476 /* If the pgd points to a shared pagetable level (either the
21477 ptes in non-PAE, or shared PMD in PAE), then just copy the
21478 references from swapper_pg_dir. */
21479@@ -440,13 +435,9 @@ static void pgd_ctor(void *p)
21480 __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
21481 #endif
21482
21483-#ifndef CONFIG_X86_PAE
21484 /* list required to sync kernel mapping updates */
21485 if (!SHARED_KERNEL_PMD)
21486 pgd_list_add(pgd);
21487-#endif
21488-
21489- spin_unlock_irqrestore(&pgd_lock, flags);
21490 }
21491
21492 static void pgd_dtor(void *pgd)
21493@@ -475,33 +466,6 @@ static void pgd_dtor(void *pgd)
21494
21495 #ifdef CONFIG_X86_PAE
21496 /*
21497- * Mop up any pmd pages which may still be attached to the pgd.
21498- * Normally they will be freed by munmap/exit_mmap, but any pmd we
21499- * preallocate which never got a corresponding vma will need to be
21500- * freed manually.
21501- */
21502-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
21503-{
21504- int i;
21505-
21506- for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
21507- pgd_t pgd = pgdp[i];
21508-
21509- if (__pgd_val(pgd) != 0) {
21510- pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
21511-
21512- pgdp[i] = xen_make_pgd(0);
21513-
21514- paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
21515- pmd_free(mm, pmd);
21516- }
21517- }
21518-
21519- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
21520- xen_destroy_contiguous_region((unsigned long)pgdp, 0);
21521-}
21522-
21523-/*
21524 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
21525 * updating the top-level pagetable entries to guarantee the
21526 * processor notices the update. Since this is expensive, and
21527@@ -512,61 +476,7 @@ static void pgd_mop_up_pmds(struct mm_st
21528 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
21529 * and initialize the kernel pmds here.
21530 */
21531-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
21532-{
21533- pud_t *pud;
21534- pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
21535- unsigned long addr, flags;
21536- int i;
21537-
21538- /*
21539- * We can race save/restore (if we sleep during a GFP_KERNEL memory
21540- * allocation). We therefore store virtual addresses of pmds as they
21541- * do not change across save/restore, and poke the machine addresses
21542- * into the pgdir under the pgd_lock.
21543- */
21544- for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
21545- pmds[i] = pmd_alloc_one(mm, addr);
21546- if (!pmds[i])
21547- goto out_oom;
21548- }
21549-
21550- spin_lock_irqsave(&pgd_lock, flags);
21551-
21552- /* Protect against save/restore: move below 4GB under pgd_lock. */
21553- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
21554- && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
21555- spin_unlock_irqrestore(&pgd_lock, flags);
21556-out_oom:
21557- while (i--)
21558- pmd_free(mm, pmds[i]);
21559- return 0;
21560- }
21561-
21562- /* Copy kernel pmd contents and write-protect the new pmds. */
21563- pud = pud_offset(pgd, 0);
21564- for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
21565- i++, pud++, addr += PUD_SIZE) {
21566- if (i >= KERNEL_PGD_BOUNDARY) {
21567- memcpy(pmds[i],
21568- (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
21569- sizeof(pmd_t) * PTRS_PER_PMD);
21570- make_lowmem_page_readonly(
21571- pmds[i], XENFEAT_writable_page_tables);
21572- }
21573-
21574- /* It is safe to poke machine addresses of pmds under the pgd_lock. */
21575- pud_populate(mm, pud, pmds[i]);
21576- }
21577-
21578- /* List required to sync kernel mapping updates and
21579- * to pin/unpin on save/restore. */
21580- pgd_list_add(pgd);
21581-
21582- spin_unlock_irqrestore(&pgd_lock, flags);
21583-
21584- return 1;
21585-}
21586+#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
21587
21588 void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
21589 {
21590@@ -596,16 +506,101 @@ void pud_populate(struct mm_struct *mm,
21591 xen_tlb_flush();
21592 }
21593 #else /* !CONFIG_X86_PAE */
21594+
21595 /* No need to prepopulate any pagetable entries in non-PAE modes. */
21596-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
21597+#define PREALLOCATED_PMDS 0
21598+
21599+#endif /* CONFIG_X86_PAE */
21600+
21601+static void free_pmds(pmd_t *pmds[], struct mm_struct *mm, bool contig)
21602 {
21603- return 1;
21604+ int i;
21605+
21606+#ifdef CONFIG_X86_PAE
21607+ if (contig)
21608+ xen_destroy_contiguous_region((unsigned long)mm->pgd, 0);
21609+#endif
21610+
21611+ for(i = 0; i < PREALLOCATED_PMDS; i++)
21612+ if (pmds[i])
21613+ pmd_free(mm, pmds[i]);
21614 }
21615
21616-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
21617+static int preallocate_pmds(pmd_t *pmds[], struct mm_struct *mm)
21618 {
21619+ int i;
21620+ bool failed = false;
21621+
21622+ for(i = 0; i < PREALLOCATED_PMDS; i++) {
21623+ pmd_t *pmd = pmd_alloc_one(mm, i << PUD_SHIFT);
21624+ if (pmd == NULL)
21625+ failed = true;
21626+ pmds[i] = pmd;
21627+ }
21628+
21629+ if (failed) {
21630+ free_pmds(pmds, mm, false);
21631+ return -ENOMEM;
21632+ }
21633+
21634+ return 0;
21635+}
21636+
21637+/*
21638+ * Mop up any pmd pages which may still be attached to the pgd.
21639+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
21640+ * preallocate which never got a corresponding vma will need to be
21641+ * freed manually.
21642+ */
21643+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
21644+{
21645+ int i;
21646+
21647+ for(i = 0; i < PREALLOCATED_PMDS; i++) {
21648+ pgd_t pgd = pgdp[i];
21649+
21650+ if (__pgd_val(pgd) != 0) {
21651+ pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
21652+
21653+ pgdp[i] = xen_make_pgd(0);
21654+
21655+ paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
21656+ pmd_free(mm, pmd);
21657+ }
21658+ }
21659+
21660+#ifdef CONFIG_X86_PAE
21661+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
21662+ xen_destroy_contiguous_region((unsigned long)pgdp, 0);
21663+#endif
21664+}
21665+
21666+static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
21667+{
21668+ pud_t *pud;
21669+ unsigned long addr;
21670+ int i;
21671+
21672+ if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
21673+ return;
21674+
21675+ pud = pud_offset(pgd, 0);
21676+ for (addr = i = 0; i < PREALLOCATED_PMDS;
21677+ i++, pud++, addr += PUD_SIZE) {
21678+ pmd_t *pmd = pmds[i];
21679+
21680+ if (i >= KERNEL_PGD_BOUNDARY) {
21681+ memcpy(pmd,
21682+ (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
21683+ sizeof(pmd_t) * PTRS_PER_PMD);
21684+ make_lowmem_page_readonly(
21685+ pmd, XENFEAT_writable_page_tables);
21686+ }
21687+
21688+ /* It is safe to poke machine addresses of pmds under the pgd_lock. */
21689+ pud_populate(mm, pud, pmd);
21690+ }
21691 }
21692-#endif /* CONFIG_X86_PAE */
21693
21694 #ifdef CONFIG_X86_64
21695 /* We allocate two contiguous pages for kernel and user. */
21696@@ -616,19 +611,52 @@ static void pgd_mop_up_pmds(struct mm_st
21697
21698 pgd_t *pgd_alloc(struct mm_struct *mm)
21699 {
21700- pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
21701+ pgd_t *pgd;
21702+ pmd_t *pmds[PREALLOCATED_PMDS];
21703+ unsigned long flags;
21704+
21705+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
21706+
21707+ if (pgd == NULL)
21708+ goto out;
21709
21710- /* so that alloc_pd can use it */
21711 mm->pgd = pgd;
21712- if (pgd)
21713- pgd_ctor(pgd);
21714
21715- if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
21716- free_pages((unsigned long)pgd, PGD_ORDER);
21717- pgd = NULL;
21718+ if (preallocate_pmds(pmds, mm) != 0)
21719+ goto out_free_pgd;
21720+
21721+ if (paravirt_pgd_alloc(mm) != 0)
21722+ goto out_free_pmds;
21723+
21724+ /*
21725+ * Make sure that pre-populating the pmds is atomic with
21726+ * respect to anything walking the pgd_list, so that they
21727+ * never see a partially populated pgd.
21728+ */
21729+ spin_lock_irqsave(&pgd_lock, flags);
21730+
21731+#ifdef CONFIG_X86_PAE
21732+ /* Protect against save/restore: move below 4GB under pgd_lock. */
21733+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
21734+ && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
21735+ spin_unlock_irqrestore(&pgd_lock, flags);
21736+ goto out_free_pmds;
21737 }
21738+#endif
21739+
21740+ pgd_ctor(pgd);
21741+ pgd_prepopulate_pmd(mm, pgd, pmds);
21742+
21743+ spin_unlock_irqrestore(&pgd_lock, flags);
21744
21745 return pgd;
21746+
21747+out_free_pmds:
21748+ free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb));
21749+out_free_pgd:
21750+ free_pages((unsigned long)pgd, PGD_ORDER);
21751+out:
21752+ return NULL;
21753 }
21754
21755 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
21756@@ -644,6 +672,7 @@ void pgd_free(struct mm_struct *mm, pgd_
21757 pgd_dtor(pgd);
21758
21759 pgd_mop_up_pmds(mm, pgd);
21760+ paravirt_pgd_free(mm, pgd);
21761 free_pages((unsigned long)pgd, PGD_ORDER);
21762 }
21763
21764@@ -685,7 +714,7 @@ int ptep_test_and_clear_young(struct vm_
21765
21766 if (pte_young(*ptep))
21767 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
21768- &ptep->pte);
21769+ (unsigned long *) &ptep->pte);
21770
21771 if (ret)
21772 pte_update(vma->vm_mm, addr, ptep);
21773@@ -707,3 +736,42 @@ int ptep_clear_flush_young(struct vm_are
21774
21775 return young;
21776 }
21777+
21778+int fixmaps_set;
21779+
21780+void xen_set_fixmap(enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
21781+{
21782+ unsigned long address = __fix_to_virt(idx);
21783+ pte_t pte;
21784+
21785+ if (idx >= __end_of_fixed_addresses) {
21786+ BUG();
21787+ return;
21788+ }
21789+
21790+ switch (idx) {
21791+#ifdef CONFIG_X86_64
21792+ extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
21793+
21794+ case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
21795+ pte = pfn_pte(phys >> PAGE_SHIFT, flags);
21796+ set_pte_vaddr_pud(level3_user_pgt, address, pte);
21797+ break;
21798+ case FIX_EARLYCON_MEM_BASE:
21799+ xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
21800+ pfn_pte_ma(phys >> PAGE_SHIFT, flags));
21801+ fixmaps_set++;
21802+ return;
21803+#else
21804+ case FIX_WP_TEST:
21805+ case FIX_VDSO:
21806+ pte = pfn_pte(phys >> PAGE_SHIFT, flags);
21807+ break;
21808+#endif
21809+ default:
21810+ pte = pfn_pte_ma(phys >> PAGE_SHIFT, flags);
21811+ break;
21812+ }
21813+ set_pte_vaddr(address, pte);
21814+ fixmaps_set++;
21815+}
82094b55
AF
21816--- sle11-2009-10-16.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-16 16:38:05.000000000 +0100
21817+++ sle11-2009-10-16/arch/x86/mm/pgtable_32-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
21818@@ -25,51 +25,49 @@
21819 #include <xen/features.h>
21820 #include <asm/hypervisor.h>
21821
21822-void show_mem(void)
21823+/*
21824+ * Associate a virtual page frame with a given physical page frame
21825+ * and protection flags for that frame.
21826+ */
21827+void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
21828 {
21829- int total = 0, reserved = 0;
21830- int shared = 0, cached = 0;
21831- int highmem = 0;
21832- struct page *page;
21833- pg_data_t *pgdat;
21834- unsigned long i;
21835- unsigned long flags;
21836-
21837- printk(KERN_INFO "Mem-info:\n");
21838- show_free_areas();
21839- for_each_online_pgdat(pgdat) {
21840- pgdat_resize_lock(pgdat, &flags);
21841- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
21842- if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
21843- touch_nmi_watchdog();
21844- page = pgdat_page_nr(pgdat, i);
21845- total++;
21846- if (PageHighMem(page))
21847- highmem++;
21848- if (PageReserved(page))
21849- reserved++;
21850- else if (PageSwapCache(page))
21851- cached++;
21852- else if (page_count(page))
21853- shared += page_count(page) - 1;
21854- }
21855- pgdat_resize_unlock(pgdat, &flags);
21856- }
21857- printk(KERN_INFO "%d pages of RAM\n", total);
21858- printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
21859- printk(KERN_INFO "%d reserved pages\n", reserved);
21860- printk(KERN_INFO "%d pages shared\n", shared);
21861- printk(KERN_INFO "%d pages swap cached\n", cached);
21862-
21863- printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
21864- printk(KERN_INFO "%lu pages writeback\n",
21865- global_page_state(NR_WRITEBACK));
21866- printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
21867- printk(KERN_INFO "%lu pages slab\n",
21868- global_page_state(NR_SLAB_RECLAIMABLE) +
21869- global_page_state(NR_SLAB_UNRECLAIMABLE));
21870- printk(KERN_INFO "%lu pages pagetables\n",
21871- global_page_state(NR_PAGETABLE));
21872+#ifndef CONFIG_XEN
21873+ pgd_t *pgd;
21874+ pud_t *pud;
21875+ pmd_t *pmd;
21876+ pte_t *pte;
21877+
21878+ pgd = swapper_pg_dir + pgd_index(vaddr);
21879+ if (pgd_none(*pgd)) {
21880+ BUG();
21881+ return;
21882+ }
21883+ pud = pud_offset(pgd, vaddr);
21884+ if (pud_none(*pud)) {
21885+ BUG();
21886+ return;
21887+ }
21888+ pmd = pmd_offset(pud, vaddr);
21889+ if (pmd_none(*pmd)) {
21890+ BUG();
21891+ return;
21892+ }
21893+ pte = pte_offset_kernel(pmd, vaddr);
21894+ if (pte_val(pteval))
21895+ set_pte_present(&init_mm, vaddr, pte, pteval);
21896+ else
21897+ pte_clear(&init_mm, vaddr, pte);
21898+
21899+ /*
21900+ * It's enough to flush this one mapping.
21901+ * (PGE mappings get flushed as well)
21902+ */
21903+ __flush_tlb_one(vaddr);
21904+#else
21905+ if (HYPERVISOR_update_va_mapping(vaddr, pteval,
21906+ UVMF_INVLPG|UVMF_ALL))
21907+ BUG();
21908+#endif
21909 }
21910
21911 /*
21912@@ -107,35 +105,10 @@ void set_pmd_pfn(unsigned long vaddr, un
21913 __flush_tlb_one(vaddr);
21914 }
21915
21916-static int fixmaps;
21917 unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
21918 unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
21919 EXPORT_SYMBOL(__FIXADDR_TOP);
21920
21921-void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
21922-{
21923- unsigned long address = __fix_to_virt(idx);
21924- pte_t pte;
21925-
21926- if (idx >= __end_of_fixed_addresses) {
21927- BUG();
21928- return;
21929- }
21930- switch (idx) {
21931- case FIX_WP_TEST:
21932- case FIX_VDSO:
21933- pte = pfn_pte(phys >> PAGE_SHIFT, flags);
21934- break;
21935- default:
21936- pte = pfn_pte_ma(phys >> PAGE_SHIFT, flags);
21937- break;
21938- }
21939- if (HYPERVISOR_update_va_mapping(address, pte,
21940- UVMF_INVLPG|UVMF_ALL))
21941- BUG();
21942- fixmaps++;
21943-}
21944-
21945 /**
21946 * reserve_top_address - reserves a hole in the top of kernel address space
21947 * @reserve - size of hole to reserve
21948@@ -145,13 +118,48 @@ void __set_fixmap (enum fixed_addresses
21949 */
21950 void __init reserve_top_address(unsigned long reserve)
21951 {
21952- BUG_ON(fixmaps > 0);
21953+ BUG_ON(fixmaps_set > 0);
21954 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
21955 (int)-reserve);
21956 __FIXADDR_TOP = -reserve - PAGE_SIZE;
21957 __VMALLOC_RESERVE += reserve;
21958 }
21959
21960+/*
21961+ * vmalloc=size forces the vmalloc area to be exactly 'size'
21962+ * bytes. This can be used to increase (or decrease) the
21963+ * vmalloc area - the default is 128m.
21964+ */
21965+static int __init parse_vmalloc(char *arg)
21966+{
21967+ if (!arg)
21968+ return -EINVAL;
21969+
21970+ __VMALLOC_RESERVE = memparse(arg, &arg);
21971+ return 0;
21972+}
21973+early_param("vmalloc", parse_vmalloc);
21974+
21975+#ifndef CONFIG_XEN
21976+/*
21977+ * reservetop=size reserves a hole at the top of the kernel address space which
21978+ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
21979+ * so relocating the fixmap can be done before paging initialization.
21980+ */
21981+static int __init parse_reservetop(char *arg)
21982+{
21983+ unsigned long address;
21984+
21985+ if (!arg)
21986+ return -EINVAL;
21987+
21988+ address = memparse(arg, &arg);
21989+ reserve_top_address(address);
21990+ return 0;
21991+}
21992+early_param("reservetop", parse_reservetop);
21993+#endif
21994+
21995 void make_lowmem_page_readonly(void *va, unsigned int feature)
21996 {
21997 pte_t *pte;
82094b55
AF
21998--- sle11-2009-10-16.orig/arch/x86/pci/amd_bus.c 2009-10-28 14:55:02.000000000 +0100
21999+++ sle11-2009-10-16/arch/x86/pci/amd_bus.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
22000@@ -607,6 +607,14 @@ static int __init pci_io_ecs_init(void)
22001 for_each_online_cpu(cpu)
22002 amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
22003 (void *)(long)cpu);
22004+#ifdef CONFIG_XEN
22005+ {
22006+ u64 reg;
22007+ rdmsrl(MSR_AMD64_NB_CFG, reg);
22008+ if (!(reg & ENABLE_CF8_EXT_CFG))
22009+ return 0;
22010+ }
22011+#endif
22012 pci_probe |= PCI_HAS_IO_ECS;
22013
22014 return 0;
22015@@ -614,6 +622,10 @@ static int __init pci_io_ecs_init(void)
22016
22017 static int __init amd_postcore_init(void)
22018 {
22019+#ifdef CONFIG_XEN
22020+ if (!is_initial_xendomain())
22021+ return 0;
22022+#endif
22023 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
22024 return 0;
22025
82094b55
AF
22026--- sle11-2009-10-16.orig/arch/x86/pci/irq-xen.c 2009-03-16 16:38:05.000000000 +0100
22027+++ sle11-2009-10-16/arch/x86/pci/irq-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
22028@@ -11,8 +11,8 @@
22029 #include <linux/slab.h>
22030 #include <linux/interrupt.h>
22031 #include <linux/dmi.h>
22032-#include <asm/io.h>
22033-#include <asm/smp.h>
22034+#include <linux/io.h>
22035+#include <linux/smp.h>
22036 #include <asm/io_apic.h>
22037 #include <linux/irq.h>
22038 #include <linux/acpi.h>
22039@@ -45,7 +45,8 @@ struct irq_router {
22040 char *name;
22041 u16 vendor, device;
22042 int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
22043- int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
22044+ int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
22045+ int new);
22046 };
22047
22048 struct irq_router_handler {
22049@@ -61,7 +62,7 @@ void (*pcibios_disable_irq)(struct pci_d
22050 * and perform checksum verification.
22051 */
22052
22053-static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr)
22054+static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
22055 {
22056 struct irq_routing_table *rt;
22057 int i;
22058@@ -74,10 +75,11 @@ static inline struct irq_routing_table *
22059 rt->size < sizeof(struct irq_routing_table))
22060 return NULL;
22061 sum = 0;
22062- for (i=0; i < rt->size; i++)
22063+ for (i = 0; i < rt->size; i++)
22064 sum += addr[i];
22065 if (!sum) {
22066- DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt);
22067+ DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
22068+ rt);
22069 return rt;
22070 }
22071 return NULL;
22072@@ -104,7 +106,9 @@ static struct irq_routing_table * __init
22073 return rt;
22074 printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
22075 }
22076- for(addr = (u8 *) isa_bus_to_virt(0xf0000); addr < (u8 *) isa_bus_to_virt(0x100000); addr += 16) {
22077+ for (addr = (u8 *) isa_bus_to_virt(0xf0000);
22078+ addr < (u8 *) isa_bus_to_virt(0x100000);
22079+ addr += 16) {
22080 rt = pirq_check_routing_table(addr);
22081 if (rt)
22082 return rt;
22083@@ -126,20 +130,20 @@ static void __init pirq_peer_trick(void)
22084 struct irq_info *e;
22085
22086 memset(busmap, 0, sizeof(busmap));
22087- for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
22088+ for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
22089 e = &rt->slots[i];
22090 #ifdef DEBUG
22091 {
22092 int j;
22093 DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
22094- for(j=0; j<4; j++)
22095+ for (j = 0; j < 4; j++)
22096 DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
22097 DBG("\n");
22098 }
22099 #endif
22100 busmap[e->bus] = 1;
22101 }
22102- for(i = 1; i < 256; i++) {
22103+ for (i = 1; i < 256; i++) {
22104 int node;
22105 if (!busmap[i] || pci_find_bus(0, i))
22106 continue;
22107@@ -187,7 +191,8 @@ static unsigned int read_config_nybble(s
22108 return (nr & 1) ? (x >> 4) : (x & 0xf);
22109 }
22110
22111-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
22112+static void write_config_nybble(struct pci_dev *router, unsigned offset,
22113+ unsigned nr, unsigned int val)
22114 {
22115 u8 x;
22116 unsigned reg = offset + (nr >> 1);
22117@@ -289,7 +294,7 @@ static int pirq_ite_get(struct pci_dev *
22118 static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
22119
22120 WARN_ON_ONCE(pirq > 4);
22121- return read_config_nybble(router,0x43, pirqmap[pirq-1]);
22122+ return read_config_nybble(router, 0x43, pirqmap[pirq-1]);
22123 }
22124
22125 static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
22126@@ -318,7 +323,7 @@ static int pirq_opti_set(struct pci_dev
22127
22128 /*
22129 * Cyrix: nibble offset 0x5C
22130- * 0x5C bits 7:4 is INTB bits 3:0 is INTA
22131+ * 0x5C bits 7:4 is INTB bits 3:0 is INTA
22132 * 0x5D bits 7:4 is INTD bits 3:0 is INTC
22133 */
22134 static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
22135@@ -354,7 +359,7 @@ static int pirq_cyrix_set(struct pci_dev
22136 * Apparently there are systems implementing PCI routing table using
22137 * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
22138 * We try our best to handle both link mappings.
22139- *
22140+ *
22141 * Currently (2003-05-21) it appears most SiS chipsets follow the
22142 * definition of routing registers from the SiS-5595 southbridge.
22143 * According to the SiS 5595 datasheets the revision id's of the
22144@@ -374,7 +379,7 @@ static int pirq_cyrix_set(struct pci_dev
22145 *
22146 * 0x62: USBIRQ:
22147 * bit 6 OHCI function disabled (0), enabled (1)
22148- *
22149+ *
22150 * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
22151 *
22152 * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
22153@@ -437,7 +442,7 @@ static int pirq_vlsi_get(struct pci_dev
22154 {
22155 WARN_ON_ONCE(pirq >= 9);
22156 if (pirq > 8) {
22157- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
22158+ dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
22159 return 0;
22160 }
22161 return read_config_nybble(router, 0x74, pirq-1);
22162@@ -447,7 +452,7 @@ static int pirq_vlsi_set(struct pci_dev
22163 {
22164 WARN_ON_ONCE(pirq >= 9);
22165 if (pirq > 8) {
22166- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
22167+ dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
22168 return 0;
22169 }
22170 write_config_nybble(router, 0x74, pirq-1, irq);
22171@@ -471,7 +476,8 @@ static int pirq_serverworks_get(struct p
22172 return inb(0xc01) & 0xf;
22173 }
22174
22175-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
22176+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev,
22177+ int pirq, int irq)
22178 {
22179 outb(pirq, 0xc00);
22180 outb(irq, 0xc01);
22181@@ -491,22 +497,20 @@ static int pirq_amd756_get(struct pci_de
22182 u8 irq;
22183 irq = 0;
22184 if (pirq <= 4)
22185- {
22186 irq = read_config_nybble(router, 0x56, pirq - 1);
22187- }
22188- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
22189- dev->vendor, dev->device, pirq, irq);
22190+ dev_info(&dev->dev,
22191+ "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
22192+ dev->vendor, dev->device, pirq, irq);
22193 return irq;
22194 }
22195
22196 static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
22197 {
22198- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
22199- dev->vendor, dev->device, pirq, irq);
22200+ dev_info(&dev->dev,
22201+ "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
22202+ dev->vendor, dev->device, pirq, irq);
22203 if (pirq <= 4)
22204- {
22205 write_config_nybble(router, 0x56, pirq - 1, irq);
22206- }
22207 return 1;
22208 }
22209
22210@@ -553,50 +557,51 @@ static __init int intel_router_probe(str
22211 if (pci_dev_present(pirq_440gx))
22212 return 0;
22213
22214- switch(device)
22215- {
22216- case PCI_DEVICE_ID_INTEL_82371FB_0:
22217- case PCI_DEVICE_ID_INTEL_82371SB_0:
22218- case PCI_DEVICE_ID_INTEL_82371AB_0:
22219- case PCI_DEVICE_ID_INTEL_82371MX:
22220- case PCI_DEVICE_ID_INTEL_82443MX_0:
22221- case PCI_DEVICE_ID_INTEL_82801AA_0:
22222- case PCI_DEVICE_ID_INTEL_82801AB_0:
22223- case PCI_DEVICE_ID_INTEL_82801BA_0:
22224- case PCI_DEVICE_ID_INTEL_82801BA_10:
22225- case PCI_DEVICE_ID_INTEL_82801CA_0:
22226- case PCI_DEVICE_ID_INTEL_82801CA_12:
22227- case PCI_DEVICE_ID_INTEL_82801DB_0:
22228- case PCI_DEVICE_ID_INTEL_82801E_0:
22229- case PCI_DEVICE_ID_INTEL_82801EB_0:
22230- case PCI_DEVICE_ID_INTEL_ESB_1:
22231- case PCI_DEVICE_ID_INTEL_ICH6_0:
22232- case PCI_DEVICE_ID_INTEL_ICH6_1:
22233- case PCI_DEVICE_ID_INTEL_ICH7_0:
22234- case PCI_DEVICE_ID_INTEL_ICH7_1:
22235- case PCI_DEVICE_ID_INTEL_ICH7_30:
22236- case PCI_DEVICE_ID_INTEL_ICH7_31:
22237- case PCI_DEVICE_ID_INTEL_ESB2_0:
22238- case PCI_DEVICE_ID_INTEL_ICH8_0:
22239- case PCI_DEVICE_ID_INTEL_ICH8_1:
22240- case PCI_DEVICE_ID_INTEL_ICH8_2:
22241- case PCI_DEVICE_ID_INTEL_ICH8_3:
22242- case PCI_DEVICE_ID_INTEL_ICH8_4:
22243- case PCI_DEVICE_ID_INTEL_ICH9_0:
22244- case PCI_DEVICE_ID_INTEL_ICH9_1:
22245- case PCI_DEVICE_ID_INTEL_ICH9_2:
22246- case PCI_DEVICE_ID_INTEL_ICH9_3:
22247- case PCI_DEVICE_ID_INTEL_ICH9_4:
22248- case PCI_DEVICE_ID_INTEL_ICH9_5:
22249- case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
22250- case PCI_DEVICE_ID_INTEL_ICH10_0:
22251- case PCI_DEVICE_ID_INTEL_ICH10_1:
22252- case PCI_DEVICE_ID_INTEL_ICH10_2:
22253- case PCI_DEVICE_ID_INTEL_ICH10_3:
22254- r->name = "PIIX/ICH";
22255- r->get = pirq_piix_get;
22256- r->set = pirq_piix_set;
22257- return 1;
22258+ switch (device) {
22259+ case PCI_DEVICE_ID_INTEL_82371FB_0:
22260+ case PCI_DEVICE_ID_INTEL_82371SB_0:
22261+ case PCI_DEVICE_ID_INTEL_82371AB_0:
22262+ case PCI_DEVICE_ID_INTEL_82371MX:
22263+ case PCI_DEVICE_ID_INTEL_82443MX_0:
22264+ case PCI_DEVICE_ID_INTEL_82801AA_0:
22265+ case PCI_DEVICE_ID_INTEL_82801AB_0:
22266+ case PCI_DEVICE_ID_INTEL_82801BA_0:
22267+ case PCI_DEVICE_ID_INTEL_82801BA_10:
22268+ case PCI_DEVICE_ID_INTEL_82801CA_0:
22269+ case PCI_DEVICE_ID_INTEL_82801CA_12:
22270+ case PCI_DEVICE_ID_INTEL_82801DB_0:
22271+ case PCI_DEVICE_ID_INTEL_82801E_0:
22272+ case PCI_DEVICE_ID_INTEL_82801EB_0:
22273+ case PCI_DEVICE_ID_INTEL_ESB_1:
22274+ case PCI_DEVICE_ID_INTEL_ICH6_0:
22275+ case PCI_DEVICE_ID_INTEL_ICH6_1:
22276+ case PCI_DEVICE_ID_INTEL_ICH7_0:
22277+ case PCI_DEVICE_ID_INTEL_ICH7_1:
22278+ case PCI_DEVICE_ID_INTEL_ICH7_30:
22279+ case PCI_DEVICE_ID_INTEL_ICH7_31:
22280+ case PCI_DEVICE_ID_INTEL_ESB2_0:
22281+ case PCI_DEVICE_ID_INTEL_ICH8_0:
22282+ case PCI_DEVICE_ID_INTEL_ICH8_1:
22283+ case PCI_DEVICE_ID_INTEL_ICH8_2:
22284+ case PCI_DEVICE_ID_INTEL_ICH8_3:
22285+ case PCI_DEVICE_ID_INTEL_ICH8_4:
22286+ case PCI_DEVICE_ID_INTEL_ICH9_0:
22287+ case PCI_DEVICE_ID_INTEL_ICH9_1:
22288+ case PCI_DEVICE_ID_INTEL_ICH9_2:
22289+ case PCI_DEVICE_ID_INTEL_ICH9_3:
22290+ case PCI_DEVICE_ID_INTEL_ICH9_4:
22291+ case PCI_DEVICE_ID_INTEL_ICH9_5:
22292+ case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
22293+ case PCI_DEVICE_ID_INTEL_ICH10_0:
22294+ case PCI_DEVICE_ID_INTEL_ICH10_1:
22295+ case PCI_DEVICE_ID_INTEL_ICH10_2:
22296+ case PCI_DEVICE_ID_INTEL_ICH10_3:
22297+ case PCI_DEVICE_ID_INTEL_PCH_0:
22298+ case PCI_DEVICE_ID_INTEL_PCH_1:
22299+ r->name = "PIIX/ICH";
22300+ r->get = pirq_piix_get;
22301+ r->set = pirq_piix_set;
22302+ return 1;
22303 }
22304 return 0;
22305 }
22306@@ -610,7 +615,7 @@ static __init int via_router_probe(struc
22307 * workarounds for some buggy BIOSes
22308 */
22309 if (device == PCI_DEVICE_ID_VIA_82C586_0) {
22310- switch(router->device) {
22311+ switch (router->device) {
22312 case PCI_DEVICE_ID_VIA_82C686:
22313 /*
22314 * Asus k7m bios wrongly reports 82C686A
22315@@ -635,7 +640,7 @@ static __init int via_router_probe(struc
22316 }
22317 }
22318
22319- switch(device) {
22320+ switch (device) {
22321 case PCI_DEVICE_ID_VIA_82C586_0:
22322 r->name = "VIA";
22323 r->get = pirq_via586_get;
22324@@ -658,28 +663,27 @@ static __init int via_router_probe(struc
22325
22326 static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22327 {
22328- switch(device)
22329- {
22330- case PCI_DEVICE_ID_VLSI_82C534:
22331- r->name = "VLSI 82C534";
22332- r->get = pirq_vlsi_get;
22333- r->set = pirq_vlsi_set;
22334- return 1;
22335+ switch (device) {
22336+ case PCI_DEVICE_ID_VLSI_82C534:
22337+ r->name = "VLSI 82C534";
22338+ r->get = pirq_vlsi_get;
22339+ r->set = pirq_vlsi_set;
22340+ return 1;
22341 }
22342 return 0;
22343 }
22344
22345
22346-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22347+static __init int serverworks_router_probe(struct irq_router *r,
22348+ struct pci_dev *router, u16 device)
22349 {
22350- switch(device)
22351- {
22352- case PCI_DEVICE_ID_SERVERWORKS_OSB4:
22353- case PCI_DEVICE_ID_SERVERWORKS_CSB5:
22354- r->name = "ServerWorks";
22355- r->get = pirq_serverworks_get;
22356- r->set = pirq_serverworks_set;
22357- return 1;
22358+ switch (device) {
22359+ case PCI_DEVICE_ID_SERVERWORKS_OSB4:
22360+ case PCI_DEVICE_ID_SERVERWORKS_CSB5:
22361+ r->name = "ServerWorks";
22362+ r->get = pirq_serverworks_get;
22363+ r->set = pirq_serverworks_set;
22364+ return 1;
22365 }
22366 return 0;
22367 }
22368@@ -688,7 +692,7 @@ static __init int sis_router_probe(struc
22369 {
22370 if (device != PCI_DEVICE_ID_SI_503)
22371 return 0;
22372-
22373+
22374 r->name = "SIS";
22375 r->get = pirq_sis_get;
22376 r->set = pirq_sis_set;
22377@@ -697,50 +701,45 @@ static __init int sis_router_probe(struc
22378
22379 static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22380 {
22381- switch(device)
22382- {
22383- case PCI_DEVICE_ID_CYRIX_5520:
22384- r->name = "NatSemi";
22385- r->get = pirq_cyrix_get;
22386- r->set = pirq_cyrix_set;
22387- return 1;
22388+ switch (device) {
22389+ case PCI_DEVICE_ID_CYRIX_5520:
22390+ r->name = "NatSemi";
22391+ r->get = pirq_cyrix_get;
22392+ r->set = pirq_cyrix_set;
22393+ return 1;
22394 }
22395 return 0;
22396 }
22397
22398 static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22399 {
22400- switch(device)
22401- {
22402- case PCI_DEVICE_ID_OPTI_82C700:
22403- r->name = "OPTI";
22404- r->get = pirq_opti_get;
22405- r->set = pirq_opti_set;
22406- return 1;
22407+ switch (device) {
22408+ case PCI_DEVICE_ID_OPTI_82C700:
22409+ r->name = "OPTI";
22410+ r->get = pirq_opti_get;
22411+ r->set = pirq_opti_set;
22412+ return 1;
22413 }
22414 return 0;
22415 }
22416
22417 static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22418 {
22419- switch(device)
22420- {
22421- case PCI_DEVICE_ID_ITE_IT8330G_0:
22422- r->name = "ITE";
22423- r->get = pirq_ite_get;
22424- r->set = pirq_ite_set;
22425- return 1;
22426+ switch (device) {
22427+ case PCI_DEVICE_ID_ITE_IT8330G_0:
22428+ r->name = "ITE";
22429+ r->get = pirq_ite_get;
22430+ r->set = pirq_ite_set;
22431+ return 1;
22432 }
22433 return 0;
22434 }
22435
22436 static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22437 {
22438- switch(device)
22439- {
22440+ switch (device) {
22441 case PCI_DEVICE_ID_AL_M1533:
22442 case PCI_DEVICE_ID_AL_M1563:
22443- printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n");
22444 r->name = "ALI";
22445 r->get = pirq_ali_get;
22446 r->set = pirq_ali_set;
22447@@ -751,25 +750,24 @@ static __init int ali_router_probe(struc
22448
22449 static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22450 {
22451- switch(device)
22452- {
22453- case PCI_DEVICE_ID_AMD_VIPER_740B:
22454- r->name = "AMD756";
22455- break;
22456- case PCI_DEVICE_ID_AMD_VIPER_7413:
22457- r->name = "AMD766";
22458- break;
22459- case PCI_DEVICE_ID_AMD_VIPER_7443:
22460- r->name = "AMD768";
22461- break;
22462- default:
22463- return 0;
22464+ switch (device) {
22465+ case PCI_DEVICE_ID_AMD_VIPER_740B:
22466+ r->name = "AMD756";
22467+ break;
22468+ case PCI_DEVICE_ID_AMD_VIPER_7413:
22469+ r->name = "AMD766";
22470+ break;
22471+ case PCI_DEVICE_ID_AMD_VIPER_7443:
22472+ r->name = "AMD768";
22473+ break;
22474+ default:
22475+ return 0;
22476 }
22477 r->get = pirq_amd756_get;
22478 r->set = pirq_amd756_set;
22479 return 1;
22480 }
22481-
22482+
22483 static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
22484 {
22485 switch (device) {
22486@@ -811,7 +809,7 @@ static struct pci_dev *pirq_router_dev;
22487 * FIXME: should we have an option to say "generic for
22488 * chipset" ?
22489 */
22490-
22491+
22492 static void __init pirq_find_router(struct irq_router *r)
22493 {
22494 struct irq_routing_table *rt = pirq_table;
22495@@ -830,7 +828,7 @@ static void __init pirq_find_router(stru
22496 r->name = "default";
22497 r->get = NULL;
22498 r->set = NULL;
22499-
22500+
22501 DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
22502 rt->rtr_vendor, rt->rtr_device);
22503
22504@@ -841,19 +839,19 @@ static void __init pirq_find_router(stru
22505 return;
22506 }
22507
22508- for( h = pirq_routers; h->vendor; h++) {
22509+ for (h = pirq_routers; h->vendor; h++) {
22510 /* First look for a router match */
22511- if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
22512+ if (rt->rtr_vendor == h->vendor &&
22513+ h->probe(r, pirq_router_dev, rt->rtr_device))
22514 break;
22515 /* Fall back to a device match */
22516- if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
22517+ if (pirq_router_dev->vendor == h->vendor &&
22518+ h->probe(r, pirq_router_dev, pirq_router_dev->device))
22519 break;
22520 }
22521- printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
22522- pirq_router.name,
22523- pirq_router_dev->vendor,
22524- pirq_router_dev->device,
22525- pci_name(pirq_router_dev));
22526+ dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
22527+ pirq_router.name,
22528+ pirq_router_dev->vendor, pirq_router_dev->device);
22529
22530 /* The device remains referenced for the kernel lifetime */
22531 }
22532@@ -861,11 +859,13 @@ static void __init pirq_find_router(stru
22533 static struct irq_info *pirq_get_info(struct pci_dev *dev)
22534 {
22535 struct irq_routing_table *rt = pirq_table;
22536- int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
22537+ int entries = (rt->size - sizeof(struct irq_routing_table)) /
22538+ sizeof(struct irq_info);
22539 struct irq_info *info;
22540
22541 for (info = rt->slots; entries--; info++)
22542- if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
22543+ if (info->bus == dev->bus->number &&
22544+ PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
22545 return info;
22546 return NULL;
22547 }
22548@@ -884,7 +884,7 @@ static int pcibios_lookup_irq(struct pci
22549 /* Find IRQ pin */
22550 pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
22551 if (!pin) {
22552- DBG(KERN_DEBUG " -> no interrupt pin\n");
22553+ dev_dbg(&dev->dev, "no interrupt pin\n");
22554 return 0;
22555 }
22556 pin = pin - 1;
22557@@ -893,20 +893,21 @@ static int pcibios_lookup_irq(struct pci
22558
22559 if (!pirq_table)
22560 return 0;
22561-
22562- DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin);
22563+
22564 info = pirq_get_info(dev);
22565 if (!info) {
22566- DBG(" -> not found in routing table\n" KERN_DEBUG);
22567+ dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
22568+ 'A' + pin);
22569 return 0;
22570 }
22571 pirq = info->irq[pin].link;
22572 mask = info->irq[pin].bitmap;
22573 if (!pirq) {
22574- DBG(" -> not routed\n" KERN_DEBUG);
22575+ dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin);
22576 return 0;
22577 }
22578- DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
22579+ dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
22580+ 'A' + pin, pirq, mask, pirq_table->exclusive_irqs);
22581 mask &= pcibios_irq_mask;
22582
22583 /* Work around broken HP Pavilion Notebooks which assign USB to
22584@@ -919,7 +920,8 @@ static int pcibios_lookup_irq(struct pci
22585 }
22586
22587 /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
22588- if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
22589+ if (acer_tm360_irqrouting && dev->irq == 11 &&
22590+ dev->vendor == PCI_VENDOR_ID_O2) {
22591 pirq = 0x68;
22592 mask = 0x400;
22593 dev->irq = r->get(pirq_router_dev, dev, pirq);
22594@@ -932,51 +934,50 @@ static int pcibios_lookup_irq(struct pci
22595 */
22596 newirq = dev->irq;
22597 if (newirq && !((1 << newirq) & mask)) {
22598- if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0;
22599- else printk("\n" KERN_WARNING
22600- "PCI: IRQ %i for device %s doesn't match PIRQ mask "
22601- "- try pci=usepirqmask\n" KERN_DEBUG, newirq,
22602- pci_name(dev));
22603+ if (pci_probe & PCI_USE_PIRQ_MASK)
22604+ newirq = 0;
22605+ else
22606+ dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask "
22607+ "%#x; try pci=usepirqmask\n", newirq, mask);
22608 }
22609 if (!newirq && assign) {
22610 for (i = 0; i < 16; i++) {
22611 if (!(mask & (1 << i)))
22612 continue;
22613- if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED))
22614+ if (pirq_penalty[i] < pirq_penalty[newirq] &&
22615+ can_request_irq(i, IRQF_SHARED))
22616 newirq = i;
22617 }
22618 }
22619- DBG(" -> newirq=%d", newirq);
22620+ dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq);
22621
22622 /* Check if it is hardcoded */
22623 if ((pirq & 0xf0) == 0xf0) {
22624 irq = pirq & 0xf;
22625- DBG(" -> hardcoded IRQ %d\n", irq);
22626- msg = "Hardcoded";
22627- } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
22628- ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) {
22629- DBG(" -> got IRQ %d\n", irq);
22630- msg = "Found";
22631+ msg = "hardcoded";
22632+ } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
22633+ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
22634+ msg = "found";
22635 eisa_set_level_irq(irq);
22636- } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
22637- DBG(" -> assigning IRQ %d", newirq);
22638+ } else if (newirq && r->set &&
22639+ (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
22640 if (r->set(pirq_router_dev, dev, pirq, newirq)) {
22641 eisa_set_level_irq(newirq);
22642- DBG(" ... OK\n");
22643- msg = "Assigned";
22644+ msg = "assigned";
22645 irq = newirq;
22646 }
22647 }
22648
22649 if (!irq) {
22650- DBG(" ... failed\n");
22651 if (newirq && mask == (1 << newirq)) {
22652- msg = "Guessed";
22653+ msg = "guessed";
22654 irq = newirq;
22655- } else
22656+ } else {
22657+ dev_dbg(&dev->dev, "can't route interrupt\n");
22658 return 0;
22659+ }
22660 }
22661- printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
22662+ dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq);
22663
22664 /* Update IRQ for all devices with the same pirq value */
22665 while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
22666@@ -988,20 +989,25 @@ static int pcibios_lookup_irq(struct pci
22667 if (!info)
22668 continue;
22669 if (info->irq[pin].link == pirq) {
22670- /* We refuse to override the dev->irq information. Give a warning! */
22671- if ( dev2->irq && dev2->irq != irq && \
22672+ /*
22673+ * We refuse to override the dev->irq
22674+ * information. Give a warning!
22675+ */
22676+ if (dev2->irq && dev2->irq != irq && \
22677 (!(pci_probe & PCI_USE_PIRQ_MASK) || \
22678- ((1 << dev2->irq) & mask)) ) {
22679+ ((1 << dev2->irq) & mask))) {
22680 #ifndef CONFIG_PCI_MSI
22681- printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
22682- pci_name(dev2), dev2->irq, irq);
22683+ dev_info(&dev2->dev, "IRQ routing conflict: "
22684+ "have IRQ %d, want IRQ %d\n",
22685+ dev2->irq, irq);
22686 #endif
22687- continue;
22688- }
22689+ continue;
22690+ }
22691 dev2->irq = irq;
22692 pirq_penalty[irq]++;
22693 if (dev != dev2)
22694- printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
22695+ dev_info(&dev->dev, "sharing IRQ %d with %s\n",
22696+ irq, pci_name(dev2));
22697 }
22698 }
22699 return 1;
22700@@ -1015,15 +1021,20 @@ static void __init pcibios_fixup_irqs(vo
22701 DBG(KERN_DEBUG "PCI: IRQ fixup\n");
22702 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
22703 /*
22704- * If the BIOS has set an out of range IRQ number, just ignore it.
22705- * Also keep track of which IRQ's are already in use.
22706+ * If the BIOS has set an out of range IRQ number, just
22707+ * ignore it. Also keep track of which IRQ's are
22708+ * already in use.
22709 */
22710 if (dev->irq >= 16) {
22711- DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
22712+ dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq);
22713 dev->irq = 0;
22714 }
22715- /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
22716- if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
22717+ /*
22718+ * If the IRQ is already assigned to a PCI device,
22719+ * ignore its ISA use penalty
22720+ */
22721+ if (pirq_penalty[dev->irq] >= 100 &&
22722+ pirq_penalty[dev->irq] < 100000)
22723 pirq_penalty[dev->irq] = 0;
22724 pirq_penalty[dev->irq]++;
22725 }
22726@@ -1035,13 +1046,17 @@ static void __init pcibios_fixup_irqs(vo
22727 /*
22728 * Recalculate IRQ numbers if we use the I/O APIC.
22729 */
22730- if (io_apic_assign_pci_irqs)
22731- {
22732+ if (io_apic_assign_pci_irqs) {
22733 int irq;
22734
22735 if (pin) {
22736- pin--; /* interrupt pins are numbered starting from 1 */
22737- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
22738+ /*
22739+ * interrupt pins are numbered starting
22740+ * from 1
22741+ */
22742+ pin--;
22743+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
22744+ PCI_SLOT(dev->devfn), pin);
22745 /*
22746 * Busses behind bridges are typically not listed in the MP-table.
22747 * In this case we have to look up the IRQ based on the parent bus,
22748@@ -1049,18 +1064,18 @@ static void __init pcibios_fixup_irqs(vo
22749 * busses itself so we should get into this branch reliably.
22750 */
22751 if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
22752- struct pci_dev * bridge = dev->bus->self;
22753+ struct pci_dev *bridge = dev->bus->self;
22754
22755 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
22756- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
22757+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
22758 PCI_SLOT(bridge->devfn), pin);
22759 if (irq >= 0)
22760- printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
22761- pci_name(bridge), 'A' + pin, irq);
22762+ dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
22763+ pci_name(bridge),
22764+ 'A' + pin, irq);
22765 }
22766 if (irq >= 0) {
22767- printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
22768- pci_name(dev), 'A' + pin, irq);
22769+ dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
22770 dev->irq = irq;
22771 }
22772 }
22773@@ -1082,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq
22774 {
22775 if (!broken_hp_bios_irq9) {
22776 broken_hp_bios_irq9 = 1;
22777- printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
22778+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
22779+ d->ident);
22780 }
22781 return 0;
22782 }
22783@@ -1095,7 +1111,8 @@ static int __init fix_acer_tm360_irqrout
22784 {
22785 if (!acer_tm360_irqrouting) {
22786 acer_tm360_irqrouting = 1;
22787- printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
22788+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
22789+ d->ident);
22790 }
22791 return 0;
22792 }
22793@@ -1107,7 +1124,8 @@ static struct dmi_system_id __initdata p
22794 .matches = {
22795 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
22796 DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
22797- DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
22798+ DMI_MATCH(DMI_PRODUCT_VERSION,
22799+ "HP Pavilion Notebook Model GE"),
22800 DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
22801 },
22802 },
22803@@ -1122,7 +1140,7 @@ static struct dmi_system_id __initdata p
22804 { }
22805 };
22806
22807-static int __init pcibios_irq_init(void)
22808+int __init pcibios_irq_init(void)
22809 {
22810 DBG(KERN_DEBUG "PCI: IRQ init\n");
22811
22812@@ -1142,11 +1160,14 @@ static int __init pcibios_irq_init(void)
22813 pirq_find_router(&pirq_router);
22814 if (pirq_table->exclusive_irqs) {
22815 int i;
22816- for (i=0; i<16; i++)
22817+ for (i = 0; i < 16; i++)
22818 if (!(pirq_table->exclusive_irqs & (1 << i)))
22819 pirq_penalty[i] += 100;
22820 }
22821- /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
22822+ /*
22823+ * If we're using the I/O APIC, avoid using the PCI IRQ
22824+ * routing table
22825+ */
22826 if (io_apic_assign_pci_irqs)
22827 pirq_table = NULL;
22828 }
22829@@ -1157,9 +1178,6 @@ static int __init pcibios_irq_init(void)
22830 return 0;
22831 }
22832
22833-subsys_initcall(pcibios_irq_init);
22834-
22835-
22836 static void pirq_penalize_isa_irq(int irq, int active)
22837 {
22838 /*
22839@@ -1193,7 +1211,7 @@ static int pirq_enable_irq(struct pci_de
22840 if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
22841 char *msg = "";
22842
22843- pin--; /* interrupt pins are numbered starting from 1 */
22844+ pin--; /* interrupt pins are numbered starting from 1 */
22845
22846 if (io_apic_assign_pci_irqs) {
22847 int irq;
22848@@ -1207,35 +1225,41 @@ static int pirq_enable_irq(struct pci_de
22849 */
22850 temp_dev = dev;
22851 while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
22852- struct pci_dev * bridge = dev->bus->self;
22853+ struct pci_dev *bridge = dev->bus->self;
22854
22855 pin = (pin + PCI_SLOT(dev->devfn)) % 4;
22856- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
22857+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
22858 PCI_SLOT(bridge->devfn), pin);
22859 if (irq >= 0)
22860- printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
22861- pci_name(bridge), 'A' + pin, irq);
22862+ dev_warn(&dev->dev, "using bridge %s "
22863+ "INT %c to get IRQ %d\n",
22864+ pci_name(bridge), 'A' + pin,
22865+ irq);
22866 dev = bridge;
22867 }
22868 dev = temp_dev;
22869 if (irq >= 0) {
22870- printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
22871- pci_name(dev), 'A' + pin, irq);
22872+ dev_info(&dev->dev, "PCI->APIC IRQ transform: "
22873+ "INT %c -> IRQ %d\n", 'A' + pin, irq);
22874 dev->irq = irq;
22875 return 0;
22876 } else
22877- msg = " Probably buggy MP table.";
22878+ msg = "; probably buggy MP table";
22879 } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
22880 msg = "";
22881 else
22882- msg = " Please try using pci=biosirq.";
22883+ msg = "; please try using pci=biosirq";
22884
22885- /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
22886- if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
22887+ /*
22888+ * With IDE legacy devices the IRQ lookup failure is not
22889+ * a problem..
22890+ */
22891+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE &&
22892+ !(dev->class & 0x5))
22893 return 0;
22894
22895- printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
22896- 'A' + pin, pci_name(dev), msg);
22897+ dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n",
22898+ 'A' + pin, msg);
22899 }
22900 return 0;
22901 }
82094b55
AF
22902--- sle11-2009-10-16.orig/arch/x86/vdso/Makefile 2009-03-16 16:33:40.000000000 +0100
22903+++ sle11-2009-10-16/arch/x86/vdso/Makefile 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
22904@@ -65,9 +65,7 @@ obj-$(VDSO32-y) += vdso32-syms.lds
22905 vdso32.so-$(VDSO32-y) += int80
22906 vdso32.so-$(CONFIG_COMPAT) += syscall
22907 vdso32.so-$(VDSO32-y) += sysenter
22908-xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80
22909-xen-vdso32-$(CONFIG_X86_32) += syscall
22910-vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y)
22911+vdso32.so-$(CONFIG_X86_XEN) += syscall
22912
22913 vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
22914
82094b55
AF
22915--- sle11-2009-10-16.orig/arch/x86/vdso/vdso32.S 2009-03-16 16:33:40.000000000 +0100
22916+++ sle11-2009-10-16/arch/x86/vdso/vdso32.S 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
22917@@ -9,7 +9,7 @@ vdso32_int80_end:
22918
22919 .globl vdso32_syscall_start, vdso32_syscall_end
22920 vdso32_syscall_start:
22921-#ifdef CONFIG_COMPAT
22922+#if defined(CONFIG_COMPAT) || defined(CONFIG_X86_XEN)
22923 .incbin "arch/x86/vdso/vdso32-syscall.so"
22924 #endif
22925 vdso32_syscall_end:
22926@@ -19,16 +19,4 @@ vdso32_sysenter_start:
22927 .incbin "arch/x86/vdso/vdso32-sysenter.so"
22928 vdso32_sysenter_end:
22929
22930-#if defined(CONFIG_X86_64_XEN) && CONFIG_XEN_COMPAT < 0x030200
22931- .globl vdso32_int80_start, vdso32_int80_end
22932-vdso32_int80_start:
22933- .incbin "arch/x86/vdso/vdso32-int80.so"
22934-vdso32_int80_end:
22935-#elif defined(CONFIG_X86_XEN)
22936- .globl vdso32_syscall_start, vdso32_syscall_end
22937-vdso32_syscall_start:
22938- .incbin "arch/x86/vdso/vdso32-syscall.so"
22939-vdso32_syscall_end:
22940-#endif
22941-
22942 __FINIT
82094b55
AF
22943--- sle11-2009-10-16.orig/arch/x86/vdso/vdso32-setup-xen.c 2009-03-16 16:38:05.000000000 +0100
22944+++ sle11-2009-10-16/arch/x86/vdso/vdso32-setup-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
22945@@ -195,50 +195,28 @@ static __init void relocate_vdso(Elf32_E
22946 }
22947 }
22948
22949-/*
22950- * These symbols are defined by vdso32.S to mark the bounds
22951- * of the ELF DSO images included therein.
22952- */
22953-extern const char vdso32_default_start, vdso32_default_end;
22954-extern const char vdso32_sysenter_start, vdso32_sysenter_end;
22955 static struct page *vdso32_pages[1];
22956
22957 #ifdef CONFIG_X86_64
22958
22959-#if CONFIG_XEN_COMPAT < 0x030200
22960-static int use_int80 = 1;
22961-#endif
22962-static int use_sysenter __read_mostly = -1;
22963-
22964-#define vdso32_sysenter() (use_sysenter > 0)
22965+#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
22966+#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
22967
22968-/* May not be __init: called during resume */
22969-void syscall32_cpu_init(void)
22970+void __cpuinit syscall32_cpu_init(void)
22971 {
22972- static const struct callback_register cstar = {
22973+ static const struct callback_register __cpuinitconst cstar = {
22974 .type = CALLBACKTYPE_syscall32,
22975 .address = (unsigned long)ia32_cstar_target
22976 };
22977- static const struct callback_register sysenter = {
22978+ static const struct callback_register __cpuinitconst sysenter = {
22979 .type = CALLBACKTYPE_sysenter,
22980 .address = (unsigned long)ia32_sysenter_target
22981 };
22982
22983- if ((HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) ||
22984- (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0))
22985-#if CONFIG_XEN_COMPAT < 0x030200
22986- return;
22987- use_int80 = 0;
22988-#else
22989- BUG();
22990-#endif
22991-
22992- if (use_sysenter < 0) {
22993- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
22994- use_sysenter = 1;
22995- if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
22996- use_sysenter = 1;
22997- }
22998+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
22999+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
23000+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0)
23001+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
23002 }
23003
23004 #define compat_uses_vma 1
23005@@ -250,6 +228,7 @@ static inline void map_compat_vdso(int m
23006 #else /* CONFIG_X86_32 */
23007
23008 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
23009+#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
23010
23011 extern asmlinkage void ia32pv_cstar_target(void);
23012 static const struct callback_register __cpuinitconst cstar = {
23013@@ -265,13 +244,13 @@ void __cpuinit enable_sep_cpu(void)
23014 .address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target },
23015 };
23016
23017- if (boot_cpu_has(X86_FEATURE_SYSCALL)) {
23018+ if (vdso32_syscall()) {
23019 if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0)
23020 BUG();
23021 return;
23022 }
23023
23024- if (!boot_cpu_has(X86_FEATURE_SEP))
23025+ if (!vdso32_sysenter())
23026 return;
23027
23028 if (xen_feature(XENFEAT_supervisor_mode_kernel))
23029@@ -341,34 +320,26 @@ int __init sysenter_setup(void)
23030
23031 #ifdef CONFIG_X86_32
23032 gate_vma_init();
23033-#endif
23034
23035-#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
23036- if (use_int80) {
23037- extern const char vdso32_int80_start, vdso32_int80_end;
23038-
23039- vsyscall = &vdso32_int80_start;
23040- vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
23041- } else
23042-#elif defined(CONFIG_X86_32)
23043- if (boot_cpu_has(X86_FEATURE_SYSCALL)
23044- && (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
23045- || HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0))
23046- setup_clear_cpu_cap(X86_FEATURE_SYSCALL);
23047- barrier(); /* until clear_bit()'s constraints are correct ... */
23048 if (boot_cpu_has(X86_FEATURE_SYSCALL)) {
23049- extern const char vdso32_syscall_start, vdso32_syscall_end;
23050-
23051+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD
23052+ && HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) == 0)
23053+ setup_force_cpu_cap(X86_FEATURE_SYSCALL32);
23054+ else {
23055+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL);
23056+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
23057+ }
23058+ }
23059+#endif
23060+ if (vdso32_syscall()) {
23061 vsyscall = &vdso32_syscall_start;
23062 vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
23063- } else
23064-#endif
23065- if (!vdso32_sysenter()) {
23066- vsyscall = &vdso32_default_start;
23067- vsyscall_len = &vdso32_default_end - &vdso32_default_start;
23068- } else {
23069+ } else if (vdso32_sysenter()){
23070 vsyscall = &vdso32_sysenter_start;
23071 vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
23072+ } else {
23073+ vsyscall = &vdso32_int80_start;
23074+ vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
23075 }
23076
23077 memcpy(syscall_page, vsyscall, vsyscall_len);
82094b55
AF
23078--- sle11-2009-10-16.orig/arch/x86/xen/Kconfig 2009-02-16 16:17:21.000000000 +0100
23079+++ sle11-2009-10-16/arch/x86/xen/Kconfig 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23080@@ -17,7 +17,7 @@ config XEN_MAX_DOMAIN_MEMORY
23081 int "Maximum allowed size of a domain in gigabytes"
23082 default 8 if X86_32
23083 default 32 if X86_64
23084- depends on XEN
23085+ depends on PARAVIRT_XEN
23086 help
23087 The pseudo-physical to machine address array is sized
23088 according to the maximum possible memory size of a Xen
23089@@ -26,5 +26,5 @@ config XEN_MAX_DOMAIN_MEMORY
23090
23091 config XEN_SAVE_RESTORE
23092 bool
23093- depends on PM
23094+ depends on PARAVIRT_XEN && PM
23095 default y
23096\ No newline at end of file
82094b55
AF
23097--- sle11-2009-10-16.orig/drivers/acpi/processor_core.c 2009-08-26 11:54:44.000000000 +0200
23098+++ sle11-2009-10-16/drivers/acpi/processor_core.c 2009-08-26 12:04:00.000000000 +0200
23099@@ -730,9 +730,11 @@ static int __cpuinit acpi_processor_star
2cb7cef9
BS
23100 if (result)
23101 goto end;
23102
23103- sysdev = get_cpu_sysdev(pr->id);
23104- if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev"))
23105- return -EFAULT;
23106+ if (pr->id != -1) {
23107+ sysdev = get_cpu_sysdev(pr->id);
23108+ if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev"))
23109+ return -EFAULT;
23110+ }
23111
23112 status = acpi_install_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY,
23113 acpi_processor_notify, pr);
82094b55 23114@@ -904,7 +906,8 @@ static int acpi_processor_remove(struct
2cb7cef9
BS
23115 status = acpi_remove_notify_handler(pr->handle, ACPI_DEVICE_NOTIFY,
23116 acpi_processor_notify);
23117
23118- sysfs_remove_link(&device->dev.kobj, "sysdev");
23119+ if (pr->id != -1)
23120+ sysfs_remove_link(&device->dev.kobj, "sysdev");
23121
23122 acpi_processor_remove_fs(device);
23123
82094b55
AF
23124--- sle11-2009-10-16.orig/drivers/char/tpm/tpm_vtpm.c 2009-08-26 11:52:33.000000000 +0200
23125+++ sle11-2009-10-16/drivers/char/tpm/tpm_vtpm.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23126@@ -347,7 +347,7 @@ static int _vtpm_send_queued(struct tpm_
23127 {
23128 int rc;
23129 int error = 0;
23130- long flags;
23131+ unsigned long flags;
23132 unsigned char buffer[1];
23133 struct vtpm_state *vtpms;
23134 vtpms = (struct vtpm_state *)chip_get_private(chip);
82094b55
AF
23135--- sle11-2009-10-16.orig/drivers/misc/Kconfig 2009-10-28 14:55:02.000000000 +0100
23136+++ sle11-2009-10-16/drivers/misc/Kconfig 2009-08-26 12:04:11.000000000 +0200
23137@@ -440,7 +440,7 @@ config ENCLOSURE_SERVICES
2cb7cef9
BS
23138 config SGI_XP
23139 tristate "Support communication between SGI SSIs"
23140 depends on NET
23141- depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
23142+ depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP && !XEN
23143 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
23144 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
23145 select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
82094b55 23146@@ -467,7 +467,7 @@ config HP_ILO
2cb7cef9
BS
23147
23148 config SGI_GRU
23149 tristate "SGI GRU driver"
23150- depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
23151+ depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP && !XEN
23152 default n
23153 select MMU_NOTIFIER
23154 ---help---
82094b55
AF
23155--- sle11-2009-10-16.orig/drivers/pci/msi-xen.c 2009-03-16 16:38:05.000000000 +0100
23156+++ sle11-2009-10-16/drivers/pci/msi-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23157@@ -90,12 +90,10 @@ arch_teardown_msi_irqs(struct pci_dev *d
23158 }
23159 #endif
23160
23161-static void msi_set_enable(struct pci_dev *dev, int enable)
23162+static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
23163 {
23164- int pos;
23165 u16 control;
23166
23167- pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
23168 if (pos) {
23169 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
23170 control &= ~PCI_MSI_FLAGS_ENABLE;
23171@@ -105,6 +103,11 @@ static void msi_set_enable(struct pci_de
23172 }
23173 }
23174
23175+static void msi_set_enable(struct pci_dev *dev, int enable)
23176+{
23177+ __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
23178+}
23179+
23180 static void msix_set_enable(struct pci_dev *dev, int enable)
23181 {
23182 int pos;
23183@@ -568,9 +571,8 @@ int pci_enable_msi(struct pci_dev* dev)
23184
23185 /* Check whether driver already requested for MSI-X irqs */
23186 if (dev->msix_enabled) {
23187- printk(KERN_INFO "PCI: %s: Can't enable MSI. "
23188- "Device already has MSI-X enabled\n",
23189- pci_name(dev));
23190+ dev_info(&dev->dev, "can't enable MSI "
23191+ "(MSI-X already enabled)\n");
23192 return -EINVAL;
23193 }
23194
23195@@ -702,9 +704,8 @@ int pci_enable_msix(struct pci_dev* dev,
23196 temp = dev->irq;
23197 /* Check whether driver already requested for MSI vector */
23198 if (dev->msi_enabled) {
23199- printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
23200- "Device already has an MSI irq assigned\n",
23201- pci_name(dev));
23202+ dev_info(&dev->dev, "can't enable MSI-X "
23203+ "(MSI IRQ already assigned)\n");
23204 return -EINVAL;
23205 }
23206
82094b55
AF
23207--- sle11-2009-10-16.orig/drivers/pci/quirks.c 2009-10-28 14:55:02.000000000 +0100
23208+++ sle11-2009-10-16/drivers/pci/quirks.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23209@@ -44,9 +44,8 @@ static void __devinit quirk_release_reso
23210 /* PCI Host Bridge isn't a target device */
23211 return;
23212 }
23213- printk(KERN_INFO
23214- "PCI: Disable memory decoding and release memory resources [%s].\n",
23215- pci_name(dev));
23216+ dev_info(&dev->dev,
23217+ "disable memory decoding and release memory resources\n");
23218 pci_read_config_word(dev, PCI_COMMAND, &command);
23219 command &= ~PCI_COMMAND_MEMORY;
23220 pci_write_config_word(dev, PCI_COMMAND, command);
82094b55
AF
23221--- sle11-2009-10-16.orig/drivers/pci/setup-res.c 2009-10-28 14:55:02.000000000 +0100
23222+++ sle11-2009-10-16/drivers/pci/setup-res.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23223@@ -129,7 +129,7 @@ int pci_claim_resource(struct pci_dev *d
23224 #ifdef CONFIG_PCI_REASSIGN
23225 void pci_disable_bridge_window(struct pci_dev *dev)
23226 {
23227- printk(KERN_DEBUG "PCI: Disable bridge window on %s\n", pci_name(dev));
23228+ dev_dbg(&dev->dev, "disable bridge window\n");
23229
23230 /* MMIO Base/Limit */
23231 pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
23232@@ -190,8 +190,8 @@ int pci_assign_resource(struct pci_dev *
23233 res->flags &= ~IORESOURCE_STARTALIGN;
23234 if (resno < PCI_BRIDGE_RESOURCES) {
23235 #ifdef CONFIG_PCI_REASSIGN
23236- printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
23237- "%016llx - %016llx\n", resno, pci_name(dev),
23238+ dev_dbg(&dev->dev, "assign resource(%d) "
23239+ "%016llx - %016llx\n", resno,
23240 (unsigned long long)res->start,
23241 (unsigned long long)res->end);
23242 #endif
23243@@ -235,8 +235,8 @@ int pci_assign_resource_fixed(struct pci
23244 (unsigned long long)res->end);
23245 } else if (resno < PCI_BRIDGE_RESOURCES) {
23246 #ifdef CONFIG_PCI_REASSIGN
23247- printk(KERN_DEBUG "PCI: Assign resource(%d) on %s "
23248- "%016llx - %016llx\n", resno, pci_name(dev),
23249+ dev_dbg(&dev->dev, "assign resource(%d) "
23250+ "%016llx - %016llx\n", resno,
23251 (unsigned long long)res->start,
23252 (unsigned long long)res->end);
23253 #endif
82094b55
AF
23254--- sle11-2009-10-16.orig/drivers/xen/Makefile 2009-03-16 16:38:05.000000000 +0100
23255+++ sle11-2009-10-16/drivers/xen/Makefile 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23256@@ -1,4 +1,4 @@
23257-obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o
23258+obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o
23259 xen-xencomm-$(CONFIG_PARAVIRT_XEN) := xencomm.o
23260 xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
23261
82094b55
AF
23262--- sle11-2009-10-16.orig/drivers/xen/balloon/balloon.c 2009-06-29 15:28:36.000000000 +0200
23263+++ sle11-2009-10-16/drivers/xen/balloon/balloon.c 2009-06-29 15:30:29.000000000 +0200
2cb7cef9
BS
23264@@ -84,7 +84,7 @@ static unsigned long frame_list[PAGE_SIZ
23265 /* VM /proc information for memory */
23266 extern unsigned long totalram_pages;
23267
23268-#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
23269+#ifdef CONFIG_HIGHMEM
23270 extern unsigned long totalhigh_pages;
23271 #define inc_totalhigh_pages() (totalhigh_pages++)
23272 #define dec_totalhigh_pages() (totalhigh_pages--)
82094b55
AF
23273--- sle11-2009-10-16.orig/drivers/xen/balloon/sysfs.c 2009-06-29 15:29:24.000000000 +0200
23274+++ sle11-2009-10-16/drivers/xen/balloon/sysfs.c 2009-06-29 15:31:06.000000000 +0200
2cb7cef9
BS
23275@@ -45,6 +45,7 @@
23276
23277 #define BALLOON_SHOW(name, format, args...) \
23278 static ssize_t show_##name(struct sys_device *dev, \
23279+ struct sysdev_attribute *attr, \
23280 char *buf) \
23281 { \
23282 return sprintf(buf, format, ##args); \
23283@@ -56,14 +57,15 @@ BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(b
23284 BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
23285 BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
23286
23287-static ssize_t show_target_kb(struct sys_device *dev, char *buf)
23288+static ssize_t show_target_kb(struct sys_device *dev,
23289+ struct sysdev_attribute *attr, char *buf)
23290 {
23291 return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
23292 }
23293
23294 static ssize_t store_target_kb(struct sys_device *dev,
23295- const char *buf,
23296- size_t count)
23297+ struct sysdev_attribute *attr,
23298+ const char *buf, size_t count)
23299 {
23300 char memstring[64], *endchar;
23301 unsigned long long target_bytes;
82094b55
AF
23302--- sle11-2009-10-16.orig/drivers/xen/blktap/blktap.c 2009-04-20 11:40:14.000000000 +0200
23303+++ sle11-2009-10-16/drivers/xen/blktap/blktap.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23304@@ -54,6 +54,7 @@
23305 #include <linux/gfp.h>
23306 #include <linux/poll.h>
23307 #include <linux/delay.h>
23308+#include <linux/nsproxy.h>
23309 #include <asm/tlbflush.h>
23310
23311 #define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
23312@@ -498,7 +499,7 @@ found:
23313
23314 if ((class = get_xen_class()) != NULL)
23315 device_create(class, NULL, MKDEV(blktap_major, minor),
23316- "blktap%d", minor);
23317+ NULL, "blktap%d", minor);
23318 }
23319
23320 out:
23321@@ -1683,7 +1684,8 @@ static int __init blkif_init(void)
23322 * We only create the device when a request of a new device is
23323 * made.
23324 */
23325- device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
23326+ device_create(class, NULL, MKDEV(blktap_major, 0), NULL,
23327+ "blktap0");
23328 } else {
23329 /* this is bad, but not fatal */
23330 WPRINTK("blktap: sysfs xen_class not created\n");
82094b55
AF
23331--- sle11-2009-10-16.orig/drivers/xen/char/mem.c 2009-03-16 16:38:05.000000000 +0100
23332+++ sle11-2009-10-16/drivers/xen/char/mem.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23333@@ -35,7 +35,7 @@ static inline int uncached_access(struct
23334
23335 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
23336 {
23337-#ifdef CONFIG_NONPROMISC_DEVMEM
23338+#ifdef CONFIG_STRICT_DEVMEM
23339 u64 from = ((u64)pfn) << PAGE_SHIFT;
23340 u64 to = from + size;
23341 u64 cursor = from;
23342@@ -172,7 +172,10 @@ static void mmap_mem_close(struct vm_are
23343
23344 static struct vm_operations_struct mmap_mem_ops = {
23345 .open = mmap_mem_open,
23346- .close = mmap_mem_close
23347+ .close = mmap_mem_close,
23348+#ifdef CONFIG_HAVE_IOREMAP_PROT
23349+ .access = generic_access_phys
23350+#endif
23351 };
23352
23353 static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
82094b55
AF
23354--- sle11-2009-10-16.orig/drivers/xen/console/console.c 2009-03-16 16:38:05.000000000 +0100
23355+++ sle11-2009-10-16/drivers/xen/console/console.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23356@@ -432,9 +432,7 @@ static void __xencons_tx_flush(void)
23357
23358 if (work_done && (xencons_tty != NULL)) {
23359 wake_up_interruptible(&xencons_tty->write_wait);
23360- if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
23361- (xencons_tty->ldisc.write_wakeup != NULL))
23362- (xencons_tty->ldisc.write_wakeup)(xencons_tty);
23363+ tty_wakeup(xencons_tty);
23364 }
23365 }
23366
23367@@ -635,8 +633,8 @@ static void xencons_close(struct tty_str
23368 tty->closing = 1;
23369 tty_wait_until_sent(tty, 0);
23370 tty_driver_flush_buffer(tty);
23371- if (tty->ldisc.flush_buffer != NULL)
23372- tty->ldisc.flush_buffer(tty);
23373+ if (tty->ldisc.ops->flush_buffer != NULL)
23374+ tty->ldisc.ops->flush_buffer(tty);
23375 tty->closing = 0;
23376 spin_lock_irqsave(&xencons_lock, flags);
23377 xencons_tty = NULL;
82094b55
AF
23378--- sle11-2009-10-16.orig/drivers/xen/core/evtchn.c 2009-03-16 16:33:40.000000000 +0100
23379+++ sle11-2009-10-16/drivers/xen/core/evtchn.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23380@@ -746,8 +746,9 @@ static struct irq_chip dynirq_chip = {
23381 };
23382
23383 /* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
23384-static int pirq_eoi_does_unmask;
23385+static bool pirq_eoi_does_unmask;
23386 static unsigned long *pirq_needs_eoi;
23387+static DECLARE_BITMAP(probing_pirq, NR_PIRQS);
23388
23389 static void pirq_unmask_and_notify(unsigned int evtchn, unsigned int irq)
23390 {
23391@@ -794,25 +795,31 @@ static inline void pirq_query_unmask(int
23392 set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
23393 }
23394
23395-/*
23396- * On startup, if there is no action associated with the IRQ then we are
23397- * probing. In this case we should not share with others as it will confuse us.
23398- */
23399-#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
23400+static int set_type_pirq(unsigned int irq, unsigned int type)
23401+{
23402+ if (type != IRQ_TYPE_PROBE)
23403+ return -EINVAL;
23404+ set_bit(irq - PIRQ_BASE, probing_pirq);
23405+ return 0;
23406+}
23407
23408 static unsigned int startup_pirq(unsigned int irq)
23409 {
23410 struct evtchn_bind_pirq bind_pirq;
23411 int evtchn = evtchn_from_irq(irq);
23412
23413- if (VALID_EVTCHN(evtchn))
23414+ if (VALID_EVTCHN(evtchn)) {
23415+ clear_bit(irq - PIRQ_BASE, probing_pirq);
23416 goto out;
23417+ }
23418
23419 bind_pirq.pirq = evtchn_get_xen_pirq(irq);
23420 /* NB. We are happy to share unless we are probing. */
23421- bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
23422+ bind_pirq.flags = test_and_clear_bit(irq - PIRQ_BASE, probing_pirq)
23423+ || (irq_desc[irq].status & IRQ_AUTODETECT)
23424+ ? 0 : BIND_PIRQ__WILL_SHARE;
23425 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
23426- if (!probing_irq(irq))
23427+ if (bind_pirq.flags)
23428 printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
23429 irq);
23430 return 0;
23431@@ -891,6 +898,7 @@ static struct irq_chip pirq_chip = {
23432 .mask_ack = ack_pirq,
23433 .ack = ack_pirq,
23434 .end = end_pirq,
23435+ .set_type = set_type_pirq,
23436 #ifdef CONFIG_SMP
23437 .set_affinity = set_affinity_irq,
23438 #endif
23439@@ -1003,6 +1011,7 @@ void xen_poll_irq(int irq)
23440 BUG();
23441 }
23442
23443+#ifdef CONFIG_PM_SLEEP
23444 static void restore_cpu_virqs(unsigned int cpu)
23445 {
23446 struct evtchn_bind_virq bind_virq;
23447@@ -1095,6 +1104,7 @@ void irq_resume(void)
23448 }
23449
23450 }
23451+#endif
23452
23453 #if defined(CONFIG_X86_IO_APIC)
23454 #define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE))
23455@@ -1177,7 +1187,7 @@ void __init xen_init_IRQ(void)
23456 * BITS_TO_LONGS(ALIGN(NR_PIRQS, PAGE_SIZE * 8)));
23457 eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
23458 if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
23459- pirq_eoi_does_unmask = 1;
23460+ pirq_eoi_does_unmask = true;
23461
23462 /* No event channels are 'live' right now. */
23463 for (i = 0; i < NR_EVENT_CHANNELS; i++)
82094b55
AF
23464--- sle11-2009-10-16.orig/drivers/xen/core/gnttab.c 2008-12-01 11:25:57.000000000 +0100
23465+++ sle11-2009-10-16/drivers/xen/core/gnttab.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23466@@ -449,6 +449,7 @@ static int map_pte_fn(pte_t *pte, struct
23467 return 0;
23468 }
23469
23470+#ifdef CONFIG_PM_SLEEP
23471 static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
23472 unsigned long addr, void *data)
23473 {
23474@@ -456,6 +457,7 @@ static int unmap_pte_fn(pte_t *pte, stru
23475 set_pte_at(&init_mm, addr, pte, __pte(0));
23476 return 0;
23477 }
23478+#endif
23479
23480 void *arch_gnttab_alloc_shared(unsigned long *frames)
23481 {
23482@@ -633,6 +635,75 @@ void __gnttab_dma_map_page(struct page *
23483 } while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
23484 }
23485
23486+#ifdef __HAVE_ARCH_PTE_SPECIAL
23487+
23488+static unsigned int GNTMAP_pte_special;
23489+
23490+bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *map,
23491+ unsigned int count)
23492+{
23493+ unsigned int i;
23494+
23495+ if (unlikely(cmd != GNTTABOP_map_grant_ref))
23496+ count = 0;
23497+
23498+ for (i = 0; i < count; ++i, ++map) {
23499+ if (!(map->flags & GNTMAP_host_map)
23500+ || !(map->flags & GNTMAP_application_map))
23501+ continue;
23502+ if (GNTMAP_pte_special)
23503+ map->flags |= GNTMAP_pte_special;
23504+ else {
23505+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
23506+ return true;
23507+ }
23508+ }
23509+
23510+ return false;
23511+}
23512+EXPORT_SYMBOL(gnttab_pre_map_adjust);
23513+
23514+#if CONFIG_XEN_COMPAT < 0x030400
23515+int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *map, unsigned int count)
23516+{
23517+ unsigned int i;
23518+ int rc = 0;
23519+
23520+ for (i = 0; i < count && rc == 0; ++i, ++map) {
23521+ pte_t pte;
23522+
23523+ if (!(map->flags & GNTMAP_host_map)
23524+ || !(map->flags & GNTMAP_application_map))
23525+ continue;
23526+
23527+#ifdef CONFIG_X86
23528+ pte = __pte_ma((map->dev_bus_addr | _PAGE_PRESENT | _PAGE_USER
23529+ | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX
23530+ | _PAGE_SPECIAL)
23531+ & __supported_pte_mask);
23532+#else
23533+#error Architecture not yet supported.
23534+#endif
23535+ if (!(map->flags & GNTMAP_readonly))
23536+ pte = pte_mkwrite(pte);
23537+
23538+ if (map->flags & GNTMAP_contains_pte) {
23539+ mmu_update_t u;
23540+
23541+ u.ptr = map->host_addr;
23542+ u.val = __pte_val(pte);
23543+ rc = HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
23544+ } else
23545+ rc = HYPERVISOR_update_va_mapping(map->host_addr, pte, 0);
23546+ }
23547+
23548+ return rc;
23549+}
23550+EXPORT_SYMBOL(gnttab_post_map_adjust);
23551+#endif
23552+
23553+#endif /* __HAVE_ARCH_PTE_SPECIAL */
23554+
23555 int gnttab_resume(void)
23556 {
23557 if (max_nr_grant_frames() < nr_grant_frames)
23558@@ -640,6 +711,7 @@ int gnttab_resume(void)
23559 return gnttab_map(0, nr_grant_frames - 1);
23560 }
23561
23562+#ifdef CONFIG_PM_SLEEP
23563 int gnttab_suspend(void)
23564 {
23565 #ifdef CONFIG_X86
23566@@ -649,6 +721,7 @@ int gnttab_suspend(void)
23567 #endif
23568 return 0;
23569 }
23570+#endif
23571
23572 #else /* !CONFIG_XEN */
23573
23574@@ -759,6 +832,18 @@ int __devinit gnttab_init(void)
23575 gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
23576 gnttab_free_head = NR_RESERVED_ENTRIES;
23577
23578+#if defined(CONFIG_XEN) && defined(__HAVE_ARCH_PTE_SPECIAL)
23579+ if (!xen_feature(XENFEAT_auto_translated_physmap)
23580+ && xen_feature(XENFEAT_gnttab_map_avail_bits)) {
23581+#ifdef CONFIG_X86
23582+ GNTMAP_pte_special = (__pte_val(pte_mkspecial(__pte_ma(0)))
23583+ >> _PAGE_BIT_UNUSED1) << _GNTMAP_guest_avail0;
23584+#else
23585+#error Architecture not yet supported.
23586+#endif
23587+ }
23588+#endif
23589+
23590 return 0;
23591
23592 ini_nomem:
82094b55
AF
23593--- sle11-2009-10-16.orig/drivers/xen/core/machine_kexec.c 2009-03-16 16:38:05.000000000 +0100
23594+++ sle11-2009-10-16/drivers/xen/core/machine_kexec.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23595@@ -91,7 +91,7 @@ void __init xen_machine_kexec_setup_reso
23596 xen_hypervisor_res.start = range.start;
23597 xen_hypervisor_res.end = range.start + range.size - 1;
23598 xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
23599-#ifdef CONFIG_X86_64
23600+#ifdef CONFIG_X86
23601 insert_resource(&iomem_resource, &xen_hypervisor_res);
23602 #endif
23603
23604@@ -106,7 +106,7 @@ void __init xen_machine_kexec_setup_reso
23605 if (range.size) {
23606 crashk_res.start = range.start;
23607 crashk_res.end = range.start + range.size - 1;
23608-#ifdef CONFIG_X86_64
23609+#ifdef CONFIG_X86
23610 insert_resource(&iomem_resource, &crashk_res);
23611 #endif
23612 }
23613@@ -160,7 +160,7 @@ void __init xen_machine_kexec_setup_reso
23614 return;
23615 }
23616
23617-#ifndef CONFIG_X86_64
23618+#ifndef CONFIG_X86
23619 void __init xen_machine_kexec_register_resources(struct resource *res)
23620 {
23621 request_resource(res, &xen_hypervisor_res);
82094b55
AF
23622--- sle11-2009-10-16.orig/drivers/xen/core/machine_reboot.c 2009-10-28 14:55:02.000000000 +0100
23623+++ sle11-2009-10-16/drivers/xen/core/machine_reboot.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23624@@ -57,6 +57,7 @@ EXPORT_SYMBOL(machine_restart);
23625 EXPORT_SYMBOL(machine_halt);
23626 EXPORT_SYMBOL(machine_power_off);
23627
23628+#ifdef CONFIG_PM_SLEEP
23629 static void pre_suspend(void)
23630 {
23631 HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
23632@@ -111,6 +112,7 @@ static void post_suspend(int suspend_can
23633 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
23634 virt_to_mfn(pfn_to_mfn_frame_list_list);
23635 }
23636+#endif
23637
23638 #else /* !(defined(__i386__) || defined(__x86_64__)) */
23639
23640@@ -129,6 +131,7 @@ static void post_suspend(int suspend_can
23641
23642 #endif
23643
23644+#ifdef CONFIG_PM_SLEEP
23645 struct suspend {
23646 int fast_suspend;
23647 void (*resume_notifier)(int);
23648@@ -222,7 +225,8 @@ int __xen_suspend(int fast_suspend, void
23649
23650 if (fast_suspend) {
23651 xenbus_suspend();
23652- err = stop_machine_run(take_machine_down, &suspend, 0);
23653+ err = stop_machine(take_machine_down, &suspend,
23654+ &cpumask_of_cpu(0));
23655 if (err < 0)
23656 xenbus_suspend_cancel();
23657 } else {
23658@@ -245,3 +249,4 @@ int __xen_suspend(int fast_suspend, void
23659
23660 return 0;
23661 }
23662+#endif
82094b55
AF
23663--- sle11-2009-10-16.orig/drivers/xen/core/reboot.c 2009-02-16 16:17:21.000000000 +0100
23664+++ sle11-2009-10-16/drivers/xen/core/reboot.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23665@@ -29,17 +29,12 @@ MODULE_LICENSE("Dual BSD/GPL");
23666 /* Ignore multiple shutdown requests. */
23667 static int shutting_down = SHUTDOWN_INVALID;
23668
23669-/* Was last suspend request cancelled? */
23670-static int suspend_cancelled;
23671-
23672 /* Can we leave APs online when we suspend? */
23673 static int fast_suspend;
23674
23675 static void __shutdown_handler(struct work_struct *unused);
23676 static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler);
23677
23678-static int setup_suspend_evtchn(void);
23679-
23680 int __xen_suspend(int fast_suspend, void (*resume_notifier)(int));
23681
23682 static int shutdown_process(void *__unused)
23683@@ -69,6 +64,13 @@ static int shutdown_process(void *__unus
23684 return 0;
23685 }
23686
23687+#ifdef CONFIG_PM_SLEEP
23688+
23689+static int setup_suspend_evtchn(void);
23690+
23691+/* Was last suspend request cancelled? */
23692+static int suspend_cancelled;
23693+
23694 static void xen_resume_notifier(int _suspend_cancelled)
23695 {
23696 int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING);
23697@@ -118,6 +120,10 @@ static int xen_suspend(void *__unused)
23698 return 0;
23699 }
23700
23701+#else
23702+# define xen_suspend NULL
23703+#endif
23704+
23705 static void switch_shutdown_state(int new_state)
23706 {
23707 int prev_state, old_state = SHUTDOWN_INVALID;
23708@@ -194,8 +200,10 @@ static void shutdown_handler(struct xenb
23709 new_state = SHUTDOWN_POWEROFF;
23710 else if (strcmp(str, "reboot") == 0)
23711 ctrl_alt_del();
23712+#ifdef CONFIG_PM_SLEEP
23713 else if (strcmp(str, "suspend") == 0)
23714 new_state = SHUTDOWN_SUSPEND;
23715+#endif
23716 else if (strcmp(str, "halt") == 0)
23717 new_state = SHUTDOWN_HALT;
23718 else
23719@@ -247,6 +255,7 @@ static struct xenbus_watch sysrq_watch =
23720 .callback = sysrq_handler
23721 };
23722
23723+#ifdef CONFIG_PM_SLEEP
23724 static irqreturn_t suspend_int(int irq, void* dev_id)
23725 {
23726 switch_shutdown_state(SHUTDOWN_SUSPEND);
23727@@ -274,6 +283,9 @@ static int setup_suspend_evtchn(void)
23728
23729 return 0;
23730 }
23731+#else
23732+#define setup_suspend_evtchn() 0
23733+#endif
23734
23735 static int setup_shutdown_watcher(void)
23736 {
82094b55
AF
23737--- sle11-2009-10-16.orig/drivers/xen/core/smpboot.c 2009-03-16 16:38:05.000000000 +0100
23738+++ sle11-2009-10-16/drivers/xen/core/smpboot.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23739@@ -27,6 +27,7 @@
23740
23741 extern irqreturn_t smp_reschedule_interrupt(int, void *);
23742 extern irqreturn_t smp_call_function_interrupt(int, void *);
23743+extern irqreturn_t smp_call_function_single_interrupt(int, void *);
23744
23745 extern int local_setup_timer(unsigned int cpu);
23746 extern void local_teardown_timer(unsigned int cpu);
23747@@ -50,8 +51,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
23748
23749 static DEFINE_PER_CPU(int, resched_irq);
23750 static DEFINE_PER_CPU(int, callfunc_irq);
23751+static DEFINE_PER_CPU(int, call1func_irq);
23752 static char resched_name[NR_CPUS][15];
23753 static char callfunc_name[NR_CPUS][15];
23754+static char call1func_name[NR_CPUS][15];
23755
23756 #ifdef CONFIG_X86_LOCAL_APIC
23757 #define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
23758@@ -73,15 +76,13 @@ void __init prefill_possible_map(void)
23759
23760 for (i = 0; i < NR_CPUS; i++) {
23761 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
23762- if (rc >= 0)
23763+ if (rc >= 0) {
23764 cpu_set(i, cpu_possible_map);
23765+ nr_cpu_ids = i + 1;
23766+ }
23767 }
23768 }
23769
23770-void __init smp_alloc_memory(void)
23771-{
23772-}
23773-
23774 static inline void
23775 set_cpu_sibling_map(unsigned int cpu)
23776 {
23777@@ -110,7 +111,8 @@ static int __cpuinit xen_smp_intr_init(u
23778 {
23779 int rc;
23780
23781- per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
23782+ per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
23783+ per_cpu(call1func_irq, cpu) = -1;
23784
23785 sprintf(resched_name[cpu], "resched%u", cpu);
23786 rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
23787@@ -134,6 +136,17 @@ static int __cpuinit xen_smp_intr_init(u
23788 goto fail;
23789 per_cpu(callfunc_irq, cpu) = rc;
23790
23791+ sprintf(call1func_name[cpu], "call1func%u", cpu);
23792+ rc = bind_ipi_to_irqhandler(CALL_FUNC_SINGLE_VECTOR,
23793+ cpu,
23794+ smp_call_function_single_interrupt,
23795+ IRQF_DISABLED|IRQF_NOBALANCING,
23796+ call1func_name[cpu],
23797+ NULL);
23798+ if (rc < 0)
23799+ goto fail;
23800+ per_cpu(call1func_irq, cpu) = rc;
23801+
23802 rc = xen_spinlock_init(cpu);
23803 if (rc < 0)
23804 goto fail;
23805@@ -148,6 +161,8 @@ static int __cpuinit xen_smp_intr_init(u
23806 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
23807 if (per_cpu(callfunc_irq, cpu) >= 0)
23808 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
23809+ if (per_cpu(call1func_irq, cpu) >= 0)
23810+ unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
23811 xen_spinlock_cleanup(cpu);
23812 return rc;
23813 }
23814@@ -160,6 +175,7 @@ static void __cpuexit xen_smp_intr_exit(
23815
23816 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
23817 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
23818+ unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
23819 xen_spinlock_cleanup(cpu);
23820 }
23821 #endif
23822@@ -167,11 +183,7 @@ static void __cpuexit xen_smp_intr_exit(
23823 void __cpuinit cpu_bringup(void)
23824 {
23825 cpu_init();
23826-#ifdef __i386__
23827 identify_secondary_cpu(&current_cpu_data);
23828-#else
23829- identify_cpu(&current_cpu_data);
23830-#endif
23831 touch_softlockup_watchdog();
23832 preempt_disable();
23833 local_irq_enable();
23834@@ -251,9 +263,6 @@ void __init smp_prepare_cpus(unsigned in
23835 struct task_struct *idle;
23836 int apicid;
23837 struct vcpu_get_physid cpu_id;
23838-#ifdef __x86_64__
23839- struct desc_ptr *gdt_descr;
23840-#endif
23841 void *gdt_addr;
23842
23843 apicid = 0;
23844@@ -266,7 +275,7 @@ void __init smp_prepare_cpus(unsigned in
23845
23846 current_thread_info()->cpu = 0;
23847
23848- for (cpu = 0; cpu < NR_CPUS; cpu++) {
23849+ for_each_possible_cpu (cpu) {
23850 cpus_clear(per_cpu(cpu_sibling_map, cpu));
23851 cpus_clear(per_cpu(cpu_core_map, cpu));
23852 }
23853@@ -293,21 +302,10 @@ void __init smp_prepare_cpus(unsigned in
23854 if (IS_ERR(idle))
23855 panic("failed fork for CPU %d", cpu);
23856
23857-#ifdef __x86_64__
23858- gdt_descr = &cpu_gdt_descr[cpu];
23859- gdt_descr->address = get_zeroed_page(GFP_KERNEL);
23860- if (unlikely(!gdt_descr->address)) {
23861- printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
23862- cpu);
23863- continue;
23864- }
23865- gdt_descr->size = GDT_SIZE;
23866- memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
23867- gdt_addr = (void *)gdt_descr->address;
23868-#else
23869+#ifdef __i386__
23870 init_gdt(cpu);
23871- gdt_addr = get_cpu_gdt_table(cpu);
23872 #endif
23873+ gdt_addr = get_cpu_gdt_table(cpu);
23874 make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
23875
23876 apicid = cpu;
23877@@ -353,8 +351,8 @@ void __init smp_prepare_boot_cpu(void)
23878 {
23879 #ifdef __i386__
23880 init_gdt(smp_processor_id());
23881- switch_to_new_gdt();
23882 #endif
23883+ switch_to_new_gdt();
23884 prefill_possible_map();
23885 }
23886
82094b55
AF
23887--- sle11-2009-10-16.orig/drivers/xen/core/spinlock.c 2009-03-16 16:33:40.000000000 +0100
23888+++ sle11-2009-10-16/drivers/xen/core/spinlock.c 2009-06-04 10:36:24.000000000 +0200
2cb7cef9
BS
23889@@ -5,6 +5,8 @@
23890 * portions of this file.
23891 */
23892
23893+#if CONFIG_XEN_COMPAT >= 0x030200
23894+
23895 #include <linux/init.h>
23896 #include <linux/irq.h>
23897 #include <linux/kernel.h>
23898@@ -73,9 +75,9 @@ int xen_spin_wait(raw_spinlock_t *lock,
23899 /* announce we're spinning */
23900 spinning.ticket = token;
23901 spinning.lock = lock;
23902- spinning.prev = __get_cpu_var(spinning);
23903+ spinning.prev = x86_read_percpu(spinning);
23904 smp_wmb();
23905- __get_cpu_var(spinning) = &spinning;
23906+ x86_write_percpu(spinning, &spinning);
23907
23908 /* clear pending */
23909 xen_clear_irq_pending(irq);
23910@@ -102,7 +104,7 @@ int xen_spin_wait(raw_spinlock_t *lock,
23911 kstat_this_cpu.irqs[irq] += !rc;
23912
23913 /* announce we're done */
23914- __get_cpu_var(spinning) = spinning.prev;
23915+ x86_write_percpu(spinning, spinning.prev);
23916 rm_lock = &__get_cpu_var(spinning_rm_lock);
23917 raw_local_irq_save(flags);
23918 __raw_write_lock(rm_lock);
23919@@ -159,3 +161,5 @@ void xen_spin_kick(raw_spinlock_t *lock,
23920 }
23921 }
23922 EXPORT_SYMBOL(xen_spin_kick);
23923+
23924+#endif /* CONFIG_XEN_COMPAT >= 0x030200 */
82094b55
AF
23925--- sle11-2009-10-16.orig/drivers/xen/fbfront/xenfb.c 2009-03-16 16:38:05.000000000 +0100
23926+++ sle11-2009-10-16/drivers/xen/fbfront/xenfb.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23927@@ -18,6 +18,7 @@
23928 * frame buffer.
23929 */
23930
23931+#include <linux/console.h>
23932 #include <linux/kernel.h>
23933 #include <linux/errno.h>
23934 #include <linux/fb.h>
23935@@ -545,6 +546,28 @@ static unsigned long vmalloc_to_mfn(void
23936 return pfn_to_mfn(vmalloc_to_pfn(address));
23937 }
23938
23939+static __devinit void
23940+xenfb_make_preferred_console(void)
23941+{
23942+ struct console *c;
23943+
23944+ if (console_set_on_cmdline)
23945+ return;
23946+
23947+ acquire_console_sem();
23948+ for (c = console_drivers; c; c = c->next) {
23949+ if (!strcmp(c->name, "tty") && c->index == 0)
23950+ break;
23951+ }
23952+ release_console_sem();
23953+ if (c) {
23954+ unregister_console(c);
23955+ c->flags |= CON_CONSDEV;
23956+ c->flags &= ~CON_PRINTBUFFER; /* don't print again */
23957+ register_console(c);
23958+ }
23959+}
23960+
23961 static int __devinit xenfb_probe(struct xenbus_device *dev,
23962 const struct xenbus_device_id *id)
23963 {
23964@@ -665,6 +688,7 @@ static int __devinit xenfb_probe(struct
23965 if (ret < 0)
23966 goto error;
23967
23968+ xenfb_make_preferred_console();
23969 return 0;
23970
23971 error_nomem:
23972@@ -882,4 +906,5 @@ static void __exit xenfb_cleanup(void)
23973 module_init(xenfb_init);
23974 module_exit(xenfb_cleanup);
23975
23976+MODULE_DESCRIPTION("Xen virtual framebuffer device frontend");
23977 MODULE_LICENSE("GPL");
82094b55
AF
23978--- sle11-2009-10-16.orig/drivers/xen/fbfront/xenkbd.c 2009-03-04 11:25:55.000000000 +0100
23979+++ sle11-2009-10-16/drivers/xen/fbfront/xenkbd.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23980@@ -350,4 +350,5 @@ static void __exit xenkbd_cleanup(void)
23981 module_init(xenkbd_init);
23982 module_exit(xenkbd_cleanup);
23983
23984+MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend");
23985 MODULE_LICENSE("GPL");
82094b55
AF
23986--- sle11-2009-10-16.orig/drivers/xen/gntdev/gntdev.c 2009-03-16 16:38:05.000000000 +0100
23987+++ sle11-2009-10-16/drivers/xen/gntdev/gntdev.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23988@@ -418,7 +418,7 @@ static int __init gntdev_init(void)
23989 }
23990
23991 device = device_create(class, NULL, MKDEV(gntdev_major, 0),
23992- GNTDEV_NAME);
23993+ NULL, GNTDEV_NAME);
23994 if (IS_ERR(device)) {
23995 printk(KERN_ERR "Error creating gntdev device in xen_class\n");
23996 printk(KERN_ERR "gntdev created with major number = %d\n",
82094b55
AF
23997--- sle11-2009-10-16.orig/drivers/xen/netfront/accel.c 2009-03-30 16:39:19.000000000 +0200
23998+++ sle11-2009-10-16/drivers/xen/netfront/accel.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
23999@@ -28,6 +28,7 @@
24000 * IN THE SOFTWARE.
24001 */
24002
24003+#include <linux/version.h>
24004 #include <linux/netdevice.h>
24005 #include <linux/skbuff.h>
24006 #include <linux/list.h>
82094b55
AF
24007--- sle11-2009-10-16.orig/drivers/xen/netfront/netfront.c 2009-03-30 16:40:17.000000000 +0200
24008+++ sle11-2009-10-16/drivers/xen/netfront/netfront.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24009@@ -640,7 +640,7 @@ static int network_open(struct net_devic
24010 }
24011 spin_unlock_bh(&np->rx_lock);
24012
24013- network_maybe_wake_tx(dev);
24014+ netif_start_queue(dev);
24015
24016 return 0;
24017 }
82094b55
AF
24018--- sle11-2009-10-16.orig/drivers/xen/sfc_netback/accel.h 2009-03-30 16:00:09.000000000 +0200
24019+++ sle11-2009-10-16/drivers/xen/sfc_netback/accel.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24020@@ -25,6 +25,7 @@
24021 #ifndef NETBACK_ACCEL_H
24022 #define NETBACK_ACCEL_H
24023
24024+#include <linux/version.h>
24025 #include <linux/slab.h>
24026 #include <linux/ip.h>
24027 #include <linux/tcp.h>
82094b55
AF
24028--- sle11-2009-10-16.orig/drivers/xen/sfc_netfront/accel.h 2009-03-30 16:34:56.000000000 +0200
24029+++ sle11-2009-10-16/drivers/xen/sfc_netfront/accel.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24030@@ -35,6 +35,7 @@
24031 #include <xen/evtchn.h>
24032
24033 #include <linux/kernel.h>
24034+#include <linux/version.h>
24035 #include <linux/list.h>
24036
24037 enum netfront_accel_post_status {
82094b55
AF
24038--- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_client.c 2009-03-24 10:13:17.000000000 +0100
24039+++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_client.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24040@@ -150,7 +150,7 @@ int xenbus_watch_pathfmt(struct xenbus_d
24041 char *path;
24042
24043 va_start(ap, pathfmt);
24044- path = kvasprintf(GFP_KERNEL, pathfmt, ap);
24045+ path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
24046 va_end(ap);
24047
24048 if (!path) {
82094b55
AF
24049--- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_comms.c 2009-02-16 16:17:21.000000000 +0100
24050+++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_comms.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24051@@ -228,14 +228,11 @@ int xb_init_comms(void)
24052 intf->rsp_cons = intf->rsp_prod;
24053 }
24054
24055+#if defined(CONFIG_XEN) || defined(MODULE)
24056 if (xenbus_irq)
24057 unbind_from_irqhandler(xenbus_irq, &xb_waitq);
24058
24059-#if defined(CONFIG_XEN) || defined(MODULE)
24060 err = bind_caller_port_to_irqhandler(
24061-#else
24062- err = bind_evtchn_to_irqhandler(
24063-#endif
24064 xen_store_evtchn, wake_waiting,
24065 0, "xenbus", &xb_waitq);
24066 if (err <= 0) {
24067@@ -244,6 +241,20 @@ int xb_init_comms(void)
24068 }
24069
24070 xenbus_irq = err;
24071+#else
24072+ if (xenbus_irq) {
24073+ /* Already have an irq; assume we're resuming */
24074+ rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
24075+ } else {
24076+ err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
24077+ 0, "xenbus", &xb_waitq);
24078+ if (err <= 0) {
24079+ printk(KERN_ERR "XENBUS request irq failed %i\n", err);
24080+ return err;
24081+ }
24082+ xenbus_irq = err;
24083+ }
24084+#endif
24085
24086 return 0;
24087 }
82094b55
AF
24088--- sle11-2009-10-16.orig/drivers/xen/xenbus/xenbus_probe.c 2009-03-16 16:38:05.000000000 +0100
24089+++ sle11-2009-10-16/drivers/xen/xenbus/xenbus_probe.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24090@@ -36,6 +36,7 @@
24091 __FUNCTION__, __LINE__, ##args)
24092
24093 #include <linux/kernel.h>
24094+#include <linux/version.h>
24095 #include <linux/err.h>
24096 #include <linux/string.h>
24097 #include <linux/ctype.h>
82094b55
AF
24098--- sle11-2009-10-16.orig/fs/aio.c 2009-03-24 10:13:25.000000000 +0100
24099+++ sle11-2009-10-16/fs/aio.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24100@@ -1335,7 +1335,7 @@ static int make_aio_fd(struct kioctx *io
24101 int fd;
24102 struct file *file;
24103
24104- fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
24105+ fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx, 0);
24106 if (fd < 0)
24107 return fd;
24108
82094b55
AF
24109--- sle11-2009-10-16.orig/include/asm-generic/pgtable.h 2009-03-04 11:28:34.000000000 +0100
24110+++ sle11-2009-10-16/include/asm-generic/pgtable.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24111@@ -99,10 +99,6 @@ static inline void ptep_set_wrprotect(st
24112 }
24113 #endif
24114
24115-#ifndef arch_change_pte_range
24116-#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
24117-#endif
24118-
24119 #ifndef __HAVE_ARCH_PTE_SAME
24120 #define pte_same(A,B) (pte_val(A) == pte_val(B))
24121 #endif
82094b55
AF
24122--- sle11-2009-10-16.orig/include/asm-x86/dma-mapping.h 2009-03-16 16:38:05.000000000 +0100
24123+++ sle11-2009-10-16/include/asm-x86/dma-mapping.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24124@@ -74,7 +74,7 @@ static inline struct dma_mapping_ops *ge
24125 /* Make sure we keep the same behaviour */
24126 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
24127 {
24128-#ifdef CONFIG_X86_32
24129+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
24130 return 0;
24131 #else
24132 struct dma_mapping_ops *ops = get_dma_ops(dev);
82094b55
AF
24133--- sle11-2009-10-16.orig/include/asm-x86/kexec.h 2008-12-01 11:11:08.000000000 +0100
24134+++ sle11-2009-10-16/include/asm-x86/kexec.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24135@@ -10,6 +10,7 @@
24136 # define VA_PTE_0 5
24137 # define PA_PTE_1 6
24138 # define VA_PTE_1 7
24139+# ifndef CONFIG_XEN
24140 # define PA_SWAP_PAGE 8
24141 # ifdef CONFIG_X86_PAE
24142 # define PA_PMD_0 9
24143@@ -20,6 +21,18 @@
24144 # else
24145 # define PAGES_NR 9
24146 # endif
24147+# else /* CONFIG_XEN */
24148+/*
24149+ * The hypervisor interface implicitly requires that all entries (except
24150+ * for possibly the final one) are arranged in matching PA_/VA_ pairs.
24151+ */
24152+# define PA_PMD_0 8
24153+# define VA_PMD_0 9
24154+# define PA_PMD_1 10
24155+# define VA_PMD_1 11
24156+# define PA_SWAP_PAGE 12
24157+# define PAGES_NR 13
24158+# endif /* CONFIG_XEN */
24159 #else
24160 # define PA_CONTROL_PAGE 0
24161 # define VA_CONTROL_PAGE 1
82094b55
AF
24162--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/desc.h 2009-03-16 16:38:05.000000000 +0100
24163+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/desc.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24164@@ -31,11 +31,17 @@ extern struct desc_ptr idt_descr;
24165 extern gate_desc idt_table[];
24166 #endif
24167
24168+struct gdt_page {
24169+ struct desc_struct gdt[GDT_ENTRIES];
24170+} __attribute__((aligned(PAGE_SIZE)));
24171+DECLARE_PER_CPU(struct gdt_page, gdt_page);
24172+
24173+static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
24174+{
24175+ return per_cpu(gdt_page, cpu).gdt;
24176+}
24177+
24178 #ifdef CONFIG_X86_64
24179-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
24180-extern struct desc_ptr cpu_gdt_descr[];
24181-/* the cpu gdt accessor */
24182-#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
24183
24184 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
24185 unsigned dpl, unsigned ist, unsigned seg)
24186@@ -53,16 +59,6 @@ static inline void pack_gate(gate_desc *
24187 }
24188
24189 #else
24190-struct gdt_page {
24191- struct desc_struct gdt[GDT_ENTRIES];
24192-} __attribute__((aligned(PAGE_SIZE)));
24193-DECLARE_PER_CPU(struct gdt_page, gdt_page);
24194-
24195-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
24196-{
24197- return per_cpu(gdt_page, cpu).gdt;
24198-}
24199-
24200 static inline void pack_gate(gate_desc *gate, unsigned char type,
24201 unsigned long base, unsigned dpl, unsigned flags,
24202 unsigned short seg)
24203@@ -333,6 +329,28 @@ static inline void set_intr_gate(unsigne
24204 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
24205 }
24206
24207+#define SYS_VECTOR_FREE 0
24208+#define SYS_VECTOR_ALLOCED 1
24209+
24210+extern int first_system_vector;
24211+extern char system_vectors[];
24212+
24213+static inline void alloc_system_vector(int vector)
24214+{
24215+ if (system_vectors[vector] == SYS_VECTOR_FREE) {
24216+ system_vectors[vector] = SYS_VECTOR_ALLOCED;
24217+ if (first_system_vector > vector)
24218+ first_system_vector = vector;
24219+ } else
24220+ BUG();
24221+}
24222+
24223+static inline void alloc_intr_gate(unsigned int n, void *addr)
24224+{
24225+ alloc_system_vector(n);
24226+ set_intr_gate(n, addr);
24227+}
24228+
24229 /*
24230 * This routine sets up an interrupt gate at directory privilege level 3.
24231 */
82094b55
AF
24232--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap.h 2009-03-16 16:38:05.000000000 +0100
24233+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24234@@ -7,7 +7,58 @@
24235 # include "fixmap_64.h"
24236 #endif
24237
24238+extern int fixmaps_set;
24239+
24240+void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t);
24241+
24242+static inline void __set_fixmap(enum fixed_addresses idx,
24243+ maddr_t phys, pgprot_t flags)
24244+{
24245+ xen_set_fixmap(idx, phys, flags);
24246+}
24247+
24248+#define set_fixmap(idx, phys) \
24249+ __set_fixmap(idx, phys, PAGE_KERNEL)
24250+
24251+/*
24252+ * Some hardware wants to get fixmapped without caching.
24253+ */
24254+#define set_fixmap_nocache(idx, phys) \
24255+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
24256+
24257 #define clear_fixmap(idx) \
24258 __set_fixmap(idx, 0, __pgprot(0))
24259
24260+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
24261+#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
24262+
24263+extern void __this_fixmap_does_not_exist(void);
24264+
24265+/*
24266+ * 'index to address' translation. If anyone tries to use the idx
24267+ * directly without translation, we catch the bug with a NULL-deference
24268+ * kernel oops. Illegal ranges of incoming indices are caught too.
24269+ */
24270+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
24271+{
24272+ /*
24273+ * this branch gets completely eliminated after inlining,
24274+ * except when someone tries to use fixaddr indices in an
24275+ * illegal way. (such as mixing up address types or using
24276+ * out-of-range indices).
24277+ *
24278+ * If it doesn't get removed, the linker will complain
24279+ * loudly with a reasonably clear error message..
24280+ */
24281+ if (idx >= __end_of_fixed_addresses)
24282+ __this_fixmap_does_not_exist();
24283+
24284+ return __fix_to_virt(idx);
24285+}
24286+
24287+static inline unsigned long virt_to_fix(const unsigned long vaddr)
24288+{
24289+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
24290+ return __virt_to_fix(vaddr);
24291+}
24292 #endif
82094b55
AF
24293--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-03-16 16:38:05.000000000 +0100
24294+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_32.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24295@@ -58,10 +58,17 @@ enum fixed_addresses {
24296 #ifdef CONFIG_X86_LOCAL_APIC
24297 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
24298 #endif
24299-#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
24300+#ifndef CONFIG_XEN
24301+#ifdef CONFIG_X86_IO_APIC
24302 FIX_IO_APIC_BASE_0,
24303 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
24304 #endif
24305+#else
24306+ FIX_SHARED_INFO,
24307+#define NR_FIX_ISAMAPS 256
24308+ FIX_ISAMAP_END,
24309+ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
24310+#endif
24311 #ifdef CONFIG_X86_VISWS_APIC
24312 FIX_CO_CPU, /* Cobalt timer */
24313 FIX_CO_APIC, /* Cobalt APIC Redirection Table */
24314@@ -78,51 +85,38 @@ enum fixed_addresses {
24315 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
24316 FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
24317 #endif
24318-#ifdef CONFIG_ACPI
24319- FIX_ACPI_BEGIN,
24320- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
24321-#endif
24322 #ifdef CONFIG_PCI_MMCONFIG
24323 FIX_PCIE_MCFG,
24324 #endif
24325 #ifdef CONFIG_PARAVIRT
24326 FIX_PARAVIRT_BOOTMAP,
24327 #endif
24328- FIX_SHARED_INFO,
24329-#define NR_FIX_ISAMAPS 256
24330- FIX_ISAMAP_END,
24331- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
24332 __end_of_permanent_fixed_addresses,
24333 /*
24334 * 256 temporary boot-time mappings, used by early_ioremap(),
24335 * before ioremap() is functional.
24336 *
24337- * We round it up to the next 512 pages boundary so that we
24338+ * We round it up to the next 256 pages boundary so that we
24339 * can have a single pgd entry and a single pte table:
24340 */
24341 #define NR_FIX_BTMAPS 64
24342 #define FIX_BTMAPS_NESTING 4
24343- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
24344- (__end_of_permanent_fixed_addresses & 511),
24345+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
24346+ (__end_of_permanent_fixed_addresses & 255),
24347 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
24348 FIX_WP_TEST,
24349+#ifdef CONFIG_ACPI
24350+ FIX_ACPI_BEGIN,
24351+ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
24352+#endif
24353 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
24354 FIX_OHCI1394_BASE,
24355 #endif
24356 __end_of_fixed_addresses
24357 };
24358
24359-extern void __set_fixmap(enum fixed_addresses idx,
24360- maddr_t phys, pgprot_t flags);
24361 extern void reserve_top_address(unsigned long reserve);
24362
24363-#define set_fixmap(idx, phys) \
24364- __set_fixmap(idx, phys, PAGE_KERNEL)
24365-/*
24366- * Some hardware wants to get fixmapped without caching.
24367- */
24368-#define set_fixmap_nocache(idx, phys) \
24369- __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
24370
24371 #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
24372
24373@@ -131,38 +125,5 @@ extern void reserve_top_address(unsigned
24374 #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
24375 #define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
24376
24377-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
24378-#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
24379-
24380-extern void __this_fixmap_does_not_exist(void);
24381-
24382-/*
24383- * 'index to address' translation. If anyone tries to use the idx
24384- * directly without tranlation, we catch the bug with a NULL-deference
24385- * kernel oops. Illegal ranges of incoming indices are caught too.
24386- */
24387-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
24388-{
24389- /*
24390- * this branch gets completely eliminated after inlining,
24391- * except when someone tries to use fixaddr indices in an
24392- * illegal way. (such as mixing up address types or using
24393- * out-of-range indices).
24394- *
24395- * If it doesn't get removed, the linker will complain
24396- * loudly with a reasonably clear error message..
24397- */
24398- if (idx >= __end_of_fixed_addresses)
24399- __this_fixmap_does_not_exist();
24400-
24401- return __fix_to_virt(idx);
24402-}
24403-
24404-static inline unsigned long virt_to_fix(const unsigned long vaddr)
24405-{
24406- BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
24407- return __virt_to_fix(vaddr);
24408-}
24409-
24410 #endif /* !__ASSEMBLY__ */
24411 #endif
82094b55
AF
24412--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-03-16 16:38:05.000000000 +0100
24413+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/fixmap_64.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24414@@ -12,6 +12,7 @@
24415 #define _ASM_FIXMAP_64_H
24416
24417 #include <linux/kernel.h>
24418+#include <asm/acpi.h>
24419 #include <asm/apicdef.h>
24420 #include <asm/page.h>
24421 #include <asm/vsyscall.h>
24422@@ -40,7 +41,6 @@ enum fixed_addresses {
24423 VSYSCALL_HPET,
24424 FIX_DBGP_BASE,
24425 FIX_EARLYCON_MEM_BASE,
24426- FIX_HPET_BASE,
24427 #ifdef CONFIG_X86_LOCAL_APIC
24428 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
24429 #endif
24430@@ -53,14 +53,21 @@ enum fixed_addresses {
24431 FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
24432 + MAX_EFI_IO_PAGES - 1,
24433 #endif
24434+#ifdef CONFIG_PARAVIRT
24435+ FIX_PARAVIRT_BOOTMAP,
24436+#else
24437+ FIX_SHARED_INFO,
24438+#endif
24439 #ifdef CONFIG_ACPI
24440 FIX_ACPI_BEGIN,
24441 FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
24442 #endif
24443- FIX_SHARED_INFO,
24444 #define NR_FIX_ISAMAPS 256
24445 FIX_ISAMAP_END,
24446 FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
24447+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
24448+ FIX_OHCI1394_BASE,
24449+#endif
24450 __end_of_permanent_fixed_addresses,
24451 /*
24452 * 256 temporary boot-time mappings, used by early_ioremap(),
24453@@ -71,27 +78,12 @@ enum fixed_addresses {
24454 */
24455 #define NR_FIX_BTMAPS 64
24456 #define FIX_BTMAPS_NESTING 4
24457- FIX_BTMAP_END =
24458- __end_of_permanent_fixed_addresses + 512 -
24459+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
24460 (__end_of_permanent_fixed_addresses & 511),
24461 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
24462-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
24463- FIX_OHCI1394_BASE,
24464-#endif
24465 __end_of_fixed_addresses
24466 };
24467
24468-extern void __set_fixmap(enum fixed_addresses idx,
24469- unsigned long phys, pgprot_t flags);
24470-
24471-#define set_fixmap(idx, phys) \
24472- __set_fixmap(idx, phys, PAGE_KERNEL)
24473-/*
24474- * Some hardware wants to get fixmapped without caching.
24475- */
24476-#define set_fixmap_nocache(idx, phys) \
24477- __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
24478-
24479 #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
24480 #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
24481 #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
24482@@ -100,30 +92,4 @@ extern void __set_fixmap(enum fixed_addr
24483 #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
24484 #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
24485
24486-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
24487-
24488-extern void __this_fixmap_does_not_exist(void);
24489-
24490-/*
24491- * 'index to address' translation. If anyone tries to use the idx
24492- * directly without translation, we catch the bug with a NULL-deference
24493- * kernel oops. Illegal ranges of incoming indices are caught too.
24494- */
24495-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
24496-{
24497- /*
24498- * this branch gets completely eliminated after inlining,
24499- * except when someone tries to use fixaddr indices in an
24500- * illegal way. (such as mixing up address types or using
24501- * out-of-range indices).
24502- *
24503- * If it doesn't get removed, the linker will complain
24504- * loudly with a reasonably clear error message..
24505- */
24506- if (idx >= __end_of_fixed_addresses)
24507- __this_fixmap_does_not_exist();
24508-
24509- return __fix_to_virt(idx);
24510-}
24511-
24512 #endif
82094b55
AF
24513--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/highmem.h 2009-03-16 16:38:05.000000000 +0100
24514+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/highmem.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24515@@ -74,6 +74,9 @@ struct page *kmap_atomic_to_page(void *p
24516
24517 #define flush_cache_kmaps() do { } while (0)
24518
24519+extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
24520+ unsigned long end_pfn);
24521+
24522 void clear_highpage(struct page *);
24523 static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
24524 {
82094b55
AF
24525--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypercall.h 2009-02-16 16:18:36.000000000 +0100
24526+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypercall.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24527@@ -323,9 +323,19 @@ static inline int __must_check
24528 HYPERVISOR_grant_table_op(
24529 unsigned int cmd, void *uop, unsigned int count)
24530 {
24531+ bool fixup = false;
24532+ int rc;
24533+
24534 if (arch_use_lazy_mmu_mode())
24535 xen_multicall_flush(false);
24536- return _hypercall3(int, grant_table_op, cmd, uop, count);
24537+#ifdef GNTTABOP_map_grant_ref
24538+ if (cmd == GNTTABOP_map_grant_ref)
24539+#endif
24540+ fixup = gnttab_pre_map_adjust(cmd, uop, count);
24541+ rc = _hypercall3(int, grant_table_op, cmd, uop, count);
24542+ if (rc == 0 && fixup)
24543+ rc = gnttab_post_map_adjust(uop, count);
24544+ return rc;
24545 }
24546
24547 static inline int __must_check
82094b55
AF
24548--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/hypervisor.h 2009-03-16 16:33:40.000000000 +0100
24549+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/hypervisor.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24550@@ -35,7 +35,6 @@
24551
24552 #include <linux/types.h>
24553 #include <linux/kernel.h>
24554-#include <linux/version.h>
24555 #include <linux/errno.h>
24556 #include <xen/interface/xen.h>
24557 #include <xen/interface/platform.h>
24558@@ -112,6 +111,8 @@ int xen_create_contiguous_region(
24559 unsigned long vstart, unsigned int order, unsigned int address_bits);
24560 void xen_destroy_contiguous_region(
24561 unsigned long vstart, unsigned int order);
24562+int early_create_contiguous_region(unsigned long pfn, unsigned int order,
24563+ unsigned int address_bits);
24564
24565 struct page;
24566
24567@@ -181,6 +182,29 @@ static inline void xen_multicall_flush(b
24568
24569 #endif /* CONFIG_XEN && !MODULE */
24570
24571+#ifdef CONFIG_XEN
24572+
24573+struct gnttab_map_grant_ref;
24574+bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *,
24575+ unsigned int count);
24576+#if CONFIG_XEN_COMPAT < 0x030400
24577+int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *, unsigned int);
24578+#else
24579+static inline int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *m,
24580+ unsigned int count)
24581+{
24582+ BUG();
24583+ return -ENOSYS;
24584+}
24585+#endif
24586+
24587+#else /* !CONFIG_XEN */
24588+
24589+#define gnttab_pre_map_adjust(...) false
24590+#define gnttab_post_map_adjust(...) ({ BUG(); -ENOSYS; })
24591+
24592+#endif /* CONFIG_XEN */
24593+
24594 #if defined(CONFIG_X86_64)
24595 #define MULTI_UVMFLAGS_INDEX 2
24596 #define MULTI_UVMDOMID_INDEX 3
82094b55
AF
24597--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/io.h 2009-03-16 16:38:05.000000000 +0100
24598+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/io.h 2009-09-24 11:02:00.000000000 +0200
24599@@ -3,20 +3,140 @@
2cb7cef9
BS
24600
24601 #define ARCH_HAS_IOREMAP_WC
24602
24603+#include <linux/compiler.h>
24604+
24605+/*
24606+ * early_ioremap() and early_iounmap() are for temporary early boot-time
24607+ * mappings, before the real ioremap() is functional.
24608+ * A boot-time mapping is currently limited to at most 16 pages.
24609+ */
24610+#ifndef __ASSEMBLY__
24611+extern void early_ioremap_init(void);
24612+extern void early_ioremap_clear(void);
24613+extern void early_ioremap_reset(void);
24614+extern void *early_ioremap(unsigned long offset, unsigned long size);
24615+extern void early_iounmap(void *addr, unsigned long size);
24616+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
24617+#endif
24618+
24619+#define build_mmio_read(name, size, type, reg, barrier) \
24620+static inline type name(const volatile void __iomem *addr) \
24621+{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
24622+:"m" (*(volatile type __force *)addr) barrier); return ret; }
24623+
24624+#define build_mmio_write(name, size, type, reg, barrier) \
24625+static inline void name(type val, volatile void __iomem *addr) \
24626+{ asm volatile("mov" size " %0,%1": :reg (val), \
24627+"m" (*(volatile type __force *)addr) barrier); }
24628+
24629+build_mmio_read(readb, "b", unsigned char, "=q", :"memory")
24630+build_mmio_read(readw, "w", unsigned short, "=r", :"memory")
24631+build_mmio_read(readl, "l", unsigned int, "=r", :"memory")
24632+
24633+build_mmio_read(__readb, "b", unsigned char, "=q", )
24634+build_mmio_read(__readw, "w", unsigned short, "=r", )
24635+build_mmio_read(__readl, "l", unsigned int, "=r", )
24636+
24637+build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
24638+build_mmio_write(writew, "w", unsigned short, "r", :"memory")
24639+build_mmio_write(writel, "l", unsigned int, "r", :"memory")
24640+
24641+build_mmio_write(__writeb, "b", unsigned char, "q", )
24642+build_mmio_write(__writew, "w", unsigned short, "r", )
24643+build_mmio_write(__writel, "l", unsigned int, "r", )
24644+
24645+#define readb_relaxed(a) __readb(a)
24646+#define readw_relaxed(a) __readw(a)
24647+#define readl_relaxed(a) __readl(a)
24648+#define __raw_readb __readb
24649+#define __raw_readw __readw
24650+#define __raw_readl __readl
24651+
24652+#define __raw_writeb __writeb
24653+#define __raw_writew __writew
24654+#define __raw_writel __writel
24655+
24656+#define mmiowb() barrier()
24657+
24658+#ifdef CONFIG_X86_64
24659+build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
24660+build_mmio_read(__readq, "q", unsigned long, "=r", )
24661+build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
24662+build_mmio_write(__writeq, "q", unsigned long, "r", )
24663+
24664+#define readq_relaxed(a) __readq(a)
24665+#define __raw_readq __readq
24666+#define __raw_writeq writeq
24667+
24668+/* Let people know we have them */
24669+#define readq readq
24670+#define writeq writeq
24671+#endif
24672+
24673+#define native_io_delay xen_io_delay
24674+
24675 #ifdef CONFIG_X86_32
24676-# include "io_32.h"
24677+# include "../../io_32.h"
24678 #else
24679-# include "io_64.h"
24680+# include "../../io_64.h"
24681+#endif
24682+
24683+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
24684+
24685+/* We will be supplying our own /dev/mem implementation */
24686+#define ARCH_HAS_DEV_MEM
24687+
24688+#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
24689+#undef page_to_phys
24690+#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
24691+#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page)))
24692+
24693+#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \
24694+ (unsigned long) (bv)->bv_offset)
24695+
24696+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
24697+ (bvec_to_phys(vec1) + (vec1)->bv_len == bvec_to_phys(vec2) \
24698+ && bvec_to_pseudophys(vec1) + (vec1)->bv_len \
24699+ == bvec_to_pseudophys(vec2))
24700+
24701+#undef virt_to_bus
24702+#undef bus_to_virt
24703+#define virt_to_bus(_x) phys_to_machine(__pa(_x))
24704+#define bus_to_virt(_x) __va(machine_to_phys(_x))
24705+
24706+#include <asm/fixmap.h>
24707+
82094b55 24708+#undef __ISA_IO_base
2cb7cef9
BS
24709+#undef isa_virt_to_bus
24710+#undef isa_page_to_bus
24711+#undef isa_bus_to_virt
82094b55
AF
24712+#define isa_virt_to_bus(_x) ({ \
24713+ unsigned long _va_ = (unsigned long)(_x); \
24714+ _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \
24715+ ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \
24716+ : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); })
24717+#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x))
2cb7cef9
BS
24718+
24719 #endif
24720
24721 extern void *xlate_dev_mem_ptr(unsigned long phys);
24722 extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
24723
24724-extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
24725-extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
24726-
24727 extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
24728 unsigned long prot_val);
24729 extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
24730
24731+/*
24732+ * early_ioremap() and early_iounmap() are for temporary early boot-time
24733+ * mappings, before the real ioremap() is functional.
24734+ * A boot-time mapping is currently limited to at most 16 pages.
24735+ */
24736+extern void early_ioremap_init(void);
24737+extern void early_ioremap_clear(void);
24738+extern void early_ioremap_reset(void);
24739+extern void *early_ioremap(unsigned long offset, unsigned long size);
24740+extern void early_iounmap(void *addr, unsigned long size);
24741+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
24742+
24743+
24744 #endif /* _ASM_X86_IO_H */
24745--- /dev/null 1970-01-01 00:00:00.000000000 +0000
82094b55 24746+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/irq_vectors.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24747@@ -0,0 +1,52 @@
24748+#ifndef _ASM_IRQ_VECTORS_H
24749+#define _ASM_IRQ_VECTORS_H
24750+
24751+#ifdef CONFIG_X86_32
24752+# define SYSCALL_VECTOR 0x80
24753+#else
24754+# define IA32_SYSCALL_VECTOR 0x80
24755+#endif
24756+
24757+#define RESCHEDULE_VECTOR 0
24758+#define CALL_FUNCTION_VECTOR 1
24759+#define CALL_FUNC_SINGLE_VECTOR 2
24760+#define SPIN_UNLOCK_VECTOR 3
24761+#define NR_IPIS 4
24762+
24763+/*
24764+ * The maximum number of vectors supported by i386 processors
24765+ * is limited to 256. For processors other than i386, NR_VECTORS
24766+ * should be changed accordingly.
24767+ */
24768+#define NR_VECTORS 256
24769+
24770+#define FIRST_VM86_IRQ 3
24771+#define LAST_VM86_IRQ 15
24772+#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
24773+
24774+/*
24775+ * The flat IRQ space is divided into two regions:
24776+ * 1. A one-to-one mapping of real physical IRQs. This space is only used
24777+ * if we have physical device-access privilege. This region is at the
24778+ * start of the IRQ space so that existing device drivers do not need
24779+ * to be modified to translate physical IRQ numbers into our IRQ space.
24780+ * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
24781+ * are bound using the provided bind/unbind functions.
24782+ */
24783+
24784+#define PIRQ_BASE 0
24785+#if defined(NR_CPUS) && defined(MAX_IO_APICS)
24786+# if NR_CPUS < MAX_IO_APICS
24787+# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
24788+# else
24789+# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS)
24790+# endif
24791+#endif
24792+
24793+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
24794+#define NR_DYNIRQS 256
24795+
24796+#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
24797+#define NR_IRQ_VECTORS NR_IRQS
24798+
24799+#endif /* _ASM_IRQ_VECTORS_H */
82094b55
AF
24800--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/irqflags.h 2009-03-16 16:38:05.000000000 +0100
24801+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/irqflags.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24802@@ -118,7 +118,7 @@ static inline void halt(void)
24803
24804 #ifndef CONFIG_X86_64
24805 #define INTERRUPT_RETURN iret
24806-#define ENABLE_INTERRUPTS_SYSCALL_RET __ENABLE_INTERRUPTS ; \
24807+#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
24808 sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
24809 __TEST_PENDING ; \
24810 jnz 14f /* process more events if necessary... */ ; \
24811@@ -175,18 +175,6 @@ static inline void trace_hardirqs_fixup_
24812 #else
24813
24814 #ifdef CONFIG_X86_64
24815-/*
24816- * Currently paravirt can't handle swapgs nicely when we
24817- * don't have a stack we can rely on (such as a user space
24818- * stack). So we either find a way around these or just fault
24819- * and emulate if a guest tries to call swapgs directly.
24820- *
24821- * Either way, this is a good way to document that we don't
24822- * have a reliable stack. x86_64 only.
24823- */
24824-#define SWAPGS_UNSAFE_STACK swapgs
24825-#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
24826-#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
24827 #define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
24828 #define ARCH_LOCKDEP_SYS_EXIT_IRQ \
24829 TRACE_IRQS_ON; \
24830@@ -198,24 +186,6 @@ static inline void trace_hardirqs_fixup_
24831 TRACE_IRQS_OFF;
24832
24833 #else
24834-#define ARCH_TRACE_IRQS_ON \
24835- pushl %eax; \
24836- pushl %ecx; \
24837- pushl %edx; \
24838- call trace_hardirqs_on; \
24839- popl %edx; \
24840- popl %ecx; \
24841- popl %eax;
24842-
24843-#define ARCH_TRACE_IRQS_OFF \
24844- pushl %eax; \
24845- pushl %ecx; \
24846- pushl %edx; \
24847- call trace_hardirqs_off; \
24848- popl %edx; \
24849- popl %ecx; \
24850- popl %eax;
24851-
24852 #define ARCH_LOCKDEP_SYS_EXIT \
24853 pushl %eax; \
24854 pushl %ecx; \
24855@@ -229,8 +199,8 @@ static inline void trace_hardirqs_fixup_
24856 #endif
24857
24858 #ifdef CONFIG_TRACE_IRQFLAGS
24859-# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON
24860-# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF
24861+# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
24862+# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
24863 #else
24864 # define TRACE_IRQS_ON
24865 # define TRACE_IRQS_OFF
82094b55
AF
24866--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context.h 2009-02-16 16:18:36.000000000 +0100
24867+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24868@@ -1,5 +1,42 @@
24869+#ifndef __ASM_X86_MMU_CONTEXT_H
24870+#define __ASM_X86_MMU_CONTEXT_H
24871+
24872+#include <asm/desc.h>
24873+#include <asm/atomic.h>
24874+#include <asm/pgalloc.h>
24875+#include <asm/tlbflush.h>
24876+
24877+void arch_exit_mmap(struct mm_struct *mm);
24878+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
24879+
24880+void mm_pin(struct mm_struct *mm);
24881+void mm_unpin(struct mm_struct *mm);
24882+void mm_pin_all(void);
24883+
24884+static inline void xen_activate_mm(struct mm_struct *prev,
24885+ struct mm_struct *next)
24886+{
24887+ if (!PagePinned(virt_to_page(next->pgd)))
24888+ mm_pin(next);
24889+}
24890+
24891+/*
24892+ * Used for LDT copy/destruction.
24893+ */
24894+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
24895+void destroy_context(struct mm_struct *mm);
24896+
24897 #ifdef CONFIG_X86_32
24898 # include "mmu_context_32.h"
24899 #else
24900 # include "mmu_context_64.h"
24901 #endif
24902+
24903+#define activate_mm(prev, next) \
24904+do { \
24905+ xen_activate_mm(prev, next); \
24906+ switch_mm((prev), (next), NULL); \
24907+} while (0);
24908+
24909+
24910+#endif /* __ASM_X86_MMU_CONTEXT_H */
82094b55
AF
24911--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-03-16 16:38:05.000000000 +0100
24912+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context_32.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24913@@ -1,32 +1,6 @@
24914 #ifndef __I386_SCHED_H
24915 #define __I386_SCHED_H
24916
24917-#include <asm/desc.h>
24918-#include <asm/atomic.h>
24919-#include <asm/pgalloc.h>
24920-#include <asm/tlbflush.h>
24921-
24922-void arch_exit_mmap(struct mm_struct *mm);
24923-void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
24924-
24925-void mm_pin(struct mm_struct *mm);
24926-void mm_unpin(struct mm_struct *mm);
24927-void mm_pin_all(void);
24928-
24929-static inline void xen_activate_mm(struct mm_struct *prev,
24930- struct mm_struct *next)
24931-{
24932- if (!PagePinned(virt_to_page(next->pgd)))
24933- mm_pin(next);
24934-}
24935-
24936-/*
24937- * Used for LDT copy/destruction.
24938- */
24939-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
24940-void destroy_context(struct mm_struct *mm);
24941-
24942-
24943 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
24944 {
24945 #if 0 /* XEN: no lazy tlb */
24946@@ -107,10 +81,4 @@ static inline void switch_mm(struct mm_s
24947 #define deactivate_mm(tsk, mm) \
24948 asm("movl %0,%%gs": :"r" (0));
24949
24950-#define activate_mm(prev, next) \
24951-do { \
24952- xen_activate_mm(prev, next); \
24953- switch_mm((prev), (next), NULL); \
24954-} while (0)
24955-
24956 #endif
82094b55
AF
24957--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-03-16 16:38:05.000000000 +0100
24958+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/mmu_context_64.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
24959@@ -1,23 +1,6 @@
24960 #ifndef __X86_64_MMU_CONTEXT_H
24961 #define __X86_64_MMU_CONTEXT_H
24962
24963-#include <asm/desc.h>
24964-#include <asm/atomic.h>
24965-#include <asm/pgalloc.h>
24966-#include <asm/page.h>
24967-#include <asm/pda.h>
24968-#include <asm/pgtable.h>
24969-#include <asm/tlbflush.h>
24970-
24971-void arch_exit_mmap(struct mm_struct *mm);
24972-void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
24973-
24974-/*
24975- * possibly do the LDT unload here?
24976- */
24977-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
24978-void destroy_context(struct mm_struct *mm);
24979-
24980 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
24981 {
24982 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
24983@@ -58,10 +41,6 @@ static inline void __prepare_arch_switch
24984 }
24985 }
24986
24987-extern void mm_pin(struct mm_struct *mm);
24988-extern void mm_unpin(struct mm_struct *mm);
24989-void mm_pin_all(void);
24990-
24991 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
24992 struct task_struct *tsk)
24993 {
24994@@ -124,11 +103,4 @@ do { \
24995 asm volatile("movl %0,%%fs"::"r"(0)); \
24996 } while (0)
24997
24998-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
24999-{
25000- if (!PagePinned(virt_to_page(next->pgd)))
25001- mm_pin(next);
25002- switch_mm(prev, next, NULL);
25003-}
25004-
25005 #endif
82094b55
AF
25006--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/page.h 2009-03-16 16:38:05.000000000 +0100
25007+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/page.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25008@@ -16,9 +16,9 @@
25009 * below. The preprocessor will warn if the two definitions aren't identical.
25010 */
25011 #define _PAGE_BIT_PRESENT 0
25012-#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT)
25013-#define _PAGE_BIT_IO 9
25014-#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO)
25015+#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
25016+#define _PAGE_BIT_IO 11
25017+#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO)
25018
25019 #define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
25020 #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
25021@@ -28,8 +28,11 @@
25022 (ie, 32-bit PAE). */
25023 #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
25024
25025-/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
25026-#define PTE_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
25027+/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
25028+#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
25029+
25030+/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
25031+#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
25032
25033 #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
25034 #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
25035@@ -39,8 +42,7 @@
25036 #define HPAGE_MASK (~(HPAGE_SIZE - 1))
25037 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
25038
25039-/* to align the pointer to the (next) page boundary */
25040-#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
25041+#define HUGE_MAX_HSTATE 2
25042
25043 #ifndef __ASSEMBLY__
25044 #include <linux/types.h>
25045@@ -61,9 +63,17 @@
25046
25047 #ifndef __ASSEMBLY__
25048
25049+typedef struct { pgdval_t pgd; } pgd_t;
25050+typedef struct { pgprotval_t pgprot; } pgprot_t;
25051+
25052 extern int page_is_ram(unsigned long pagenr);
25053 extern int devmem_is_allowed(unsigned long pagenr);
25054+extern void map_devmem(unsigned long pfn, unsigned long size,
25055+ pgprot_t vma_prot);
25056+extern void unmap_devmem(unsigned long pfn, unsigned long size,
25057+ pgprot_t vma_prot);
25058
25059+extern unsigned long max_low_pfn_mapped;
25060 extern unsigned long max_pfn_mapped;
25061
25062 struct page;
25063@@ -84,15 +94,11 @@ static inline void copy_user_page(void *
25064 alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
25065 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
25066
25067-typedef struct { pgprotval_t pgprot; } pgprot_t;
25068-
25069 #define pgprot_val(x) ((x).pgprot)
25070 #define __pgprot(x) ((pgprot_t) { (x) } )
25071
25072 #include <asm/maddr.h>
25073
25074-typedef struct { pgdval_t pgd; } pgd_t;
25075-
25076 #define __pgd_ma(x) ((pgd_t) { (x) } )
25077 static inline pgd_t xen_make_pgd(pgdval_t val)
25078 {
25079@@ -196,6 +202,11 @@ static inline pteval_t xen_pte_val(pte_t
25080 return ret;
25081 }
25082
25083+static inline pteval_t xen_pte_flags(pte_t pte)
25084+{
25085+ return __pte_val(pte) & PTE_FLAGS_MASK;
25086+}
25087+
25088 #define pgd_val(x) xen_pgd_val(x)
25089 #define __pgd(x) xen_make_pgd(x)
25090
25091@@ -210,6 +221,7 @@ static inline pteval_t xen_pte_val(pte_t
25092 #endif
25093
25094 #define pte_val(x) xen_pte_val(x)
25095+#define pte_flags(x) xen_pte_flags(x)
25096 #define __pte(x) xen_make_pte(x)
25097
25098 #define __pa(x) __phys_addr((unsigned long)(x))
82094b55
AF
25099--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/page_64.h 2009-03-16 16:38:05.000000000 +0100
25100+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/page_64.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25101@@ -26,6 +26,12 @@
25102 #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
25103 #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
25104
25105+/*
25106+ * Set __PAGE_OFFSET to the most negative possible address +
25107+ * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a
25108+ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
25109+ * what Xen requires.
25110+ */
25111 #define __PAGE_OFFSET _AC(0xffff880000000000, UL)
25112
25113 #define __PHYSICAL_START CONFIG_PHYSICAL_START
25114@@ -63,7 +69,8 @@
25115 void clear_page(void *page);
25116 void copy_page(void *to, void *from);
25117
25118-extern unsigned long end_pfn;
25119+/* duplicated to the one in bootmem.h */
25120+extern unsigned long max_pfn;
25121
25122 static inline unsigned long __phys_addr(unsigned long x)
25123 {
25124@@ -91,6 +98,11 @@ typedef union { pteval_t pte; unsigned i
25125 extern unsigned long init_memory_mapping(unsigned long start,
25126 unsigned long end);
25127
25128+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
25129+
25130+extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
25131+extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
25132+
25133 #endif /* !__ASSEMBLY__ */
25134
25135 #ifdef CONFIG_FLATMEM
82094b55
AF
25136--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pci.h 2009-03-16 16:38:05.000000000 +0100
25137+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pci.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25138@@ -21,6 +21,8 @@ struct pci_sysdata {
25139 #endif
25140 };
25141
25142+extern int pci_routeirq;
25143+
25144 /* scan a bus after allocating a pci_sysdata for it */
25145 extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
25146 int node);
82094b55
AF
25147--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pci_32.h 2009-02-16 16:18:36.000000000 +0100
25148+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pci_32.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25149@@ -38,12 +38,14 @@ struct pci_dev;
25150 #define PCI_DMA_BUS_IS_PHYS (1)
25151
25152 /* pci_unmap_{page,single} is a nop so... */
25153-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
25154-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
25155-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
25156-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
25157-#define pci_unmap_len(PTR, LEN_NAME) (0)
25158-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
25159+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
25160+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
25161+#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
25162+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
25163+ do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
25164+#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
25165+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
25166+ do { break; } while (pci_unmap_len(PTR, LEN_NAME))
25167
25168 #endif
25169
82094b55
AF
25170--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgalloc.h 2009-03-16 16:38:05.000000000 +0100
25171+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgalloc.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25172@@ -7,6 +7,9 @@
25173
25174 #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
25175
25176+static inline int paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
25177+static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {}
25178+
25179 static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
25180 static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
25181 static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
82094b55
AF
25182--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable.h 2009-03-16 16:38:05.000000000 +0100
25183+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25184@@ -13,11 +13,12 @@
25185 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
25186 #define _PAGE_BIT_PAT 7 /* on 4KB pages */
25187 #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
25188-#define _PAGE_BIT_IO 9 /* Mapped page is I/O or foreign and
25189+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
25190+#define _PAGE_BIT_UNUSED2 10
25191+#define _PAGE_BIT_IO 11 /* Mapped page is I/O or foreign and
25192 * has no associated page struct. */
25193-#define _PAGE_BIT_UNUSED2 10 /* available for programmer */
25194-#define _PAGE_BIT_UNUSED3 11
25195 #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
25196+#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
25197 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
25198
25199 /* If _PAGE_BIT_PRESENT is clear, we use these: */
25200@@ -28,34 +29,31 @@
25201 /* if the user mapped it with PROT_NONE; pte_present gives true */
25202 #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
25203
25204-/*
25205- * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
25206- * sign-extended value on 32-bit with all 1's in the upper word,
25207- * which preserves the upper pte values on 64-bit ptes:
25208- */
25209-#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT)
25210-#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW)
25211-#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER)
25212-#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT)
25213-#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD)
25214-#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED)
25215-#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY)
25216-#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */
25217-#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */
25218-#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO)
25219-#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2)
25220-#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3)
25221-#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT)
25222-#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
25223+#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
25224+#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
25225+#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
25226+#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
25227+#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
25228+#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
25229+#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
25230+#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
25231+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
25232+#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
25233+#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2)
25234+#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO)
25235+#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
25236+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
25237+#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
25238+#define __HAVE_ARCH_PTE_SPECIAL
25239
25240 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
25241-#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX)
25242+#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
25243 #else
25244-#define _PAGE_NX 0
25245+#define _PAGE_NX (_AT(pteval_t, 0))
25246 #endif
25247
25248-#define _PAGE_FILE (_AC(1, L)<<_PAGE_BIT_FILE)
25249-#define _PAGE_PROTNONE (_AC(1, L)<<_PAGE_BIT_PROTNONE)
25250+#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
25251+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
25252
25253 #ifndef __ASSEMBLY__
25254 #if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
25255@@ -71,8 +69,8 @@ extern unsigned int __kernel_page_user;
25256 _PAGE_DIRTY | __kernel_page_user)
25257
25258 /* Set of bits not changed in pte_modify */
25259-#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
25260- _PAGE_ACCESSED | _PAGE_DIRTY)
25261+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
25262+ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
25263
25264 /*
25265 * PAT settings are part of the hypervisor interface, which sets the
25266@@ -102,19 +100,9 @@ extern unsigned int __kernel_page_user;
25267 #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
25268 _PAGE_ACCESSED)
25269
25270-#ifdef CONFIG_X86_32
25271-#define _PAGE_KERNEL_EXEC \
25272- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
25273-#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
25274-
25275-#ifndef __ASSEMBLY__
25276-extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
25277-#endif /* __ASSEMBLY__ */
25278-#else
25279 #define __PAGE_KERNEL_EXEC \
25280 (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user)
25281 #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
25282-#endif
25283
25284 #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
25285 #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
25286@@ -125,25 +113,22 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
25287 #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
25288 #define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
25289 #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
25290+#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
25291 #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
25292
25293-/*
25294- * We don't support GLOBAL page in xenolinux64
25295- */
25296-#define MAKE_GLOBAL(x) __pgprot((x))
25297-
25298-#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
25299-#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
25300-#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
25301-#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX)
25302-#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC)
25303-#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
25304-#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
25305-#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
25306-#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
25307-#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
25308-#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
25309-#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
25310+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
25311+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
25312+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
25313+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
25314+#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
25315+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
25316+#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
25317+#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
25318+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
25319+#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
25320+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
25321+#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
25322+#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
25323
25324 /* xwr */
25325 #define __P000 PAGE_NONE
25326@@ -182,27 +167,27 @@ extern struct list_head pgd_list;
25327 */
25328 static inline int pte_dirty(pte_t pte)
25329 {
25330- return __pte_val(pte) & _PAGE_DIRTY;
25331+ return pte_flags(pte) & _PAGE_DIRTY;
25332 }
25333
25334 static inline int pte_young(pte_t pte)
25335 {
25336- return __pte_val(pte) & _PAGE_ACCESSED;
25337+ return pte_flags(pte) & _PAGE_ACCESSED;
25338 }
25339
25340 static inline int pte_write(pte_t pte)
25341 {
25342- return __pte_val(pte) & _PAGE_RW;
25343+ return pte_flags(pte) & _PAGE_RW;
25344 }
25345
25346 static inline int pte_file(pte_t pte)
25347 {
25348- return __pte_val(pte) & _PAGE_FILE;
25349+ return pte_flags(pte) & _PAGE_FILE;
25350 }
25351
25352 static inline int pte_huge(pte_t pte)
25353 {
25354- return __pte_val(pte) & _PAGE_PSE;
25355+ return pte_flags(pte) & _PAGE_PSE;
25356 }
25357
25358 static inline int pte_global(pte_t pte)
25359@@ -212,12 +197,12 @@ static inline int pte_global(pte_t pte)
25360
25361 static inline int pte_exec(pte_t pte)
25362 {
25363- return !(__pte_val(pte) & _PAGE_NX);
25364+ return !(pte_flags(pte) & _PAGE_NX);
25365 }
25366
25367 static inline int pte_special(pte_t pte)
25368 {
25369- return 0;
25370+ return pte_flags(pte) & _PAGE_SPECIAL;
25371 }
25372
25373 static inline int pmd_large(pmd_t pte)
25374@@ -228,22 +213,22 @@ static inline int pmd_large(pmd_t pte)
25375
25376 static inline pte_t pte_mkclean(pte_t pte)
25377 {
25378- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
25379+ return __pte_ma(__pte_val(pte) & ~_PAGE_DIRTY);
25380 }
25381
25382 static inline pte_t pte_mkold(pte_t pte)
25383 {
25384- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
25385+ return __pte_ma(__pte_val(pte) & ~_PAGE_ACCESSED);
25386 }
25387
25388 static inline pte_t pte_wrprotect(pte_t pte)
25389 {
25390- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
25391+ return __pte_ma(__pte_val(pte) & ~_PAGE_RW);
25392 }
25393
25394 static inline pte_t pte_mkexec(pte_t pte)
25395 {
25396- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
25397+ return __pte_ma(__pte_val(pte) & ~_PAGE_NX);
25398 }
25399
25400 static inline pte_t pte_mkdirty(pte_t pte)
25401@@ -268,7 +253,7 @@ static inline pte_t pte_mkhuge(pte_t pte
25402
25403 static inline pte_t pte_clrhuge(pte_t pte)
25404 {
25405- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
25406+ return __pte_ma(__pte_val(pte) & ~_PAGE_PSE);
25407 }
25408
25409 static inline pte_t pte_mkglobal(pte_t pte)
25410@@ -283,35 +268,46 @@ static inline pte_t pte_clrglobal(pte_t
25411
25412 static inline pte_t pte_mkspecial(pte_t pte)
25413 {
25414- return pte;
25415+ return __pte_ma(__pte_val(pte) | _PAGE_SPECIAL);
25416 }
25417
25418 extern pteval_t __supported_pte_mask;
25419
25420 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
25421 {
25422- return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) |
25423- pgprot_val(pgprot)) & __supported_pte_mask);
25424+ pgprotval_t prot = pgprot_val(pgprot);
25425+
25426+ if (prot & _PAGE_PRESENT)
25427+ prot &= __supported_pte_mask;
25428+ return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
25429 }
25430
25431 static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot)
25432 {
25433- return __pte_ma((((phys_addr_t)page_nr << PAGE_SHIFT) |
25434- pgprot_val(pgprot)) & __supported_pte_mask);
25435+ pgprotval_t prot = pgprot_val(pgprot);
25436+
25437+ if (prot & _PAGE_PRESENT)
25438+ prot &= __supported_pte_mask;
25439+ return __pte_ma(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
25440 }
25441
25442 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
25443 {
25444- return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) |
25445- pgprot_val(pgprot)) & __supported_pte_mask);
25446+ pgprotval_t prot = pgprot_val(pgprot);
25447+
25448+ if (prot & _PAGE_PRESENT)
25449+ prot &= __supported_pte_mask;
25450+ return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
25451 }
25452
25453 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
25454 {
25455- pteval_t val = pte_val(pte);
25456+ pgprotval_t prot = pgprot_val(newprot);
25457+ pteval_t val = pte_val(pte) & _PAGE_CHG_MASK;
25458
25459- val &= _PAGE_CHG_MASK;
25460- val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
25461+ if (prot & _PAGE_PRESENT)
25462+ prot &= __supported_pte_mask;
25463+ val |= prot & ~_PAGE_CHG_MASK;
25464
25465 return __pte(val);
25466 }
25467@@ -325,9 +321,11 @@ static inline pgprot_t pgprot_modify(pgp
25468 return __pgprot(preservebits | addbits);
25469 }
25470
25471-#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
25472+#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
25473
25474-#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
25475+#define canon_pgprot(p) __pgprot(pgprot_val(p) & _PAGE_PRESENT \
25476+ ? pgprot_val(p) & __supported_pte_mask \
25477+ : pgprot_val(p))
25478
25479 #ifndef __ASSEMBLY__
25480 #define __HAVE_PHYS_MEM_ACCESS_PROT
25481@@ -338,6 +336,17 @@ int phys_mem_access_prot_allowed(struct
25482 unsigned long size, pgprot_t *vma_prot);
25483 #endif
25484
25485+/* Install a pte for a particular vaddr in kernel space. */
25486+void set_pte_vaddr(unsigned long vaddr, pte_t pte);
25487+
25488+#ifndef CONFIG_XEN
25489+extern void native_pagetable_setup_start(pgd_t *base);
25490+extern void native_pagetable_setup_done(pgd_t *base);
25491+#else
25492+static inline void xen_pagetable_setup_start(pgd_t *base) {}
25493+static inline void xen_pagetable_setup_done(pgd_t *base) {}
25494+#endif
25495+
25496 #define set_pte(ptep, pte) xen_set_pte(ptep, pte)
25497 #define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
25498
25499@@ -373,6 +382,26 @@ int phys_mem_access_prot_allowed(struct
25500 # include "pgtable_64.h"
25501 #endif
25502
25503+/*
25504+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
25505+ *
25506+ * this macro returns the index of the entry in the pgd page which would
25507+ * control the given virtual address
25508+ */
25509+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
25510+
25511+/*
25512+ * pgd_offset() returns a (pgd_t *)
25513+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
25514+ */
25515+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
25516+/*
25517+ * a shortcut which implies the use of the kernel's pgd, instead
25518+ * of a process's
25519+ */
25520+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
25521+
25522+
25523 #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
25524 #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
25525
25526@@ -383,8 +412,15 @@ enum {
25527 PG_LEVEL_4K,
25528 PG_LEVEL_2M,
25529 PG_LEVEL_1G,
25530+ PG_LEVEL_NUM
25531 };
25532
25533+#ifdef CONFIG_PROC_FS
25534+extern void update_page_count(int level, unsigned long pages);
25535+#else
25536+static inline void update_page_count(int level, unsigned long pages) { }
25537+#endif
25538+
25539 /*
25540 * Helper function that returns the kernel pagetable entry controlling
25541 * the virtual address 'address'. NULL means no pagetable entry present.
25542@@ -441,6 +477,8 @@ static inline void xen_pte_clear(struct
25543 * race with other CPU's that might be updating the dirty
25544 * bit at the same time.
25545 */
25546+struct vm_area_struct;
25547+
25548 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
25549 extern int ptep_set_access_flags(struct vm_area_struct *vma,
25550 unsigned long address, pte_t *ptep,
25551@@ -523,9 +561,6 @@ static inline void clone_pgd_range(pgd_t
25552 memcpy(dst, src, count * sizeof(pgd_t));
25553 }
25554
25555-#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
25556- xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
25557-
25558 #define arbitrary_virt_to_machine(va) \
25559 ({ \
25560 unsigned int __lvl; \
25561@@ -548,6 +583,34 @@ struct page *kmap_atomic_to_page(void *)
25562 #define ptep_to_machine(ptep) virt_to_machine(ptep)
25563 #endif
25564
25565+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
25566+static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
25567+ pte_t *ptep)
25568+{
25569+#if CONFIG_XEN_COMPAT < 0x030300
25570+ if (unlikely(!xen_feature(XENFEAT_mmu_pt_update_preserve_ad)))
25571+ return ptep_get_and_clear(mm, addr, ptep);
25572+#endif
25573+ return *ptep;
25574+}
25575+
25576+static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
25577+ pte_t *ptep, pte_t pte)
25578+{
25579+ mmu_update_t u;
25580+
25581+#if CONFIG_XEN_COMPAT < 0x030300
25582+ if (unlikely(!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))) {
25583+ set_pte_at(mm, addr, ptep, pte);
25584+ return;
25585+ }
25586+#endif
25587+ u.ptr = ptep_to_machine(ptep) | MMU_PT_UPDATE_PRESERVE_AD;
25588+ u.val = __pte_val(pte);
25589+ if (HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF))
25590+ BUG();
25591+}
25592+
25593 #include <asm-generic/pgtable.h>
25594
25595 #include <xen/features.h>
25596@@ -576,10 +639,6 @@ int touch_pte_range(struct mm_struct *mm
25597 unsigned long address,
25598 unsigned long size);
25599
25600-int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
25601- unsigned long addr, unsigned long end, pgprot_t newprot,
25602- int dirty_accountable);
25603-
25604 #endif /* __ASSEMBLY__ */
25605
25606 #endif /* _ASM_X86_PGTABLE_H */
82094b55
AF
25607--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-03-16 16:38:05.000000000 +0100
25608+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable-3level.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25609@@ -14,11 +14,11 @@
25610 #define pmd_ERROR(e) \
25611 printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", \
25612 __FILE__, __LINE__, &(e), __pmd_val(e), \
25613- (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
25614+ (pmd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT)
25615 #define pgd_ERROR(e) \
25616 printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", \
25617 __FILE__, __LINE__, &(e), __pgd_val(e), \
25618- (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
25619+ (pgd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT)
25620
25621 static inline int pud_none(pud_t pud)
25622 {
25623@@ -27,7 +27,7 @@ static inline int pud_none(pud_t pud)
25624 }
25625 static inline int pud_bad(pud_t pud)
25626 {
25627- return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
25628+ return (__pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
25629 }
25630
25631 static inline int pud_present(pud_t pud)
25632@@ -102,9 +102,9 @@ static inline void pud_clear(pud_t *pudp
25633 xen_tlb_flush();
25634 }
25635
25636-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
25637+#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK))
25638
25639-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
25640+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
25641
25642
25643 /* Find an entry in the second-level page table.. */
82094b55
AF
25644--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-03-16 16:38:05.000000000 +0100
25645+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_32.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25646@@ -89,10 +89,10 @@ extern unsigned long pg0[];
25647 /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
25648 can temporarily clear it. */
25649 #define pmd_present(x) (__pmd_val(x))
25650-#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
25651+#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
25652 #else
25653 #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
25654-#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
25655+#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
25656 #endif
25657
25658
25659@@ -119,26 +119,6 @@ extern unsigned long pg0[];
25660 */
25661 #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
25662
25663-/*
25664- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
25665- *
25666- * this macro returns the index of the entry in the pgd page which would
25667- * control the given virtual address
25668- */
25669-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
25670-#define pgd_index_k(addr) pgd_index((addr))
25671-
25672-/*
25673- * pgd_offset() returns a (pgd_t *)
25674- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
25675- */
25676-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
25677-
25678-/*
25679- * a shortcut which implies the use of the kernel's pgd, instead
25680- * of a process's
25681- */
25682-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
25683
25684 static inline int pud_large(pud_t pud) { return 0; }
25685
25686@@ -165,7 +145,7 @@ static inline int pud_large(pud_t pud) {
25687 #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
25688
25689 #define pmd_page_vaddr(pmd) \
25690- ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
25691+ ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK))
25692
25693 #if defined(CONFIG_HIGHPTE)
25694 #define pte_offset_map(dir, address) \
82094b55
AF
25695--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-03-16 16:38:05.000000000 +0100
25696+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/pgtable_64.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25697@@ -23,6 +23,8 @@ extern void xen_init_pt(void);
25698 extern pud_t level3_kernel_pgt[512];
25699 extern pud_t level3_ident_pgt[512];
25700 extern pmd_t level2_kernel_pgt[512];
25701+extern pmd_t level2_fixmap_pgt[512];
25702+extern pmd_t level2_ident_pgt[512];
25703 extern pgd_t init_level4_pgt[];
25704
25705 #define swapper_pg_dir init_level4_pgt
25706@@ -79,6 +81,9 @@ extern void paging_init(void);
25707
25708 struct mm_struct;
25709
25710+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
25711+
25712+
25713 #define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0))
25714
25715 static inline void xen_set_pte(pte_t *ptep, pte_t pte)
25716@@ -145,29 +150,29 @@ static inline void xen_pgd_clear(pgd_t *
25717 #define PGDIR_MASK (~(PGDIR_SIZE - 1))
25718
25719
25720-#define MAXMEM _AC(0x00003fffffffffff, UL)
25721+#define MAXMEM _AC(0x000004ffffffffff, UL)
25722 #define VMALLOC_START _AC(0xffffc20000000000, UL)
25723 #define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
25724 #define VMEMMAP_START _AC(0xffffe20000000000, UL)
25725 #define MODULES_VADDR _AC(0xffffffffa0000000, UL)
25726-#define MODULES_END _AC(0xfffffffffff00000, UL)
25727+#define MODULES_END _AC(0xffffffffff000000, UL)
25728 #define MODULES_LEN (MODULES_END - MODULES_VADDR)
25729
25730 #ifndef __ASSEMBLY__
25731
25732 static inline int pgd_bad(pgd_t pgd)
25733 {
25734- return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
25735+ return (__pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
25736 }
25737
25738 static inline int pud_bad(pud_t pud)
25739 {
25740- return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
25741+ return (__pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
25742 }
25743
25744 static inline int pmd_bad(pmd_t pmd)
25745 {
25746- return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
25747+ return (__pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
25748 }
25749
25750 #define pte_none(x) (!(x).pte)
25751@@ -175,7 +180,7 @@ static inline int pmd_bad(pmd_t pmd)
25752
25753 #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
25754
25755-#define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
25756+#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
25757 #define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
25758 __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
25759 #define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? max_mapnr : \
25760@@ -200,11 +205,8 @@ static inline int pmd_bad(pmd_t pmd)
25761 * Level 4 access.
25762 */
25763 #define pgd_page_vaddr(pgd) \
25764- ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
25765+ ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
25766 #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
25767-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
25768-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
25769-#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
25770 #define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
25771 static inline int pgd_large(pgd_t pgd) { return 0; }
25772 #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
25773@@ -226,7 +228,7 @@ static inline int pud_large(pud_t pte)
25774 }
25775
25776 /* PMD - Level 2 access */
25777-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
25778+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
25779 #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
25780
25781 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
82094b55
AF
25782--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/processor.h 2009-03-16 16:38:05.000000000 +0100
25783+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/processor.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25784@@ -134,7 +134,7 @@ extern __u32 cleared_cpu_caps[NCAPINTS
25785 #ifdef CONFIG_SMP
25786 DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
25787 #define cpu_data(cpu) per_cpu(cpu_info, cpu)
25788-#define current_cpu_data cpu_data(smp_processor_id())
25789+#define current_cpu_data __get_cpu_var(cpu_info)
25790 #else
25791 #define cpu_data(cpu) boot_cpu_data
25792 #define current_cpu_data boot_cpu_data
25793@@ -153,7 +153,7 @@ static inline int hlt_works(int cpu)
25794
25795 extern void cpu_detect(struct cpuinfo_x86 *c);
25796
25797-extern void identify_cpu(struct cpuinfo_x86 *);
25798+extern void early_cpu_init(void);
25799 extern void identify_boot_cpu(void);
25800 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
25801 extern void print_cpu_info(struct cpuinfo_x86 *);
25802@@ -267,15 +267,11 @@ struct tss_struct {
25803 struct thread_struct *io_bitmap_owner;
25804
25805 /*
25806- * Pad the TSS to be cacheline-aligned (size is 0x100):
25807- */
25808- unsigned long __cacheline_filler[35];
25809- /*
25810 * .. and then another 0x100 bytes for the emergency kernel stack:
25811 */
25812 unsigned long stack[64];
25813
25814-} __attribute__((packed));
25815+} ____cacheline_aligned;
25816
25817 DECLARE_PER_CPU(struct tss_struct, init_tss);
25818
25819@@ -668,11 +664,36 @@ static inline void __sti_mwait(unsigned
25820
25821 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
25822
25823-extern int force_mwait;
25824-
25825 extern void select_idle_routine(const struct cpuinfo_x86 *c);
25826
25827 extern unsigned long boot_option_idle_override;
25828+extern unsigned long idle_halt;
25829+extern unsigned long idle_nomwait;
25830+
25831+#ifndef CONFIG_XEN
25832+/*
25833+ * on systems with caches, caches must be flashed as the absolute
25834+ * last instruction before going into a suspended halt. Otherwise,
25835+ * dirty data can linger in the cache and become stale on resume,
25836+ * leading to strange errors.
25837+ *
25838+ * perform a variety of operations to guarantee that the compiler
25839+ * will not reorder instructions. wbinvd itself is serializing
25840+ * so the processor will not reorder.
25841+ *
25842+ * Systems without cache can just go into halt.
25843+ */
25844+static inline void wbinvd_halt(void)
25845+{
25846+ mb();
25847+ /* check for clflush to determine if wbinvd is legal */
25848+ if (cpu_has_clflush)
25849+ asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
25850+ else
25851+ while (1)
25852+ halt();
25853+}
25854+#endif
25855
25856 extern void enable_sep_cpu(void);
25857 extern int sysenter_setup(void);
82094b55
AF
25858--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/segment.h 2009-03-16 16:38:05.000000000 +0100
25859+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/segment.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25860@@ -1,6 +1,15 @@
25861 #ifndef _ASM_X86_SEGMENT_H_
25862 #define _ASM_X86_SEGMENT_H_
25863
25864+/* Constructor for a conventional segment GDT (or LDT) entry */
25865+/* This is a macro so it can be used in initializers */
25866+#define GDT_ENTRY(flags, base, limit) \
25867+ ((((base) & 0xff000000ULL) << (56-24)) | \
25868+ (((flags) & 0x0000f0ffULL) << 40) | \
25869+ (((limit) & 0x000f0000ULL) << (48-16)) | \
25870+ (((base) & 0x00ffffffULL) << 16) | \
25871+ (((limit) & 0x0000ffffULL)))
25872+
25873 /* Simple and small GDT entries for booting only */
25874
25875 #define GDT_ENTRY_BOOT_CS 2
25876@@ -61,18 +70,14 @@
25877 #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
25878
25879 #define GDT_ENTRY_DEFAULT_USER_CS 14
25880-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
25881
25882 #define GDT_ENTRY_DEFAULT_USER_DS 15
25883-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
25884
25885 #define GDT_ENTRY_KERNEL_BASE 12
25886
25887 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
25888-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
25889
25890 #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
25891-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
25892
25893 #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
25894 #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
25895@@ -143,10 +148,11 @@
25896 #else
25897 #include <asm/cache.h>
25898
25899-#define __KERNEL_CS 0x10
25900-#define __KERNEL_DS 0x18
25901+#define GDT_ENTRY_KERNEL32_CS 1
25902+#define GDT_ENTRY_KERNEL_CS 2
25903+#define GDT_ENTRY_KERNEL_DS 3
25904
25905-#define __KERNEL32_CS 0x08
25906+#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
25907
25908 /*
25909 * we cannot use the same code segment descriptor for user and kernel
25910@@ -154,10 +160,10 @@
25911 * The segment offset needs to contain a RPL. Grr. -AK
25912 * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
25913 */
25914-
25915-#define __USER32_CS 0x23 /* 4*8+3 */
25916-#define __USER_DS 0x2b /* 5*8+3 */
25917-#define __USER_CS 0x33 /* 6*8+3 */
25918+#define GDT_ENTRY_DEFAULT_USER32_CS 4
25919+#define GDT_ENTRY_DEFAULT_USER_DS 5
25920+#define GDT_ENTRY_DEFAULT_USER_CS 6
25921+#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3)
25922 #define __USER32_DS __USER_DS
25923
25924 #define GDT_ENTRY_TSS 8 /* needs two entries */
25925@@ -179,6 +185,11 @@
25926
25927 #endif
25928
25929+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
25930+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
25931+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3)
25932+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3)
25933+
25934 /* User mode is privilege level 3 */
25935 #define USER_RPL 0x3
25936 /* LDT segment has TI set, GDT has it cleared */
82094b55
AF
25937--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/smp.h 2009-03-16 16:38:05.000000000 +0100
25938+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/smp.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
25939@@ -25,23 +25,16 @@ extern cpumask_t cpu_initialized;
25940 extern void (*mtrr_hook)(void);
25941 extern void zap_low_mappings(void);
25942
25943+extern int __cpuinit get_local_pda(int cpu);
25944+
25945 extern int smp_num_siblings;
25946 extern unsigned int num_processors;
25947 extern cpumask_t cpu_initialized;
25948
25949-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
25950-extern u16 x86_cpu_to_apicid_init[];
25951-extern u16 x86_bios_cpu_apicid_init[];
25952-extern void *x86_cpu_to_apicid_early_ptr;
25953-extern void *x86_bios_cpu_apicid_early_ptr;
25954-#else
25955-#define x86_cpu_to_apicid_early_ptr NULL
25956-#define x86_bios_cpu_apicid_early_ptr NULL
25957-#endif
25958-
25959 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
25960 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
25961 DECLARE_PER_CPU(u16, cpu_llc_id);
25962+
25963 DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
25964 DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
25965
25966@@ -63,9 +56,9 @@ struct smp_ops {
25967
25968 void (*smp_send_stop)(void);
25969 void (*smp_send_reschedule)(int cpu);
25970- int (*smp_call_function_mask)(cpumask_t mask,
25971- void (*func)(void *info), void *info,
25972- int wait);
25973+
25974+ void (*send_call_func_ipi)(cpumask_t mask);
25975+ void (*send_call_func_single_ipi)(int cpu);
25976 };
25977
25978 /* Globals due to paravirt */
25979@@ -106,11 +99,14 @@ static inline void smp_send_reschedule(i
25980 smp_ops.smp_send_reschedule(cpu);
25981 }
25982
25983-static inline int smp_call_function_mask(cpumask_t mask,
25984- void (*func) (void *info), void *info,
25985- int wait)
25986+static inline void arch_send_call_function_single_ipi(int cpu)
25987 {
25988- return smp_ops.smp_call_function_mask(mask, func, info, wait);
25989+ smp_ops.send_call_func_single_ipi(cpu);
25990+}
25991+
25992+static inline void arch_send_call_function_ipi(cpumask_t mask)
25993+{
25994+ smp_ops.send_call_func_ipi(mask);
25995 }
25996
25997 void native_smp_prepare_boot_cpu(void);
25998@@ -122,23 +118,19 @@ int native_cpu_up(unsigned int cpunum);
25999
26000 void xen_smp_send_stop(void);
26001 void xen_smp_send_reschedule(int cpu);
26002-int xen_smp_call_function_mask(cpumask_t mask,
26003- void (*func) (void *info), void *info,
26004- int wait);
26005+void xen_send_call_func_ipi(cpumask_t mask);
26006+void xen_send_call_func_single_ipi(int cpu);
26007
26008 #define smp_send_stop xen_smp_send_stop
26009 #define smp_send_reschedule xen_smp_send_reschedule
26010-#define smp_call_function_mask xen_smp_call_function_mask
26011-
26012-extern void prefill_possible_map(void);
26013+#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
26014+#define arch_send_call_function_ipi xen_send_call_func_ipi
26015
26016 #endif /* CONFIG_XEN */
26017
26018 extern int __cpu_disable(void);
26019 extern void __cpu_die(unsigned int cpu);
26020
26021-extern void prefill_possible_map(void);
26022-
26023 void smp_store_cpu_info(int id);
26024 #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
26025
26026@@ -149,6 +141,14 @@ static inline int num_booting_cpus(void)
26027 }
26028 #endif /* CONFIG_SMP */
26029
26030+#if defined(CONFIG_SMP) && (defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_XEN))
26031+extern void prefill_possible_map(void);
26032+#else
26033+static inline void prefill_possible_map(void)
26034+{
26035+}
26036+#endif
26037+
26038 extern unsigned disabled_cpus __cpuinitdata;
26039
26040 #ifdef CONFIG_X86_32_SMP
26041@@ -216,12 +216,8 @@ static inline int hard_smp_processor_id(
26042 #endif /* CONFIG_X86_LOCAL_APIC */
26043
26044 #ifdef CONFIG_HOTPLUG_CPU
26045-extern void cpu_exit_clear(void);
26046 extern void cpu_uninit(void);
26047 #endif
26048
26049-extern void smp_alloc_memory(void);
26050-extern void lock_ipi_call_lock(void);
26051-extern void unlock_ipi_call_lock(void);
26052 #endif /* __ASSEMBLY__ */
26053 #endif
82094b55
AF
26054--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/spinlock.h 2009-03-16 16:38:05.000000000 +0100
26055+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/spinlock.h 2009-06-04 11:09:05.000000000 +0200
2cb7cef9
BS
26056@@ -38,6 +38,11 @@
26057 # define UNLOCK_LOCK_PREFIX
26058 #endif
26059
26060+/*
26061+ * Xen versions prior to 3.2.x have a race condition with HYPERVISOR_poll().
26062+ */
26063+#if CONFIG_XEN_COMPAT >= 0x030200
26064+
26065 int xen_spinlock_init(unsigned int cpu);
26066 void xen_spinlock_cleanup(unsigned int cpu);
26067 extern int xen_spin_wait(raw_spinlock_t *, unsigned int token);
26068@@ -65,14 +70,14 @@ extern void xen_spin_kick(raw_spinlock_t
26069 */
26070 #if (NR_CPUS < 256)
26071 #define TICKET_SHIFT 8
26072-#define __raw_spin_lock_preamble \
26073+#define __ticket_spin_lock_preamble \
26074 asm(LOCK_PREFIX "xaddw %w0, %2\n\t" \
26075 "cmpb %h0, %b0\n\t" \
26076 "sete %1" \
26077 : "=&Q" (token), "=qm" (free), "+m" (lock->slock) \
26078 : "0" (0x0100) \
26079 : "memory", "cc")
26080-#define __raw_spin_lock_body \
26081+#define __ticket_spin_lock_body \
26082 asm("1:\t" \
26083 "cmpb %h0, %b0\n\t" \
26084 "je 2f\n\t" \
26085@@ -88,7 +93,7 @@ extern void xen_spin_kick(raw_spinlock_t
26086 : "memory", "cc")
26087
26088
26089-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
26090+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
26091 {
26092 int tmp, new;
26093
26094@@ -107,7 +112,7 @@ static __always_inline int __raw_spin_tr
26095 return tmp;
26096 }
26097
26098-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
26099+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
26100 {
26101 unsigned int token;
26102 unsigned char kick;
26103@@ -124,7 +129,7 @@ static __always_inline void __raw_spin_u
26104 }
26105 #else
26106 #define TICKET_SHIFT 16
26107-#define __raw_spin_lock_preamble \
26108+#define __ticket_spin_lock_preamble \
26109 do { \
26110 unsigned int tmp; \
26111 asm(LOCK_PREFIX "xaddl %0, %2\n\t" \
26112@@ -136,7 +141,7 @@ static __always_inline void __raw_spin_u
26113 : "0" (0x00010000) \
26114 : "memory", "cc"); \
26115 } while (0)
26116-#define __raw_spin_lock_body \
26117+#define __ticket_spin_lock_body \
26118 do { \
26119 unsigned int tmp; \
26120 asm("shldl $16, %0, %2\n" \
26121@@ -155,7 +160,7 @@ static __always_inline void __raw_spin_u
26122 : "memory", "cc"); \
26123 } while (0)
26124
26125-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
26126+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
26127 {
26128 int tmp;
26129 int new;
26130@@ -177,7 +182,7 @@ static __always_inline int __raw_spin_tr
26131 return tmp;
26132 }
26133
26134-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
26135+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
26136 {
26137 unsigned int token, tmp;
26138 bool kick;
26139@@ -195,49 +200,161 @@ static __always_inline void __raw_spin_u
26140 }
26141 #endif
26142
26143-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
26144+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
26145 {
26146 int tmp = ACCESS_ONCE(lock->slock);
26147
26148 return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
26149 }
26150
26151-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
26152+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
26153 {
26154 int tmp = ACCESS_ONCE(lock->slock);
26155
26156 return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
26157 }
26158
26159-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
26160+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
26161 {
26162 unsigned int token, count;
26163 bool free;
26164
26165- __raw_spin_lock_preamble;
26166+ __ticket_spin_lock_preamble;
26167 if (unlikely(!free))
26168 token = xen_spin_adjust(lock, token);
26169 do {
26170 count = 1 << 10;
26171- __raw_spin_lock_body;
26172+ __ticket_spin_lock_body;
26173 } while (unlikely(!count) && !xen_spin_wait(lock, token));
26174 }
26175
26176-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
26177- unsigned long flags)
26178+static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
26179+ unsigned long flags)
26180 {
26181 unsigned int token, count;
26182 bool free;
26183
26184- __raw_spin_lock_preamble;
26185+ __ticket_spin_lock_preamble;
26186 if (unlikely(!free))
26187 token = xen_spin_adjust(lock, token);
26188 do {
26189 count = 1 << 10;
26190- __raw_spin_lock_body;
26191+ __ticket_spin_lock_body;
26192 } while (unlikely(!count) && !xen_spin_wait_flags(lock, &token, flags));
26193 }
26194
26195+#define __raw_spin(n) __ticket_spin_##n
26196+
26197+#else /* CONFIG_XEN_COMPAT < 0x030200 */
26198+/*
26199+ * Define virtualization-friendly old-style lock byte lock, for use in
26200+ * pv_lock_ops if desired.
26201+ *
26202+ * This differs from the pre-2.6.24 spinlock by always using xchgb
26203+ * rather than decb to take the lock; this allows it to use a
26204+ * zero-initialized lock structure. It also maintains a 1-byte
26205+ * contention counter, so that we can implement
26206+ * __byte_spin_is_contended.
26207+ */
26208+struct __byte_spinlock {
26209+ u8 lock;
26210+#if NR_CPUS < 256
26211+ u8 spinners;
26212+#else
26213+#error NR_CPUS >= 256 support not implemented
26214+#endif
26215+};
26216+
26217+static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
26218+static inline void xen_spinlock_cleanup(unsigned int cpu) {}
26219+
26220+static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
26221+{
26222+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
26223+ return bl->lock != 0;
26224+}
26225+
26226+static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
26227+{
26228+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
26229+ return bl->spinners != 0;
26230+}
26231+
26232+static inline void __byte_spin_lock(raw_spinlock_t *lock)
26233+{
26234+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
26235+ s8 val = 1;
26236+
26237+ asm("1: xchgb %1, %0\n"
26238+ " test %1,%1\n"
26239+ " jz 3f\n"
26240+ " " LOCK_PREFIX "incb %2\n"
26241+ "2: rep;nop\n"
26242+ " cmpb $1, %0\n"
26243+ " je 2b\n"
26244+ " " LOCK_PREFIX "decb %2\n"
26245+ " jmp 1b\n"
26246+ "3:"
26247+ : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
26248+}
26249+
26250+#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
26251+
26252+static inline int __byte_spin_trylock(raw_spinlock_t *lock)
26253+{
26254+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
26255+ u8 old = 1;
26256+
26257+ asm("xchgb %1,%0"
26258+ : "+m" (bl->lock), "+q" (old) : : "memory");
26259+
26260+ return old == 0;
26261+}
26262+
26263+static inline void __byte_spin_unlock(raw_spinlock_t *lock)
26264+{
26265+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
26266+ smp_wmb();
26267+ bl->lock = 0;
26268+}
26269+
26270+#define __raw_spin(n) __byte_spin_##n
26271+
26272+#endif /* CONFIG_XEN_COMPAT */
26273+
26274+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
26275+{
26276+ return __raw_spin(is_locked)(lock);
26277+}
26278+
26279+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
26280+{
26281+ return __raw_spin(is_contended)(lock);
26282+}
26283+
26284+static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
26285+{
26286+ __raw_spin(lock)(lock);
26287+}
26288+
26289+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
26290+ unsigned long flags)
26291+{
26292+ __raw_spin(lock_flags)(lock, flags);
26293+}
26294+
26295+static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
26296+{
26297+ return __raw_spin(trylock)(lock);
26298+}
26299+
26300+static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
26301+{
26302+ __raw_spin(unlock)(lock);
26303+}
26304+
26305+#undef __raw_spin
26306+
26307 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
26308 {
26309 while (__raw_spin_is_locked(lock))
82094b55
AF
26310--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/system.h 2009-03-16 16:38:05.000000000 +0100
26311+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/system.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26312@@ -137,7 +137,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
26313 #define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
26314 #define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
26315
26316-extern void load_gs_index(unsigned);
26317+extern void xen_load_gs_index(unsigned);
26318
26319 /*
26320 * Load a segment. Fall back on loading the zero
26321@@ -154,14 +154,14 @@ extern void load_gs_index(unsigned);
26322 "jmp 2b\n" \
26323 ".previous\n" \
26324 _ASM_EXTABLE(1b,3b) \
26325- : :"r" (value), "r" (0))
26326+ : :"r" (value), "r" (0) : "memory")
26327
26328
26329 /*
26330 * Save a segment register away
26331 */
26332 #define savesegment(seg, value) \
26333- asm volatile("mov %%" #seg ",%0":"=rm" (value))
26334+ asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
26335
26336 static inline unsigned long get_limit(unsigned long segment)
26337 {
26338@@ -269,6 +269,7 @@ static inline void xen_wbinvd(void)
26339 #ifdef CONFIG_X86_64
26340 #define read_cr8() (xen_read_cr8())
26341 #define write_cr8(x) (xen_write_cr8(x))
26342+#define load_gs_index xen_load_gs_index
26343 #endif
26344
26345 /* Clear the 'TS' bit */
26346@@ -287,13 +288,12 @@ static inline void clflush(volatile void
26347 void disable_hlt(void);
26348 void enable_hlt(void);
26349
26350-extern int es7000_plat;
26351 void cpu_idle_wait(void);
26352
26353 extern unsigned long arch_align_stack(unsigned long sp);
26354 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
26355
26356-void default_idle(void);
26357+void xen_idle(void);
26358
26359 /*
26360 * Force strict CPU ordering.
82094b55
AF
26361--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/asm/xor_64.h 2009-03-16 16:38:05.000000000 +0100
26362+++ sle11-2009-10-16/include/asm-x86/mach-xen/asm/xor_64.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26363@@ -1,3 +1,6 @@
26364+#ifndef ASM_X86__XOR_64_H
26365+#define ASM_X86__XOR_64_H
26366+
26367 /*
26368 * x86-64 changes / gcc fixes from Andi Kleen.
26369 * Copyright 2002 Andi Kleen, SuSE Labs.
26370@@ -330,3 +333,5 @@ do { \
26371 We may also be able to load into the L1 only depending on how the cpu
26372 deals with a load to a line that is being prefetched. */
26373 #define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
26374+
26375+#endif /* ASM_X86__XOR_64_H */
82094b55 26376--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/irq_vectors.h 2009-03-16 16:33:40.000000000 +0100
2cb7cef9
BS
26377+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
26378@@ -1,126 +0,0 @@
26379-/*
26380- * This file should contain #defines for all of the interrupt vector
26381- * numbers used by this architecture.
26382- *
26383- * In addition, there are some standard defines:
26384- *
26385- * FIRST_EXTERNAL_VECTOR:
26386- * The first free place for external interrupts
26387- *
26388- * SYSCALL_VECTOR:
26389- * The IRQ vector a syscall makes the user to kernel transition
26390- * under.
26391- *
26392- * TIMER_IRQ:
26393- * The IRQ number the timer interrupt comes in at.
26394- *
26395- * NR_IRQS:
26396- * The total number of interrupt vectors (including all the
26397- * architecture specific interrupts) needed.
26398- *
26399- */
26400-#ifndef _ASM_IRQ_VECTORS_H
26401-#define _ASM_IRQ_VECTORS_H
26402-
26403-/*
26404- * IDT vectors usable for external interrupt sources start
26405- * at 0x20:
26406- */
26407-#define FIRST_EXTERNAL_VECTOR 0x20
26408-
26409-#define SYSCALL_VECTOR 0x80
26410-
26411-/*
26412- * Vectors 0x20-0x2f are used for ISA interrupts.
26413- */
26414-
26415-#if 0
26416-/*
26417- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
26418- *
26419- * some of the following vectors are 'rare', they are merged
26420- * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
26421- * TLB, reschedule and local APIC vectors are performance-critical.
26422- *
26423- * Vectors 0xf0-0xfa are free (reserved for future Linux use).
26424- */
26425-#define SPURIOUS_APIC_VECTOR 0xff
26426-#define ERROR_APIC_VECTOR 0xfe
26427-#define INVALIDATE_TLB_VECTOR 0xfd
26428-#define RESCHEDULE_VECTOR 0xfc
26429-#define CALL_FUNCTION_VECTOR 0xfb
26430-
26431-#define THERMAL_APIC_VECTOR 0xf0
26432-/*
26433- * Local APIC timer IRQ vector is on a different priority level,
26434- * to work around the 'lost local interrupt if more than 2 IRQ
26435- * sources per level' errata.
26436- */
26437-#define LOCAL_TIMER_VECTOR 0xef
26438-#endif
26439-
26440-#define SPURIOUS_APIC_VECTOR 0xff
26441-#define ERROR_APIC_VECTOR 0xfe
26442-
26443-/*
26444- * First APIC vector available to drivers: (vectors 0x30-0xee)
26445- * we start at 0x31 to spread out vectors evenly between priority
26446- * levels. (0x80 is the syscall vector)
26447- */
26448-#define FIRST_DEVICE_VECTOR 0x31
26449-#define FIRST_SYSTEM_VECTOR 0xef
26450-
26451-/*
26452- * 16 8259A IRQ's, 208 potential APIC interrupt sources.
26453- * Right now the APIC is mostly only used for SMP.
26454- * 256 vectors is an architectural limit. (we can have
26455- * more than 256 devices theoretically, but they will
26456- * have to use shared interrupts)
26457- * Since vectors 0x00-0x1f are used/reserved for the CPU,
26458- * the usable vector space is 0x20-0xff (224 vectors)
26459- */
26460-
26461-#define RESCHEDULE_VECTOR 0
26462-#define CALL_FUNCTION_VECTOR 1
26463-#define SPIN_UNLOCK_VECTOR 2
26464-#define NR_IPIS 3
26465-
26466-/*
26467- * The maximum number of vectors supported by i386 processors
26468- * is limited to 256. For processors other than i386, NR_VECTORS
26469- * should be changed accordingly.
26470- */
26471-#define NR_VECTORS 256
26472-
26473-#define FPU_IRQ 13
26474-
26475-#define FIRST_VM86_IRQ 3
26476-#define LAST_VM86_IRQ 15
26477-#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
26478-
26479-/*
26480- * The flat IRQ space is divided into two regions:
26481- * 1. A one-to-one mapping of real physical IRQs. This space is only used
26482- * if we have physical device-access privilege. This region is at the
26483- * start of the IRQ space so that existing device drivers do not need
26484- * to be modified to translate physical IRQ numbers into our IRQ space.
26485- * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
26486- * are bound using the provided bind/unbind functions.
26487- */
26488-
26489-#define PIRQ_BASE 0
26490-#if !defined(MAX_IO_APICS)
26491-# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
26492-#elif NR_CPUS < MAX_IO_APICS
26493-# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
26494-#else
26495-# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS)
26496-#endif
26497-
26498-#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
26499-#define NR_DYNIRQS 256
26500-
26501-#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
26502-#define NR_IRQ_VECTORS NR_IRQS
26503-
26504-#endif /* _ASM_IRQ_VECTORS_H */
82094b55 26505--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/setup_arch_post.h 2009-10-28 14:55:02.000000000 +0100
2cb7cef9
BS
26506+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
26507@@ -1,63 +0,0 @@
26508-/**
26509- * machine_specific_* - Hooks for machine specific setup.
26510- *
26511- * Description:
26512- * This is included late in kernel/setup.c so that it can make
26513- * use of all of the static functions.
26514- **/
26515-
26516-#include <xen/interface/callback.h>
26517-
26518-extern void hypervisor_callback(void);
26519-extern void failsafe_callback(void);
26520-extern void nmi(void);
26521-
26522-static void __init machine_specific_arch_setup(void)
26523-{
26524- int ret;
26525- static struct callback_register __initdata event = {
26526- .type = CALLBACKTYPE_event,
26527- .address = (unsigned long) hypervisor_callback,
26528- };
26529- static struct callback_register __initdata failsafe = {
26530- .type = CALLBACKTYPE_failsafe,
26531- .address = (unsigned long)failsafe_callback,
26532- };
26533- static struct callback_register __initdata syscall = {
26534- .type = CALLBACKTYPE_syscall,
26535- .address = (unsigned long)system_call,
26536- };
26537-#ifdef CONFIG_X86_LOCAL_APIC
26538- static struct callback_register __initdata nmi_cb = {
26539- .type = CALLBACKTYPE_nmi,
26540- .address = (unsigned long)nmi,
26541- };
26542-#endif
26543-
26544- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
26545- if (ret == 0)
26546- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
26547- if (ret == 0)
26548- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
26549-#if CONFIG_XEN_COMPAT <= 0x030002
26550- if (ret == -ENOSYS)
26551- ret = HYPERVISOR_set_callbacks(
26552- event.address,
26553- failsafe.address,
26554- syscall.address);
26555-#endif
26556- BUG_ON(ret);
26557-
26558-#ifdef CONFIG_X86_LOCAL_APIC
26559- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
26560-#if CONFIG_XEN_COMPAT <= 0x030002
26561- if (ret == -ENOSYS) {
26562- static struct xennmi_callback __initdata cb = {
26563- .handler_address = (unsigned long)nmi
26564- };
26565-
26566- HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
26567- }
26568-#endif
26569-#endif
26570-}
82094b55 26571--- sle11-2009-10-16.orig/include/asm-x86/mach-xen/setup_arch_pre.h 2009-10-28 14:55:02.000000000 +0100
2cb7cef9
BS
26572+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
26573@@ -1,5 +0,0 @@
26574-/* Hook to call BIOS initialisation function */
26575-
26576-#define ARCH_SETUP machine_specific_arch_setup();
26577-
26578-static void __init machine_specific_arch_setup(void);
82094b55
AF
26579--- sle11-2009-10-16.orig/include/asm-x86/traps.h 2009-10-28 14:55:02.000000000 +0100
26580+++ sle11-2009-10-16/include/asm-x86/traps.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26581@@ -23,6 +23,9 @@ asmlinkage void spurious_interrupt_bug(v
26582 #ifdef CONFIG_X86_MCE
26583 asmlinkage void machine_check(void);
26584 #endif /* CONFIG_X86_MCE */
26585+#ifdef CONFIG_X86_XEN
26586+asmlinkage void fixup_4gb_segment(void);
26587+#endif
26588
26589 void do_divide_error(struct pt_regs *, long);
26590 void do_overflow(struct pt_regs *, long);
26591@@ -48,6 +51,9 @@ void math_error(void __user *);
26592 void do_coprocessor_error(struct pt_regs *, long);
26593 void do_simd_coprocessor_error(struct pt_regs *, long);
26594 void do_spurious_interrupt_bug(struct pt_regs *, long);
26595+#ifdef CONFIG_XEN
26596+void do_fixup_4gb_segment(struct pt_regs *, long);
26597+#endif
26598 unsigned long patch_espfix_desc(unsigned long, unsigned long);
26599 asmlinkage void math_emulate(long);
26600
82094b55
AF
26601--- sle11-2009-10-16.orig/include/asm-x86/xen/interface_64.h 2009-10-28 14:55:02.000000000 +0100
26602+++ sle11-2009-10-16/include/asm-x86/xen/interface_64.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26603@@ -136,7 +136,7 @@ struct cpu_user_regs {
26604 uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */
26605 uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
26606 };
26607-DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
26608+DEFINE_XEN_GUEST_HANDLE_STRUCT(cpu_user_regs);
26609
26610 #undef __DECL_REG
26611
82094b55
AF
26612--- sle11-2009-10-16.orig/include/linux/page-flags.h 2009-03-16 16:38:05.000000000 +0100
26613+++ sle11-2009-10-16/include/linux/page-flags.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26614@@ -110,9 +110,11 @@ enum pageflags {
26615 /* Filesystems */
26616 PG_checked = PG_owner_priv_1,
26617
26618+#ifdef CONFIG_PARAVIRT_XEN
26619 /* XEN */
26620 PG_pinned = PG_owner_priv_1,
26621 PG_savepinned = PG_dirty,
26622+#endif
26623
26624 /* SLOB */
26625 PG_slob_page = PG_active,
26626@@ -187,8 +189,12 @@ PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU,
26627 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
26628 __PAGEFLAG(Slab, slab)
26629 PAGEFLAG(Checked, checked) /* Used by some filesystems */
26630+#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
26631 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
26632+#endif
26633+#ifdef CONFIG_PARAVIRT_XEN
26634 PAGEFLAG(SavePinned, savepinned); /* Xen */
26635+#endif
26636 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
26637 PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
26638 __SETPAGEFLAG(Private, private)
82094b55
AF
26639--- sle11-2009-10-16.orig/include/xen/interface/memory.h 2009-03-16 16:38:05.000000000 +0100
26640+++ sle11-2009-10-16/include/xen/interface/memory.h 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26641@@ -82,6 +82,7 @@ struct xen_memory_reservation {
26642 domid_t domid;
26643
26644 };
26645+DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation);
26646 typedef struct xen_memory_reservation xen_memory_reservation_t;
26647 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
26648
26649@@ -167,11 +168,7 @@ struct xen_machphys_mfn_list {
26650 * any large discontiguities in the machine address space, 2MB gaps in
26651 * the machphys table will be represented by an MFN base of zero.
26652 */
26653-#ifndef CONFIG_PARAVIRT_XEN
26654 XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
26655-#else
26656- ulong extent_start;
26657-#endif
26658
26659 /*
26660 * Number of extents written to the above array. This will be smaller
26661@@ -179,6 +176,7 @@ struct xen_machphys_mfn_list {
26662 */
26663 unsigned int nr_extents;
26664 };
26665+DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
26666 typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
26667 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
26668
26669@@ -218,6 +216,7 @@ struct xen_add_to_physmap {
26670 /* GPFN where the source mapping page should appear. */
26671 xen_pfn_t gpfn;
26672 };
26673+DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
26674 typedef struct xen_add_to_physmap xen_add_to_physmap_t;
26675 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
26676
26677@@ -250,21 +249,13 @@ struct xen_translate_gpfn_list {
26678 xen_ulong_t nr_gpfns;
26679
26680 /* List of GPFNs to translate. */
26681-#ifndef CONFIG_PARAVIRT_XEN
26682 XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
26683-#else
26684- ulong gpfn_list;
26685-#endif
26686
26687 /*
26688 * Output list to contain MFN translations. May be the same as the input
26689 * list (in which case each input GPFN is overwritten with the output MFN).
26690 */
26691-#ifndef CONFIG_PARAVIRT_XEN
26692 XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
26693-#else
26694- ulong mfn_list;
26695-#endif
26696 };
26697 DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
26698 typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
82094b55
AF
26699--- sle11-2009-10-16.orig/kernel/hrtimer.c 2009-10-28 14:55:02.000000000 +0100
26700+++ sle11-2009-10-16/kernel/hrtimer.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26701@@ -1084,7 +1084,7 @@ ktime_t hrtimer_get_remaining(const stru
26702 }
26703 EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
26704
26705-#ifdef CONFIG_NO_HZ
26706+#if defined(CONFIG_NO_HZ) || defined(CONFIG_NO_IDLE_HZ)
26707 /**
26708 * hrtimer_get_next_event - get the time until next expiry event
26709 *
82094b55
AF
26710--- sle11-2009-10-16.orig/kernel/kexec.c 2009-02-17 12:38:20.000000000 +0100
26711+++ sle11-2009-10-16/kernel/kexec.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26712@@ -54,7 +54,7 @@ int dump_after_notifier;
26713 unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
26714 u32
26715 #if defined(CONFIG_XEN) && defined(CONFIG_X86)
26716-__attribute__((__section__(".bss.page_aligned"), __aligned__(PAGE_SIZE)))
26717+__page_aligned_bss
26718 #endif
26719 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
26720 size_t vmcoreinfo_size;
82094b55
AF
26721--- sle11-2009-10-16.orig/kernel/timer.c 2009-10-28 14:55:02.000000000 +0100
26722+++ sle11-2009-10-16/kernel/timer.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26723@@ -884,7 +884,7 @@ static inline void __run_timers(struct t
26724 spin_unlock_irq(&base->lock);
26725 }
26726
26727-#ifdef CONFIG_NO_HZ
26728+#if defined(CONFIG_NO_HZ) || defined(CONFIG_NO_IDLE_HZ)
26729 /*
26730 * Find out when the next timer event is due to happen. This
26731 * is used on S/390 to stop all activity when a cpus is idle.
82094b55
AF
26732--- sle11-2009-10-16.orig/lib/swiotlb-xen.c 2009-03-16 16:38:05.000000000 +0100
26733+++ sle11-2009-10-16/lib/swiotlb-xen.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26734@@ -750,7 +750,7 @@ swiotlb_sync_sg_for_device(struct device
26735 }
26736
26737 int
26738-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
26739+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
26740 {
26741 return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
26742 }
82094b55
AF
26743--- sle11-2009-10-16.orig/mm/mprotect.c 2009-03-04 11:28:34.000000000 +0100
26744+++ sle11-2009-10-16/mm/mprotect.c 2009-06-04 10:21:39.000000000 +0200
2cb7cef9
BS
26745@@ -92,8 +92,6 @@ static inline void change_pmd_range(stru
26746 next = pmd_addr_end(addr, end);
26747 if (pmd_none_or_clear_bad(pmd))
26748 continue;
26749- if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
26750- continue;
26751 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
26752 } while (pmd++, addr = next, addr != end);
26753 }